From 397e0ec2cc1bcde3d73b4e884de01e3fb54e0207 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:36:01 -0800 Subject: [PATCH 001/884] Add DT_HALF support for SpaceToDepth on GPU This fix tries to address the issue raised in 14871 where there were no DT_HALF support for SpaceToDepth on GPU. This fix adds DT_HALF support on GPU and adds aditional test cases. This fix fixes 14871. Signed-off-by: Yong Tang --- tensorflow/core/kernels/spacetodepth_op.cc | 3 +++ tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index 23df1c35e5..d93a2a9bad 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); +REGISTER_KERNEL_BUILDER( + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index a1a01e8813..e841472972 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -225,6 +225,10 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::Half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; -- GitLab From 1d77785e9e13241cb318edce4661e0bdc2dd3095 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:37:27 -0800 Subject: [PATCH 002/884] Add test cases for DT_HALF support for SpaceToDepth on GPU. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/spacetodepth_op_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index 3c98a685e0..4af0e6f9db 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -34,8 +34,8 @@ from tensorflow.python.platform import tf_logging class SpaceToDepthTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.space_to_depth(input_nhwc, block_size) @@ -58,6 +58,12 @@ class SpaceToDepthTest(test.TestCase): x_out = [[[[1, 2, 3, 4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1], [2]], [[3], [4]]]] + block_size = 2 + x_out = [[[[1, 2, 3, 4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testLargerInput2x2(self): -- GitLab From 3e6edce1f41a79ca83358b14af9230826e871b66 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 24 Nov 2017 17:50:04 -0800 Subject: [PATCH 003/884] Address `Eigen::Half` -> `Eigen::half` Signed-off-by: Yong Tang --- tensorflow/core/kernels/spacetodepth_op.cc | 4 ++-- tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/kernels/spacetodepth_op.cc b/tensorflow/core/kernels/spacetodepth_op.cc index d93a2a9bad..e59adfc6ac 100644 --- a/tensorflow/core/kernels/spacetodepth_op.cc +++ b/tensorflow/core/kernels/spacetodepth_op.cc @@ -188,8 +188,8 @@ REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); REGISTER_KERNEL_BUILDER( - Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), - SpaceToDepthOp); + Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), + SpaceToDepthOp); REGISTER_KERNEL_BUILDER( Name("SpaceToDepth").Device(DEVICE_GPU).TypeConstraint("T"), SpaceToDepthOp); diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index e841472972..8466fa192f 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -225,9 +225,9 @@ struct SpaceToDepthOpFunctor { template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; -// Instantiate the GPU implementations for Eigen::Half. -template struct functor::SpaceToDepthOpFunctor; -template struct functor::SpaceToDepthOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; -- GitLab From 17b982cad07799feeb00614b0faeba4cf95474c2 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 25 Nov 2017 17:33:43 -0800 Subject: [PATCH 004/884] Add DT_HALF support for DepthToSpace on GPU Signed-off-by: Yong Tang --- tensorflow/core/kernels/depthtospace_op.cc | 3 +++ tensorflow/core/kernels/depthtospace_op_gpu.cu.cc | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tensorflow/core/kernels/depthtospace_op.cc b/tensorflow/core/kernels/depthtospace_op.cc index 39aa3e9eb0..b74a09e2cb 100644 --- a/tensorflow/core/kernels/depthtospace_op.cc +++ b/tensorflow/core/kernels/depthtospace_op.cc @@ -187,6 +187,9 @@ TF_CALL_ALL_TYPES(REGISTER); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); +REGISTER_KERNEL_BUILDER( + Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), + DepthToSpaceOp); REGISTER_KERNEL_BUILDER( Name("DepthToSpace").Device(DEVICE_GPU).TypeConstraint("T"), DepthToSpaceOp); diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 7a66285383..2d39abce16 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -229,6 +229,10 @@ struct DepthToSpaceOpFunctor { template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; +// Instantiate the GPU implementations for Eigen::half. +template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; + // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; -- GitLab From 1100256692a2b130f3ef2b4e36cd5b63241672ce Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 25 Nov 2017 17:34:14 -0800 Subject: [PATCH 005/884] Add test cases for DT_HALF support with DepthToSpace on GPU. Signed-off-by: Yong Tang --- tensorflow/python/kernel_tests/depthtospace_op_test.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index 7df2366954..f03ad85f17 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -35,8 +35,8 @@ from tensorflow.python.platform import tf_logging class DepthToSpaceTest(test.TestCase): - def _testOne(self, inputs, block_size, outputs): - input_nhwc = math_ops.to_float(inputs) + def _testOne(self, inputs, block_size, outputs, dtype=dtypes.float32): + input_nhwc = math_ops.cast(inputs, dtype) with self.test_session(use_gpu=False): # test NHWC (default) on CPU x_tf = array_ops.depth_to_space(input_nhwc, block_size) @@ -59,6 +59,12 @@ class DepthToSpaceTest(test.TestCase): x_out = [[[[1], [2]], [[3], [4]]]] self._testOne(x_np, block_size, x_out) + def testBasicFloat16(self): + x_np = [[[[1, 2, 3, 4]]]] + block_size = 2 + x_out = [[[[1], [2]], [[3], [4]]]] + self._testOne(x_np, block_size, x_out, dtype=dtypes.float16) + # Tests for larger input dimensions. To make sure elements are # correctly ordered spatially. def testBlockSize2(self): -- GitLab From 7a590cd8ea21ae085845efc6d9b1724d42800659 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 19 Jan 2018 19:13:43 -0800 Subject: [PATCH 006/884] Turn the op_performance_data proto lib into a header only library by default PiperOrigin-RevId: 182621348 Signed-off-by: Jie --- tensorflow/core/BUILD | 6 +++-- tensorflow/core/grappler/costs/BUILD | 24 +++++++++---------- .../core/platform/default/build_config.bzl | 8 +++++++ tensorflow/python/BUILD | 4 ++-- 4 files changed, 26 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 579174efa3..f2f66fc567 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -136,6 +136,8 @@ load( "tf_nano_proto_library", "tf_protos_all", "tf_protos_all_impl", + "tf_protos_grappler", + "tf_protos_grappler_impl", ) load( "//tensorflow/core:platform/default/build_config_root.bzl", @@ -1529,7 +1531,7 @@ cc_library( "@snappy", "@zlib_archive//:zlib", "@protobuf_archive//:protobuf", - ] + tf_protos_all_impl(), + ] + tf_protos_all_impl() + tf_protos_grappler_impl(), ) # File compiled with extra flags to get cpu-specific acceleration. @@ -2094,7 +2096,7 @@ tf_cuda_library( ":core_cpu_base", ":proto_text", "//tensorflow/core/grappler:grappler_item", - ] + if_static([":core_cpu_impl"]) + tf_protos_all(), + ] + if_static([":core_cpu_impl"]) + tf_protos_all() + tf_protos_grappler(), ) tf_cuda_library( diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 7abc155c19..0fe01e9c9e 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -1,6 +1,10 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cuda_library", "tf_cc_test") +load( + "//tensorflow/core:platform/default/build_config.bzl", + "tf_protos_grappler", +) filegroup( name = "all_files", @@ -37,6 +41,7 @@ tf_proto_library( name = "op_performance_data", srcs = ["op_performance_data.proto"], cc_api_version = 2, + default_header = True, protodeps = tf_additional_all_protos(), visibility = ["//visibility:public"], ) @@ -47,7 +52,6 @@ cc_library( hdrs = ["graph_properties.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", ":utils", "//tensorflow/core:core_cpu_base", "//tensorflow/core:framework", @@ -55,7 +59,7 @@ cc_library( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:cluster", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -135,7 +139,7 @@ tf_cuda_library( hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", "//tensorflow/core:lib", @@ -143,8 +147,7 @@ tf_cuda_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/clusters:utils", - "//third_party/eigen3", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -207,9 +210,8 @@ cc_library( hdrs = ["op_context.h"], visibility = ["//visibility:public"], deps = [ - ":op_performance_data_cc", "//tensorflow/core:protos_all_cc", - ], + ] + tf_protos_grappler(), ) cc_library( @@ -276,12 +278,11 @@ cc_library( deps = [ ":cost_estimator", ":op_context", - ":op_performance_data_cc", + "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler/clusters:utils", - "//third_party/eigen3", - ], + ] + tf_protos_grappler(), ) tf_cc_test( @@ -305,7 +306,6 @@ cc_library( ":cost_estimator", ":graph_properties", ":op_level_cost_estimator", - ":op_performance_data_cc", ":utils", ":virtual_placer", ":virtual_scheduler", @@ -314,7 +314,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", - ], + ] + tf_protos_grappler(), ) tf_cc_test( diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index e9c510c93c..2102c5cca3 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -378,6 +378,14 @@ def tf_protos_all(): extra_deps=tf_protos_all_impl(), otherwise=["//tensorflow/core:protos_all_cc"]) +def tf_protos_grappler_impl(): + return ["//tensorflow/core/grappler/costs:op_performance_data_cc_impl"] + +def tf_protos_grappler(): + return if_static( + extra_deps=tf_protos_grappler_impl(), + otherwise=["//tensorflow/core/grappler/costs:op_performance_data_cc"]) + def tf_env_time_hdrs(): return [ "platform/env_time.h", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3493ed76f3..dbb29d9878 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -32,6 +32,7 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library_py") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_lib_deps") load("//tensorflow/core:platform/default/build_config.bzl", "tf_additional_all_protos") +load("//tensorflow/core:platform/default/build_config.bzl", "tf_protos_grappler") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_plugin_deps") load("//tensorflow/python:build_defs.bzl", "tf_gen_op_wrapper_private_py") load("//tensorflow/core:platform/default/build_config_root.bzl", "tf_additional_verbs_deps") @@ -209,9 +210,8 @@ cc_library( "//tensorflow/core/grappler/costs:analytical_cost_estimator", "//tensorflow/core/grappler/costs:cost_estimator", "//tensorflow/core/grappler/costs:measuring_cost_estimator", - "//tensorflow/core/grappler/costs:op_performance_data_cc", "//tensorflow/core/grappler/costs:utils", - ], + ] + tf_protos_grappler(), ) cc_library( -- GitLab From 550a8fa4e9a29bde527730eb45bcbfb7e9067436 Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 22 Jan 2018 18:07:49 -0800 Subject: [PATCH 007/884] [Update] Refactor optimization pass through grappler tensorflow fixed dependency issues in core/grappler/constant_folding removed python calls for optimization(layout/constfold), moved optimization to convert_graph.cc bug: dependency issue with //tensorflow/core/grappler/clusters:single_machine TODO: shape inference through grappler. cluster for optimization pass. --- tensorflow/contrib/tensorrt/BUILD | 6 +- .../contrib/tensorrt/convert/convert_graph.cc | 56 +++++++++++++++++-- .../contrib/tensorrt/python/trt_convert.py | 36 ++++++------ 3 files changed, 76 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 723c9f5434..1cb916e4c3 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -192,7 +192,11 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework_headers_lib", "//tensorflow/core:core_cpu_base", - #"//third_party/eigen3", + "//tensorflow/core/grappler/optimizers:constant_folding", + "//tensorflow/core/grappler/optimizers:layout_optimizer", + "//tensorflow/core/grappler/clusters:virtual_cluster", + "//tensorflow/core/grappler:devices", + #"//tensorflow/core/grappler/clusters:single_machine", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 29aa555467..c1948c8144 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -40,6 +40,15 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/devices.h" +//#include "tensorflow/core/grappler/clusters/single_machine.h" +#include "tensorflow/core/grappler/clusters/virtual_cluster.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils.h" + //------------------------------------------------------------------------------ namespace tensorrt { namespace convert { @@ -199,9 +208,48 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) { + + // optimization pass + tensorflow::grappler::GrapplerItem item; + item.fetch = output_names; + tensorflow::GraphDef gdef; + + // layout optimization + item.graph = graph_def; + tensorflow::grappler::LayoutOptimizer optimizer; + tensorflow::grappler::Cluster* gCluster; + + // virtual cluster + tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); + device_properties.mutable_environment()->insert({"architecture", "6"}); + gCluster = + new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); + + // single machine + int num_cpu_cores = tensorflow::grappler::GetNumAvailableLogicalCPUCores(); + int num_gpus = tensorflow::grappler::GetNumAvailableGPUs(); + LOG(DEBUG) << "cpu_cores: " << num_cpu_cores; + LOG(DEBUG) << "gpus: " << num_gpus; + // int timeout_s = 60 * 10; + // gCluster = new tensorflow::grappler::SingleMachine( + // timeout_s, num_cpu_cores, num_gpus); + + tensorflow::Status status = optimizer.Optimize(gCluster, item, &gdef); + + if (status !=tensorflow::Status::OK()) + return status; + + // constant folding + item.graph = gdef; + tensorflow::grappler::ConstantFolding fold(nullptr); + status = fold.Optimize(nullptr, item, &gdef); + if (status !=tensorflow::Status::OK()) + return status; + ShapeMap shape_map; TF_RETURN_IF_ERROR( - tensorflow::trt::inferShapes(graph_def, output_names, shape_map)); + tensorflow::trt::inferShapes(gdef, output_names, shape_map)); std::stringstream oss; for (auto& n : shape_map) { // nodes oss << " Node= " << n.first << ", "; @@ -213,10 +261,10 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), - graph_def.library()); + gdef.library()); tensorflow::Graph graph(flib); TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &graph)); + tensorflow::GraphConstructorOptions(), gdef, &graph)); // Segment the graph into subgraphs that can be converted to TensorRT tensorrt::segment::SegmentOptions segment_options; @@ -227,7 +275,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( segment_options.minimum_segment_size = 2; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( - graph_def, IsTensorRTCandidate, segment_options, &segments)); + gdef, IsTensorRTCandidate, segment_options, &segments)); if (segments.size() > 1) { // LOG(WARNING) << "Multiple TensorRT candidate subgraphs were found, " //<< "but only the first can be converted."; diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index a66afa8d05..354f0c8b42 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -48,25 +48,27 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace # output_graph_def_string = trt_convert( # input_graph_def_string,outputs, # max_batch_size,max_workspace_size, status) - g = tf.Graph() - with g.as_default(): - tf.import_graph_def(input_graph_def, name="") - rewriter_config = rewriter_config_pb2.RewriterConfig() - rewriter_config.optimizers.append('layout') - rewriter_config.optimizers.append('constfold') + # g = tf.Graph() + # with g.as_default(): + # tf.import_graph_def(input_graph_def, name="") + # rewriter_config = rewriter_config_pb2.RewriterConfig() + # rewriter_config.optimizers.append('layout') + # rewriter_config.optimizers.append('constfold') - # mark output nodes as fetch - train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) - for node_name in outputs: - out_node = g.get_operation_by_name(node_name) - for i in range(0,len(out_node.outputs)): - train_op.append(out_node.outputs[0]) + # # mark output nodes as fetch + # train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) + # for node_name in outputs: + # out_node = g.get_operation_by_name(node_name) + # for i in range(0,len(out_node.outputs)): + # train_op.append(out_node.outputs[0]) - # constant folding - mg = meta_graph.create_meta_graph_def(graph=g) - meta_graph.add_collection_def(mg, ops.GraphKeys.TRAIN_OP) - optimized_graph_def_str = \ - tf_optimizer.OptimizeGraph(rewriter_config, mg).SerializeToString() + # # constant folding + # mg = meta_graph.create_meta_graph_def(graph=g) + # meta_graph.add_collection_def(mg, ops.GraphKeys.TRAIN_OP) + # optimized_graph_def_str = \ + # tf_optimizer.OptimizeGraph(rewriter_config, mg).SerializeToString() + + optimized_graph_def_str = input_graph_def.SerializeToString() # TODO(sami): Fix this when we can return status from C++ library # There is a problem with the TF internal library setup that doesn't allow us to return a status object from C++. -- GitLab From da188d378bc6826a8f182b42aa8175a932a0c2f8 Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 23 Jan 2018 17:23:00 -0800 Subject: [PATCH 008/884] [UPDATE] Refactoring shape inference Removed shape refiner and apply shape inference through grappler/costs/graph_properties Currently using static shape inference --- tensorflow/contrib/tensorrt/BUILD | 3 +- .../contrib/tensorrt/convert/convert_graph.cc | 39 +++--- .../contrib/tensorrt/convert/convert_nodes.cc | 24 ++-- .../contrib/tensorrt/convert/convert_nodes.h | 5 +- .../contrib/tensorrt/convert/inferShapes.cc | 125 ------------------ .../contrib/tensorrt/convert/inferShapes.h | 39 ------ 6 files changed, 40 insertions(+), 195 deletions(-) delete mode 100644 tensorflow/contrib/tensorrt/convert/inferShapes.cc delete mode 100644 tensorflow/contrib/tensorrt/convert/inferShapes.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 1cb916e4c3..f92b60b03a 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -174,12 +174,10 @@ cc_library( "convert/convert_nodes.cc", "convert/convert_graph.cc", "segment/segment.cc", - "convert/inferShapes.cc", ], hdrs=[ "convert/convert_nodes.h", "convert/convert_graph.h", - "convert/inferShapes.h", "segment/segment.h", "segment/union_find.h", ], @@ -196,6 +194,7 @@ cc_library( "//tensorflow/core/grappler/optimizers:layout_optimizer", "//tensorflow/core/grappler/clusters:virtual_cluster", "//tensorflow/core/grappler:devices", + "//tensorflow/core/grappler/costs:graph_properties", #"//tensorflow/core/grappler/clusters:single_machine", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index c1948c8144..e90790716c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -28,7 +28,6 @@ limitations under the License. #include "NvInfer.h" #include "tensorflow/contrib/tensorrt/convert/convert_nodes.h" -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" #include "tensorflow/contrib/tensorrt/segment/segment.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" @@ -49,6 +48,8 @@ limitations under the License. #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" + //------------------------------------------------------------------------------ namespace tensorrt { namespace convert { @@ -123,7 +124,8 @@ std::unordered_map> BuildTensorNameMap( tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::Graph& graph, const std::vector& output_names, const std::set& subgraph_node_ids, size_t max_batch_size, - size_t max_workspace_size, const ShapeMap& shape_map) { + size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_properties) { tensorflow::EdgeSet subgraph_incoming_edges; GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); @@ -161,7 +163,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::NodeDef trt_node_def; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, shape_map, &trt_node_def)); + max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); @@ -246,19 +248,24 @@ tensorflow::Status ConvertGraphDefToTensorRT( status = fold.Optimize(nullptr, item, &gdef); if (status !=tensorflow::Status::OK()) return status; + + // AJ refactoring shape inference through grappler/GraphProperties. + tensorflow::grappler::GraphProperties static_graph_properties(item); + static_graph_properties.InferStatically(false); + // TF_CHECK_OK(static_graph_prop.InferStatically(false)); + // ShapeMap shape_map; + // TF_RETURN_IF_ERROR( + // tensorflow::trt::inferShapes(gdef, output_names, shape_map)); + // std::stringstream oss; + // for (auto& n : shape_map) { // nodes + // oss << " Node= " << n.first << ", "; + // for (auto o : n.second) { // outputs + // oss << o.first.DebugString() << " T= " << o.second << ", "; + // } + // LOG(DEBUG) << oss.str(); + // oss.str(""); + // } - ShapeMap shape_map; - TF_RETURN_IF_ERROR( - tensorflow::trt::inferShapes(gdef, output_names, shape_map)); - std::stringstream oss; - for (auto& n : shape_map) { // nodes - oss << " Node= " << n.first << ", "; - for (auto o : n.second) { // outputs - oss << o.first.DebugString() << " T= " << o.second << ", "; - } - LOG(DEBUG) << oss.str(); - oss.str(""); - } // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -291,7 +298,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, shape_map)); + max_workspace_size, static_graph_properties)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 83f78d7eff..6c77cdc0b6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1548,7 +1548,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( const tensorflow::Graph& graph, const std::set& subgraph_node_ids, const std::vector>& input_inds, const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size, const ShapeMap& shape_map, + size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_properties, tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1605,20 +1606,20 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_names.push_back(node_name); // insert original node name without port // TODO(jie): alternative :) // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (shape_map.count(node_name) == 0) + if (!graph_properties.HasOutputProperties(node_name)) return tensorflow::errors::Internal("failed to find input node: " + node_name); - auto input_entry_vec = shape_map.at(node_name); - if (static_cast(input_entry_vec.size()) < output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( "accessing output index of: " + std::to_string(output_idx) + ", at node: " + node_name + "with output entry from shape_map: " + - std::to_string(input_entry_vec.size())); + std::to_string(op_info_vec.size())); - auto input_entry = input_entry_vec.at(output_idx); + auto op_info = op_info_vec.at(output_idx); - tensorflow::DataType tf_dtype = input_entry.second; + tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); @@ -1627,15 +1628,16 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) << ", at node: " << node_name << "with output entry from shape_map: " - << std::to_string(input_entry_vec.size()); + << std::to_string(op_info_vec.size()); + // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; - for (int i = 1; i < input_entry.first.dims(); i++) { + for (int i = 1; i < op_info.shape().dim_size(); i++) { LOG(DEBUG) << "dimension: " << i - << " , size: " << input_entry.first.dim_size(i); - input_dim_psuedo_chw.d[i - 1] = input_entry.first.dim_size(i); + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } // TODO(ben,jie): proper way to restore input tensor name? diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index a624582dec..dc59c37892 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -20,10 +20,10 @@ limitations under the License. #include #include -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/grappler/costs/graph_properties.h" namespace tensorrt { namespace convert { @@ -34,7 +34,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_inds, // {node_id, output_idx} const std::vector>& output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size, const ShapeMap& shape_map, + size_t max_batch_size, size_t max_workspace_size, + const tensorflow::grappler::GraphProperties& graph_prop, tensorflow::NodeDef* trt_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/inferShapes.cc b/tensorflow/contrib/tensorrt/convert/inferShapes.cc deleted file mode 100644 index c7f0f0023d..0000000000 --- a/tensorflow/contrib/tensorrt/convert/inferShapes.cc +++ /dev/null @@ -1,125 +0,0 @@ -/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/contrib/tensorrt/convert/inferShapes.h" -#include -#include "tensorflow/core/common_runtime/shape_refiner.h" -#include "tensorflow/core/framework/node_def.pb.h" -#include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/framework/tensor.h" -#include "tensorflow/core/framework/types.pb_text.h" -#include "tensorflow/core/graph/algorithm.h" -#include "tensorflow/core/graph/graph.h" -#include "tensorflow/core/graph/graph_constructor.h" -#include "tensorflow/core/lib/core/errors.h" -#include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/platform/logging.h" - -#define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) - -namespace tensorflow { -namespace trt { -std::vector getTypes(const tensorflow::OpDef& op, - const tensorflow::NodeDef& nd, - bool inp = true) { - const auto& attrMap = nd.attr(); - auto getType = [&attrMap](decltype( - op.input_arg(0)) a) -> std::vector { - std::vector tvec; - if (!a.type_list_attr().empty()) { // get the list types - const auto& tl = attrMap.at(a.type_list_attr()).list(); - int tsize = tl.type_size(); - tvec.reserve(tsize); - for (int t = 0; t < tsize; t++) { - tvec.push_back(tl.type(t)); - } - return tvec; - } - tensorflow::DataType cType = tensorflow::DT_INVALID; - if (a.type() != tensorflow::DT_INVALID) { // get defined types - cType = a.type(); - } else if (!a.type_attr().empty()) { - cType = attrMap.at(a.type_attr()).type(); - } - if (!a.number_attr().empty()) { // numbertypes - int64 nTensors = attrMap.at(a.number_attr()).i(); - tvec = std::vector(nTensors, cType); - return tvec; - } - tvec.push_back(cType); - return tvec; - }; - std::vector types; - if (inp) { - int n_inputs = op.input_arg_size(); - for (int i = 0; i < n_inputs; i++) { - auto tout = getType(op.input_arg(i)); - LOG(DEBUG) << "Node= " << nd.name() << " #inputs" << tout.size(); - types.insert(types.end(), tout.begin(), tout.end()); - } - } else { - int n_outputs = op.output_arg_size(); - // types.resize(n_outputs); - for (int i = 0; i < n_outputs; i++) { - auto tout = getType(op.output_arg(i)); - LOG(DEBUG) << "Node= " << nd.name() << " #outputs" << tout.size(); - types.insert(types.end(), tout.begin(), tout.end()); - } - } - return types; -} - -tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def, - const std::vector& output_names, - ShapeMap& shapes) { - tensorflow::Graph g(OpRegistry::Global()); - TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( - tensorflow::GraphConstructorOptions(), graph_def, &g)); - std::vector POnodes; - tensorflow::GetPostOrder(g, &POnodes); - tensorflow::ShapeRefiner refiner(graph_def.versions().producer(), - OpRegistry::Global()); - for (auto n = POnodes.rbegin(); n != POnodes.rend(); ++n) { - TF_CHECK_OK(refiner.AddNode(*n)); - } - - auto shape2PTS = [](tensorflow::shape_inference::InferenceContext* ic, - const tensorflow::shape_inference::ShapeHandle& sh) - -> tensorflow::PartialTensorShape { - std::vector dims; - int64 rank = ic->Rank(sh); - for (int64 i = 0; i < rank; i++) { - auto dh = ic->Dim(sh, i); - dims.push_back(ic->Value(dh)); - } - return tensorflow::PartialTensorShape(dims); - }; - for (const auto& n : POnodes) { - auto ic = refiner.GetContext(n); - if (ic) { - int nOuts = ic->num_outputs(); - auto types = getTypes(n->op_def(), n->def(), false); - std::vector< - std::pair> - SAT; - for (int i = 0; i < nOuts; i++) { - auto PTS = shape2PTS(ic, ic->output(i)); - SAT.push_back({PTS, types.at(i)}); - } - shapes[n->name()] = SAT; - } else { - LOG(WARNING) << "Node " << n->name() << " doesn't have InferenceContext!"; - } - } - return tensorflow::Status::OK(); -} -} // namespace trt -} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/inferShapes.h b/tensorflow/contrib/tensorrt/convert/inferShapes.h deleted file mode 100644 index b94f1ee893..0000000000 --- a/tensorflow/contrib/tensorrt/convert/inferShapes.h +++ /dev/null @@ -1,39 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#ifndef TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ -#define TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ - -#include -#include -#include -#include - -#include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/framework/tensor_shape.h" -#include "tensorflow/core/lib/core/status.h" - -typedef std::unordered_map>> - ShapeMap; -namespace tensorflow { -namespace trt { -tensorflow::Status inferShapes(const tensorflow::GraphDef& graph_def, - const std::vector& output_names, - ShapeMap& shapes); -} -} // namespace tensorflow - -#endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_INFERSHAPES_H_ -- GitLab From ccb555f1e7947785763cf65a6713634a85c72607 Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 24 Jan 2018 16:32:02 -0800 Subject: [PATCH 009/884] [BUG_FIX] 'Mean' converter ConvertReduce fixed 1. permutation index 2. output tensor pushed back into map --- tensorflow/contrib/tensorrt/convert/convert_graph.cc | 2 +- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index e90790716c..16d6e6ec7d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -60,7 +60,7 @@ static std::unordered_set output_nodes; bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" // "Placeholder" ,"Mean" + "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6c77cdc0b6..6a93edfb47 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1334,7 +1334,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, int nbDims = dims.nbDims + 1; TRT_ShapedWeights index_list = inputs.at(1).weights(); - + TFAttrs attrs(node_def); // TODO(jie): handle data type // auto data_type = attrs.get("T"); @@ -1372,7 +1372,9 @@ tensorflow::Status ConvertReduce(Converter& ctx, if (index_list_data[i] == 0) return tensorflow::errors::InvalidArgument("TRT cannot reduce at 0, at" + node_def.name()); - if (index_list_data[i] == 1) permuted_index = 1; + if (index_list_data[i] == 1) + permuted_index = 1; + idx_set.emplace(index_list_data[i]); } @@ -1380,7 +1382,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nbDims; i++) { - if (idx_set.count(i)) { + if (idx_set.count(i)==0) { permuted_index = i; break; } @@ -1415,6 +1417,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, output_tensor = ctx.transposeTensor( const_cast(output_tensor), permutation_order); } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); } -- GitLab From e1eb01e5edf1b5814d7f50e8bcdf910c02a49256 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 24 Jan 2018 19:29:22 -0800 Subject: [PATCH 010/884] Adding Resources for calibration and execution --- tensorflow/contrib/tensorrt/BUILD | 21 ++++++ .../contrib/tensorrt/convert/convert_nodes.cc | 1 + .../tensorrt/resources/TRTInt8Calibrator.cc | 65 +++++++++++++++++++ .../tensorrt/resources/TRTInt8Calibrator.h | 40 ++++++++++++ .../tensorrt/resources/TRTResourceManager.cc | 18 +++++ .../tensorrt/resources/TRTResourceManager.h | 37 +++++++++++ .../contrib/tensorrt/resources/TRTResources.h | 32 +++++++++ 7 files changed, 214 insertions(+) create mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.h create mode 100644 tensorflow/contrib/tensorrt/resources/TRTResources.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 1cb916e4c3..37aa573cdb 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -168,6 +168,26 @@ tf_py_wrap_cc( ], ) +cc_library( + name = "trt_resources", + srcs = [ + "resources/TRTInt8Calibrator.cc", + "resources/TRTResourceManager.cc", + ], + hdrs = [ + "resources/TRTInt8Calibrator.h", + "resources/TRTResourceManager.h", + "resources/TRTResources.h", + ], + deps = [ + "@local_config_tensorrt//:tensorrt", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:core_cpu_base", + + ], +) + cc_library( name= "trt_conversion", srcs=[ @@ -188,6 +208,7 @@ cc_library( "@protobuf_archive//:protobuf_headers", "@nsync//:nsync_headers", ":trt_logging", + ":trt_resources", "//tensorflow/core:framework_lite", "//tensorflow/core:protos_all_cc", "//tensorflow/core:framework_headers_lib", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 83f78d7eff..3684ac8e78 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -39,6 +39,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) // Check if the types are equal. Cast to int first so that failure log message diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc new file mode 100644 index 0000000000..3c94b52ea6 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -0,0 +1,65 @@ +// +// Created by skama on 1/24/18. +// + +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" + +#include +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace trt { + +int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } + +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + } + for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); + if (devptr == dev_buffers_.end()) { + LOG(FATAL) << "FATAL input name '" << it.first + << "' does not match with the buffer names"; + } + const auto& d = devptr->second; + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyHostToDevice); + if (status != 0) { + LOG(FATAL) << "cudaMemcpy for '" << it.first << "' failed with " + << status; + } + } + calib_running_.store(true, std::memory_order_release); // release builder + cond_.notify_all(); + return true; +} + +bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, + int nbBindings) { + calib_running_.store(false, std::memory_order_release); // wait for new batch + cond_.notify_all(); + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + } + if (done_) { + return false; + } + for (int i = 0; i < nbBindings; i++) { + auto it = dev_buffers_.find(names[i]); + if (it == dev_buffers_.end()) { + LOG(FATAL) << "Calibration engine asked for unknown tensor name '" + << names[i] << "' at position " << i; + } + bindings[i] = it->second.first; + } + return true; +} + +} // namespace trt +} // namespace tensorflow \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h new file mode 100644 index 0000000000..b0e904b666 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h @@ -0,0 +1,40 @@ +// +// Created by skama on 1/24/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ + +#include +#include +#include +#include +#include +#include "tensorflow/core/platform/mutex.h" +namespace tensorflow { +namespace trt { + +struct TRTInt8Calibrator : public nvinfer1::IInt8Calibrator { + public: + TRTInt8Calibrator(const std::unordered_map< + std::string, std::pair>& dev_buffers, + int batch_size) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false){}; + int getBatchSize() const; + bool getBatch(void* bindings[], const char* names[], int nbBindings) override; + bool setBatch(const std::unordered_map &data); + void setDone(){done_=true;} + private: + int batch_size_; + tensorflow::mutex cond_mtx_; + tensorflow::condition_variable cond_; + bool done_; + std::unordered_map> dev_buffers_; + std::atomic_bool calib_running_; +}; +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc new file mode 100644 index 0000000000..b060295301 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc @@ -0,0 +1,18 @@ +// +// Created by skama on 1/23/18. +// + +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" + + +std::shared_ptr tensorflow::trt::TRTResourceManager::getManager(const std::string &mgr_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held longer + // will be during op creation and should be ok. + tensorflow::mutex_lock lock(map_mutex_); + auto s=managers_.find(mgr_name); + if(s==managers_.end()){ + auto it=managers_.emplace(mgr_name,std::make_shared(mgr_name)); + return it.first->second; + } + return s->second; +} diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h new file mode 100644 index 0000000000..5ec66ab582 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h @@ -0,0 +1,37 @@ +// +// Created by skama on 1/23/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ + +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRTRESOURCEMANAGER_H_ +#include + +#include +#include +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace trt { +class TRTResourceManager { + TRTResourceManager() = default; + + public: + static std::shared_ptr instance() { + static std::shared_ptr instance_( + new TRTResourceManager); + return instance_; + } + // returns a manager for given op, if it doesn't exists it creates one + std::shared_ptr getManager( + const std::string& op_name); + + private: + std::unordered_map> + managers_; + tensorflow::mutex map_mutex_; +}; +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h new file mode 100644 index 0000000000..2b65017943 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -0,0 +1,32 @@ +// +// Created by skama on 1/23/18. +// + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#include +#include +#include "tensorflow/contrib/tensorrt/resourcemgr/TRTInt8Calibrator.h" +#include "tensorflow/core/framework/resource_mgr.h" + +namespace tensorflow { +namespace trt { + +struct TRTCalibrationResource : public tensorflow::ResourceBase { + TRTCalibrationResource():calibrator(nullptr), builder(nullptr), thr(nullptr){}; + TRTInt8Calibrator* calibrator; + nvinfer1::IBuilder* builder; + std::thread *thr; +}; + +struct TRTEngineResource:public tensorflow::ResourceBase{ + TRTEngineResource():runtime(nullptr), ctx(nullptr){}; + nvinfer1::IRuntime *runtime; + nvinfer1::IExecutionContext *ctx; +}; + +} +} +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ -- GitLab From 6ea7a24c615e7cd9445395539a37e67cb74eede2 Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 25 Jan 2018 15:14:50 -0800 Subject: [PATCH 011/884] [UPDATE] Converter update ConcatV2 AvgPool inception_v1 passed --- .../contrib/tensorrt/convert/convert_graph.cc | 3 +- .../contrib/tensorrt/convert/convert_nodes.cc | 122 +++++++++++++++++- 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 16d6e6ec7d..2b6a26491b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -60,7 +60,8 @@ static std::unordered_set output_nodes; bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", - "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean" + "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", + "AvgPool", "ConcatV2" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 86c43d960a..ff2e37b7da 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1093,6 +1093,8 @@ tensorflow::Status ConvertPool(Converter& ctx, // TODO(jie): support other pooling type if (node_def.op() == "MaxPool") type = nvinfer1::PoolingType::kMAX; + else if (node_def.op() == "AvgPool") + type = nvinfer1::PoolingType::kAVERAGE; else return tensorflow::errors::Unimplemented("only supports Max pool"); @@ -1253,6 +1255,25 @@ tensorflow::Status ConvertConst(Converter& ctx, // weights = ctx.get_temp_weights(dtype, scalar_shape); // std::memcpy(const_cast(weights.values), // weights_tensor.float_val().data(), weights.size_bytes()); + } else if (!weights_tensor.int_val().empty()) { + LOG(DEBUG) << "int!!!" << node_def.name(); + nvinfer1::Dims scalar_shape; + if (tensor.dims() > 0) { + LOG(DEBUG) << "dimensions: " << tensor.dims(); + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + get_tensor_shape(tensor)); + } else { + LOG(DEBUG) << "dimensions: " << tensor.dims(); + scalar_shape.nbDims = 1; + scalar_shape.d[0] = 1; + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { + scalar_shape.d[i] = 0; + scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; + } + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + scalar_shape); + } } else if (!weights_tensor.tensor_content().empty()) { LOG(DEBUG) << "TENSOR!!!" << node_def.name(); weights = TRT_ShapedWeights(dtype, weights_tensor.tensor_content().data(), @@ -1261,6 +1282,7 @@ tensorflow::Status ConvertConst(Converter& ctx, return tensorflow::errors::Unimplemented( "not supported constant type, at " + node_def.name()); } + // pass the output outputs->push_back(TRT_TensorOrWeights(weights)); return tensorflow::Status::OK(); @@ -1522,19 +1544,115 @@ tensorflow::Status ConvertPad(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConcat( + Converter& ctx, tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + + // not including the last input (axis) here + int input_size = static_cast(inputs.size()) - 1; + + if (!inputs.at(0).is_tensor()) + return tensorflow::errors::InvalidArgument( + "Concat in TRT support only Tensor input, at " + node_def.name()); + + // We are retrieving the axis + TRT_ShapedWeights axis = inputs.at(input_size).weights(); + + TFAttrs attrs(node_def); + auto attr_size = attrs.at("N")->i(); + auto data_type = attrs.get("T"); + auto index_type = attrs.get("Tidx"); + + // TODO(jie): handle data type + // Only expect to handle INT32 as index attributes for now + if (index_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented("Tidx supports only DT_INT32, at " + + node_def.name()); + + int index = + *(static_cast(const_cast(axis.values_))); + + // TODO(jie): early termination with no-op (attr_size==1) + + auto dim = inputs.at(0).tensor()->getDimensions(); + // dimension check + if (index > dim.nbDims + 1) + return tensorflow::errors::InvalidArgument( + "Concatenate on axis out of dimension range, at " + + node_def.name()); + + if (index == 0) + return tensorflow::errors::InvalidArgument( + "Concatenate on batch dimension not supported, at " + + node_def.name()); + + // incase we need permutation; + std::vector permutation_order(dim.nbDims+1); + + for (int i=0; i inputs_vec; + // Shap chack (all input tensor should have same shape) + // starting from 0 since we are probably also doing transpose here; + for (int i=0; i < input_size; i++) { + auto tensor_i = inputs.at(i).tensor(); + auto dim_i = tensor_i->getDimensions(); + if ( dim_i.nbDims != dim.nbDims ) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent dimensions, at " + + node_def.name()); + + for (int j=0; j < dim.nbDims; j++) { + // check dimension consistency on non-concatenate axis + if (j != index-1 && dim_i.d[j] != dim.d[j]) + return tensorflow::errors::InvalidArgument( + "Concatenate receives inputs with inconsistent shape, at" + + node_def.name()); + } + + // TRT does concatenation only on channel! + if (index != 1) + tensor_i = ctx.transposeTensor(const_cast(tensor_i), + permutation_order); + + inputs_vec.push_back(tensor_i); + } + + // nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + nvinfer1::IConcatenationLayer* layer = ctx.network()->addConcatenation( + const_cast(inputs_vec.data()), + inputs_vec.size()); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + if (index != 1) + { + output_tensor= ctx.transposeTensor(output_tensor, permutation_order); + } + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; _op_registry["Conv2D"] = ConvertConv2D; _op_registry["Relu"] = ConvertActivation; _op_registry["MaxPool"] = ConvertPool; + _op_registry["AvgPool"] = ConvertPool; // This could be really handled as ConvertBinary _op_registry["BiasAdd"] = ConvertScale; _op_registry["Const"] = ConvertConst; // _op_registry["MatMul"] = ConvertFullyConnected; // not used in vgg // TODO(ben,jie): this is a temp hack. _op_registry["Identity"] = ConvertIdentity; // Identity should be removed - // _op_registry["AvgPool"] = ConvertPool; // resnet_50_v1 slim implementation _op_registry["Add"] = ConvertBinary; @@ -1544,6 +1662,8 @@ void Converter::register_op_converters() { _op_registry["Mean"] = ConvertReduce; _op_registry["Pad"] = ConvertPad; // TODO(ben,jie): Add more ops + + _op_registry["ConcatV2"] = ConvertConcat; } } // namespace -- GitLab From cf30a7549e026d5c50117ae011af2b0148a81a89 Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 25 Jan 2018 17:21:07 -0800 Subject: [PATCH 012/884] [UPDATE] Converter update Grouped convolution support added (depthwise as a special case) inception_v2 passed --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 220 +++++++++++------- 2 files changed, 140 insertions(+), 82 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 2b6a26491b..c7fa4144b1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ff2e37b7da..ff47cdfe4a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -366,15 +366,20 @@ void reorder4(nvinfer1::DimsNCHW shape, T const* idata, } void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, - TRT_ShapedWeights* oweights) { + TRT_ShapedWeights* oweights, int nbGroups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; - int c = iweights.shape_.d[2]; - int k = iweights.shape_.d[3]; - oweights->shape_.d[0] = k; - oweights->shape_.d[1] = c; + // TRT requires GKcRS, while TF depthwise has RSCK + // where c=1, C=G + LOG(DEBUG) << "nbGroups: " << nbGroups; + int c = iweights.shape_.d[2]/nbGroups; + LOG(DEBUG) << "c" << iweights.shape_.d[2] << " then " << c; + int k = iweights.shape_.d[3]*nbGroups; + LOG(DEBUG) << "k" << iweights.shape_.d[3] << " then " << k; + oweights->shape_.d[0] = k/nbGroups; + oweights->shape_.d[1] = c*nbGroups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; // nvinfer1::DimsNCHW istrides = {1, s, c*r*s, r*s}; @@ -911,87 +916,23 @@ tensorflow::Status BinaryTensorOpWeight( return tensorflow::Status::OK(); } -tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, tensorflow::NodeDef const& node_def, - const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, - std::vector* outputs) { - static const std::unordered_map - ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; - - // FIXME assume type matches input weights - // get trt type & shape - TFAttrs attrs(node_def); - // maybe this part has to be moved into the block of rsqrt later - nvinfer1::DataType dtype = attrs.get("T"); - - // check type consistency - CHECK_EQ_TYPE(tensor_l->getType(), dtype); - CHECK_EQ_TYPE(tensor_r->getType(), dtype); - auto op_pair = ops.find(node_def.op()); - if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); - - nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( - *const_cast(tensor_l), - *const_cast(tensor_r), op_pair->second); - - nvinfer1::ITensor* output_tensor = layer->getOutput(0); - - // pass the output - outputs->push_back(TRT_TensorOrWeights(output_tensor)); - return tensorflow::Status::OK(); -} - -tensorflow::Status ConvertPlaceholder( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, - std::vector* outputs) { - LOG(DEBUG) << "Placeholder should have been replace already"; - return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); - // OK this make sense since we are supposed to replace it with input - TFAttrs attrs(node_def); - nvinfer1::DataType dtype = attrs.get("dtype"); - nvinfer1::Dims dims = attrs.get("shape"); - - dims.nbDims--; - for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; - - nvinfer1::ITensor* output = - ctx.network()->addInput(node_def.name().c_str(), dtype, dims); - if (!output) { - return tensorflow::errors::InvalidArgument("Failed to create Input layer"); - } - outputs->push_back(TRT_TensorOrWeights(output)); - return tensorflow::Status::OK(); -} +enum class ConvolutionType { + DEFAULT, + DEPTHWISE_CONV +}; -tensorflow::Status ConvertConv2D(Converter& ctx, +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, - std::vector* outputs) { + std::vector* outputs, + int group // group ==0 specifies depthwise conv + ) { nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - // nvinfer1::ITensor* tensor = inputs.at(0).tensor(); - // TODO(jie): handle NHWC/NCHW transpose; - TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); - TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - reorder_rsck_to_kcrs(weights_rsck, &weights); - TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0]; - nvinfer1::DimsHW kernel_size; - kernel_size.h() = weights.shape_.d[2]; - kernel_size.w() = weights.shape_.d[3]; - LOG(DEBUG) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + TFAttrs attrs(node_def); + int c_index = 1; int h_index = 2; int w_index = 3; auto data_format = attrs.get("data_format"); @@ -1000,17 +941,36 @@ tensorflow::Status ConvertConv2D(Converter& ctx, {0, 3, 1, 2}); h_index = 1; w_index = 2; + c_index = 3; // TODO(jie): transpose it } else { LOG(DEBUG) << "NCHW !!!!"; } + + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + int nbGroups = group; + if (nbGroups == 0) // depthwise convolution + nbGroups = tensor_dim.d[0]; + LOG(DEBUG) << "groups count: " << nbGroups; + + TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); + reorder_rsck_to_kcrs(weights_rsck, &weights, nbGroups); + TRT_ShapedWeights biases(weights.type_); + int noutput = weights.shape_.d[0] * nbGroups; + nvinfer1::DimsHW kernel_size; + kernel_size.h() = weights.shape_.d[2]; + kernel_size.w() = weights.shape_.d[3]; + LOG(DEBUG) << "kernel size: " << kernel_size.h() << ", " << kernel_size.w(); + // TODO(jie): stride. (NHWC/NCHW) auto tf_stride = attrs.get>("strides"); LOG(DEBUG) << "h_INDEX" << h_index << ", w_index " << w_index; LOG(DEBUG) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); - auto tensor_dim = tensor->getDimensions(); std::vector> padding; // TODO(jie): padding. if (attrs.get("padding") == "SAME") { @@ -1055,6 +1015,7 @@ tensorflow::Status ConvertConv2D(Converter& ctx, layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); + layer->setNbGroups(nbGroups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); @@ -1071,6 +1032,102 @@ tensorflow::Status ConvertConv2D(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertConv2DHelper( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs, + ConvolutionType type) { + switch(type) { + case ConvolutionType::DEFAULT: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 1); + case ConvolutionType::DEPTHWISE_CONV: + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, 0); + } + return tensorflow::errors::Unimplemented( + "unsupported convolution type at, " + node_def.name()); +} + +tensorflow::Status BinaryTensorOpTensor( + Converter& ctx, tensorflow::NodeDef const& node_def, + const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, + std::vector* outputs) { + static const std::unordered_map + ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; + + // FIXME assume type matches input weights + // get trt type & shape + TFAttrs attrs(node_def); + // maybe this part has to be moved into the block of rsqrt later + nvinfer1::DataType dtype = attrs.get("T"); + + // check type consistency + CHECK_EQ_TYPE(tensor_l->getType(), dtype); + CHECK_EQ_TYPE(tensor_r->getType(), dtype); + auto op_pair = ops.find(node_def.op()); + if (op_pair == ops.end()) + return tensorflow::errors::Unimplemented( + "binary op: " + node_def.op() + + " not supported at: " + node_def.name()); + + nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( + *const_cast(tensor_l), + *const_cast(tensor_r), op_pair->second); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + + // pass the output + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertPlaceholder( + Converter& ctx, tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + LOG(DEBUG) << "Placeholder should have been replace already"; + return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); + // OK this make sense since we are supposed to replace it with input + TFAttrs attrs(node_def); + nvinfer1::DataType dtype = attrs.get("dtype"); + nvinfer1::Dims dims = attrs.get("shape"); + + dims.nbDims--; + for (int i = 0; i < dims.nbDims; i++) dims.d[i] = dims.d[i + 1]; + + nvinfer1::ITensor* output = + ctx.network()->addInput(node_def.name().c_str(), dtype, dims); + if (!output) { + return tensorflow::errors::InvalidArgument("Failed to create Input layer"); + } + outputs->push_back(TRT_TensorOrWeights(output)); + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertConv2D(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEFAULT); +} + +tensorflow::Status ConvertConv2DDepthwise( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + return ConvertConv2DHelper(ctx, node_def, inputs, outputs, + ConvolutionType::DEPTHWISE_CONV); +} + tensorflow::Status ConvertPool(Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, @@ -1644,6 +1701,7 @@ void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; _op_registry["Conv2D"] = ConvertConv2D; + _op_registry["DepthwiseConv2dNative"] = ConvertConv2DDepthwise; _op_registry["Relu"] = ConvertActivation; _op_registry["MaxPool"] = ConvertPool; _op_registry["AvgPool"] = ConvertPool; -- GitLab From 51ce6cf02c0a445e1a7c89225353ff20fdb538cb Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 30 Jan 2018 10:43:21 -0800 Subject: [PATCH 013/884] [DEBUG] Converter update 1. ConvertConst float length doesn't match tensor shape. handling default broadcast. -> fixed resnet_200 2. Control dependency edge normalizing (remove '^' prefix) -> fixed inception_resnet_v2 --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 39 +++++++++++++------ 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index c7fa4144b1..185451e28b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -280,7 +280,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (auto node : output_names) output_nodes.insert(node); // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = 10; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ff47cdfe4a..6cdfc837fc 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -437,8 +437,14 @@ class Converter { tensorflow::NodeDef const& node_def) { std::vector inputs; for (auto const& input_name : node_def.input()) { - LOG(DEBUG) << "retrieve input: " << input_name; - inputs.push_back(_trt_tensors.at(input_name)); + std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; + LOG(DEBUG) << "retrieve input: " << name; + if (_trt_tensors.count(name)) { + inputs.push_back(_trt_tensors.at(name)); + } else { + LOG(FATAL) << "input: " << name << "not availabled for node at, " + << node_def.name(); + } } return inputs; } @@ -462,6 +468,7 @@ class Converter { } tensorflow::Status convert_node(tensorflow::NodeDef const& node_def) { + //LOG(DEBUG) << node_def.DebugString(); std::vector inputs = this->get_inputs(node_def); std::string op = node_def.op(); if (!_op_registry.count(op)) { @@ -1292,20 +1299,24 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - get_tensor_shape(tensor)); + scalar_shape = get_tensor_shape(tensor); + if (get_shape_size(scalar_shape) != weights_tensor.float_val_size()) { + LOG(FATAL) << "Broadcast on weights not supported, at: " + << node_def.name(); + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), - scalar_shape); } + weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + scalar_shape); // LOG(INFO) << " add: " << weights_tensor.float_val().data(); // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); @@ -1317,20 +1328,24 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); - weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), - get_tensor_shape(tensor)); + scalar_shape = get_tensor_shape(tensor); + if (get_shape_size(scalar_shape) != weights_tensor.int_val_size()) { + LOG(FATAL) << "Broadcast on weights not supported, at: " + << node_def.name(); + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; - scalar_shape.d[0] = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; for (int i = 1; i < nvinfer1::Dims::MAX_DIMS; i++) { scalar_shape.d[i] = 0; scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } - weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), - scalar_shape); } + weights = TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), + scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { LOG(DEBUG) << "TENSOR!!!" << node_def.name(); weights = TRT_ShapedWeights(dtype, weights_tensor.tensor_content().data(), -- GitLab From 359329893e9db38d08be605bad85c3d3eef1a4cd Mon Sep 17 00:00:00 2001 From: Jie Date: Tue, 30 Jan 2018 21:31:10 -0800 Subject: [PATCH 014/884] [Debug + Feature] Feature: input tensor shape inference passing output_edge_map to allow ops absorbed by TRT subgraph to infer shape without running another shape infer Debug: fixed BiasAdd broadcasting Debug: fixed rewiring input edge to TRT_ENGINE_OP TODO: incoming edge check (shape / dimension) TRT dimension requirement for 3.1 makes input tensor with 2 dimension (NC) tricky to interpret. --- .../contrib/tensorrt/convert/convert_graph.cc | 38 +++++- .../contrib/tensorrt/convert/convert_nodes.cc | 118 ++++++++++++++---- .../contrib/tensorrt/convert/convert_nodes.h | 1 + .../contrib/tensorrt/kernels/trt_engine_op.cc | 3 +- 4 files changed, 134 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 185451e28b..258a850b21 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -77,8 +77,10 @@ void GetSubGraphIncomingEdges(tensorflow::Graph const& graph, for (tensorflow::Edge const* edge : node->in_edges()) { if (!subgraph_node_ids.count(edge->src()->id()) && !edge->src()->IsSource()) { - LOG(DEBUG) << edge->src()->name() << ", "; + LOG(DEBUG) << edge->src()->name() << " Y, "; incoming_edges->insert(edge); + } else { + LOG(DEBUG) << edge->src()->name() << " N, "; } } } @@ -93,7 +95,10 @@ void GetSubGraphOutgoingEdges(tensorflow::Graph const& graph, for (tensorflow::Edge const* edge : node->out_edges()) { if (!subgraph_node_ids.count(edge->dst()->id()) && !edge->dst()->IsSink()) { + LOG(DEBUG) << edge->dst()->name() << " Y, "; outgoing_edges->insert(edge); + } else { + LOG(DEBUG) << edge->dst()->name() << " N, "; } } } @@ -126,6 +131,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::Graph& graph, const std::vector& output_names, const std::set& subgraph_node_ids, size_t max_batch_size, size_t max_workspace_size, + std::unordered_map>* output_edge_map, const tensorflow::grappler::GraphProperties& graph_properties) { tensorflow::EdgeSet subgraph_incoming_edges; GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); @@ -164,10 +170,32 @@ tensorflow::Status ConvertSubGraphToTensorRT( tensorflow::NodeDef trt_node_def; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); + max_batch_size, max_workspace_size, graph_properties, output_edge_map, + &trt_node_def)); tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); + // AddNode does not wire edges. + // Re-map incoming edges to use the new TRT node instead of the orig subgraph + std::map, int> subgraph_edge_to_input_map; + for (size_t i=0; i old_src = {edge->src()->id(), edge->src_output()}; + int new_src_output = subgraph_edge_to_input_map.at(old_src); + graph.AddEdge( + edge->src(), edge->src_output(), trt_node, new_src_output); + graph.RemoveEdge(edge); + } + + + LOG(DEBUG) << "new wiring edges: " << trt_node->in_edges().size(); + for (tensorflow::Edge const* edge : trt_node->in_edges()) { + LOG(DEBUG) << edge->src()->name() << " port: " << edge->src_output(); + } + TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph @@ -176,6 +204,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); + LOG(DEBUG) << "OUT going edge size: " << subgraph_outgoing_edges.size(); for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); @@ -280,7 +309,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (auto node : output_names) output_nodes.insert(node); // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 10; + segment_options.minimum_segment_size = 2; tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -292,6 +321,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); + std::unordered_map> output_edge_map; for (std::set const& subgraph_node_names : segments) { std::set subgraph_node_ids; for (std::string const& node_name : subgraph_node_names) { @@ -299,7 +329,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, static_graph_properties)); + max_workspace_size, &output_edge_map, static_graph_properties)); } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6cdfc837fc..bf6a9be8be 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -437,6 +437,17 @@ class Converter { tensorflow::NodeDef const& node_def) { std::vector inputs; for (auto const& input_name : node_def.input()) { + /************************************************************************* + * TODO(jie) handle case 1) here + * Normalizes the inputs and extracts associated metadata: + * 1) Inputs can contain a colon followed by a suffix of characters. + * That suffix may be a single number (e.g. inputName:1) or several + * word characters separated from a number by a colon + * (e.g. inputName:foo:1). The + * latter case is used to denote inputs and outputs of functions. + * 2) Control dependency inputs contain caret at the beginning and we + * remove this and annotate the edge as a control dependency. + ************************************************************************/ std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; LOG(DEBUG) << "retrieve input: " << name; if (_trt_tensors.count(name)) { @@ -1261,9 +1272,26 @@ tensorflow::Status ConvertScale(Converter& ctx, } else { LOG(DEBUG) << "NCHW !!!!"; } + + auto dims = tensor->getDimensions(); + LOG(DEBUG) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + LOG(DEBUG) << "i: " << dims.d[i]; + } + dims = weights.shape_; + LOG(DEBUG) << "tensor dimensions: " << dims.nbDims; + for (int i = 0; i < dims.nbDims; i++) { + LOG(DEBUG) << "i: " << dims.d[i]; + } + + nvinfer1::ScaleMode mode = nvinfer1::ScaleMode::kCHANNEL; + if (weights.shape_.d[0] == 1) { + mode = nvinfer1::ScaleMode::kUNIFORM; + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( - *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - weights, empty_weights, empty_weights); + *const_cast(tensor), mode, weights, + empty_weights, empty_weights); nvinfer1::ITensor* output_tensor = layer->getOutput(0); if (data_format == "NHWC") { @@ -1299,11 +1327,21 @@ tensorflow::Status ConvertConst(Converter& ctx, nvinfer1::Dims scalar_shape; if (tensor.dims() > 0) { LOG(DEBUG) << "dimensions: " << tensor.dims(); + LOG(DEBUG) << "size: " << weights_tensor.float_val_size(); scalar_shape = get_tensor_shape(tensor); + for (int i=0; i < scalar_shape.nbDims; i++) LOG(DEBUG) << scalar_shape.d[i]; if (get_shape_size(scalar_shape) != weights_tensor.float_val_size()) { - LOG(FATAL) << "Broadcast on weights not supported, at: " - << node_def.name(); + if (weights_tensor.float_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.float_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.float_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; @@ -1330,9 +1368,17 @@ tensorflow::Status ConvertConst(Converter& ctx, LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape = get_tensor_shape(tensor); if (get_shape_size(scalar_shape) != weights_tensor.int_val_size()) { - LOG(FATAL) << "Broadcast on weights not supported, at: " - << node_def.name(); + if (weights_tensor.int_val_size() == 1 || + scalar_shape.d[0] == weights_tensor.int_val_size()) { + scalar_shape.nbDims = 1; + // no dimension provided. flatten it + scalar_shape.d[0] = weights_tensor.int_val_size(); + scalar_shape.type[0] = nvinfer1::DimensionType::kSPATIAL; + } else { + LOG(FATAL) << "Broadcast on weights only supports kCHANNEL and" + << " kUNIFORM, at: " << node_def.name(); } + } } else { LOG(DEBUG) << "dimensions: " << tensor.dims(); scalar_shape.nbDims = 1; @@ -1747,6 +1793,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( const std::vector>& output_inds, size_t max_batch_size, size_t max_workspace_size, const tensorflow::grappler::GraphProperties& graph_properties, + std::unordered_map>* output_edge_map, tensorflow::NodeDef* trt_node) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1800,21 +1847,39 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( int output_idx = input.second; tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); - input_names.push_back(node_name); // insert original node name without port + // input_names should use the node name in the graph + // insert original node name without port + input_names.push_back(node_name); + + auto tensor_name = node_name; + if (output_idx != 0) + tensor_name = tensor_name + ":" + std::to_string(output_idx); + + LOG(DEBUG) << "input name: " << node_name << " tensor_name: " << tensor_name << " idx: " << output_idx; + + auto shape_inference_node_name = node_name; + auto shape_inference_output_idx = output_idx; + // rewire the shape inference to original node in the graph + if (output_edge_map->count(tensor_name)) { + shape_inference_node_name = output_edge_map->at(tensor_name).second; + shape_inference_output_idx = output_edge_map->at(tensor_name).first; + } + LOG(DEBUG) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; + // TODO(jie): alternative :) - // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (!graph_properties.HasOutputProperties(node_name)) + // tensorflow::DataType tf_dtype = node->output_type(); + if (!graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + - node_name); + shape_inference_node_name); - auto op_info_vec = graph_properties.GetOutputProperties(node_name); - if (static_cast(op_info_vec.size()) < output_idx) + auto op_info_vec = graph_properties.GetOutputProperties(shape_inference_node_name); + if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) return tensorflow::errors::Internal( - "accessing output index of: " + std::to_string(output_idx) + - ", at node: " + node_name + "with output entry from shape_map: " + + "accessing output index of: " + std::to_string(shape_inference_output_idx) + + ", at node: " + shape_inference_node_name + " with output entry from shape_map: " + std::to_string(op_info_vec.size())); - auto op_info = op_info_vec.at(output_idx); + auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); input_dtypes.push_back(tf_dtype); @@ -1822,9 +1887,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); - LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) - << ", at node: " << node_name - << "with output entry from shape_map: " + LOG(DEBUG) << "accessing output index of: " << std::to_string(shape_inference_output_idx) + << ", at node: " << shape_inference_node_name + << " with output entry from shape_map: " << std::to_string(op_info_vec.size()); // TODO(ben,jie): update TRT input format/dimension @@ -1866,15 +1931,26 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "finished conversion"; + // TODO(sami,ben,jie): proper naming! + static int static_id = 0; + std::string engine_name = "my_trt_op" + std::to_string(static_id++); + // Gather output metadata std::vector output_names; std::vector output_dtypes; + int trt_engine_op_output_idx = 0; for (std::pair const& output : output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; + + output_edge_map->insert( + {trt_engine_op_output_idx == 0 ? + engine_name : engine_name + std::to_string(trt_engine_op_output_idx), + {output_idx, tensor_name}}); + if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); LOG(DEBUG) << "output tensor name: " << tensor_name; @@ -1923,12 +1999,12 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(INFO) << "finished engine"; // Build the TRT op - // TODO(sami,ben,jie): proper naming! - static int static_id = 0; tensorflow::NodeDefBuilder op_builder( - "my_trt_op" + std::to_string(static_id++), "TRTEngineOp"); + engine_name, "TRTEngineOp"); std::vector income_edges; + LOG(DEBUG) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { + LOG(DEBUG) << "input edges: " << std::to_string(i) << " " << input_names.at(i); int output_idx = input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index dc59c37892..23ca9fcc82 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -36,6 +36,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_inds, // {node_id, output_idx} size_t max_batch_size, size_t max_workspace_size, const tensorflow::grappler::GraphProperties& graph_prop, + std::unordered_map>* output_edge_map, tensorflow::NodeDef* trt_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index a1524a592a..445900f08c 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -175,7 +175,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->CudaStreamMemberHack())); trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); - cudaStreamSynchronize(*stream); + // sync should be done by TF. + //cudaStreamSynchronize(*stream); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); -- GitLab From c91050a97b9816627865dd367c93c3ef88ca212f Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 31 Jan 2018 14:35:49 -0800 Subject: [PATCH 015/884] [Feature] subgraph conversion graceful failure conversion failure would result in skipping current subgraph. incoming edge check. require subgraph with incoming edge passing 4 dimensional tensor. TODO binary op -> still needs transpose (since current layout optimization is not working properly --- .../contrib/tensorrt/convert/convert_graph.cc | 17 +++++++++++++---- .../contrib/tensorrt/convert/convert_nodes.cc | 9 ++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 258a850b21..34a2e9ce6a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -143,6 +143,7 @@ tensorflow::Status ConvertSubGraphToTensorRT( for (tensorflow::Edge const* edge : subgraph_incoming_edges) { subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + std::set> subgraph_outputs_set; // Collect outputs referenced from output_names auto output_name_to_index_map = BuildTensorNameMap(output_names); @@ -168,11 +169,11 @@ tensorflow::Status ConvertSubGraphToTensorRT( subgraph_outputs_set.begin(), subgraph_outputs_set.end()); // Build TensorRT node and add it to the graph tensorflow::NodeDef trt_node_def; + tensorflow::Status status; TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, max_batch_size, max_workspace_size, graph_properties, output_edge_map, &trt_node_def)); - tensorflow::Status status; tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); // AddNode does not wire edges. @@ -253,6 +254,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( // virtual cluster tensorflow::DeviceProperties device_properties; + device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); gCluster = @@ -322,14 +324,21 @@ tensorflow::Status ConvertGraphDefToTensorRT( std::unordered_map node_map; TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); std::unordered_map> output_edge_map; + int count = 0; for (std::set const& subgraph_node_names : segments) { std::set subgraph_node_ids; for (std::string const& node_name : subgraph_node_names) { subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, &output_edge_map, static_graph_properties)); + tensorflow::Status status = + ConvertSubGraphToTensorRT(graph, output_names, subgraph_node_ids, + max_batch_size, max_workspace_size, &output_edge_map, + static_graph_properties); + if ( status != tensorflow::Status::OK()) { + LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count + << " due to: \n" << status.ToString() << "SKIPPING......"; + } + count++; } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index bf6a9be8be..da6252b25d 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1866,8 +1866,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } LOG(DEBUG) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; - // TODO(jie): alternative :) - // tensorflow::DataType tf_dtype = node->output_type(); if (!graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + shape_inference_node_name); @@ -1885,7 +1883,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_dtypes.push_back(tf_dtype); nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); - TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); + TF_RETURN_IF_ERROR(convert_dtype(tf_dtype, &dtype)); LOG(DEBUG) << "accessing output index of: " << std::to_string(shape_inference_output_idx) << ", at node: " << shape_inference_node_name @@ -1896,6 +1894,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + // TODO(jie): TRT 3.x only support 4 dimensional input tensor. + // update the code once TRT 4.0 comes out. + if (op_info.shape().dim_size() != 4) + return tensorflow::errors::Unimplemented("require 4 dimensional input"); + for (int i = 1; i < op_info.shape().dim_size(); i++) { LOG(DEBUG) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); -- GitLab From 45487b143f890eac31844bfdea171954ddae9e38 Mon Sep 17 00:00:00 2001 From: Jie Date: Wed, 31 Jan 2018 21:13:07 -0800 Subject: [PATCH 016/884] [UPDATE] 1. debug binary ops: transpose added again since TF layout optimization is not sufficient 2. debug consecutive trt_engine_op binding names TODO: binding names + input wiring needs refactoring Also change the trt_engine_op attrs (input/output nodes might not be necessary --- .../contrib/tensorrt/convert/convert_nodes.cc | 99 ++++++++++++------- 1 file changed, 63 insertions(+), 36 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index da6252b25d..5df1132f01 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -449,6 +449,10 @@ class Converter { * remove this and annotate the edge as a control dependency. ************************************************************************/ std::string name = input_name[0] == '^'? input_name.substr(1) : input_name; + auto first = name.find_first_of(':'); + if (first != std::string::npos && first+2 == name.size() && name[first+1]=='0') + name.erase(first); + LOG(DEBUG) << "retrieve input: " << name; if (_trt_tensors.count(name)) { inputs.push_back(_trt_tensors.at(name)); @@ -833,9 +837,12 @@ tensorflow::Status BinaryTensorOpWeight( auto dims_w = weights.shape_; auto dims_t = tensor->getDimensions(); - // default to channel-wise + // default to element-wise auto scale_mode = nvinfer1::ScaleMode::kELEMENTWISE; + // TODO(jie): maybe use a permuatation instead to support more cases; + bool permutation_flag = false; + /* if (weights.count() == 1) { LOG(DEBUG) << "UNIFORM"; @@ -857,44 +864,63 @@ tensorflow::Status BinaryTensorOpWeight( scale_mode = nvinfer1::ScaleMode::kUNIFORM; } else { // no broadcasting on Batch dimension; - assert(dims_w.d[0]==1); - - // broadcasting on Channel dimension only allowed in kUNIFORM - assert(dims_w.d[1]==dims_t.d[0]); - assert(dims_w.nbDims==dims_t.nbDims); - - // default is element; - for (int i=2; i permutation(dims_t.nbDims + 1); - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { - // we swap the last dimension into channel for trt. - // because of tensorflow default broadcasting rules. - for (int i = 0; i < static_cast(permutation.size()); i++) { - permutation[i] = i; + if (permutation_flag) { + if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + // we swap the last dimension into channel for trt. + // because of tensorflow default broadcasting rules. + for (int i = 0; i < static_cast(permutation.size()); i++) { + permutation[i] = i; + } + permutation[1] = dims_t.nbDims; + permutation[dims_t.nbDims] = 1; + tensor = ctx.transposeTensor(const_cast(tensor), + permutation); + } else { + return tensorflow::errors::InvalidArgument( + "Transpose cannot be applied, " + node_def.name()); } - permutation[1] = dims_t.nbDims; - permutation[dims_t.nbDims] = 1; - tensor = ctx.transposeTensor(const_cast(tensor), - permutation); } - */ // prepare weights TRT_ShapedWeights shiftWeights(weights.type_); @@ -923,11 +949,9 @@ tensorflow::Status BinaryTensorOpWeight( nvinfer1::ITensor* output_tensor = layer->getOutput(0); // transpose back dimension - /* - if (scale_mode == nvinfer1::ScaleMode::kCHANNEL && dims_t.nbDims > 1) { + if (permutation_flag) { output_tensor = ctx.transposeTensor(output_tensor, permutation); } - */ // pass the output outputs->push_back(TRT_TensorOrWeights(output_tensor)); @@ -1847,9 +1871,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( int output_idx = input.second; tensorflow::Node* node = graph.FindNodeId(node_id); auto node_name = node->name(); + // input_names should use the node name in the graph + // here it should be the input tensor name -> matching the binding // insert original node name without port - input_names.push_back(node_name); + // input_names.push_back(node_name); auto tensor_name = node_name; if (output_idx != 0) @@ -1910,6 +1936,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( if (output_idx != 0) input_tensor_name = node_name + ":" + std::to_string(output_idx); + input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -1951,9 +1978,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( output_edge_map->insert( {trt_engine_op_output_idx == 0 ? - engine_name : engine_name + std::to_string(trt_engine_op_output_idx), + engine_name : engine_name + ":" + std::to_string(trt_engine_op_output_idx), {output_idx, tensor_name}}); - + trt_engine_op_output_idx++; if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); LOG(DEBUG) << "output tensor name: " << tensor_name; @@ -1999,7 +2026,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // engine_out << engine_plan_string; // engine_out.close(); - LOG(INFO) << "finished engine"; + LOG(INFO) << "finished engine" << engine_name; // Build the TRT op tensorflow::NodeDefBuilder op_builder( -- GitLab From 10a642da150356d1072e9a5197967f3f3a2bcd7b Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 1 Feb 2018 07:13:40 -0800 Subject: [PATCH 017/884] [UPDATE] converter update: MatMul added TODO: reshape --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 67 ++++++++++++++++++- 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 34a2e9ce6a..254a428104 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" , "MatMul" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 5df1132f01..6c0ee5e527 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -347,7 +347,7 @@ template <> tensorflow::DataType TFAttrs::get(std::string key) const { return this->at(key)->type(); } - +// TODO(jie): reorder4 & reorder2 should be merged? template void reorder4(nvinfer1::DimsNCHW shape, T const* idata, nvinfer1::DimsNCHW istrides, T* odata, @@ -365,6 +365,38 @@ void reorder4(nvinfer1::DimsNCHW shape, T const* idata, } } +template +void reorder2(nvinfer1::DimsHW shape, T const* idata, + nvinfer1::DimsHW istrides, T* odata, + nvinfer1::DimsHW ostrides) { + for (int h = 0; h < shape.h(); ++h) { + for (int w = 0; w < shape.w(); ++w) { + odata[h * ostrides.h() + w * ostrides.w()] + = idata[h * ostrides.h() + w * ostrides.w()]; + } + } +} + +// TODO(jie): fail to tensorflow!! +void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, + TRT_ShapedWeights* oweights) { + int c = iweights.shape_.d[0]; + int k = iweights.shape_.d[1]; + oweights->shape_.d[0] = k; + oweights->shape_.d[1] = c; + nvinfer1::DimsHW istrides = {1, k}; + nvinfer1::DimsHW ostrides = {c, 1}; + switch (iweights.type_) { + case tensorflow::DataType::DT_FLOAT: + reorder2( + {k, c}, static_cast(iweights.values_), istrides, + static_cast(const_cast(oweights->values_)), ostrides); + break; + default: + LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + } +} + void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, TRT_ShapedWeights* oweights, int nbGroups) { CHECK_EQ(iweights.type_, oweights->type_); @@ -382,7 +414,6 @@ void reorder_rsck_to_kcrs(TRT_ShapedWeights const& iweights, oweights->shape_.d[1] = c*nbGroups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; - // nvinfer1::DimsNCHW istrides = {1, s, c*r*s, r*s}; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { @@ -1782,6 +1813,37 @@ tensorflow::Status ConvertConcat( return tensorflow::Status::OK(); } +tensorflow::Status ConvertMatMul( + Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + + // TODO(jie): transpose! + TFAttrs attrs(node_def); + //bool transpose_w = bool(attrs->at("transpose_b")->i()); + + // tensor after transpose (NCHW) + auto tensor_dim = tensor->getDimensions(); + + TRT_ShapedWeights weights_ck = inputs.at(1).weights(); + TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); + reorder_ck_to_kc(weights_ck, &weights); + TRT_ShapedWeights biases(weights.type_); + + int noutput = weights.shape_.d[0]; + + nvinfer1::IFullyConnectedLayer* layer = + ctx.network()->addFullyConnected(*const_cast(tensor), + noutput, weights, biases); + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); + +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; @@ -1804,6 +1866,7 @@ void Converter::register_op_converters() { _op_registry["Rsqrt"] = ConvertUnary; _op_registry["Mean"] = ConvertReduce; _op_registry["Pad"] = ConvertPad; + _op_registry["MatMul"] = ConvertMatMul; // TODO(ben,jie): Add more ops _op_registry["ConcatV2"] = ConvertConcat; -- GitLab From c5d9369831bfcb66ea54f06349ebae5979c4912d Mon Sep 17 00:00:00 2001 From: Jie Date: Thu, 1 Feb 2018 09:43:24 -0800 Subject: [PATCH 018/884] [debug] binary op mode/dimension bug fixed TODO: reshape / debug Matmul --- .../contrib/tensorrt/convert/convert_graph.cc | 3 ++- .../contrib/tensorrt/convert/convert_nodes.cc | 24 ++++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 254a428104..e9ab542f31 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,8 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" , "MatMul" + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", + //"Reshape" // TODO(ben,jie): ... }; if (output_nodes.count(node_def.name())) return false; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 6c0ee5e527..c697093d12 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -897,17 +897,22 @@ tensorflow::Status BinaryTensorOpWeight( // no broadcasting on Batch dimension; LOG(DEBUG) << "WEIGHTS DIM: " << dims_w.nbDims << " tensor DIM: " << dims_t.nbDims; - if (dims_w.nbDims==dims_t.nbDims && dims_w.d[0]==1) { - for (int i=1; i Date: Thu, 1 Feb 2018 11:07:53 -0800 Subject: [PATCH 019/884] [update] converter update: reshape implemented. I cannot support reshape or MatMul at this moment because of the backend. TODO: wait until TRT 4.0 for backend support on reshape. --- .../contrib/tensorrt/convert/convert_graph.cc | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 72 ++++++++++++++++++- 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index e9ab542f31..573394f309 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -61,7 +61,7 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { static const std::set candidate_ops = { "Identity", "Const", "Conv2D", "MaxPool", "BiasAdd", "Relu", "Add", "Mul", "Sub", "Rsqrt", "Pad" , "Mean", - "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", + "AvgPool", "ConcatV2", "DepthwiseConv2dNative" //, "MatMul", //"Reshape" // TODO(ben,jie): ... }; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index c697093d12..09c1b959ce 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1849,6 +1849,76 @@ tensorflow::Status ConvertMatMul( } +tensorflow::Status ConvertReshape(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + if (inputs.size() != 2 || !inputs.at(0).is_tensor() || + !inputs.at(1).is_weights()) + return tensorflow::errors::InvalidArgument( + "Input expects tensor and weights, at" + node_def.name()); + + // implement tensor binaryOp weight [channel wise] for now; + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + auto dims = tensor->getDimensions(); + // restore implicit batch dimension + int nbDims = dims.nbDims + 1; + + TRT_ShapedWeights shape = inputs.at(1).weights(); + + TFAttrs attrs(node_def); + + auto padding_type = attrs.get("Tshape"); + + if (shape.shape_.nbDims != 1) + return tensorflow::errors::InvalidArgument( + "reshape new shape is not 1 dimensional, at " + node_def.name()); + + // Only expect to handle INT32 as attributes for now + if (padding_type != tensorflow::DataType::DT_INT32) + return tensorflow::errors::Unimplemented( + "reshape new shape supports only DT_INT32, at "+ node_def.name()); + + auto shape_data = static_cast(const_cast(shape.values_)); + + if (shape_data[0] != -1) + return tensorflow::errors::InvalidArgument( + "reshape new shape first dimension is not -1, at "+ node_def.name()); + + auto shape_num_dims = shape.shape_.d[0]; + LOG(DEBUG) << "shape dimensions: " << shape_num_dims; + int volume_w = 1; + for (int i = 1; i < shape.shape_.d[0]; i++) + volume_w *= shape_data[i]; + + int volume_t = 1; + for (int i = 0; i < dims.nbDims; i++) + volume_t *= dims.d[i]; + + LOG(DEBUG) << "volume: " << volume_t << " volume weights: " << volume_w; + if (volume_w != volume_t) + return tensorflow::errors::InvalidArgument( + "volume does not agree between tensor and new shape, at "+ node_def.name()); + + nvinfer1::IShuffleLayer* layer = + ctx.network()->addShuffle(*const_cast(tensor)); + + nvinfer1::Dims reshapeDims; + LOG(DEBUG) << "new dimension: " << shape_num_dims-1; + reshapeDims.nbDims = shape_num_dims-1; + for (int32_t i = 0; i < reshapeDims.nbDims; ++i) { + reshapeDims.d[i] = shape_data[i+1]; + } + layer->setReshapeDimensions(reshapeDims); + LOG(DEBUG) << "new dimension: " << shape_num_dims-1; + + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + auto dims_output = output_tensor->getDimensions(); + LOG(DEBUG) << "output tensor dimension:" << dims_output.nbDims; + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + void Converter::register_op_converters() { // vgg_16 slim implementation _op_registry["Placeholder"] = ConvertPlaceholder; @@ -1875,7 +1945,7 @@ void Converter::register_op_converters() { _op_registry["ConcatV2"] = ConvertConcat; _op_registry["MatMul"] = ConvertMatMul; - //_op_registry["Reshape"] = ConvertReshape; + _op_registry["Reshape"] = ConvertReshape; } } // namespace -- GitLab From 28c52d14afb5a54930bcca0db60c9d5068a2c63e Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Feb 2018 09:55:39 -0800 Subject: [PATCH 020/884] WIP: adding int8 calibration --- tensorflow/contrib/tensorrt/BUILD | 4 + .../contrib/tensorrt/convert/convert_graph.cc | 167 +++++++++---- .../contrib/tensorrt/convert/convert_graph.h | 3 +- .../contrib/tensorrt/convert/convert_nodes.cc | 230 ++++++++++++++++-- .../contrib/tensorrt/convert/convert_nodes.h | 38 ++- .../contrib/tensorrt/kernels/trt_calib_op.cc | 68 ++++++ .../contrib/tensorrt/kernels/trt_calib_op.h | 35 +++ .../contrib/tensorrt/kernels/trt_engine_op.cc | 6 +- .../contrib/tensorrt/ops/trt_calib_op.cc | 34 +++ .../contrib/tensorrt/python/trt_convert.py | 4 +- .../tensorrt/resources/TRTInt8Calibrator.cc | 2 +- .../contrib/tensorrt/resources/TRTResources.h | 35 ++- tensorflow/contrib/tensorrt/trt_conversion.i | 9 +- 13 files changed, 543 insertions(+), 92 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.h create mode 100644 tensorflow/contrib/tensorrt/ops/trt_calib_op.cc diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index c10e85cffa..bcb8573045 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -60,9 +60,11 @@ tf_kernel_library( name = "trt_engine_op_kernel", srcs = [ "kernels/trt_engine_op.cc", + "kernels/trt_calib_op.cc", ], hdrs=[ "kernels/trt_engine_op.h", + "kernels/trt_calib_op.h", ], gpu_srcs = [ ], @@ -82,6 +84,7 @@ tf_kernel_library( tf_gen_op_libs( op_lib_names = [ "trt_engine_op", + "trt_calib_op", ], deps=[ "@local_config_tensorrt//:tensorrt", @@ -108,6 +111,7 @@ tf_gen_op_wrapper_py( name = "trt_engine_op", deps = [ ":trt_engine_op_op_lib", + ":trt_calib_op_op_lib", ":trt_shape_function", ], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 16d6e6ec7d..d14abf14dd 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -40,9 +40,8 @@ limitations under the License. #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) #include "tensorflow/core/grappler/optimizers/constant_folding.h" -#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" +#include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/devices.h" -//#include "tensorflow/core/grappler/clusters/single_machine.h" #include "tensorflow/core/grappler/clusters/virtual_cluster.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/grappler/grappler_item.h" @@ -121,73 +120,146 @@ std::unordered_map> BuildTensorNameMap( return result; } -tensorflow::Status ConvertSubGraphToTensorRT( - tensorflow::Graph& graph, const std::vector& output_names, - const std::set& subgraph_node_ids, size_t max_batch_size, - size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_properties) { - tensorflow::EdgeSet subgraph_incoming_edges; - GetSubGraphIncomingEdges(graph, subgraph_node_ids, &subgraph_incoming_edges); +struct ConvertGraphParams{ + ConvertGraphParams(tensorflow::Graph &graph_, + const std::vector &output_names_, + const std::set& subgraph_node_ids_, + size_t max_batch_size_, + size_t max_workspace_size_, + const tensorflow::grappler::GraphProperties &graph_properties_, + bool int8_ + ):graph(graph_),output_names(output_names_),subgraph_node_ids(subgraph_node_ids_), + max_batch_size(max_batch_size_),max_workspace_size(max_workspace_size_), + graph_properties(graph_properties_),int8(int8_){ + + } - std::vector> subgraph_inputs; + tensorflow::Graph& graph; + const std::vector& output_names; + const std::set& subgraph_node_ids; + size_t max_batch_size; + size_t max_workspace_size; + const tensorflow::grappler::GraphProperties& graph_properties; + bool int8; + std::vector> subgraph_inputs; + std::vector> subgraph_outputs; + tensorflow::EdgeSet subgraph_incoming_edges; + tensorflow::EdgeSet subgraph_outgoing_edges; +}; +tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams &p){ - // Collect inputs by looking for incoming edges - for (tensorflow::Edge const* edge : subgraph_incoming_edges) { - subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); + GetSubGraphIncomingEdges(p.graph, p.subgraph_node_ids, &p.subgraph_incoming_edges); + for (tensorflow::Edge const* edge : p.subgraph_incoming_edges) { + p.subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } + auto output_name_to_index_map = BuildTensorNameMap(p.output_names); std::set> subgraph_outputs_set; - // Collect outputs referenced from output_names - auto output_name_to_index_map = BuildTensorNameMap(output_names); - // for (int node_id : subgraph_node_ids_no_placeholder) { - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph.FindNodeId(node_id); + + for (int node_id : p.subgraph_node_ids) { + tensorflow::Node* node = p.graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - // Collect outputs referenced from outgoing edges - tensorflow::EdgeSet subgraph_outgoing_edges; - // GetSubGraphOutgoingEdges(graph, subgraph_node_ids_no_placeholder, - // &subgraph_outgoing_edges); - GetSubGraphOutgoingEdges(graph, subgraph_node_ids, &subgraph_outgoing_edges); - for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { + + GetSubGraphOutgoingEdges(p.graph, p.subgraph_node_ids, &p.subgraph_outgoing_edges); + for (tensorflow::Edge const* edge : p.subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - // Impose an ordering on the outputs - std::vector> subgraph_outputs( + p.subgraph_outputs.reserve(subgraph_outputs_set.size()); + p.subgraph_outputs.insert(p.subgraph_outputs.begin(), subgraph_outputs_set.begin(), subgraph_outputs_set.end()); - // Build TensorRT node and add it to the graph + return tensorflow::Status::OK(); + +}; + +tensorflow::Status GetCalibNode(ConvertGraphParams *params){ + + FillSubGraphEdgeSets(*params); tensorflow::NodeDef trt_node_def; - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef( - graph, subgraph_node_ids, subgraph_inputs, subgraph_outputs, - max_batch_size, max_workspace_size, graph_properties, &trt_node_def)); + + SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size, params->graph_properties, &trt_node_def); + TF_RETURN_IF_ERROR(InjectCalibrationNode(s)); tensorflow::Status status; - tensorflow::Node* trt_node = graph.AddNode(trt_node_def, &status); + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); + + TF_RETURN_IF_ERROR(status); + + for (auto inp_port: params->subgraph_inputs) { // loop over incoming edges and attach them to calib node + tensorflow::Node * in_node =params->graph.FindNodeId(inp_port.first); + params->graph.UpdateEdge(trt_node, inp_port.second, in_node, inp_port.second); + } + return tensorflow::Status::OK(); +} + +tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params ) { + +// tensorflow::EdgeSet subgraph_incoming_edges; +// +// std::vector> subgraph_inputs; +// +// +// // Collect inputs by looking for incoming edges +// for (tensorflow::Edge const* edge : subgraph_incoming_edges) { +// subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +// } +// std::set> subgraph_outputs_set; +// // Collect outputs referenced from output_names +// auto output_name_to_index_map = BuildTensorNameMap(output_names); +// for (int node_id : subgraph_node_ids) { +// tensorflow::Node* node = graph.FindNodeId(node_id); +// if (output_name_to_index_map.count(node->name())) { +// for (int index : output_name_to_index_map.at(node->name())) { +// subgraph_outputs_set.insert({node_id, index}); +// } +// } +// } +// // Collect outputs referenced from outgoing edges +// tensorflow::EdgeSet subgraph_outgoing_edges; +// // GetSubGraphOutgoingEdges(graph, subgraph_node_ids_no_placeholder, +// // &subgraph_outgoing_edges); +// GetSubGraphOutgoingEdges(graph, subgraph_node_ids, &subgraph_outgoing_edges); +// for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { +// subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); +// } +// // Impose an ordering on the outputs +// std::vector> subgraph_outputs( +// subgraph_outputs_set.begin(), subgraph_outputs_set.end()); +// // Build TensorRT node and add it to the graph + FillSubGraphEdgeSets(*params); + tensorflow::NodeDef trt_node_def; + + SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, + params->max_batch_size, params->max_workspace_size, params->graph_properties, &trt_node_def); + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); + tensorflow::Status status; + tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); TF_RETURN_IF_ERROR(status); // Re-map outgoing edges to use the new TRT node instead of the orig subgraph std::map, int> subgraph_edge_to_output_map; - for (size_t i = 0; i < subgraph_outputs.size(); ++i) { - subgraph_edge_to_output_map.insert({subgraph_outputs.at(i), i}); + for (size_t i = 0; i < params->subgraph_outputs.size(); ++i) { + subgraph_edge_to_output_map.insert({params->subgraph_outputs.at(i), i}); } TF_RETURN_IF_ERROR(status); - for (tensorflow::Edge const* edge : subgraph_outgoing_edges) { + for (tensorflow::Edge const* edge : params->subgraph_outgoing_edges) { std::pair old_src = {edge->src()->id(), edge->src_output()}; int new_src_output = subgraph_edge_to_output_map.at(old_src); - graph.UpdateEdge(trt_node, new_src_output, edge->dst(), edge->dst_input()); + params->graph.UpdateEdge(trt_node, new_src_output, edge->dst(), edge->dst_input()); } // Remove the original subgraph - for (int node_id : subgraph_node_ids) { - tensorflow::Node* node = graph.FindNodeId(node_id); + for (int node_id : params->subgraph_node_ids) { + tensorflow::Node* node = params->graph.FindNodeId(node_id); // Don't remove the input placeholders if (node->type_string() == "Placeholder") { continue; } - graph.RemoveNode(node); + params->graph.RemoveNode(node); } return tensorflow::Status::OK(); } @@ -209,7 +281,9 @@ tensorflow::Status BuildNodeMap( tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def) { + size_t max_workspace_size, + tensorflow::GraphDef* new_graph_def, + bool int8=false) { // optimization pass tensorflow::grappler::GrapplerItem item; @@ -246,9 +320,9 @@ tensorflow::Status ConvertGraphDefToTensorRT( item.graph = gdef; tensorflow::grappler::ConstantFolding fold(nullptr); status = fold.Optimize(nullptr, item, &gdef); - if (status !=tensorflow::Status::OK()) + if (status !=tensorflow::Status::OK()) { return status; - + } // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); static_graph_properties.InferStatically(false); @@ -296,9 +370,14 @@ tensorflow::Status ConvertGraphDefToTensorRT( for (std::string const& node_name : subgraph_node_names) { subgraph_node_ids.insert(node_map.at(node_name)->id()); } - TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT( - graph, output_names, subgraph_node_ids, max_batch_size, - max_workspace_size, static_graph_properties)); + + ConvertGraphParams p(graph,output_names,subgraph_node_ids,max_batch_size,max_workspace_size, + static_graph_properties,int8); + if(int8) { + TF_RETURN_IF_ERROR(GetCalibNode(&p)); + } else{ + TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRT(&p)); + } } graph.ToGraphDef(new_graph_def); return tensorflow::Status::OK(); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index cd713de888..4ac33cf128 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -27,7 +27,8 @@ namespace convert { tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def); + size_t max_workspace_size, + tensorflow::GraphDef* new_graph_def,bool int8); } } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 86c43d960a..d54c88d9f3 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResources.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) // Check if the types are equal. Cast to int first so that failure log message @@ -1547,23 +1548,216 @@ void Converter::register_op_converters() { } } // namespace +tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams &s ){ + return tensorflow::errors::Unimplemented("Not implemented yet"); +} + +tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams &s){ + // Visit nodes in reverse topological order and construct the TRT network. + + // Toposort + std::vector order_vec; + tensorflow::GetPostOrder(s.graph, &order_vec); + // Select just the subgraph + std::list order; + for (tensorflow::Node* node : order_vec) { + if (s.subgraph_node_ids.count(node->id())) { + // order.push_back(node); + order.push_front(node); // we want topological order to contstruct the + // network layer by layer + } + } + // topological order is needed to build TRT network + LOG(DEBUG) << "BUILDING 1"; + static int static_id = 0; + std::string calib_op_name=std::string("my_trt_calib_op_") + std::to_string(static_id++); + + + LOG(DEBUG) << "BUILDING 2"; + auto trt_rmgr=tensorflow::trt::TRTResourceManager::instance(); + auto op_rmgr=trt_rmgr->getManager("TRTCalibOps"); + auto op_res=new tensorflow::trt::TRTCalibrationResource(); + TF_CHECK_OK(op_rmgr->Create(calib_op_name,calib_op_name,op_res)); + op_res->logger=new tensorflow::tensorrt::Logger(); + op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); + + if (!op_res->builder) { + return tensorflow::errors::Internal( + "failed to create TensorRT builder object"); + } + + LOG(DEBUG) << "BUILDING 3"; + + op_res->network = op_res->builder->createNetwork(); + if (!op_res->network) { + return tensorflow::errors::Internal( + "failed to create TensorRT network object"); + } + + LOG(DEBUG) << "BUILDING 4"; + + // Build the network + Converter converter(op_res->network); + + LOG(DEBUG) << "BUILDING 5"; + std::vector input_names; + std::vector input_dtypes; + for (std::pair const& input : s.input_inds) { + LOG(DEBUG) << "parsing input!!!!!"; + int node_id = input.first; + int output_idx = input.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + auto node_name = node->name(); + input_names.push_back(node_name); // insert original node name without port + // TODO(jie): alternative :) + // tensorflow::DataType tf_dtype = node->output_type(output_idx); + if (!s.graph_properties.HasOutputProperties(node_name)) + return tensorflow::errors::Internal("failed to find input node: " + + node_name); + + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); + if (static_cast(op_info_vec.size()) < output_idx) + return tensorflow::errors::Internal( + "accessing output index of: " + std::to_string(output_idx) + + ", at node: " + node_name + "with output entry from shape_map: " + + std::to_string(op_info_vec.size())); + + auto op_info = op_info_vec.at(output_idx); + + tensorflow::DataType tf_dtype = op_info.dtype(); + input_dtypes.push_back(tf_dtype); + + nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); + TF_CHECK_OK(convert_dtype(tf_dtype, &dtype)); + + LOG(DEBUG) << "accessing output index of: " << std::to_string(output_idx) + << ", at node: " << node_name + << "with output entry from shape_map: " + << std::to_string(op_info_vec.size()); + + // TODO(ben,jie): update TRT input format/dimension + nvinfer1::DimsCHW input_dim_psuedo_chw; + for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; + + for (int i = 1; i < op_info.shape().dim_size(); i++) { + LOG(DEBUG) << "dimension: " << i + << " , size: " << op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); + } + + // TODO(ben,jie): proper way to restore input tensor name? + auto input_tensor_name = node_name; + if (output_idx != 0) + input_tensor_name = node_name + ":" + std::to_string(output_idx); + + nvinfer1::ITensor* input_tensor = converter.network()->addInput( + input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); + + if (!input_tensor) + return tensorflow::errors::InvalidArgument( + "Failed to create Input layer"); + LOG(DEBUG) << "input tensor name :" << input_tensor_name; + + if (!converter.insert_input_tensor(input_tensor_name, input_tensor)) + return tensorflow::errors::AlreadyExists( + "output tensor already exists for op: " + input_tensor_name); + } + + LOG(DEBUG) << "finished sorting"; + + for (const tensorflow::Node* node : order) { + tensorflow::NodeDef const& node_def = node->def(); + LOG(DEBUG) << "converting node: " << node_def.name() << " , " + << node_def.op(); + TF_RETURN_IF_ERROR(converter.convert_node(node_def)); + } + + LOG(DEBUG) << "finished conversion"; + + // Gather output metadata + std::vector output_names; + std::vector output_dtypes; + for (std::pair const& output : s.output_inds) { + int node_id = output.first; + int output_idx = output.second; + tensorflow::Node* node = s.graph.FindNodeId(node_id); + std::string op_name = node->name(); + std::string tensor_name = op_name; + if (output_idx != 0) + tensor_name = tensor_name + ":" + std::to_string(output_idx); + LOG(DEBUG) << "output tensor name: " << tensor_name; + output_names.push_back(tensor_name); + auto tensor_or_weights = converter.get_tensor(tensor_name); + if (!tensor_or_weights.is_tensor()) { + return tensorflow::errors::InvalidArgument( + "Output node is weights not tensor"); + } + nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); + if (!tensor) { + return tensorflow::errors::NotFound("Output tensor not found: " + + tensor_name); + } + converter.network()->markOutput(*tensor); + tensorflow::DataType tf_dtype = node->output_type(output_idx); + output_dtypes.push_back(tf_dtype); + nvinfer1::DataType trt_dtype = nvinfer1::DataType::kFLOAT; + TF_RETURN_IF_ERROR(convert_dtype(tf_dtype, &trt_dtype)); + tensor->setType(trt_dtype); + } + + LOG(DEBUG) << "finished output"; + + // Build the engine + op_res->builder->setMaxBatchSize(s.max_batch_size); + op_res->builder->setMaxWorkspaceSize(s.max_workspace_size); + + // Build the TRT op + // TODO(sami,ben,jie): proper naming! + tensorflow::NodeDefBuilder op_builder( + calib_op_name, "TRTCalibOp"); + std::vector income_edges; + for (size_t i = 0; i < input_names.size(); ++i) { + int output_idx = s.input_inds.at(i).second; + // we wired up the input here already, it is redundant to do it again in + // ConvertSubGraphToTensorRT(convert_graph.cc) + auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(input_names.at(i), + output_idx, input_dtypes.at(i)); + income_edges.push_back(incoming_edge); + } + tensorflow::gtl::ArraySlice + input_list(income_edges); + op_builder.Input(input_list); + std::vector segment_names; + segment_names.reserve(s.subgraph_node_ids.size()); + for(int i : s.subgraph_node_ids){ + auto node=s.graph.FindNodeId(i); + segment_names.push_back(node->name()); + } + LOG(INFO) << "finished op preparation"; + + auto status = op_builder.Attr("segment_names", segment_names ) + .Attr("segment_output_names", output_names) + .Finalize(s.trt_node); + + LOG(INFO) << status.ToString(); + LOG(INFO) << "finished op building"; + + return tensorflow::Status::OK(); + +} tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& input_inds, - const std::vector>& output_inds, size_t max_batch_size, - size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_properties, - tensorflow::NodeDef* trt_node) { + tensorrt::convert::SubGraphParams &s +) { // Visit nodes in reverse topological order and construct the TRT network. // Toposort std::vector order_vec; - tensorflow::GetPostOrder(graph, &order_vec); + tensorflow::GetPostOrder(s.graph, &order_vec); // Select just the subgraph std::list order; for (tensorflow::Node* node : order_vec) { - if (subgraph_node_ids.count(node->id())) { + if (s.subgraph_node_ids.count(node->id())) { // order.push_back(node); order.push_front(node); // we want topological order to contstruct the // network layer by layer @@ -1601,20 +1795,20 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "BUILDING 5"; std::vector input_names; std::vector input_dtypes; - for (std::pair const& input : input_inds) { + for (std::pair const& input : s.input_inds) { LOG(DEBUG) << "parsing input!!!!!"; int node_id = input.first; int output_idx = input.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); auto node_name = node->name(); input_names.push_back(node_name); // insert original node name without port // TODO(jie): alternative :) // tensorflow::DataType tf_dtype = node->output_type(output_idx); - if (!graph_properties.HasOutputProperties(node_name)) + if (!s.graph_properties.HasOutputProperties(node_name)) return tensorflow::errors::Internal("failed to find input node: " + node_name); - auto op_info_vec = graph_properties.GetOutputProperties(node_name); + auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( "accessing output index of: " + std::to_string(output_idx) + @@ -1676,10 +1870,10 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Gather output metadata std::vector output_names; std::vector output_dtypes; - for (std::pair const& output : output_inds) { + for (std::pair const& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; - tensorflow::Node* node = graph.FindNodeId(node_id); + tensorflow::Node* node = s.graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; if (output_idx != 0) @@ -1707,8 +1901,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( LOG(DEBUG) << "finished output"; // Build the engine - trt_builder->setMaxBatchSize(max_batch_size); - trt_builder->setMaxWorkspaceSize(max_workspace_size); + trt_builder->setMaxBatchSize(s.max_batch_size); + trt_builder->setMaxWorkspaceSize(s.max_workspace_size); LOG(INFO) << "starting build engine"; // TODO(ben,jie): half2 and int8 mode support std::string engine_plan_string; @@ -1736,7 +1930,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( "my_trt_op" + std::to_string(static_id++), "TRTEngineOp"); std::vector income_edges; for (size_t i = 0; i < input_names.size(); ++i) { - int output_idx = input_inds.at(i).second; + int output_idx = s.input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut(input_names.at(i), @@ -1753,7 +1947,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( .Attr("input_nodes", input_names) .Attr("output_nodes", output_names) .Attr("OutT", output_dtypes) - .Finalize(trt_node); + .Finalize(s.trt_node); LOG(INFO) << status.ToString(); LOG(INFO) << "finished op building"; diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index dc59c37892..9f552d0990 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -28,15 +28,37 @@ limitations under the License. namespace tensorrt { namespace convert { +struct SubGraphParams{ + SubGraphParams(const tensorflow::Graph &graph_, + const std::set &subgraph_node_ids_, + const std::vector> &input_inds_, + const std::vector> &output_inds_, + size_t max_batch_size_, + size_t max_workspace_size_, + const tensorflow::grappler::GraphProperties &graph_properties_, + tensorflow::NodeDef* trt_node_, + bool int8_=false):graph(graph_), subgraph_node_ids(subgraph_node_ids_), + input_inds(input_inds_),output_inds(output_inds_), + max_batch_size(max_batch_size_), + max_workspace_size(max_workspace_size_), + graph_properties(graph_properties_), + trt_node(trt_node_),int8(int8_){} + + const tensorflow::Graph &graph; + const std::set& subgraph_node_ids; + const std::vector>& input_inds; // {node_id, output_idx} + const std::vector>& output_inds; // {node_id, output_idx} + size_t max_batch_size; + size_t max_workspace_size; + const tensorflow::grappler::GraphProperties& graph_properties; + tensorflow::NodeDef* trt_node; + const bool int8; +}; + tensorflow::Status ConvertSubGraphToTensorRTNodeDef( - const tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& - input_inds, // {node_id, output_idx} - const std::vector>& - output_inds, // {node_id, output_idx} - size_t max_batch_size, size_t max_workspace_size, - const tensorflow::grappler::GraphProperties& graph_prop, - tensorflow::NodeDef* trt_node); + SubGraphParams & params + ); +tensorflow::Status InjectCalibrationNode(SubGraphParams ¶ms); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc new file mode 100644 index 0000000000..6fdb583b9a --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -0,0 +1,68 @@ +// +// Created by skama on 1/25/18. +// + +#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" +#include +#include +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" +#include "tensorflow/contrib/tensorrt/resources/TRTResources.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" +namespace tensorflow{ +namespace trt{ +TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context){ + OP_REQUIRES_OK(context, + context->GetAttr("segment_nodes", &segment_nodes_)); + OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_)); + dev_tensors_.resize(segment_nodes_.size()); + +}; + +void TRTCalibOp::Compute(OpKernelContext *ctx) { + auto trt_rm = tensorflow::trt::TRTResourceManager::instance(); + auto resmgr = trt_rm->getManager(name()); + TRTCalibrationResource *calibRes= nullptr; + auto status=resmgr->Lookup(name(), name(), &calibRes); + if (status.ok()){ + int batchSize=ctx->input(0).dim_size(0); + int numInputs=ctx->num_inputs(); + if ( calibRes->calibrator == nullptr){// first run + for(int i = 0 ; i < numInputs; i++){ + const Tensor& t=ctx->input(i); + OP_REQUIRES_OK(ctx, ctx->allocate_persistent(t.dtype(), t.shape(),&dev_tensors_.at(i), nullptr)); + const auto dTensor=dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(),dTensor->TotalBytes()); + auto dType=t.dtype(); + void* devAddr=(void*)dTensor->flat::Type>().data(); + device_buffers_.emplace({input_names_.at(i),std::make_pair(devAddr,dTensor->TotalBytes())}); + } + calibRes->calibrator=new TRTInt8Calibrator(device_buffers_,batchSize); + auto builder=calibRes->builder; + calibRes->thr=new std::thread([calibRes](){ + calibRes->engine=calibRes->builder->buildCudaEngine(*calibRes->network); // will loop until we terminate calibrator + }); + } + std::unordered_map input_data; + for(int i = 0; i < numInputs; i++){ + const Tensor& t = ctx->input(i); + auto dType = t.dtype(); + void* data_address = (void*)t.flat::Type>().data(); + const auto dTensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), dTensor->TotalBytes()); // use the tensor so FW keeps it + input_data.emplace(input_names_.at(i), data_address); + ctx->set_output(i,t); + } + calibRes->calibrator->setBatch(input_data); + }else{ + ctx->SetStatus(status); + return; + } + +}; + +} +} \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h new file mode 100644 index 0000000000..aefafb29d5 --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -0,0 +1,35 @@ +// +// Created by skama on 1/25/18. +// + +#ifndef TFGITHUB_TRT_CALIB_OP_H +#define TFGITHUB_TRT_CALIB_OP_H + +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" + +namespace tensorflow { +namespace trt { +class TRTCalibOp: public OpKernel { +public: + explicit TRTCalibOp(OpKernelConstruction* context); + + void Compute(OpKernelContext* context) override; + + private: + std::vector segment_nodes_; + std::vector input_names_; + std::vector shapes_; + std::unordered_map> device_buffers_; + std::vector dev_tensors_; + +}; +} +} +#endif //TFGITHUB_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index a1524a592a..54b8d0d431 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,8 +24,8 @@ limitations under the License. namespace tensorflow { static ::tensorflow::tensorrt::Logger gLogger; -using namespace nvinfer1; - +using IRuntime=nvinfer1::IRuntime; +using Dims=nvinfer1::Dims; namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { @@ -44,7 +44,7 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager - IRuntime* infer = createInferRuntime(gLogger); + IRuntime* infer = nvinfer1::createInferRuntime(gLogger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); diff --git a/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc new file mode 100644 index 0000000000..ddf2baa526 --- /dev/null +++ b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +namespace tensorflow { + + +REGISTER_OP("TRTCalibOp") + .Attr("segment_nodes: list(string)") // names of the ops in segment + .Attr("segment_output_names: list(string)") // names of the output ops in segment + .Attr("InT: list({int8, float16, float32})") + .Input("in_tensor: InT") + .Output("out_tensor: InT") + .SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) { + for (int i = 0; i < c->num_inputs(); i++){ + c->set_output(i, c->input(i)); + } + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 354f0c8b42..5aba371a03 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -30,7 +30,7 @@ from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace_size=2<<20): +def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace_size=2<<20, int8=False): """Python wrapper for the TRT transormation. @@ -76,7 +76,7 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace # transformed graphs protobuf string. out = trt_convert( optimized_graph_def_str ,outputs, - max_batch_size,max_workspace_size) + max_batch_size,max_workspace_size,int8) status = out[0] output_graph_def_string = out[1] del optimized_graph_def_str #save some memory diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index 3c94b52ea6..fe414c45ce 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -9,7 +9,7 @@ namespace tensorflow { namespace trt { - +// set the batch size before constructing the thread to execute engine int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } bool TRTInt8Calibrator::setBatch( diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h index 2b65017943..2fe78b882d 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResources.h +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -6,27 +6,40 @@ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ -#include #include -#include "tensorflow/contrib/tensorrt/resourcemgr/TRTInt8Calibrator.h" +#include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include "tensorflow/core/framework/resource_mgr.h" namespace tensorflow { namespace trt { struct TRTCalibrationResource : public tensorflow::ResourceBase { - TRTCalibrationResource():calibrator(nullptr), builder(nullptr), thr(nullptr){}; + TRTCalibrationResource() + : calibrator(nullptr), + builder(nullptr), + network(nullptr), + engine(nullptr), + logger(nullptr), + thr(nullptr) {} + string DebugString() override { + return ""; + } TRTInt8Calibrator* calibrator; nvinfer1::IBuilder* builder; - std::thread *thr; + nvinfer1::INetworkDefinition* network; + nvinfer1::ICudaEngine* engine; + tensorflow::tensorrt::Logger* logger; + std::thread* thr; }; -struct TRTEngineResource:public tensorflow::ResourceBase{ - TRTEngineResource():runtime(nullptr), ctx(nullptr){}; - nvinfer1::IRuntime *runtime; - nvinfer1::IExecutionContext *ctx; +struct TRTEngineResource : public tensorflow::ResourceBase { + TRTEngineResource() : runtime(nullptr), ctx(nullptr){}; + nvinfer1::IRuntime* runtime; + nvinfer1::IExecutionContext* ctx; }; -} -} -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ +} // namespace trt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 5f8e73a59f..3e8baf91ae 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -28,7 +28,8 @@ std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& std::vector output_names, size_t max_batch_size, - size_t max_workspace_size + size_t max_workspace_size_bytes, + bool int8 // unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -57,8 +58,8 @@ tensorrt::convert::ConvertGraphDefToTensorRT(graph_def, output_names, max_batch_size, - max_workspace_size, - &outGraph); + max_workspace_size_bytes, + &outGraph,int8); if (!conversion_status.ok()) { auto retCode=(int)conversion_status.code(); char buff[2000]; @@ -79,6 +80,6 @@ std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, - size_t max_workspace_size); + size_t max_workspace_size,bool int8); %unignoreall -- GitLab From adaabc11680fa2823d029cf67214b23fa6652a4b Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 5 Feb 2018 18:56:48 -0800 Subject: [PATCH 021/884] [DEBUG] multiple GPU crash with [cuda_illigal_memory_address] added cudaSetDevice before ICudaEngine::createExecutionContext() To make sure TRT engine gets allocated on the same GPU (to access IO memory) --- .../contrib/tensorrt/kernels/trt_engine_op.cc | 26 ++++++++++++++++--- .../contrib/tensorrt/segment/segment.cc | 10 ------- 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 445900f08c..81fd4c9747 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -44,11 +44,22 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken // from resourcemanager + // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same + // gpu where the input/output is also located. + int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; + cudaSetDevice(gpu_id); + int device; + cudaGetDevice(&device); + if (gpu_id != device) + LOG(FATAL) << "set device failed!"; + IRuntime* infer = createInferRuntime(gLogger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); trt_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); + + // trt_context_ptr_.reset(nullptr); // runtime is safe to delete after engine creation infer->destroy(); std::stringstream oss; @@ -103,12 +114,16 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { nbBatch = input_shape.dim_size(0); + if (nbBatch > trt_engine_ptr_->getMaxBatchSize()) + LOG(FATAL) << "input tensor batch larger than max_batch_size: " + << trt_engine_ptr_->getMaxBatchSize(); } else if (nbBatch != input_shape.dim_size(0)) { valid = false; break; } // int64 input_shape.dim_size(int d) // int input_shape.dims() + LOG(INFO) << "INPUT BINDING index: " << bindingIndex << " with name: " << input_nodes_[i]; switch (trt_engine_ptr_->getBindingDataType(bindingIndex)) { case nvinfer1::DataType::kFLOAT: LOG(INFO) << "float"; @@ -125,7 +140,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } - if (!valid) LOG(WARNING) << "input data inconsistent batch size"; + if (!valid) LOG(FATAL) << "input data inconsistent batch size"; for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. @@ -135,7 +150,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { TensorShape output_shape; if (bindingIndex != -1) { - LOG(INFO) << "got binding " << bindingIndex; + LOG(INFO) << "got binding " << bindingIndex << " with name: " << output_nodes_[i]; auto dims = trt_engine_ptr_->getBindingDimensions(bindingIndex); std::vector trt_shape(dims.nbDims + 1); trt_shape[0] = nbBatch; @@ -167,6 +182,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { break; } } + LOG(INFO) << "getting stream"; // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files const cudaStream_t* stream = CHECK_NOTNULL( reinterpret_cast(context->op_device_context() @@ -174,9 +190,11 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->implementation() ->CudaStreamMemberHack())); - trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); + // TODO(jie): trt enqueue does not return error + LOG(INFO) << "enqueue returns: " << trt_context_ptr_->enqueue(nbBatch, &buffers[0], *stream, nullptr); + LOG(INFO) << "all good"; // sync should be done by TF. - //cudaStreamSynchronize(*stream); + // cudaStreamSynchronize(*stream); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/segment/segment.cc b/tensorflow/contrib/tensorrt/segment/segment.cc index 41da528247..d749d0d0e8 100644 --- a/tensorflow/contrib/tensorrt/segment/segment.cc +++ b/tensorflow/contrib/tensorrt/segment/segment.cc @@ -220,16 +220,6 @@ tensorflow::Status SegmentGraph( } } - // Cleanup the graph to remove disconnected nodes before outputting - if (VLOG_IS_ON(2)) { - for (tensorflow::Node* node : graph.nodes()) { - if ((node->in_edges().size() == 0) && (node->out_edges().size() == 0)) { - graph.RemoveNode(node); - } - } - // tensorflow::DumpGraph("Post-Segment", &graph); - } - // Convert the segments into the expected return format for (const auto& itr : sg_map) { const auto& segment_node_names = itr.second; -- GitLab From 0b8492b612eef6057440c4d1fe5dca41cacf5d6d Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 12 Feb 2018 18:40:07 -0800 Subject: [PATCH 022/884] Debugging calibration --- .../contrib/tensorrt/convert/convert_graph.cc | 28 +++- .../contrib/tensorrt/convert/convert_graph.h | 5 +- .../contrib/tensorrt/convert/convert_nodes.cc | 147 +++++++++++++++++- .../contrib/tensorrt/convert/convert_nodes.h | 8 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 22 +-- .../contrib/tensorrt/python/__init__.py | 1 + .../contrib/tensorrt/python/trt_convert.py | 20 ++- .../tensorrt/resources/TRTInt8Calibrator.cc | 65 +++++++- .../tensorrt/resources/TRTInt8Calibrator.h | 9 +- tensorflow/contrib/tensorrt/trt_conversion.i | 135 ++++++++++------ 10 files changed, 363 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 494920fb7c..8aa4e42fa6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -216,11 +216,11 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { TF_RETURN_IF_ERROR(status); for (auto in_edge: params->subgraph_incoming_edges) { // loop over incoming edges and attach them to calib node - tensorflow::Node* src_node = in_edge->src(); + // tensorflow::Node* src_node = in_edge->src(); auto src_output=in_edge->src_output(); auto dst_node=in_edge->dst(); auto dst_input=in_edge->dst_input(); - VLOG(0)<<" update edge "<name()<<":"< "<name()<<":"<name()<<":"< "<name()<<":"<graph.UpdateEdge(trt_node, src_output, dst_node, dst_input); } @@ -330,6 +330,30 @@ tensorflow::Status BuildNodeMap( } } // namespace +tensorflow::Status ConvertCalibGraphToInferGraph( + const tensorflow::GraphDef& graph_def, + tensorflow::GraphDef* infer_graph){ + VLOG(0)<<"Starting Calib Conversion"; + tensorflow::Graph graph(tensorflow::OpRegistry::Global()); + TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( + tensorflow::GraphConstructorOptions(), graph_def, &graph)); + // get calib nodes + std::vector calibNodes; + for(auto node : graph.op_nodes()){ + if(node->type_string()=="TRTCalibOp"){ + VLOG(1)<<"Found Calib Node"; + calibNodes.push_back(node); + } + } + VLOG(0)<<"Num Calib nodes in graph= "<& output_names, size_t max_batch_size, diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4e70fb00f9..588cecf8dd 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -40,6 +40,7 @@ limitations under the License. #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/logging.h" #define _TF_LOG_DEBUG ::tensorflow::internal::LogMessage(__FILE__, __LINE__, -1) @@ -299,6 +300,11 @@ std::vector TFAttrs::get>(std::string key) const { return std::vector(attr.begin(), attr.end()); } template <> +std::vector TFAttrs::get>(std::string key) const { + auto attr = this->at(key)->list().s(); + return std::vector(attr.begin(), attr.end()); +} +template <> nvinfer1::Dims TFAttrs::get(std::string key) const { auto values = this->get>(key); nvinfer1::Dims dims; @@ -1938,6 +1944,125 @@ void Converter::register_op_converters() { tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { return tensorflow::errors::Unimplemented("Not implemented yet"); } +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, + tensorflow::Node *c_node) { + const auto ndef=c_node->def(); + + TFAttrs attrs(ndef); + std::vector segment_nodes(attrs.get>("segment_nodes")); + std::vector output_nodes(attrs.get>("segment_output_names")); + std::vector input_names(attrs.get>("input_names")); + std::string res_name = attrs.get("resource_name"); + VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; + std::string engine_name="my_trt_op"; + { + const auto node_id=tensorflow::str_util::Split(res_name,"_"); + engine_name+=node_id.back(); + } + std::map nodeMaps; + + for(auto n: graph.op_nodes()){ + nodeMaps.insert({n->name(),n}); + } + VLOG(1)<<"Output Nodes:"; + std::vector out_types; + std::vector out_edges; + for(auto &i : output_nodes ){ + auto node_port=tensorflow::str_util::Split(i,":"); + VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + auto out_node_name = node_port.at(0); + if(node_port.size()>1){ + VLOG(1) << "Multi port output" << node_port.at(0) << + " " << node_port.at(1) << " size=" << node_port.size(); + } + auto nodeIt=nodeMaps.find(out_node_name); + if(nodeIt!=nodeMaps.end()){ + tensorflow::Node* outNode=nodeIt->second; + int port=0; + if(node_port.size()==2){ + port=std::strtoul(node_port.at(1).c_str(),nullptr,10); + out_types.push_back(outNode->output_type(port)); + }else{ + out_types.push_back(outNode->output_type(0)); + } + for(auto outEdge : outNode->out_edges()){ + if(outEdge->src_output()==port){ + out_edges.push_back(outEdge); + break; + } + } + }else{ + LOG(WARNING)<<" couldn't find output node "<getManager("TRTCalibOps"); + tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; + auto status = resmgr->Lookup(res_name, res_name, &calibRes); + if(!status.ok() || !calibRes->calibrator){ + return tensorflow::errors::FailedPrecondition("You must run calibration"\ + " and inference conversion in the same proces"); + } + + calibRes->calibrator->setDone(); + VLOG(1)<<"Waiting for calibration thread to join"; + calibRes->thr->join(); + delete calibRes->thr; + if(!calibRes->engine){ + LOG(FATAL)<<"Calibration failed!, engine is nullptr"; + } + auto engine_plan_string=calibRes->engine->serialize(); + calibRes->engine->destroy(); + calibRes->network->destroy(); + calibRes->builder->destroy(); + calibRes->thr= nullptr; + calibRes->engine= nullptr; + calibRes->builder= nullptr; + tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); + std::vector income_edges; + for(const auto in_edge : c_node->in_edges()){ + auto src=in_edge->src(); + int dest_port=in_edge->dst_input(); + income_edges.emplace_back(src->name(),in_edge->src_output(),c_node->input_type(dest_port)); + } + tensorflow::gtl::ArraySlice input_list( + income_edges); + op_builder.Input(input_list); + tensorflow::NodeDef engine_node; + status = op_builder.Attr("serialized_engine", engine_plan_string) + .Attr("input_nodes", input_names) + .Attr("output_nodes", output_nodes) + .Attr("OutT", out_types) + .Finalize(&engine_node); + if(!status.ok()){ + LOG(ERROR)<<"Engine Node creation failed"; + return status; + } + auto trt_engine_node=graph.AddNode(engine_node,&status); + TF_CHECK_OK(status); + for(size_t i=0;idst()->name() << " port " + << out_edges.at(i)->dst_input(); + TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, + out_edges.at(i)->dst(), + out_edges.at(i)->dst_input())); + } + VLOG(1) << "Segment nodes:"; + for (auto &i : segment_nodes){ + VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + auto it=nodeMaps.find(i); + if(it!=nodeMaps.end()){ + graph.RemoveNode(it->second); + } + } + return tensorflow::Status::OK(); +} tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. @@ -1958,13 +2083,15 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { LOG(DEBUG) << "BUILDING 1"; static int static_id = 0; std::string calib_op_name = - std::string("my_trt_calib_op_") + std::to_string(static_id++); - + std::string("my_trt_calib_op_") + std::to_string(static_id); + std::string engine_name = + std::string("my_trt_op") + std::to_string(static_id); + static_id++; LOG(DEBUG) << "BUILDING 2"; auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); auto op_res = new tensorflow::trt::TRTCalibrationResource(); - VLOG(0)<<"SAMI Creating calibresource "<Create(calib_op_name, calib_op_name, op_res)); op_res->logger = new tensorflow::tensorrt::Logger(); op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); @@ -2065,15 +2192,23 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Gather output metadata std::vector output_names; std::vector output_dtypes; + int trt_engine_op_output_idx = 0; for (std::pair const& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); std::string op_name = node->name(); std::string tensor_name = op_name; + + s.output_edge_map->insert( + {trt_engine_op_output_idx == 0 + ? engine_name + : engine_name + ":" + std::to_string(trt_engine_op_output_idx), + {output_idx, tensor_name}}); + trt_engine_op_output_idx++; if (output_idx != 0) tensor_name = tensor_name + ":" + std::to_string(output_idx); - LOG(DEBUG) << "output tensor name: " << tensor_name; + VLOG(1) << "output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); if (!tensor_or_weights.is_tensor()) { @@ -2083,7 +2218,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); + tensor_name); } converter.network()->markOutput(*tensor); tensorflow::DataType tf_dtype = node->output_type(output_idx); @@ -2109,7 +2244,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // ConvertSubGraphToTensorRT(convert_graph.cc) auto incoming_edge = tensorflow::NodeDefBuilder::NodeOut( input_names.at(i), output_idx, input_dtypes.at(i)); - VLOG(0) << calib_op_name << " input " << i << " = " << input_names.at(i) + VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) << ":" << output_idx <<" dType= "<< tensorflow::DataTypeString(input_dtypes.at(i)); income_edges.push_back(incoming_edge); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 2f754968dc..71f61e2dc4 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -31,7 +31,7 @@ namespace tensorrt { namespace convert { struct SubGraphParams { - SubGraphParams(const tensorflow::Graph& graph_, + SubGraphParams(tensorflow::Graph& graph_, const std::set& subgraph_node_ids_, const std::vector>& input_inds_, const std::vector>& output_inds_, @@ -52,7 +52,7 @@ struct SubGraphParams { trt_node(trt_node_), int8(int8_) {} - const tensorflow::Graph& graph; + tensorflow::Graph& graph; const std::set& subgraph_node_ids; const std::vector>& input_inds; // {node_id, output_idx} const std::vector>& output_inds; // {node_id, output_idx} @@ -64,8 +64,10 @@ struct SubGraphParams { const bool int8; }; -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); +tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams ¶ms); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); +tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, + tensorflow::Node* c_node); } // namespace convert } // namespace tensorrt diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 4996b3cd40..41906b6090 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -43,23 +43,22 @@ TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { } void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { auto trt_rm = tensorflow::trt::TRTResourceManager::instance(); - VLOG(0) << "Op Name= " << name() << " nodedef name= " << repo_name; + VLOG(2) << "Op Name= " << name() << " nodedef name= " << repo_name; auto resmgr = trt_rm->getManager("TRTCalibOps"); tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; auto status = resmgr->Lookup(repo_name, repo_name, &calibRes); - VLOG(0) << "SAMI status " << status.ToString(); if (status.ok()) { int batchSize = ctx->input(0).dim_size(0); - VLOG(0) << "SAMI Batchsize= " << batchSize; + VLOG(2) << "SAMI Batchsize= " << batchSize; int numInputs = ctx->num_inputs(); - VLOG(0) << "SAMI numInputs= " << numInputs; + VLOG(2) << "SAMI numInputs= " << numInputs; dev_tensors_.resize(numInputs); if (calibRes->calibrator == nullptr) { - VLOG(0) << " Constructing calibrator"; + VLOG(1) << " Constructing calibrator"; // first run for (int i = 0; i < numInputs; i++) { const tensorflow::Tensor& t = ctx->input(i); - VLOG(0) << "Tensor " << i << " " << t.shape().DebugString(); + VLOG(1) << "Tensor " << i << " " << t.shape().DebugString(); OP_REQUIRES_OK(ctx, ctx->allocate_persistent(t.dtype(), t.shape(), &dev_tensors_.at(i), nullptr)); @@ -73,11 +72,14 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { } calibRes->calibrator = new TRTInt8Calibrator(device_buffers_, batchSize); calibRes->thr = new std::thread([calibRes]() { + VLOG(0)<<"Starting calibration thread, Calibration Resource @ "<builder->setInt8Calibrator(calibRes->calibrator); + calibRes->builder->setInt8Mode(true); calibRes->engine = calibRes->builder->buildCudaEngine( *calibRes->network); // will loop until we terminate calibrator - VLOG(1) << "Calibration loop terminated"; + VLOG(0) << "SAMI Calibration loop terminated"; }); - VLOG(0) << "SAMI intialized calibrator resource"; + VLOG(0) << "SAMI initialized calibrator resource"; } std::unordered_map input_data; @@ -92,9 +94,9 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { input_data.emplace(input_names_.at(i), data_address); ctx->set_output(i, t); } - VLOG(0) << "Filled map"; + VLOG(1) << "Filled map for sending"; calibRes->calibrator->setBatch(input_data); - VLOG(0) << "Passed calibration data"; + VLOG(1) << "Passed calibration data"; } else { ctx->SetStatus(status); return; diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 4aeea48515..9eb589664c 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -5,4 +5,5 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import from tensorflow.contrib.tensorrt.python.ops import trt_engine_op from tensorflow.contrib.tensorrt.python.trt_convert import CreateInferenceGraph +from tensorflow.contrib.tensorrt.python.trt_convert import CalibGraphToInferGraph # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 5aba371a03..18ea6c83cc 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert,calib_convert from tensorflow.python.util import compat import tensorflow as tf from tensorflow.python.grappler import tf_optimizer @@ -91,3 +91,21 @@ def CreateInferenceGraph(input_graph_def, outputs,max_batch_size=1,max_workspace output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string #save some memory return output_graph_def + +def CalibGraphToInferGraph(calibration_graph_def): + graph_str=calibration_graph_def.SerializeToString() + out=calib_convert(graph_str) + status=out[0] + output_graph_def_string = out[1] + del graph_str #save some memory + if len(status) < 2: + raise _impl.UnknownError(None,None,status) + if status[:2] != "OK": + msg=status.split(";") + if len(msg) == 1: + raise RuntimeError("Status message is malformed {}".format(status)) + raise _impl._make_specific_exception(None,None,";".join(msg[1:]), int(msg[0])) + output_graph_def = graph_pb2.GraphDef() + output_graph_def.ParseFromString(output_graph_def_string) + del output_graph_def_string #save some memory + return output_graph_def diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index 10d9350d7a..e1ab243b07 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -5,6 +5,10 @@ #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include +#include +#include +#include + #include "tensorflow/core/platform/logging.h" namespace tensorflow { @@ -12,26 +16,67 @@ namespace trt { // set the batch size before constructing the thread to execute engine int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } +TRTInt8Calibrator::TRTInt8Calibrator(const std::unordered_map< + std::string, std::pair>& dev_buffers, + int batch_size) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false){ + cudaPointerAttributes pa; + int devid=-1; + cudaGetDevice(&devid); + VLOG(0)<<"Constructing calibrator with batch size "<& data) { + VLOG(1)<<"SAMI SAMI Waiting to set new batch"; + if(done_)return false; while (calib_running_.load( std::memory_order_acquire)) { // wait while calibration is running tensorflow::mutex_lock l(cond_mtx_); cond_.wait_for(l, std::chrono::milliseconds(50)); + if(done_)return false; } + VLOG(1)<<"Set Batch Waiting finished"; for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); if (devptr == dev_buffers_.end()) { LOG(FATAL) << "FATAL input name '" << it.first << "' does not match with the buffer names"; } + cudaPointerAttributes pa; const auto& d = devptr->second; + VLOG(1)<<"cuda memcopy buff name= "<second.first; bindings[i] = it->second.first; + float f[2]; + f[0]=3.; + f[1]=0.14159; + auto status=cudaMemcpy(f,bindings[i],sizeof(float)*2,cudaMemcpyDeviceToHost); + int devid=-1; + cudaGetDevice(&devid); + VLOG(0)<<"SAMI ORDER GETTING, Data in perm storage [0]="<>& dev_buffers, - int batch_size) - : batch_size_(batch_size), - done_(false), - dev_buffers_(dev_buffers), - calib_running_(false){}; + int batch_size); int getBatchSize() const; bool getBatch(void* bindings[], const char* names[], int nbBindings) override; bool setBatch(const std::unordered_map &data); void setDone(){done_=true;} const void *readCalibrationCache(std::size_t &length) override; void writeCalibrationCache(const void *ptr, std::size_t length) override; + ~TRTInt8Calibrator(); private: int batch_size_; tensorflow::mutex cond_mtx_; tensorflow::condition_variable cond_; bool done_; - std::unordered_map> dev_buffers_; + const std::unordered_map> dev_buffers_; std::atomic_bool calib_running_; }; } // namespace trt diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 3e8baf91ae..ee87d7fae1 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -23,58 +23,98 @@ %ignoreall %unignore tensorflow; %unignore trt_convert; +%unignore calib_convert; %{ - std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& - std::vector output_names, - size_t max_batch_size, - size_t max_workspace_size_bytes, - bool int8 - // unfortunately we can't use TF_Status here since it - // is in c/c_api and brings in a lot of other libraries - // which in turn declare ops. These ops are included - // statically in our library and cause an abort when - // module is loaded due to double registration - // until Tensorflow properly exposes these headers - // we have to work around this by returning a string - // and converting it to exception on python side. - //,TF_Status* out_status) { - ) { - string out_status; +std::pair trt_convert(string graph_def_string,//const tensorflow::GraphDef& + std::vector output_names, + size_t max_batch_size, + size_t max_workspace_size_bytes, + bool int8 + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { + string out_status; - tensorflow::GraphDef graph_def; - if (!graph_def.ParseFromString(graph_def_string)) { - out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; - return std::pair{out_status,""}; - } + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status,""}; + } - if (!output_names.size()) { - out_status="InvalidArgument;Size of the output_names vector is 0"; - return std::pair{out_status,""}; - //return ""; - } - tensorflow::GraphDef outGraph; - tensorflow::Status conversion_status = + if (!output_names.size()) { + out_status="InvalidArgument;Size of the output_names vector is 0"; + return std::pair{out_status,""}; + //return ""; + } + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = tensorrt::convert::ConvertGraphDefToTensorRT(graph_def, - output_names, - max_batch_size, - max_workspace_size_bytes, - &outGraph,int8); - if (!conversion_status.ok()) { - auto retCode=(int)conversion_status.code(); - char buff[2000]; - snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); - out_status=buff; - return std::pair{out_status,""}; - } - string result; - if (!outGraph.SerializeToString(&result)) { - out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; - return std::pair{out_status,""}; - } - out_status="OK;All good!"; - return std::pair{out_status,result}; + output_names, + max_batch_size, + max_workspace_size_bytes, + &outGraph,int8); + if (!conversion_status.ok()) { + auto retCode=(int)conversion_status.code(); + char buff[2000]; + snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); + out_status=buff; + return std::pair{out_status,""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status,""}; + } + out_status="OK;All good!"; + return std::pair{out_status,result}; +} + +std::pair calib_convert(string graph_def_string // const tensorflow::GraphDef& + // unfortunately we can't use TF_Status here since it + // is in c/c_api and brings in a lot of other libraries + // which in turn declare ops. These ops are included + // statically in our library and cause an abort when + // module is loaded due to double registration + // until Tensorflow properly exposes these headers + // we have to work around this by returning a string + // and converting it to exception on python side. + //,TF_Status* out_status) { +) { + string out_status; + + tensorflow::GraphDef graph_def; + if (!graph_def.ParseFromString(graph_def_string)) { + out_status="InvalidArgument;Couldn't interpret input as a GraphDef"; + return std::pair{out_status,""}; } + + tensorflow::GraphDef outGraph; + tensorflow::Status conversion_status = + tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, + &outGraph); + if (!conversion_status.ok()) { + auto retCode=(int)conversion_status.code(); + char buff[2000]; + snprintf(buff,2000,"%d;%s",retCode,conversion_status.error_message().c_str()); + out_status=buff; + return std::pair{out_status,""}; + } + string result; + if (!outGraph.SerializeToString(&result)) { + out_status="InvalidArgument;Couldn't serialize output as a GraphDef"; + return std::pair{out_status,""}; + } + out_status="OK;All good!"; + return std::pair{out_status,result}; +} %} std::pair trt_convert(string graph_def_string, @@ -82,4 +122,7 @@ std::pair trt_convert(string graph_def_string, size_t max_batch_size, size_t max_workspace_size,bool int8); +std::pair calib_convert(string graph_def_string); + + %unignoreall -- GitLab From ca19b32e4d1574ad29e36dbc164c320aeca80d47 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Wed, 14 Feb 2018 00:13:00 -0800 Subject: [PATCH 023/884] cifar 10 divergance fix and batchnorm unit test fix --- .../core/kernels/mkl_fused_batch_norm_op.cc | 96 +++++++++++++------ tensorflow/core/kernels/mkl_relu_op.cc | 20 +++- 2 files changed, 81 insertions(+), 35 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..b7dee3fb3e 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1110,19 +1110,12 @@ class MklFusedBatchNormGradOp : public OpKernel { return; } - if (dnn_shape_src.IsMklTensor()) - depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); - else - ExtractParams(context); - - memory::format format_m; if (dnn_shape_src.IsMklTensor()) { - if (dnn_shape_src.IsTensorInNCHWFormat()) - format_m = memory::format::nchw; - else - format_m = memory::format::nhwc; + depth_ = dnn_shape_src.DimSize(MklDnnDims::Dim_C); + } else if (dnn_shape_diff_dst.IsMklTensor()) { + depth_ = dnn_shape_diff_dst.DimSize(MklDnnDims::Dim_C); } else { - format_m = TFDataFormatToMklDnnDataFormat(tensor_format_); + ExtractParams(context); } MklDnnData src(&cpu_engine); @@ -1146,20 +1139,20 @@ class MklFusedBatchNormGradOp : public OpKernel { diff_dst_dims = TFShapeToMklDnnDimsInNCHW(diff_dst_tensor.shape(), tensor_format_); - // set src and diff_dst primitives + // set src and diff_dst primitives according to input layout memory::desc src_md({}, memory::data_undef, memory::format_undef); memory::desc diff_dst_md({}, memory::data_undef, memory::format_undef); - if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { - if (dnn_shape_src.IsMklTensor()) { - src_md = dnn_shape_src.GetMklLayout(); - diff_dst_md = src_md; - } else { - diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); - src_md = diff_dst_md; - } + if (dnn_shape_src.IsMklTensor()) { + src_md = dnn_shape_src.GetMklLayout(); } else { - src_md = memory::desc(src_dims, MklDnnType(), format_m); - diff_dst_md = src_md; + src_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + if (dnn_shape_diff_dst.IsMklTensor()) { + diff_dst_md = dnn_shape_diff_dst.GetMklLayout(); + } else { + diff_dst_md = memory::desc(diff_dst_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); } src.SetUsrMem(src_md, &src_tensor); diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor); @@ -1211,28 +1204,64 @@ class MklFusedBatchNormGradOp : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + + // MKL-DNN's BN primitive not provide API to fetch internal format + // set common_md as OpMem + // src and diff_dst will reorder to common_md + // diff_src will set as common_md + memory::desc common_md({}, memory::data_undef, memory::format_undef); + if (dnn_shape_src.IsMklTensor() || dnn_shape_diff_dst.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor()) { + common_md = dnn_shape_src.GetMklLayout(); + } else { + common_md = dnn_shape_diff_dst.GetMklLayout(); + } + } else { + common_md = memory::desc(src_dims, MklDnnType(), + TFDataFormatToMklDnnDataFormat(tensor_format_)); + } + // if any of src and diff_dst as mkl layout, + // then we set diff_src as mkl layout + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); - auto diff_src_pd = bnrm_fwd_pd.dst_primitive_desc(); + // set diff_src's mkl layout as common_md + auto diff_src_pd = memory::primitive_desc(common_md, cpu_engine); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), src_dims, - format_m); - dnn_shape_diff_src.SetTfDimOrder(dnn_shape_src.GetDimension(), - tensor_format_); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_src.GetDimension(), + src_dims, + dnn_shape_src.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_src.GetDimension(), + tensor_format_); + } else { + dnn_shape_diff_src.SetTfLayout( + dnn_shape_diff_dst.GetDimension(), + src_dims, + dnn_shape_diff_dst.GetTfDataFormat()); + dnn_shape_diff_src.SetTfDimOrder( + dnn_shape_diff_dst.GetDimension(), + tensor_format_); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyont should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, tf_shape_diff_src, dnn_shape_diff_src); - diff_src.SetUsrMem(src_md, diff_src_tensor); + // set diff_src + diff_src.SetUsrMem(common_md, diff_src_tensor); prop_kind pk = prop_kind::backward; auto bnrm_bwd_desc = batch_normalization_backward::desc( - pk, diff_src.GetUsrMemDesc(), src.GetUsrMemDesc(), epsilon_, + pk, common_md, common_md, epsilon_, /* for inference, specify use_global_stats 1. on fwd prop, use mean and variance provided as inputs @@ -1245,11 +1274,16 @@ class MklFusedBatchNormGradOp : public OpKernel { auto bnrm_bwd_pd = batch_normalization_backward::primitive_desc( bnrm_bwd_desc, cpu_engine, bnrm_fwd_pd); + std::vector net; + src.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + diff_dst.CheckReorderToOpMem(memory::primitive_desc(common_md, + cpu_engine), &net); + auto bnrm_bwd_op = batch_normalization_backward( bnrm_bwd_pd, src.GetOpMem(), mean.GetOpMem(), variance.GetOpMem(), diff_dst.GetOpMem(), weights_m, diff_src.GetOpMem(), diff_weights_m); - std::vector net; net.push_back(bnrm_bwd_op); stream(stream::kind::eager).submit(net).wait(); diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..924b9da7e0 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -368,8 +368,11 @@ void MklReluGradOp::Compute(OpKernelContext* context) { mkl_context.MklCleanup(); } + + #else // INTEL_MKL_ML + template class MklReluOpBase : public OpKernel { public: @@ -579,17 +582,26 @@ class MklReluGradOpBase : public OpKernel { // allocate diff_src tensor MklDnnShape dnn_shape_diff_src; TensorShape tf_shape_diff_src; - if (dnn_shape_src.IsMklTensor()) { + if (dnn_shape_src.IsMklTensor() || + dnn_shape_diff_dst.IsMklTensor()) { dnn_shape_diff_src.SetMklTensor(true); auto diff_src_pd = relu_bwd_pd.diff_src_primitive_desc(); dnn_shape_diff_src.SetMklLayout(&diff_src_pd); dnn_shape_diff_src.SetElemType(MklDnnType()); - dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), - dnn_shape_src.GetSizesAsMklDnnDims(), - dnn_shape_src.GetTfDataFormat()); + if (dnn_shape_src.IsMklTensor()) { + dnn_shape_diff_src.SetTfLayout(dnn_shape_src.GetDimension(), + dnn_shape_src.GetSizesAsMklDnnDims(), + dnn_shape_src.GetTfDataFormat()); + } else { + dnn_shape_diff_src.SetTfLayout(dnn_shape_diff_dst.GetDimension(), + dnn_shape_diff_dst.GetSizesAsMklDnnDims(), + dnn_shape_diff_dst.GetTfDataFormat()); + } tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); + // both src and diff_dst are tf layout, + // so get tf shape from anyone should be ok tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From c6cd20dbcaaa601977d1b63ab17e04d137de5133 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Thu, 15 Feb 2018 19:01:57 -0800 Subject: [PATCH 024/884] Add node converter for FusedBatchNorm op --- .../contrib/tensorrt/convert/convert_graph.cc | 9 ++- .../contrib/tensorrt/convert/convert_nodes.cc | 67 +++++++++++++++++++ 2 files changed, 73 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 31ba30b2d9..8c0aada355 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -68,9 +68,12 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { "Mean", "AvgPool", "ConcatV2", - "DepthwiseConv2dNative" //, "MatMul", - //"Reshape" - // TODO(ben,jie): ... + "DepthwiseConv2dNative", + "FusedBatchNorm", + "FusedBatchNormV2", + //, "MatMul", + //"Reshape" + // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) return candidate_ops.count(node_def.op()); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ea0eb480f2..e3b16126f1 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -276,6 +276,17 @@ template <> tensorflow::DataType TFAttrs::get(string key) const { return this->at(key)->type(); } + +template <> +float TFAttrs::get(string key) const { + return this->at(key)->f(); +} + +template <> +bool TFAttrs::get(string key) const { + return this->at(key)->b(); +} + // TODO(jie): reorder4 & reorder2 should be merged? template void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, @@ -1703,6 +1714,60 @@ tensorflow::Status ConvertConcat(Converter& ctx, return tensorflow::Status::OK(); } +tensorflow::Status ConvertFusedBatchNorm(Converter& ctx, + tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { + TFAttrs attrs(node_def); + float epsilon = attrs.get("epsilon"); + auto data_format = attrs.get("data_format"); + if (data_format != "NCHW" ) { + return tensorflow::errors::Unimplemented( + "only data_format=NCHW is supported, at " + node_def.name()); + } + bool is_training = attrs.get("is_training"); + if (is_training) { + return tensorflow::errors::Unimplemented( + "only is_training=false is supported, at " + node_def.name()); + } + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + TRT_ShapedWeights variance_weights = inputs.at(4).weights(); + TRT_ShapedWeights dummy_power_weights(scale_weights.type_); + TRT_ShapedWeights combined_scale_weights = + ctx.get_temp_weights_like(scale_weights); + TRT_ShapedWeights combined_offset_weights = + ctx.get_temp_weights_like(offset_weights); + size_t nweight = scale_weights.count(); + if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || + offset_weights.type_ != tensorflow::DataType::DT_FLOAT || + mean_weights.type_ != tensorflow::DataType::DT_FLOAT || + variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { + return tensorflow::errors::Unimplemented( + "only float32 weights data type is supported, at " + node_def.name()); + } + for (size_t i=0; i(scale_weights.GetValues()))[i]; + float offset = (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + nvinfer1::IScaleLayer* layer = ctx.network()->addScale( + *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, + combined_offset_weights, combined_scale_weights, dummy_power_weights); + nvinfer1::ITensor* output_tensor = layer->getOutput(0); + outputs->push_back(TRT_TensorOrWeights(output_tensor)); + return tensorflow::Status::OK(); +} + tensorflow::Status ConvertMatMul(Converter& ctx, tensorflow::NodeDef const& node_def, std::vector const& inputs, @@ -1827,6 +1892,8 @@ void Converter::register_op_converters() { op_registry_["ConcatV2"] = ConvertConcat; op_registry_["MatMul"] = ConvertMatMul; op_registry_["Reshape"] = ConvertReshape; + op_registry_["FusedBatchNorm"] = ConvertFusedBatchNorm; + op_registry_["FusedBatchNormV2"] = ConvertFusedBatchNorm; } } // namespace -- GitLab From 1ef6ea549a48170dd139206a4962c3c493b3edc4 Mon Sep 17 00:00:00 2001 From: Seungil You <31752931+si-you@users.noreply.github.com> Date: Wed, 21 Feb 2018 03:09:01 +0900 Subject: [PATCH 025/884] Add clean_dep to tf_cc_test. (#17036) --- tensorflow/tensorflow.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 23d11c88ed..9b38eaddb7 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -605,7 +605,7 @@ def tf_cc_test(name, srcs=srcs + tf_binary_additional_srcs(), copts=tf_copts() + extra_copts, linkopts=select({ - "//tensorflow:android": [ + clean_dep("//tensorflow:android"): [ "-pie", ], clean_dep("//tensorflow:windows"): [], -- GitLab From e92b71e476acbe9d50048e0992ded9ba961f724c Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Wed, 21 Feb 2018 13:15:12 -0800 Subject: [PATCH 026/884] locally caching weights for calibration --- .../contrib/tensorrt/convert/convert_graph.cc | 14 +++- .../contrib/tensorrt/convert/convert_nodes.cc | 73 +++++++++++----- .../contrib/tensorrt/kernels/trt_calib_op.cc | 26 +++--- .../contrib/tensorrt/kernels/trt_engine_op.cc | 10 +-- .../tensorrt/resources/TRTInt8Calibrator.cc | 84 +++++++++++-------- .../tensorrt/resources/TRTInt8Calibrator.h | 4 +- .../contrib/tensorrt/resources/TRTResources.h | 25 +++++- 7 files changed, 162 insertions(+), 74 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 8c0aada355..b364ffc86b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -315,13 +315,14 @@ tensorflow::Status ConvertCalibGraphToInferGraph( TF_RETURN_IF_ERROR( tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); } + graph.ToGraphDef(infer_graph); return tensorflow::Status::OK(); } tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, - size_t max_workspace_size, tensorflow::GraphDef* new_graph_def, + size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, int precision_mode = 0) { // optimization pass tensorflow::grappler::GrapplerItem item; @@ -385,13 +386,22 @@ tensorflow::Status ConvertGraphDefToTensorRT( TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); std::unordered_map> output_edge_map; int count = 0; + float total_num_nodes_in_segments=0.; + for(auto s:segments){ + total_num_nodes_in_segments+=s.size(); + } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; + size_t max_mem_per_engine=max_workspace_size_bytes* + ((float)subgraph_node_names.size()/total_num_nodes_in_segments); + std::stringstream oss; for (const string& node_name : subgraph_node_names) { + oss<<" "<id()); } + VLOG(2)<<"Subgraph nodes"< op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - + tensorflow::trt::TRTWeightStore* weight_store_; void register_op_converters(); - std::vector get_inputs( const tensorflow::NodeDef& node_def) { std::vector inputs; @@ -432,17 +430,19 @@ class Converter { } public: - explicit Converter(nvinfer1::INetworkDefinition* trt_network) - : trt_network_(trt_network) { + explicit Converter(nvinfer1::INetworkDefinition* trt_network, + tensorflow::trt::TRTWeightStore* ws) + : trt_network_(trt_network),weight_store_(ws) { this->register_op_converters(); } - + tensorflow::trt::TRTWeightStore* weight_store(){return weight_store_;} TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error - temp_bufs_.push_back(std::vector(weights.size_bytes())); - weights.SetValues(temp_bufs_.back().data()); + weight_store_->store_.push_back(std::vector(weights.size_bytes())); + //temp_bufs_.push_back(std::vector(weights.size_bytes())); + weights.SetValues(weight_store_->store_.back().data()); return weights; } @@ -1010,7 +1010,7 @@ tensorflow::Status ConvertConv2DHelper( nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1]<<", " << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -1319,7 +1319,14 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - weights = TRT_ShapedWeights(dtype, weights_tensor.float_val().data(), + size_t lenData=tensorflow::DataTypeSize(dtype); + for(int i=0;istore_.push_back(std::vector(lenData)); + void* dst=static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data(weights_tensor.float_val().begin(), + weights_tensor.float_val().end()); // make a local copy first to flatten + memcpy(dst,tensor_data.data(),lenData);// store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); // LOG(INFO) << " add: " << weights_tensor.float_val().data(); // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); @@ -1356,8 +1363,17 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - weights = - TRT_ShapedWeights(dtype, weights_tensor.int_val().data(), scalar_shape); + size_t lenData=tensorflow::DataTypeSize(dtype); + for(int i=0;istore_.push_back(std::vector(lenData)); + void* dst=static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data(weights_tensor.int_val().begin(), + weights_tensor.int_val().end()); // make a local copy first to flatten doesn't have to be contigous + memcpy(dst,tensor_data.data(),lenTensor);// store into weight store + weights = TRT_ShapedWeights(dtype, dst, + scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1965,13 +1981,14 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, } calibRes->calibrator->setDone(); - VLOG(1)<<"Waiting for calibration thread to join"; calibRes->thr->join(); delete calibRes->thr; if(!calibRes->engine){ LOG(FATAL)<<"Calibration failed!, engine is nullptr"; } - auto engine_plan_string=calibRes->engine->serialize(); + auto weight_rmgr=trt_rm->getManager("WeightStore"); + TF_CHECK_OK(weight_rmgr->Delete(res_name,res_name)); + auto engine_plan=calibRes->engine->serialize(); calibRes->engine->destroy(); calibRes->network->destroy(); calibRes->builder->destroy(); @@ -1989,6 +2006,9 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, income_edges); op_builder.Input(input_list); tensorflow::NodeDef engine_node; + const char* engine_plan_data = + static_cast(engine_plan->data()); + string engine_plan_string(engine_plan_data, engine_plan_data + engine_plan->size()); status = op_builder.Attr("serialized_engine", engine_plan_string) .Attr("input_nodes", input_names) .Attr("output_nodes", output_nodes) @@ -2017,6 +2037,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, graph.RemoveNode(it->second); } } + graph.RemoveNode(c_node); return tensorflow::Status::OK(); } @@ -2068,7 +2089,10 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { VLOG(2) << "BUILDING 4"; // Build the network - Converter converter(op_res->network); + auto weight_rmgr=trt_rmgr->getManager("WeightStore"); + auto ws=new tensorflow::trt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); + Converter converter(op_res->network,ws); VLOG(2) << "BUILDING 5"; std::vector input_names; @@ -2259,9 +2283,15 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( return tensorflow::errors::Internal( "Failed to create TensorRT network object"); } - + static int static_id = 0; + string engine_name = tensorflow::strings::StrCat("my_trt_op", static_id++); + auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); + auto weight_rmgr=trt_rmgr->getManager("WeightStore"); + auto ws=new tensorflow::trt::TRTWeightStore(); + TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); + // Build the network - Converter converter(trt_network.get()); + Converter converter(trt_network.get(),ws); std::vector input_names; std::vector input_dtypes; @@ -2360,8 +2390,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( VLOG(2) << "Finished conversion"; // TODO(sami,ben,jie): proper naming! - static int static_id = 0; - string engine_name = tensorflow::strings::StrCat("my_trt_op", static_id++); // Gather output metadata std::vector output_names; @@ -2409,8 +2437,10 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // Build the engine trt_builder->setMaxBatchSize(s.max_batch_size); trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); + VLOG(0)<<"Max batch size= "<buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; + if(trt_engine.get()==nullptr){ + return tensorflow::errors::Internal("Engine building failure"); + } auto engine_plan = infer_object(trt_engine->serialize()); VLOG(0) << "Serialized engine"; const char* engine_plan_data = @@ -2426,7 +2459,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - + weight_rmgr->Delete(engine_name,engine_name); LOG(INFO) << "finished engine " << engine_name; // Build the TRT op diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 7cd41c4933..c6eba15711 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -48,12 +48,10 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; auto status = resmgr->Lookup(repo_name, repo_name, &calibRes); if (status.ok()) { - int batchSize = ctx->input(0).dim_size(0); - VLOG(2) << "SAMI Batchsize= " << batchSize; int numInputs = ctx->num_inputs(); - VLOG(2) << "SAMI numInputs= " << numInputs; - dev_tensors_.resize(numInputs); if (calibRes->calibrator == nullptr) { + dev_tensors_.resize(numInputs); + int batchSize = ctx->input(0).dim_size(0); VLOG(1) << " Constructing calibrator"; // first run for (int i = 0; i < numInputs; i++) { @@ -65,19 +63,20 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { const auto dTensor = dev_tensors_.at(i).AccessTensor(ctx); CHECK_EQ(t.TotalBytes(), dTensor->TotalBytes()); void* devAddr = nullptr; - GET_TENSOR_ADDRESS(dTensor, devAddr) + GET_TENSOR_ADDRESS(dTensor, devAddr); device_buffers_.emplace( input_names_.at(i), std::pair(devAddr, dTensor->TotalBytes())); } - calibRes->calibrator = new TRTInt8Calibrator(device_buffers_, batchSize); - calibRes->thr = new std::thread([calibRes]() { + calibRes->calibrator = new TRTInt8Calibrator(device_buffers_, batchSize,repo_name); + string label(repo_name); + calibRes->thr = new std::thread([calibRes,label]() { VLOG(0)<<"Starting calibration thread, Calibration Resource @ "<builder->setInt8Calibrator(calibRes->calibrator); calibRes->builder->setInt8Mode(true); calibRes->engine = calibRes->builder->buildCudaEngine( *calibRes->network); // will loop until we terminate calibrator - VLOG(0) << "SAMI Calibration loop terminated"; + VLOG(0) << "SAMI Calibration loop terminated "<TotalBytes()); // use the tensor so FW keeps it + if(VLOG_IS_ON(1)){ + void* devAddr = nullptr; + GET_TENSOR_ADDRESS(dTensor, devAddr); + if(devAddr!=device_buffers_.at(input_names_.at(i)).first){ + LOG(WARNING)<<"Device address is different!"; + } + } input_data.emplace(input_names_.at(i), data_address); ctx->set_output(i, t); } - VLOG(1) << "Filled map for sending"; + VLOG(2) << "Filled map for sending"; calibRes->calibrator->setBatch(input_data); - VLOG(1) << "Passed calibration data"; + VLOG(2) << "Passed calibration data"; } else { ctx->SetStatus(status); return; diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index e4e8ab9e0a..bab650186a 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -19,8 +19,8 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -#if GOOGLE_CUDA -#if GOOGLE_TENSORRT +//#if GOOGLE_CUDA +//#if GOOGLE_TENSORRT #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { @@ -84,7 +84,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } // int64 input_shape.dim_size(int d) // int input_shape.dims() - LOG(INFO) << "INPUT BINDING index: " << binding_index << " with name: " << input_nodes_[i]; switch (trt_engine_ptr_->getBindingDataType(binding_index)) { case nvinfer1::DataType::kFLOAT: buffers[binding_index] = (void*)(input_tensor.flat().data()); @@ -134,7 +133,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { break; } } - LOG(INFO) << "getting stream"; // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files const cudaStream_t* stream = CHECK_NOTNULL( reinterpret_cast(context->op_device_context() @@ -154,5 +152,5 @@ REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); } // namespace tensorrt } // namespace tensorflow -#endif // GOOGLE_TENSORRT -#endif // GOOGLE_CUDA +//#endif // GOOGLE_TENSORRT +//#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index f5dc4886af..3ab47f4176 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -4,7 +4,7 @@ #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" -#include +#include "cuda_runtime_api.h" #include #include #include @@ -18,28 +18,30 @@ int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } TRTInt8Calibrator::TRTInt8Calibrator(const std::unordered_map< string, std::pair>& dev_buffers, - int batch_size) + int batch_size, + string engineName) : batch_size_(batch_size), done_(false), dev_buffers_(dev_buffers), - calib_running_(false){ + calib_running_(false), + engine_name_(engineName){ cudaPointerAttributes pa; int devid=-1; cudaGetDevice(&devid); VLOG(0)<<"Constructing calibrator with batch size "<& data) { - VLOG(1)<<"SAMI SAMI Waiting to set new batch"; + VLOG(1)<<"SAMI SAMI "<second; - VLOG(1)<<"cuda memcopy buff name= "<second.first; - bindings[i] = it->second.first; - float f[2]; - f[0]=3.; - f[1]=0.14159; - auto status=cudaMemcpy(f,bindings[i],sizeof(float)*2,cudaMemcpyDeviceToHost); - int devid=-1; - cudaGetDevice(&devid); - VLOG(0)<<"SAMI ORDER GETTING, Data in perm storage [0]="<second.first; + float f[2]; + f[0]=3.; + f[1]=0.14159; + auto status=cudaMemcpy(f,bindings[i],sizeof(float)*2,cudaMemcpyDeviceToHost); + if(status!=cudaSuccess){ + VLOG(0)<<"Memcopy failed!"; + } + int devid=-1; + cudaGetDevice(&devid); + VLOG(1)<<"ORDER GETTING, "<>& dev_buffers, - int batch_size); + int batch_size, + string engineName); int getBatchSize() const; bool getBatch(void* bindings[], const char* names[], int nbBindings) override; bool setBatch(const std::unordered_map &data); @@ -33,6 +34,7 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { bool done_; const std::unordered_map> dev_buffers_; std::atomic_bool calib_running_; + string engine_name_; }; } // namespace trt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h index cd23100af8..655ff672b3 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResources.h +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -9,6 +9,8 @@ #include #include #include "tensorrt/include/NvInfer.h" +#include +#include #include #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" @@ -16,7 +18,6 @@ namespace tensorflow { namespace trt { - struct TRTCalibrationResource : public tensorflow::ResourceBase { TRTCalibrationResource() : calibrator(nullptr), @@ -24,7 +25,8 @@ struct TRTCalibrationResource : public tensorflow::ResourceBase { network(nullptr), engine(nullptr), logger(nullptr), - thr(nullptr) {} + thr(nullptr) + {} string DebugString() override { std::stringstream oss; #define VALID_OR_NULL(ptr) (!ptr ? "nullptr" : std::hex<<(void)ptr<> store_; + string DebugString() override { + std::stringstream oss; + size_t lenBytes = 0; + for(const auto& v:store_){ + lenBytes += v.size()*sizeof(uint8_t); + } + oss<<" Number of entries = "< Date: Wed, 21 Feb 2018 13:19:49 -0800 Subject: [PATCH 027/884] Merge test local (#17174) * Add filepaths to test_local support. PiperOrigin-RevId: 184602010 * Update local_test.sh --- tensorflow/tools/dist_test/local_test.sh | 33 ++++++++++++++---------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 7d7f92d246..b87232b0e5 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -24,19 +24,20 @@ # 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container # and run the distributed test suite. # -# Usage: local_test.sh +# Usage: local_test.sh # [--leave_container_running] # [--model_name ] # [--num_workers ] # [--num_parameter_servers ] # [--sync_replicas] # -# E.g., local_test.sh --model_name CENSUS_WIDENDEEP -# local_test.sh --num_workers 3 --num_parameter_servers 3 +# E.g., local_test.sh --model_name CENSUS_WIDENDEEP +# local_test.sh --num_workers 3 --num_parameter_servers 3 # # Arguments: -# -# Specify custom TensorFlow whl file URL to install in the test Docker image. +# whl_file_location: URL from which the TensorFlow whl file will be acquired. +# E.g.: https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl +# E.g.: /path/to/folder/tensorflow-0.11.0rc1-cp27-none-linux_x86_64.whl # # --leave_container_running: Do not stop the docker-in-docker container after # the termination of the tests, e.g., for debugging @@ -81,9 +82,9 @@ NUM_WORKERS=2 NUM_PARAMETER_SERVERS=2 SYNC_REPLICAS_FLAG="" -WHL_URL=${1} -if [[ -z "${WHL_URL}" ]]; then - die "whl file URL is not specified" +WHL_FILE_LOCATION=${1} +if [[ -z "${WHL_FILE_LOCATION}" ]]; then + die "whl file location is not specified" fi while true; do @@ -98,8 +99,8 @@ while true; do NUM_PARAMETER_SERVERS=$2 elif [[ $1 == "--sync_replicas" ]]; then SYNC_REPLICAS_FLAG="--sync_replicas" - elif [[ $1 == "--whl_url" ]]; then - WHL_URL=$2 + elif [[ $1 == "--whl_file_location" ]]; then + WHL_FILE_LOCATION=$2 fi shift @@ -130,15 +131,19 @@ fi # Create docker build context directory. BUILD_DIR=$(mktemp -d) echo "" -echo "Using whl file URL: ${WHL_URL}" +echo "Using whl file location: ${WHL_FILE_LOCATION}" echo "Building in temporary directory: ${BUILD_DIR}" cp -r ${DIR}/* "${BUILD_DIR}"/ || \ die "Failed to copy files to ${BUILD_DIR}" -# Download whl file into the build context directory. -wget -P "${BUILD_DIR}" ${WHL_URL} || \ - die "Failed to download tensorflow whl file from URL: ${WHL_URL}" +if [[ $WHL_FILE_LOCATION =~ 'http://' || $WHL_FILE_LOCATION =~ 'https://' ]]; then + # Download whl file into the build context directory. + wget -P "${BUILD_DIR}" "${WHL_FILE_LOCATION}" || \ + die "Failed to download tensorflow whl file from URL: ${WHL_FILE_LOCATION}" +else + cp "${WHL_FILE_LOCATION}" "${BUILD_DIR}" +fi # Build docker image for test. docker build ${NO_CACHE_FLAG} -t ${DOCKER_IMG_NAME} \ -- GitLab From 9cfa96fdf6cfa10e7cdd97f4dd2e0fd644fb5c02 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Wed, 21 Feb 2018 15:26:19 -0800 Subject: [PATCH 028/884] Fix name scope of generated TensorRT engine ops - These now inherit the common name scope of their constituent ops. --- .../contrib/tensorrt/convert/convert_nodes.cc | 30 ++++++++++++++++--- 1 file changed, 26 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 3a9a281a3f..1d285ce55a 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2252,6 +2252,18 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { return tensorflow::Status::OK(); } +string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { + size_t last_scope_separator = 0; + for (size_t i=0; iname(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope( + subgraph_name_scope, node->name()); + } static int static_id = 0; - string engine_name = tensorflow::strings::StrCat("my_trt_op", static_id++); + // TODO(sami,ben,jie): proper naming! + string engine_name = + tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op"); + engine_name = tensorflow::strings::StrCat(engine_name, static_id++); auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); auto weight_rmgr=trt_rmgr->getManager("WeightStore"); auto ws=new tensorflow::trt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); - + // Build the network Converter converter(trt_network.get(),ws); @@ -2389,8 +2413,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( VLOG(2) << "Finished conversion"; - // TODO(sami,ben,jie): proper naming! - // Gather output metadata std::vector output_names; std::vector output_dtypes; -- GitLab From 2956ecbb336464512df0127c6372f47ea9a1e2a7 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Wed, 21 Feb 2018 15:30:15 -0800 Subject: [PATCH 029/884] Add layout optimization for reduce ops w/ keepdims - Allows NHWC -> NCHW transposes to be propagated through reduce ops that have the attribute keepdims=true. This avoids redundant transposes at the end of some models such as Slim's resnet50. --- .../core/grappler/optimizers/layout_optimizer.cc | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/layout_optimizer.cc b/tensorflow/core/grappler/optimizers/layout_optimizer.cc index 5a62b77327..4342179176 100644 --- a/tensorflow/core/grappler/optimizers/layout_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/layout_optimizer.cc @@ -1789,12 +1789,18 @@ class ReduceProcessor : public AgnosticNodeProcessor { return Status::OK(); } - Status AddLayoutTransposeToOutputs() override { return Status::OK(); } + Status AddLayoutTransposeToOutputs() override { + if ((IsAlongNHW() || IsAlongHW() || IsAlongC()) && KeepDims()) { + return AgnosticNodeProcessor::AddLayoutTransposeToOutputs(); + } else { + return Status::OK(); + } + } private: bool IsReduceAxisSupported() const { return IsAlongAllFourDims() || IsAlongHWC() || - ((IsAlongNHW() || IsAlongHW() || IsAlongC()) && !KeepDims()); + IsAlongNHW() || IsAlongHW() || IsAlongC(); } bool IsAlongAxis(const std::vector& axis) const { -- GitLab From 3407102e2a6973a9504f582f6fd8b6df5b6bb63a Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 21 Feb 2018 16:54:00 -0800 Subject: [PATCH 030/884] Disabling kmeans tests for release testing on kokoro. (#17181) --- tensorflow/contrib/factorization/BUILD | 5 ++++- tensorflow/contrib/learn/BUILD | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/factorization/BUILD b/tensorflow/contrib/factorization/BUILD index 180f1b68f3..c56c92a0a4 100644 --- a/tensorflow/contrib/factorization/BUILD +++ b/tensorflow/contrib/factorization/BUILD @@ -223,7 +223,10 @@ py_test( srcs = ["python/ops/kmeans_test.py"], shard_count = 4, srcs_version = "PY2AND3", - tags = ["notsan"], # b/67512932 + tags = [ + "nomac", # b/73741358 + "notsan", # b/67512932 + ], deps = [ ":factorization_py", ":factorization_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO", diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 3c782b54a8..7562190eab 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -425,6 +425,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", + tags = ["nomac"], # b/73741358 deps = [ ":learn", "//tensorflow/python:array_ops", -- GitLab From 29a6f0c47b9e7d4b74785cc4a95890eb04aa7bbe Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 25 Nov 2017 17:37:10 -0800 Subject: [PATCH 031/884] Sanitize with clang-format -i --style=Google Signed-off-by: Yong Tang --- tensorflow/core/kernels/depthtospace_op_gpu.cu.cc | 6 ++++-- tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 2d39abce16..71ea550a4e 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -230,8 +230,10 @@ template struct functor::DepthToSpaceOpFunctor; template struct functor::DepthToSpaceOpFunctor; // Instantiate the GPU implementations for Eigen::half. -template struct functor::DepthToSpaceOpFunctor; -template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; +template struct functor::DepthToSpaceOpFunctor; // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::DepthToSpaceOpFunctor; diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index 8466fa192f..33cb2baa6c 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -226,8 +226,10 @@ template struct functor::SpaceToDepthOpFunctor; template struct functor::SpaceToDepthOpFunctor; // Instantiate the GPU implementations for Eigen::half. -template struct functor::SpaceToDepthOpFunctor; -template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; +template struct functor::SpaceToDepthOpFunctor; // NCHW_VECT_C with 4 x qint8 can be treated as NCHW int32. template struct functor::SpaceToDepthOpFunctor; -- GitLab From 9042cd1045e1f9436fd2cd02fdf162ea502ef342 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 22 Feb 2018 16:32:32 -0800 Subject: [PATCH 032/884] Ran clang-format --- .../contrib/tensorrt/convert/convert_graph.cc | 22 +- .../contrib/tensorrt/convert/convert_nodes.cc | 336 +++++++++--------- .../contrib/tensorrt/convert/convert_nodes.h | 20 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 45 ++- .../contrib/tensorrt/kernels/trt_calib_op.h | 42 ++- .../contrib/tensorrt/kernels/trt_engine_op.cc | 12 +- .../tensorrt/resources/TRTInt8Calibrator.cc | 146 ++++---- .../tensorrt/resources/TRTInt8Calibrator.h | 35 +- .../tensorrt/resources/TRTResourceManager.cc | 36 +- .../tensorrt/resources/TRTResourceManager.h | 20 +- .../contrib/tensorrt/resources/TRTResources.h | 69 ++-- 11 files changed, 444 insertions(+), 339 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index b364ffc86b..23ebaf35ba 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -140,8 +140,7 @@ struct ConvertGraphParams { const std::set& subgraph_node_ids_, size_t max_batch_size_, size_t max_workspace_size_bytes_, const tensorflow::grappler::GraphProperties& graph_properties_, - std::unordered_map>* - output_edge_map_, + std::unordered_map>* output_edge_map_, int precision_mode_) : graph(graph_), output_names(output_names_), @@ -183,7 +182,7 @@ tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams& p) { } GetSubGraphOutgoingEdges(p.graph, p.subgraph_node_ids, &p.subgraph_outgoing_edges); - for (const tensorflow::Edge *edge : p.subgraph_outgoing_edges) { + for (const tensorflow::Edge* edge : p.subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } p.subgraph_outputs.reserve(subgraph_outputs_set.size()); @@ -229,7 +228,7 @@ tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { params->subgraph_inputs, params->subgraph_outputs, params->max_batch_size, params->max_workspace_size_bytes, params->graph_properties, params->output_edge_map, - &trt_node_def,params->precision_mode); + &trt_node_def, params->precision_mode); TF_RETURN_IF_ERROR(ConvertSubGraphToTensorRTNodeDef(s)); tensorflow::Status status; tensorflow::Node* trt_node = params->graph.AddNode(trt_node_def, &status); @@ -386,20 +385,21 @@ tensorflow::Status ConvertGraphDefToTensorRT( TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map)); std::unordered_map> output_edge_map; int count = 0; - float total_num_nodes_in_segments=0.; - for(auto s:segments){ - total_num_nodes_in_segments+=s.size(); + float total_num_nodes_in_segments = 0.; + for (auto s : segments) { + total_num_nodes_in_segments += s.size(); } for (const std::set& subgraph_node_names : segments) { std::set subgraph_node_ids; - size_t max_mem_per_engine=max_workspace_size_bytes* - ((float)subgraph_node_names.size()/total_num_nodes_in_segments); + size_t max_mem_per_engine = + max_workspace_size_bytes * + ((float)subgraph_node_names.size() / total_num_nodes_in_segments); std::stringstream oss; for (const string& node_name : subgraph_node_names) { - oss<<" "<id()); } - VLOG(2)<<"Subgraph nodes"< void reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, T* odata, nvinfer1::DimsHW ostrides) { @@ -327,7 +326,8 @@ void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, nvinfer1::DimsHW ostrides = {c, 1}; switch (iweights.type_) { case tensorflow::DataType::DT_FLOAT: - reorder2({k, c}, static_cast(iweights.GetValues()), istrides, + reorder2({k, c}, static_cast(iweights.GetValues()), + istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; @@ -337,7 +337,7 @@ void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, } void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights, int nbGroups) { + TRT_ShapedWeights* oweights, int nbGroups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; @@ -411,8 +411,7 @@ class Converter { * 2) Control dependency inputs contain caret at the beginning and we * remove this and annotate the edge as a control dependency. ************************************************************************/ - string name = - input_name[0] == '^' ? input_name.substr(1) : input_name; + string name = input_name[0] == '^' ? input_name.substr(1) : input_name; auto first = name.find_first_of(':'); if (first != string::npos && first + 2 == name.size() && name[first + 1] == '0') @@ -431,17 +430,17 @@ class Converter { public: explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::trt::TRTWeightStore* ws) - : trt_network_(trt_network),weight_store_(ws) { + tensorflow::trt::TRTWeightStore* ws) + : trt_network_(trt_network), weight_store_(ws) { this->register_op_converters(); } - tensorflow::trt::TRTWeightStore* weight_store(){return weight_store_;} + tensorflow::trt::TRTWeightStore* weight_store() { return weight_store_; } TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error weight_store_->store_.push_back(std::vector(weights.size_bytes())); - //temp_bufs_.push_back(std::vector(weights.size_bytes())); + // temp_bufs_.push_back(std::vector(weights.size_bytes())); weights.SetValues(weight_store_->store_.back().data()); return weights; } @@ -816,12 +815,12 @@ tensorflow::Status BinaryTensorOpWeight( } else { // no broadcasting on Batch dimension; VLOG(2) << "WEIGHTS DIM: " << dims_w.nbDims - << " tensor DIM: " << dims_t.nbDims; + << " tensor DIM: " << dims_t.nbDims; if (dims_w.nbDims == dims_t.nbDims + 1) { if (dims_w.d[0] == 1) { - for (int i = 1; i < dims_w.nbDims; i++){ + for (int i = 1; i < dims_w.nbDims; i++) { dims_w.d[i - 1] = dims_w.d[i]; - } + } dims_w.nbDims--; } else { return tensorflow::errors::InvalidArgument( @@ -963,7 +962,7 @@ tensorflow::Status ConvertConv2DHelper( auto tf_stride = attrs.get>("strides"); VLOG(2) << "h_INDEX" << h_index << ", w_index " << w_index; VLOG(2) << "stride!!!: " << tf_stride[0] << tf_stride[1] << tf_stride[2] - << tf_stride[3]; + << tf_stride[3]; nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]); std::vector> padding; @@ -1010,7 +1009,7 @@ tensorflow::Status ConvertConv2DHelper( nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); - VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1]<<", " + VLOG(2) << "TENSOR out: " << dim_after.d[0] << ", " << dim_after.d[1] << ", " << dim_after.d[2] << ", " << dim_after.d[3]; if (data_format == "NHWC") { @@ -1041,15 +1040,14 @@ tensorflow::Status BinaryTensorOpTensor( Converter& ctx, tensorflow::NodeDef const& node_def, const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, std::vector* outputs) { - static const std::unordered_map - ops{ - {"Add", nvinfer1::ElementWiseOperation::kSUM}, - {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, - {"Sub", nvinfer1::ElementWiseOperation::kSUB}, - {"Div", nvinfer1::ElementWiseOperation::kDIV}, - }; + static const std::unordered_map ops{ + {"Add", nvinfer1::ElementWiseOperation::kSUM}, + {"Mul", nvinfer1::ElementWiseOperation::kPROD}, + // {"max", nvinfer1::ElementWiseOperation::kMAX}, + // {"min", nvinfer1::ElementWiseOperation::kMIN}, + {"Sub", nvinfer1::ElementWiseOperation::kSUB}, + {"Div", nvinfer1::ElementWiseOperation::kDIV}, + }; // FIXME assume type matches input weights // get trt type & shape @@ -1319,15 +1317,16 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - size_t lenData=tensorflow::DataTypeSize(dtype); - for(int i=0;istore_.push_back(std::vector(lenData)); - void* dst=static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data(weights_tensor.float_val().begin(), - weights_tensor.float_val().end()); // make a local copy first to flatten - memcpy(dst,tensor_data.data(),lenData);// store into weight store - weights = TRT_ShapedWeights(dtype, dst, - scalar_shape); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), lenData); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); // LOG(INFO) << " add: " << weights_tensor.float_val().data(); // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); @@ -1363,17 +1362,18 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - size_t lenData=tensorflow::DataTypeSize(dtype); - for(int i=0;istore_.push_back(std::vector(lenData)); - void* dst=static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data(weights_tensor.int_val().begin(), - weights_tensor.int_val().end()); // make a local copy first to flatten doesn't have to be contigous - memcpy(dst,tensor_data.data(),lenTensor);// store into weight store - weights = TRT_ShapedWeights(dtype, dst, - scalar_shape); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val().end()); // make a local copy first to flatten + // doesn't have to be contigous + memcpy(dst, tensor_data.data(), lenTensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1505,7 +1505,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, nvinfer1::DimsHW pool_kernel; if (permuted_index == 1) { for (int i = 2; i < nb_dims; i++) { - if (idx_set.count(i) == 0 ) { + if (idx_set.count(i) == 0) { permuted_index = i; break; } @@ -1730,26 +1730,26 @@ tensorflow::Status ConvertConcat(Converter& ctx, return tensorflow::Status::OK(); } -tensorflow::Status ConvertFusedBatchNorm(Converter& ctx, - tensorflow::NodeDef const& node_def, - std::vector const& inputs, - std::vector* outputs) { +tensorflow::Status ConvertFusedBatchNorm( + Converter& ctx, tensorflow::NodeDef const& node_def, + std::vector const& inputs, + std::vector* outputs) { TFAttrs attrs(node_def); float epsilon = attrs.get("epsilon"); auto data_format = attrs.get("data_format"); - if (data_format != "NCHW" ) { + if (data_format != "NCHW") { return tensorflow::errors::Unimplemented( - "only data_format=NCHW is supported, at " + node_def.name()); + "only data_format=NCHW is supported, at " + node_def.name()); } bool is_training = attrs.get("is_training"); if (is_training) { return tensorflow::errors::Unimplemented( - "only is_training=false is supported, at " + node_def.name()); + "only is_training=false is supported, at " + node_def.name()); } - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); - TRT_ShapedWeights scale_weights = inputs.at(1).weights(); - TRT_ShapedWeights offset_weights = inputs.at(2).weights(); - TRT_ShapedWeights mean_weights = inputs.at(3).weights(); + nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + TRT_ShapedWeights scale_weights = inputs.at(1).weights(); + TRT_ShapedWeights offset_weights = inputs.at(2).weights(); + TRT_ShapedWeights mean_weights = inputs.at(3).weights(); TRT_ShapedWeights variance_weights = inputs.at(4).weights(); TRT_ShapedWeights dummy_power_weights(scale_weights.type_); TRT_ShapedWeights combined_scale_weights = @@ -1757,23 +1757,24 @@ tensorflow::Status ConvertFusedBatchNorm(Converter& ctx, TRT_ShapedWeights combined_offset_weights = ctx.get_temp_weights_like(offset_weights); size_t nweight = scale_weights.count(); - if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || - offset_weights.type_ != tensorflow::DataType::DT_FLOAT || - mean_weights.type_ != tensorflow::DataType::DT_FLOAT || + if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || + offset_weights.type_ != tensorflow::DataType::DT_FLOAT || + mean_weights.type_ != tensorflow::DataType::DT_FLOAT || variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { return tensorflow::errors::Unimplemented( - "only float32 weights data type is supported, at " + node_def.name()); - } - for (size_t i=0; i(scale_weights.GetValues()))[i]; - float offset = (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = (static_cast(variance_weights.GetValues()))[i]; + "only float32 weights data type is supported, at " + node_def.name()); + } + for (size_t i = 0; i < nweight; ++i) { + float scale = (static_cast(scale_weights.GetValues()))[i]; + float offset = (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = + (static_cast(variance_weights.GetValues()))[i]; float& combined_scale_ref = const_cast( static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( + float& combined_offset_ref = const_cast( static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_scale_ref = scale / sqrtf(variance + epsilon); combined_offset_ref = offset - mean * combined_scale_ref; } nvinfer1::IScaleLayer* layer = ctx.network()->addScale( @@ -1916,124 +1917,129 @@ void Converter::register_op_converters() { tensorflow::Status GetTensorRTGraph(tensorrt::convert::SubGraphParams& s) { return tensorflow::errors::Unimplemented("Not implemented yet"); } -tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph &graph, - tensorflow::Node *c_node) { - const auto ndef=c_node->def(); +tensorflow::Status ConvertCalibrationNodeToEngineNode( + tensorflow::Graph& graph, tensorflow::Node* c_node) { + const auto ndef = c_node->def(); TFAttrs attrs(ndef); - std::vector segment_nodes(attrs.get>("segment_nodes")); - std::vector output_nodes(attrs.get>("segment_output_names")); - std::vector input_names(attrs.get>("input_names")); + std::vector segment_nodes( + attrs.get>("segment_nodes")); + std::vector output_nodes( + attrs.get>("segment_output_names")); + std::vector input_names( + attrs.get>("input_names")); string res_name = attrs.get("resource_name"); VLOG(1) << "Node name " << c_node->name() << " res_name " << res_name; - string engine_name="my_trt_op"; + string engine_name = "my_trt_op"; { - const auto node_id=tensorflow::str_util::Split(res_name,"_"); - engine_name+=node_id.back(); + const auto node_id = tensorflow::str_util::Split(res_name, "_"); + engine_name += node_id.back(); } - std::map nodeMaps; + std::map nodeMaps; - for(auto n: graph.op_nodes()){ - nodeMaps.insert({n->name(),n}); + for (auto n : graph.op_nodes()) { + nodeMaps.insert({n->name(), n}); } - VLOG(1)<<"Output Nodes:"; + VLOG(1) << "Output Nodes:"; std::vector out_types; std::vector out_edges; - for(auto &i : output_nodes ){ - auto node_port=tensorflow::str_util::Split(i,":"); + for (auto& i : output_nodes) { + auto node_port = tensorflow::str_util::Split(i, ":"); VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); auto out_node_name = node_port.at(0); - if(node_port.size()>1){ - VLOG(1) << "Multi port output" << node_port.at(0) << - " " << node_port.at(1) << " size=" << node_port.size(); + if (node_port.size() > 1) { + VLOG(1) << "Multi port output" << node_port.at(0) << " " + << node_port.at(1) << " size=" << node_port.size(); } - auto nodeIt=nodeMaps.find(out_node_name); - if(nodeIt!=nodeMaps.end()){ - tensorflow::Node* outNode=nodeIt->second; - int port=0; - if(node_port.size()==2){ - port=std::strtoul(node_port.at(1).c_str(),nullptr,10); + auto nodeIt = nodeMaps.find(out_node_name); + if (nodeIt != nodeMaps.end()) { + tensorflow::Node* outNode = nodeIt->second; + int port = 0; + if (node_port.size() == 2) { + port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); out_types.push_back(outNode->output_type(port)); - }else{ + } else { out_types.push_back(outNode->output_type(0)); } - for(auto outEdge : outNode->out_edges()){ - if(outEdge->src_output()==port){ + for (auto outEdge : outNode->out_edges()) { + if (outEdge->src_output() == port) { out_edges.push_back(outEdge); break; } } - }else{ - LOG(WARNING)<<" couldn't find output node "<getManager("TRTCalibOps"); tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; auto status = resmgr->Lookup(res_name, res_name, &calibRes); - if(!status.ok() || !calibRes->calibrator){ - return tensorflow::errors::FailedPrecondition("You must run calibration"\ - " and inference conversion in the same proces"); + if (!status.ok() || !calibRes->calibrator) { + return tensorflow::errors::FailedPrecondition( + "You must run calibration" + " and inference conversion in the same proces"); } calibRes->calibrator->setDone(); calibRes->thr->join(); delete calibRes->thr; - if(!calibRes->engine){ - LOG(FATAL)<<"Calibration failed!, engine is nullptr"; + if (!calibRes->engine) { + LOG(FATAL) << "Calibration failed!, engine is nullptr"; } - auto weight_rmgr=trt_rm->getManager("WeightStore"); - TF_CHECK_OK(weight_rmgr->Delete(res_name,res_name)); - auto engine_plan=calibRes->engine->serialize(); + auto weight_rmgr = trt_rm->getManager("WeightStore"); + TF_CHECK_OK( + weight_rmgr->Delete(res_name, res_name)); + auto engine_plan = calibRes->engine->serialize(); calibRes->engine->destroy(); calibRes->network->destroy(); calibRes->builder->destroy(); - calibRes->thr= nullptr; - calibRes->engine= nullptr; - calibRes->builder= nullptr; + calibRes->thr = nullptr; + calibRes->engine = nullptr; + calibRes->builder = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; - for(const auto in_edge : c_node->in_edges()){ - auto src=in_edge->src(); - int dest_port=in_edge->dst_input(); - income_edges.emplace_back(src->name(),in_edge->src_output(),c_node->input_type(dest_port)); + for (const auto in_edge : c_node->in_edges()) { + auto src = in_edge->src(); + int dest_port = in_edge->dst_input(); + income_edges.emplace_back(src->name(), in_edge->src_output(), + c_node->input_type(dest_port)); } tensorflow::gtl::ArraySlice input_list( income_edges); op_builder.Input(input_list); tensorflow::NodeDef engine_node; - const char* engine_plan_data = - static_cast(engine_plan->data()); - string engine_plan_string(engine_plan_data, engine_plan_data + engine_plan->size()); + const char* engine_plan_data = static_cast(engine_plan->data()); + string engine_plan_string(engine_plan_data, + engine_plan_data + engine_plan->size()); status = op_builder.Attr("serialized_engine", engine_plan_string) - .Attr("input_nodes", input_names) - .Attr("output_nodes", output_nodes) - .Attr("OutT", out_types) - .Finalize(&engine_node); - if(!status.ok()){ - LOG(ERROR)<<"Engine Node creation failed"; + .Attr("input_nodes", input_names) + .Attr("output_nodes", output_nodes) + .Attr("OutT", out_types) + .Finalize(&engine_node); + if (!status.ok()) { + LOG(ERROR) << "Engine Node creation failed"; return status; } - auto trt_engine_node=graph.AddNode(engine_node,&status); + auto trt_engine_node = graph.AddNode(engine_node, &status); TF_CHECK_OK(status); - for(size_t i=0;idst()->name() << " port " - << out_edges.at(i)->dst_input(); + for (size_t i = 0; i < out_edges.size(); i++) { + VLOG(1) << "Connecting trt_engine_node output " << i << " with " + << out_edges.at(i)->dst()->name() << " port " + << out_edges.at(i)->dst_input(); TF_RETURN_IF_ERROR(graph.UpdateEdge(trt_engine_node, i, out_edges.at(i)->dst(), out_edges.at(i)->dst_input())); } VLOG(1) << "Segment nodes:"; - for (auto &i : segment_nodes){ + for (auto& i : segment_nodes) { VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); - auto it=nodeMaps.find(i); - if(it!=nodeMaps.end()){ + auto it = nodeMaps.find(i); + if (it != nodeMaps.end()) { graph.RemoveNode(it->second); } } @@ -2068,7 +2074,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); auto op_res = new tensorflow::trt::TRTCalibrationResource(); - VLOG(1)<<"SAMI Creating calibresource "<Create(calib_op_name, calib_op_name, op_res)); op_res->logger = new tensorflow::tensorrt::Logger(); op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); @@ -2089,10 +2095,10 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { VLOG(2) << "BUILDING 4"; // Build the network - auto weight_rmgr=trt_rmgr->getManager("WeightStore"); - auto ws=new tensorflow::trt::TRTWeightStore(); + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::trt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network,ws); + Converter converter(op_res->network, ws); VLOG(2) << "BUILDING 5"; std::vector input_names; @@ -2126,9 +2132,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); VLOG(2) << "accessing output index of: " << std::to_string(output_idx) - << ", at node: " << node_name - << "with output entry from shape_map: " - << std::to_string(op_info_vec.size()); + << ", at node: " << node_name + << "with output entry from shape_map: " + << std::to_string(op_info_vec.size()); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; @@ -2136,7 +2142,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); + << " , size: " << op_info.shape().dim(i).size(); input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } @@ -2162,8 +2168,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { for (const tensorflow::Node* node : order) { tensorflow::NodeDef const& node_def = node->def(); - VLOG(2) << "converting node: " << node_def.name() << " , " - << node_def.op(); + VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } @@ -2182,8 +2187,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { s.output_edge_map->insert( {trt_engine_op_output_idx == 0 - ? engine_name - : engine_name + ":" + std::to_string(trt_engine_op_output_idx), + ? engine_name + : engine_name + ":" + std::to_string(trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; if (output_idx != 0) @@ -2198,7 +2203,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { nvinfer1::ITensor* tensor = tensor_or_weights.tensor(); if (!tensor) { return tensorflow::errors::NotFound("Output tensor not found: " + - tensor_name); + tensor_name); } converter.network()->markOutput(*tensor); tensorflow::DataType tf_dtype = node->output_type(output_idx); @@ -2226,7 +2231,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { input_names.at(i), output_idx, input_dtypes.at(i)); VLOG(1) << calib_op_name << " input " << i << " = " << input_names.at(i) << ":" << output_idx - <<" dType= "<< tensorflow::DataTypeString(input_dtypes.at(i)); + << " dType= " << tensorflow::DataTypeString(input_dtypes.at(i)); income_edges.push_back(incoming_edge); } tensorflow::gtl::ArraySlice input_list( @@ -2241,9 +2246,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { LOG(INFO) << "finished op preparation"; auto status = op_builder.Attr("segment_nodes", segment_names) - .Attr("input_names",input_names) + .Attr("input_names", input_names) .Attr("segment_output_names", output_names) - .Attr("resource_name",calib_op_name) + .Attr("resource_name", calib_op_name) .Finalize(s.trt_node); LOG(INFO) << status.ToString(); @@ -2254,7 +2259,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { size_t last_scope_separator = 0; - for (size_t i=0; iname(); } for (const tensorflow::Node* node : order) { - subgraph_name_scope = GetCommonNameScope( - subgraph_name_scope, node->name()); + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); } static int static_id = 0; // TODO(sami,ben,jie): proper naming! @@ -2310,12 +2314,12 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op"); engine_name = tensorflow::strings::StrCat(engine_name, static_id++); auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); - auto weight_rmgr=trt_rmgr->getManager("WeightStore"); - auto ws=new tensorflow::trt::TRTWeightStore(); + auto weight_rmgr = trt_rmgr->getManager("WeightStore"); + auto ws = new tensorflow::trt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); // Build the network - Converter converter(trt_network.get(),ws); + Converter converter(trt_network.get(), ws); std::vector input_names; std::vector input_dtypes; @@ -2333,7 +2337,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensor_name = tensor_name + ":" + std::to_string(output_idx); VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name - << " idx: " << output_idx; + << " idx: " << output_idx; auto shape_inference_node_name = node_name; auto shape_inference_output_idx = output_idx; @@ -2343,7 +2347,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; } VLOG(2) << "shapeinference name: " << shape_inference_node_name - << " idx: " << shape_inference_output_idx; + << " idx: " << shape_inference_output_idx; if (!s.graph_properties.HasOutputProperties(shape_inference_node_name)) return tensorflow::errors::Internal("failed to find input node: " + @@ -2380,7 +2384,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i - << " , size: " << op_info.shape().dim(i).size(); + << " , size: " << op_info.shape().dim(i).size(); input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } @@ -2427,11 +2431,13 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( s.output_edge_map->insert( {trt_engine_op_output_idx == 0 ? engine_name - : tensorflow::strings::StrCat(engine_name,":",trt_engine_op_output_idx), + : tensorflow::strings::StrCat(engine_name, ":", + trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":" ,std::to_string(output_idx)); + tensorflow::strings::StrAppend(&tensor_name, ":", + std::to_string(output_idx)); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2455,14 +2461,14 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( VLOG(2) << "Finished output"; // TODO(jie): static_id is not thread safe. - // Build the engine trt_builder->setMaxBatchSize(s.max_batch_size); trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); - VLOG(0)<<"Max batch size= "<buildCudaEngine(*converter.network())); VLOG(0) << "Built network"; - if(trt_engine.get()==nullptr){ + if (trt_engine.get() == nullptr) { return tensorflow::errors::Internal("Engine building failure"); } auto engine_plan = infer_object(trt_engine->serialize()); @@ -2481,7 +2487,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - weight_rmgr->Delete(engine_name,engine_name); + weight_rmgr->Delete(engine_name, + engine_name); LOG(INFO) << "finished engine " << engine_name; // Build the TRT op @@ -2489,8 +2496,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector income_edges; VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << std::to_string(i) << " " - << input_names.at(i); + VLOG(2) << "input edges: " << std::to_string(i) << " " << input_names.at(i); int output_idx = s.input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 49e060a553..7e9f8a9b4b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -35,16 +35,14 @@ namespace tensorrt { namespace convert { struct SubGraphParams { - SubGraphParams(tensorflow::Graph& graph_, - const std::set& subgraph_node_ids_, - const std::vector>& input_inds_, - const std::vector>& output_inds_, - size_t max_batch_size_, size_t max_workspace_size_bytes_, - const tensorflow::grappler::GraphProperties& graph_properties_, - std::unordered_map>* - output_edge_map_, - tensorflow::NodeDef* trt_node_, - int precision_mode_ = 0) + SubGraphParams( + tensorflow::Graph& graph_, const std::set& subgraph_node_ids_, + const std::vector>& input_inds_, + const std::vector>& output_inds_, + size_t max_batch_size_, size_t max_workspace_size_bytes_, + const tensorflow::grappler::GraphProperties& graph_properties_, + std::unordered_map>* output_edge_map_, + tensorflow::NodeDef* trt_node_, int precision_mode_ = 0) : graph(graph_), subgraph_node_ids(subgraph_node_ids_), input_inds(input_inds_), @@ -68,7 +66,7 @@ struct SubGraphParams { const int precision_mode; }; -tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams ¶ms); +tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, tensorflow::Node* c_node); diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index c6eba15711..d0c7e00428 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -1,10 +1,19 @@ -// -// Created by skama on 1/25/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" -#include "tensorrt/include/NvInfer.h" -#include #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" #include "tensorflow/contrib/tensorrt/resources/TRTResources.h" @@ -14,6 +23,11 @@ #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" +#include "tensorrt/include/NvInfer.h" + namespace tensorflow { namespace trt { TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { @@ -68,15 +82,17 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { input_names_.at(i), std::pair(devAddr, dTensor->TotalBytes())); } - calibRes->calibrator = new TRTInt8Calibrator(device_buffers_, batchSize,repo_name); + calibRes->calibrator = + new TRTInt8Calibrator(device_buffers_, batchSize, repo_name); string label(repo_name); - calibRes->thr = new std::thread([calibRes,label]() { - VLOG(0)<<"Starting calibration thread, Calibration Resource @ "<thr = new std::thread([calibRes, label]() { + VLOG(0) << "Starting calibration thread, Calibration Resource @ " + << calibRes; calibRes->builder->setInt8Calibrator(calibRes->calibrator); calibRes->builder->setInt8Mode(true); calibRes->engine = calibRes->builder->buildCudaEngine( *calibRes->network); // will loop until we terminate calibrator - VLOG(0) << "SAMI Calibration loop terminated "<TotalBytes()); // use the tensor so FW keeps it - if(VLOG_IS_ON(1)){ + if (VLOG_IS_ON(1)) { void* devAddr = nullptr; GET_TENSOR_ADDRESS(dTensor, devAddr); - if(devAddr!=device_buffers_.at(input_names_.at(i)).first){ - LOG(WARNING)<<"Device address is different!"; + if (devAddr != device_buffers_.at(input_names_.at(i)).first) { + LOG(WARNING) << "Device address is different!"; } } input_data.emplace(input_names_.at(i), data_address); @@ -110,8 +126,11 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { }; #undef TYPECASE +#undef GET_TENSOR_ADDRESS REGISTER_KERNEL_BUILDER(Name("TRTCalibOp").Device(DEVICE_GPU), TRTCalibOp); } // namespace trt -} // namespace tensorflow \ No newline at end of file +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h index 792e7bae4c..7423223582 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -1,23 +1,36 @@ -// -// Created by skama on 1/25/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. -#ifndef TFGITHUB_TRT_CALIB_OP_H -#define TFGITHUB_TRT_CALIB_OP_H +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_TRT_CALIB_OP_H +#define TENSORFLOW_CONTRIB_TENSORRT_TRT_CALIB_OP_H #include #include -#include -#include #include +#include +#include #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_shape.h" - +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT namespace tensorflow { namespace trt { -class TRTCalibOp: public OpKernel { -public: +// TODO(sami): Convert this to async kernel! +class TRTCalibOp : public OpKernel { + public: explicit TRTCalibOp(OpKernelConstruction* context); void Compute(OpKernelContext* context) override; @@ -29,8 +42,9 @@ public: std::vector shapes_; std::unordered_map> device_buffers_; std::vector dev_tensors_; - }; -} -} -#endif //TFGITHUB_TRT_CALIB_OP_H +} // namespace trt +} // namespace tensorflow +#endif +#endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index bab650186a..f8360ac547 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -26,8 +26,8 @@ limitations under the License. namespace tensorflow { static ::tensorflow::tensorrt::Logger gLogger; -using IRuntime=nvinfer1::IRuntime; -using Dims=nvinfer1::Dims; +using IRuntime = nvinfer1::IRuntime; +using Dims = nvinfer1::Dims; namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { @@ -50,8 +50,7 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { cudaSetDevice(gpu_id); int device; cudaGetDevice(&device); - if (gpu_id != device) - LOG(FATAL) << "set device failed!"; + if (gpu_id != device) LOG(FATAL) << "set device failed!"; IRuntime* infer = nvinfer1::createInferRuntime(gLogger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( @@ -77,7 +76,7 @@ void TRTEngineOp::Compute(OpKernelContext* context) { num_batch = input_shape.dim_size(0); if (num_batch > trt_engine_ptr_->getMaxBatchSize()) LOG(FATAL) << "input tensor batch larger than max_batch_size: " - << trt_engine_ptr_->getMaxBatchSize(); + << trt_engine_ptr_->getMaxBatchSize(); } else if (num_batch != input_shape.dim_size(0)) { valid = false; break; @@ -141,7 +140,8 @@ void TRTEngineOp::Compute(OpKernelContext* context) { ->CudaStreamMemberHack())); // TODO(jie): trt enqueue does not return error - auto ret=trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], *stream, nullptr); + auto ret = trt_execution_context_ptr_->enqueue(num_batch, &buffers[0], + *stream, nullptr); VLOG(2) << "enqueue returns: " << ret; // sync should be done by TF. // cudaStreamSynchronize(*stream); diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc index 3ab47f4176..57677a327d 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc @@ -1,13 +1,24 @@ -// -// Created by skama on 1/24/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" -#include "cuda_runtime_api.h" #include #include #include +#include "cuda_runtime_api.h" #include "tensorflow/core/platform/logging.h" @@ -16,80 +27,90 @@ namespace trt { // set the batch size before constructing the thread to execute engine int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } -TRTInt8Calibrator::TRTInt8Calibrator(const std::unordered_map< - string, std::pair>& dev_buffers, - int batch_size, - string engineName) +TRTInt8Calibrator::TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engineName) : batch_size_(batch_size), done_(false), dev_buffers_(dev_buffers), calib_running_(false), - engine_name_(engineName){ + engine_name_(engineName) { cudaPointerAttributes pa; - int devid=-1; + int devid = -1; cudaGetDevice(&devid); - VLOG(0)<<"Constructing calibrator with batch size "<& data) { - VLOG(1)<<"SAMI SAMI "<second; - if(VLOG_IS_ON(1)){ + if (VLOG_IS_ON(1)) { cudaPointerAttributes pa; - VLOG(1)<<"cuda memcopy "<second.first; - if (VLOG_IS_ON(1)){ - VLOG(1)<<"Setting buffer "<< i <<" named=" << names[i] <<" @ "<second.first; + if (VLOG_IS_ON(1)) { + VLOG(1) << "Setting buffer " << i << " named=" << names[i] << " @ " + << it->second.first; float f[2]; - f[0]=3.; - f[1]=0.14159; - auto status=cudaMemcpy(f,bindings[i],sizeof(float)*2,cudaMemcpyDeviceToHost); - if(status!=cudaSuccess){ - VLOG(0)<<"Memcopy failed!"; + f[0] = 3.; + f[1] = 0.14159; + auto status = + cudaMemcpy(f, bindings[i], sizeof(float) * 2, cudaMemcpyDeviceToHost); + if (status != cudaSuccess) { + VLOG(0) << "Memcopy failed!"; } - int devid=-1; + int devid = -1; cudaGetDevice(&devid); - VLOG(1)<<"ORDER GETTING, "< #include #include #include #include "tensorflow/core/platform/mutex.h" +#include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace trt { struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { public: - TRTInt8Calibrator(const std::unordered_map< - string, std::pair>& dev_buffers, - int batch_size, - string engineName); + TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engineName); int getBatchSize() const; bool getBatch(void* bindings[], const char* names[], int nbBindings) override; - bool setBatch(const std::unordered_map &data); - void setDone(){done_=true;} - const void *readCalibrationCache(std::size_t &length) override; - void writeCalibrationCache(const void *ptr, std::size_t length) override; + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } + const void* readCalibrationCache(std::size_t& length) override; + void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); + private: int batch_size_; tensorflow::mutex cond_mtx_; diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc index 62d27c1104..3eea23b1b8 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc @@ -1,21 +1,33 @@ -// -// Created by skama on 1/23/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" #include "tensorflow/core/platform/default/logging.h" - -std::shared_ptr tensorflow::trt::TRTResourceManager::getManager(const std::string &mgr_name) { - // mutex is held for lookup only. Most instantiations where mutex will be held longer - // will be during op creation and should be ok. +std::shared_ptr +tensorflow::trt::TRTResourceManager::getManager(const std::string& mgr_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held + // longer will be during op creation and should be ok. tensorflow::mutex_lock lock(map_mutex_); - auto s=managers_.find(mgr_name); - if(s==managers_.end()){ - auto it=managers_.emplace(mgr_name,std::make_shared(mgr_name)); - VLOG(0)<<"Returning a new manager "<(mgr_name)); + VLOG(0) << "Returning a new manager " << mgr_name; return it.first->second; } - VLOG(1)<<"Returning old manager "<second; } diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h index e3b50093e7..d482c7d526 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h +++ b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h @@ -1,6 +1,17 @@ -// -// Created by skama on 1/23/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ @@ -24,8 +35,7 @@ class TRTResourceManager { return instance_; } // returns a manager for given op, if it doesn't exists it creates one - std::shared_ptr getManager( - const string& op_name); + std::shared_ptr getManager(const string& op_name); private: std::unordered_map> diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h index 655ff672b3..20ccf0f9d4 100644 --- a/tensorflow/contrib/tensorrt/resources/TRTResources.h +++ b/tensorflow/contrib/tensorrt/resources/TRTResources.h @@ -1,20 +1,31 @@ -// -// Created by skama on 1/23/18. -// +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ #ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ #define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ -#include -#include -#include "tensorrt/include/NvInfer.h" -#include #include +#include +#include #include +#include #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" #include "tensorflow/core/framework/resource_mgr.h" +#include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace trt { @@ -25,52 +36,52 @@ struct TRTCalibrationResource : public tensorflow::ResourceBase { network(nullptr), engine(nullptr), logger(nullptr), - thr(nullptr) - {} + thr(nullptr) {} string DebugString() override { std::stringstream oss; -#define VALID_OR_NULL(ptr) (!ptr ? "nullptr" : std::hex<<(void)ptr<> store_; string DebugString() override { std::stringstream oss; size_t lenBytes = 0; - for(const auto& v:store_){ - lenBytes += v.size()*sizeof(uint8_t); + for (const auto& v : store_) { + lenBytes += v.size() * sizeof(uint8_t); } - oss<<" Number of entries = "< Date: Thu, 22 Feb 2018 07:21:39 -0800 Subject: [PATCH 033/884] C++ gradients for MaxPool3D, AvgPool and AvgPool3D Resolves tensorflow/tensorflow#17195 --- tensorflow/cc/gradients/nn_grad.cc | 64 +++++++++++++++++++++++++ tensorflow/cc/gradients/nn_grad_test.cc | 44 +++++++++++++++-- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 13a3bba5e6..63a67f09f6 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -196,6 +196,70 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper); +Status MaxPool3DGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + MaxPool3DGrad::Attrs grad_attrs; + grad_attrs.DataFormat(data_format); + auto dx = MaxPool3DGrad(scope, op.input(0), op.output(0), grad_inputs[0], + ksize, strides, padding, grad_attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("MaxPool3D", MaxPool3DGradHelper); + +Status AvgPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + internal::AvgPoolGrad::Attrs grad_attrs; + grad_attrs.DataFormat(data_format); + auto dx = + internal::AvgPoolGrad(scope, Shape(scope, op.input(0)), grad_inputs[0], + ksize, strides, padding, grad_attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("AvgPool", AvgPoolGradHelper); + +Status AvgPool3DGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + AvgPool3DGrad::Attrs grad_attrs; + grad_attrs.DataFormat(data_format); + auto dx = AvgPool3DGrad(scope, Shape(scope, op.input(0)), grad_inputs[0], + ksize, strides, padding, grad_attrs); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("AvgPool3D", AvgPool3DGradHelper); + Status LRNGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs){ diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 0cfe5f6e3c..c4eba7ecb0 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -31,8 +31,11 @@ using ops::Elu; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; +using ops::AvgPool; +using ops::AvgPool3D; using ops::MaxPool; using ops::MaxPoolV2; +using ops::MaxPool3D; using ops::Placeholder; using ops::Relu; using ops::Relu6; @@ -70,9 +73,9 @@ class NNGradTest : public ::testing::Test { // Sets tensor with random values, ensuring that the max value is largest by // a reasonable amount. - // This is an issue for MaxPool and MaxPoolV2, in which perturbations by the - // numeric gradient computation in the gradient checker can change the max - // value if values are too close together. + // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which + // perturbations by the numeric gradient computation in the gradient checker + // can change the max value if values are too close together. template void SetRandomValuesWithBumpedMax(Tensor* tensor) { auto tensor_flat = tensor->flat(); @@ -203,6 +206,41 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { RunTest(x, x_init_value, y, y_shape); } +TEST_F(NNGradTest, MaxPool3DGradHelper) { + TensorShape x_shape({1, 3, 3, 3, 1}); + TensorShape y_shape({1, 1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one MaxPool3D. + const std::vector ksize{1, 3, 3, 3, 1}; + const std::vector strides{1, 3, 3, 3, 1}; + auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); + Tensor x_init_value = Tensor(DT_FLOAT, x_shape); + SetRandomValuesWithBumpedMax(&x_init_value); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NNGradTest, AvgPoolGradHelper) { + TensorShape x_shape({1, 2, 2, 1}); + TensorShape y_shape({1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one AvgPool. + const std::vector ksize{1, 2, 2, 1}; + const std::vector strides{1, 2, 2, 1}; + auto y = AvgPool(scope_, x, ksize, strides, "SAME"); + RunTest(x, x_shape, y, y_shape); +} + +TEST_F(NNGradTest, AvgPool3DGradHelper) { + TensorShape x_shape({1, 3, 3, 3, 1}); + TensorShape y_shape({1, 1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one AvgPool3D. + const std::vector ksize{1, 3, 3, 3, 1}; + const std::vector strides{1, 3, 3, 3, 1}; + auto y = AvgPool3D(scope_, x, ksize, strides, "SAME"); + RunTest(x, x_shape, y, y_shape); +} + TEST_F(NNGradTest, LRN){ TensorShape x_shape({1, 1, 2, 1}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); -- GitLab From 45de35b19a1d1edc8e04ca9603f12df5d7924d26 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 10:29:58 -0800 Subject: [PATCH 034/884] Remove redundant line which is almost a duplicate of one a few lines below in datasets_quickstart document. PiperOrigin-RevId: 186788306 --- tensorflow/docs_src/get_started/datasets_quickstart.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/docs_src/get_started/datasets_quickstart.md b/tensorflow/docs_src/get_started/datasets_quickstart.md index bc69773d21..c972e5e555 100644 --- a/tensorflow/docs_src/get_started/datasets_quickstart.md +++ b/tensorflow/docs_src/get_started/datasets_quickstart.md @@ -265,9 +265,6 @@ ds = tf.data.TextLineDataset(train_path).skip(1) ### Build a csv line parser -Ultimately we will need to parse each of the lines in the dataset, to -produce the necessary `(features, label)` pairs. - We will start by building a function to parse a single line. The following `iris_data.parse_line` function accomplishes this task using the -- GitLab From 95fa8b31cc98bac0e9ce84721e4e8535befb1193 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Fri, 23 Feb 2018 11:58:10 -0800 Subject: [PATCH 035/884] [XLA] Internal change. PiperOrigin-RevId: 186802115 --- tensorflow/compiler/xla/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 1958e5abf6..97abf217d7 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1011,6 +1011,7 @@ xla_test( shard_count = 40, tags = [ "enable_for_xla_interpreter", + "optonly", ], deps = [ "//tensorflow/compiler/xla:array2d", -- GitLab From f412e5c71781003d2408c1082220dee6b140f632 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Fri, 23 Feb 2018 12:18:31 -0800 Subject: [PATCH 036/884] Cleanup for graph functions. (1) Define constants for the names of Arg and Retval ops, and use them in various places. (2) Change the signature and documentation for `BuildControlFlow` to reflect the fact that the supplied Graph is not mutated. (3) Expose the FunctionLibraryRuntime's DeviceMgr, in preparation for multi-device functions. PiperOrigin-RevId: 186804968 --- tensorflow/core/common_runtime/function.cc | 11 ++----- tensorflow/core/framework/function.cc | 8 ++--- tensorflow/core/framework/function.h | 10 +++++++ tensorflow/core/graph/control_flow.cc | 11 +++---- tensorflow/core/graph/control_flow.h | 16 +++++----- tensorflow/core/kernels/function_ops.cc | 34 ++++++++++++---------- 6 files changed, 49 insertions(+), 41 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index b941819838..3e937ceb64 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -42,11 +42,8 @@ limitations under the License. namespace tensorflow { // A few string constant used throughout this module. -// -// TODO(zhifengc): Dedup some of these constants into -// framework/function.h -static constexpr const char* const kArgOp = "_Arg"; -static constexpr const char* const kRetOp = "_Retval"; +static constexpr const char* const kArgOp = FunctionLibraryDefinition::kArgOp; +static constexpr const char* const kRetOp = FunctionLibraryDefinition::kRetOp; static constexpr const char* const kGradientOp = FunctionLibraryDefinition::kGradientOp; static constexpr const char* const kNodeLabel = "Func"; @@ -177,6 +174,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { } Device* device() override { return device_; } + const DeviceMgr* device_mgr() const override { return device_mgr_; } Env* env() override { return env_; } int graph_def_version() override { return graph_def_version_; } @@ -1580,9 +1578,6 @@ Status FunctionDefToBodyHelper( // Call BuildControlFlowInfo to validate that this function body has // well-formed control flow. - // NOTE(skyewm): this is usually done in Partition(), but we don't partition - // function bodies. This should be removed if function bodies ever go through - // the Partition() path. std::vector dummy; TF_RETURN_IF_ERROR(BuildControlFlowInfo(graph.get(), &dummy)); diff --git a/tensorflow/core/framework/function.cc b/tensorflow/core/framework/function.cc index eae8e6c3c1..3e7b89d4eb 100644 --- a/tensorflow/core/framework/function.cc +++ b/tensorflow/core/framework/function.cc @@ -168,7 +168,7 @@ class FunctionInstantiationHelper { strings::StrAppend(&name, "_", i); } NodeDef* gnode = AddNode(name); - gnode->set_op("_Arg"); + gnode->set_op(FunctionLibraryDefinition::kArgOp); AddAttr("T", dtypes[i], gnode); AddAttr("index", arg_index, gnode); result_.arg_types.push_back(dtypes[i]); @@ -328,7 +328,7 @@ class FunctionInstantiationHelper { strings::StrAppend(&name, "_", i); } NodeDef* gnode = AddNode(name); - gnode->set_op("_Retval"); + gnode->set_op(FunctionLibraryDefinition::kRetOp); AddInput(nodes_.size() - 1, item->nid, item->idx + i); AddAttr("T", dtypes[i], gnode); AddAttr("index", (*ret_index)++, gnode); @@ -558,9 +558,9 @@ string Print(gtl::ArraySlice nodes) { std::vector ret; std::vector body; for (const NodeDef* n : nodes) { - if (n->op() == "_Arg") { + if (n->op() == FunctionLibraryDefinition::kArgOp) { arg.push_back(n); - } else if (n->op() == "_Retval") { + } else if (n->op() == FunctionLibraryDefinition::kRetOp) { ret.push_back(n); } else { body.push_back(n); diff --git a/tensorflow/core/framework/function.h b/tensorflow/core/framework/function.h index e27001133b..e00399f97d 100644 --- a/tensorflow/core/framework/function.h +++ b/tensorflow/core/framework/function.h @@ -344,6 +344,11 @@ class FunctionLibraryDefinition : public OpRegistryInterface { Status LookUp(const string& op_type_name, const OpRegistrationData** op_reg_data) const override; + // Ops created for function arguments bear the name given by `kArgOp`; those + // created for return values bear the name given by `kRetOp`. + static constexpr const char* const kArgOp = "_Arg"; + static constexpr const char* const kRetOp = "_Retval"; + static constexpr const char* const kGradientOp = "SymbolicGradient"; static constexpr const char* const kFuncAttr = "f"; @@ -404,6 +409,8 @@ struct FunctionBody; // Forward declare. Defined in common_runtime/device.h class Device; +// Forward declare. Defined in common_runtime/device_mgr.h +class DeviceMgr; class FunctionLibraryRuntime { public: @@ -518,6 +525,9 @@ class FunctionLibraryRuntime { // Returns the device on which the function executes. virtual Device* device() = 0; + // Get the DeviceMgr from which the device was obtained. + virtual const DeviceMgr* device_mgr() const = 0; + // Returns the function library definition that backs this runtime. // NOTE(mrry): The returned library definition is the default function library // for this runtime. The runtime may instantiate functions from separate diff --git a/tensorflow/core/graph/control_flow.cc b/tensorflow/core/graph/control_flow.cc index db6683d1e7..30ff19cd7e 100644 --- a/tensorflow/core/graph/control_flow.cc +++ b/tensorflow/core/graph/control_flow.cc @@ -24,23 +24,24 @@ limitations under the License. namespace tensorflow { -Status BuildControlFlowInfo(Graph* g, std::vector* info) { +Status BuildControlFlowInfo(const Graph* g, + std::vector* info) { info->clear(); info->resize(g->num_node_ids()); std::vector parent_nodes; parent_nodes.resize(g->num_node_ids()); - Node* src_node = g->source_node(); + const Node* src_node = g->source_node(); ControlFlowInfo& src_info = (*info)[src_node->id()]; src_info.frame = src_node; src_info.parent_frame = src_node; string frame_name; - std::deque ready; + std::deque ready; ready.push_back(src_node); while (!ready.empty()) { - Node* curr_node = ready.front(); + const Node* curr_node = ready.front(); ready.pop_front(); const ControlFlowInfo& curr_info = (*info)[curr_node->id()]; const Node* frame = curr_info.frame; @@ -56,7 +57,7 @@ Status BuildControlFlowInfo(Graph* g, std::vector* info) { } for (const Edge* out_edge : curr_node->out_edges()) { - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); int out_id = out->id(); ControlFlowInfo* out_info = &(*info)[out_id]; const Node* out_parent = out_info->parent_frame; diff --git a/tensorflow/core/graph/control_flow.h b/tensorflow/core/graph/control_flow.h index 372044f538..79e2be0d4b 100644 --- a/tensorflow/core/graph/control_flow.h +++ b/tensorflow/core/graph/control_flow.h @@ -30,14 +30,14 @@ struct ControlFlowInfo { string frame_name; // frame name of a node }; -// Assign to each node the name of the frame and the level it belongs to. -// We check the well-formedness of the graph: All inputs to a node must -// come from the same frame and have the same "static" iteration level. -// `info` is cleared and populated by this function. -// NOTE(yuanbyu): For now, we require all sends/recvs have iteration level -// 0. This essentially means there can't be multiple serial Nexts in -// an iteration, which all sane front-ends should satisfy. -Status BuildControlFlowInfo(Graph* g, std::vector* info); +// Clear and populate `info` with each node's frame and the level it belongs to. +// We check the well-formedness of the graph: All inputs to a node must come +// from the same frame and have the same "static" iteration level. +// +// NOTE(yuanbyu): For now, we require all sends/recvs have iteration level 0. +// This essentially means there can't be multiple serial Nexts in an iteration, +// which all sane front-ends should satisfy. +Status BuildControlFlowInfo(const Graph* g, std::vector* info); } // namespace tensorflow diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 9d4bc35ba8..a094ebe5e2 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -32,7 +32,9 @@ limitations under the License. namespace tensorflow { -static const char* const kGradientOp = "SymbolicGradient"; +static const char* const kArgOp = FunctionLibraryDefinition::kArgOp; +static const char* const kRetOp = FunctionLibraryDefinition::kRetOp; +static const char* const kGradientOp = FunctionLibraryDefinition::kGradientOp; class ArgOp : public OpKernel { public: @@ -89,26 +91,25 @@ class RetvalOp : public OpKernel { TF_DISALLOW_COPY_AND_ASSIGN(RetvalOp); }; -REGISTER_SYSTEM_KERNEL_BUILDER(Name("_Arg").Device(DEVICE_CPU), ArgOp); -REGISTER_SYSTEM_KERNEL_BUILDER(Name("_Retval").Device(DEVICE_CPU), RetvalOp); +REGISTER_SYSTEM_KERNEL_BUILDER(Name(kArgOp).Device(DEVICE_CPU), ArgOp); +REGISTER_SYSTEM_KERNEL_BUILDER(Name(kRetOp).Device(DEVICE_CPU), RetvalOp); #if TENSORFLOW_USE_SYCL #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ - Name("_Arg").Device(DEVICE_SYCL).TypeConstraint("T"), ArgOp); + Name(kArgOp).Device(DEVICE_SYCL).TypeConstraint("T"), ArgOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Arg") +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kArgOp) .Device(DEVICE_SYCL) .HostMemory("output") .TypeConstraint("T"), ArgOp); #undef REGISTER -#define REGISTER(type) \ - REGISTER_KERNEL_BUILDER( \ - Name("_Retval").Device(DEVICE_SYCL).TypeConstraint("T"), \ - RetvalOp); +#define REGISTER(type) \ + REGISTER_KERNEL_BUILDER( \ + Name(kRetOp).Device(DEVICE_SYCL).TypeConstraint("T"), RetvalOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval") +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp) .Device(DEVICE_SYCL) .HostMemory("input") .TypeConstraint("T"), @@ -118,16 +119,16 @@ TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval") #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ - Name("_Arg").Device(DEVICE_GPU).TypeConstraint("T"), ArgOp); + Name(kArgOp).Device(DEVICE_GPU).TypeConstraint("T"), ArgOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Arg") +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kArgOp) .Device(DEVICE_GPU) .HostMemory("output") .TypeConstraint("T"), ArgOp); #undef REGISTER -REGISTER_KERNEL_BUILDER(Name("_Arg") +REGISTER_KERNEL_BUILDER(Name(kArgOp) .Device(DEVICE_GPU) .HostMemory("output") .TypeConstraint("T"), @@ -135,9 +136,9 @@ REGISTER_KERNEL_BUILDER(Name("_Arg") #define REGISTER(type) \ REGISTER_KERNEL_BUILDER( \ - Name("_Retval").Device(DEVICE_GPU).TypeConstraint("T"), RetvalOp); + Name(kRetOp).Device(DEVICE_GPU).TypeConstraint("T"), RetvalOp); TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER) -TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name("_Retval") +TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(kRetOp) .Device(DEVICE_GPU) .HostMemory("input") .TypeConstraint("T"), @@ -287,7 +288,8 @@ REGISTER_KERNEL_BUILDER(Name(kGradientOp).Device(DEVICE_SYCL), class RemoteCallOp : public AsyncOpKernel { public: explicit RemoteCallOp(OpKernelConstruction* ctx) : AsyncOpKernel(ctx) { - OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); + OP_REQUIRES_OK(ctx, + ctx->GetAttr(FunctionLibraryDefinition::kFuncAttr, &func_)); } ~RemoteCallOp() override {} -- GitLab From ee333be5d16ae39029f9c58a989a84089ffadb5d Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Fri, 23 Feb 2018 12:24:19 -0800 Subject: [PATCH 037/884] [TF:XLA] Fix a bug where executor's device_ordinal should be passed to AllocateShapedBuffer. Also enable C64 type for interpreter device. PiperOrigin-RevId: 186805709 --- tensorflow/compiler/jit/BUILD | 7 ++++++- tensorflow/compiler/jit/xla_interpreter_device.cc | 4 ++-- tensorflow/compiler/xla/service/interpreter/executable.cc | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index a711319607..af259e0564 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -102,12 +102,17 @@ cc_library( cc_library( name = "xla_interpreter_device", srcs = ["xla_interpreter_device.cc"], + visibility = [":friends"], deps = [ + ":jit_compilation_passes", ":xla_device", "//tensorflow/compiler/jit/kernels:xla_launch_op", "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/tf2xla/kernels:xla_ops", + "//tensorflow/compiler/xla/service:interpreter_plugin", # buildcleaner: keep + "//tensorflow/core:lib", ], - alwayslink = True, + alwayslink = 1, ) cc_library( diff --git a/tensorflow/compiler/jit/xla_interpreter_device.cc b/tensorflow/compiler/jit/xla_interpreter_device.cc index 2614deefd8..a329451b14 100644 --- a/tensorflow/compiler/jit/xla_interpreter_device.cc +++ b/tensorflow/compiler/jit/xla_interpreter_device.cc @@ -25,8 +25,8 @@ namespace tensorflow { const char* const DEVICE_XLA_INTERPRETER = "XLA_INTERPRETER"; const char* const DEVICE_INTERPRETER_XLA_JIT = "XLA_INTERPRETER_JIT"; -constexpr std::array kExecAllTypes = { - {DT_INT32, DT_FLOAT, DT_BOOL, DT_DOUBLE, DT_INT64}}; +constexpr std::array kExecAllTypes = { + {DT_INT32, DT_INT64, DT_FLOAT, DT_DOUBLE, DT_COMPLEX64, DT_BOOL}}; class XlaInterpreterDeviceFactory : public DeviceFactory { public: diff --git a/tensorflow/compiler/xla/service/interpreter/executable.cc b/tensorflow/compiler/xla/service/interpreter/executable.cc index 0cb9b5d810..883063d0f0 100644 --- a/tensorflow/compiler/xla/service/interpreter/executable.cc +++ b/tensorflow/compiler/xla/service/interpreter/executable.cc @@ -93,7 +93,7 @@ StatusOr> InterpreterExecutable::ExecuteOnStream( TF_ASSIGN_OR_RETURN(std::unique_ptr result, transfer_manager->AllocateShapedBuffer( result_literal->shape(), run_options->allocator(), - run_options->device_ordinal())); + executor->device_ordinal())); TF_RETURN_IF_ERROR(transfer_manager->TransferLiteralToDevice( executor, *result_literal, *result)); -- GitLab From d0aaeae4ce79c0982c8a8894d3f87d3adae06683 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 12:24:53 -0800 Subject: [PATCH 038/884] Add usage example to KMeans Estimator documentation. PiperOrigin-RevId: 186805772 --- .../factorization/python/ops/kmeans.py | 61 +++++++++++++++++-- 1 file changed, 57 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/factorization/python/ops/kmeans.py b/tensorflow/contrib/factorization/python/ops/kmeans.py index c861cfff54..7319eaa7de 100644 --- a/tensorflow/contrib/factorization/python/ops/kmeans.py +++ b/tensorflow/contrib/factorization/python/ops/kmeans.py @@ -61,8 +61,8 @@ class _LossRelativeChangeHook(session_run_hook.SessionRunHook): loss = run_values.results assert loss is not None if self._prev_loss: - relative_change = (abs(loss - self._prev_loss) / - (1 + abs(self._prev_loss))) + relative_change = ( + abs(loss - self._prev_loss) / (1 + abs(self._prev_loss))) if relative_change < self._tolerance: run_context.request_stop() self._prev_loss = loss @@ -233,7 +233,57 @@ class _ModelFn(object): # TODO(agarwal,ands): support sharded input. class KMeansClustering(estimator.Estimator): - """An Estimator for K-Means clustering.""" + """An Estimator for K-Means clustering. + + Example: + ``` + import numpy as np + import tensorflow as tf + + num_points = 100 + dimensions = 2 + points = np.random.uniform(0, 1000, [num_points, dimensions]) + + def input_fn(): + return tf.train.limit_epochs( + tf.convert_to_tensor(points, dtype=tf.float32), num_epochs=1) + + num_clusters = 5 + kmeans = tf.contrib.factorization.KMeansClustering( + num_clusters=num_clusters, use_mini_batch=False) + + # train + num_iterations = 10 + previous_centers = None + for _ in xrange(num_iterations): + kmeans.train(input_fn) + cluster_centers = kmeans.cluster_centers() + if previous_centers is not None: + print 'delta:', cluster_centers - previous_centers + previous_centers = cluster_centers + print 'score:', kmeans.score(input_fn) + print 'cluster centers:', cluster_centers + + # map the input points to their clusters + cluster_indices = list(kmeans.predict_cluster_index(input_fn)) + for i, point in enumerate(points): + cluster_index = cluster_indices[i] + center = cluster_centers[cluster_index] + print 'point:', point, 'is in cluster', cluster_index, 'centered at', center + ``` + + The `SavedModel` saved by the `export_savedmodel` method does not include the + cluster centers. However, the cluster centers may be retrieved by the + latest checkpoint saved during training. Specifically, + ``` + kmeans.cluster_centers() + ``` + is equivalent to + ``` + tf.train.load_variable( + kmeans.model_dir, KMeansClustering.CLUSTER_CENTERS_VAR_NAME) + ``` + """ # Valid values for the distance_metric constructor argument. SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE @@ -253,6 +303,9 @@ class KMeansClustering(estimator.Estimator): CLUSTER_INDEX = 'cluster_index' ALL_DISTANCES = 'all_distances' + # Variable name used by cluster_centers(). + CLUSTER_CENTERS_VAR_NAME = clustering_ops.CLUSTERS_VAR_NAME + def __init__(self, num_clusters, model_dir=None, @@ -406,4 +459,4 @@ class KMeansClustering(estimator.Estimator): def cluster_centers(self): """Returns the cluster centers.""" - return self.get_variable_value(clustering_ops.CLUSTERS_VAR_NAME) + return self.get_variable_value(KMeansClustering.CLUSTER_CENTERS_VAR_NAME) -- GitLab From 75af2e0afeb30325e2e0d37e30054e67fde43707 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 23 Feb 2018 13:13:25 -0800 Subject: [PATCH 039/884] [XLA] Fix BF16 propagation pass to produce matching fusion root and output. Previously, the propagation pass might produce different procision in the fused computation's root than the fusion itself, when the fused root doesn't define a buffer. Add explicit converts at such fusion roots. PiperOrigin-RevId: 186812368 --- tensorflow/compiler/xla/service/BUILD | 2 + .../xla/service/bfloat16_propagation.cc | 207 ++++++++++++++---- .../xla/service/bfloat16_propagation.h | 11 + .../xla/service/bfloat16_propagation_test.cc | 60 ++++- tensorflow/compiler/xla/service/hlo_dce.cc | 2 +- 5 files changed, 233 insertions(+), 49 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 37ca1b893a..e6a6e54927 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -126,7 +126,9 @@ cc_library( ":bfloat16_support", ":hlo", ":hlo_dataflow_analysis", + ":hlo_dce", ":hlo_pass", + ":tuple_simplifier", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 9246cb25d2..6145c690b9 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -17,8 +17,10 @@ limitations under the License. #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_dce.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/tuple_simplifier.h" #include "tensorflow/compiler/xla/shape_tree.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/core/lib/gtl/cleanup.h" @@ -229,55 +231,10 @@ bool BFloat16Propagation::InstructionIsCandidateForBF16Output( return true; } -// The algorithm first does a forward pass (parameters to root) to determine a -// set of instructions to consider using bfloat16, then does a backward pass to -// determine the precisions of those instructions according to the need of -// their users. -StatusOr BFloat16Propagation::Run(HloModule* module) { - TF_ASSIGN_OR_RETURN(dataflow_, HloDataflowAnalysis::Run(*module)); - +Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( + HloModule* module) { std::list computations_topological_order = module->MakeComputationPostOrder(); - // The first step is a forward pass (parameters to root), where we determine - // the potential candidate instructions to use bfloat16 in the outputs that - // are not likely to cause overhead from extra explicit conversions. This is - // done forwardly because we determine whether an HLO is a candidate partially - // based on whether its operands are candidates. - for (auto computation : computations_topological_order) { - for (auto inst : computation->MakeInstructionPostOrder()) { - if (InstructionIsCandidateForBF16Output(inst)) { - consider_using_bfloat16_.insert(inst); - } - } - } - - // The second step is a backward pass (root to parameters), where we modify - // the precisions of the instructions identified in the first step when - // feasible. This is done backwardly because we determine the precision of an - // HLO's output based on how it is later used. - // - // The precision of an instruction is determined by its users, so we do the - // propagation in reverse topological order. - for (auto comp_it = computations_topological_order.rbegin(); - comp_it != computations_topological_order.rend(); ++comp_it) { - if ((*comp_it)->IsFusionComputation()) { - // Fusion computations are handled when visiting the fusion instruction. - continue; - } - auto insts = (*comp_it)->MakeInstructionPostOrder(); - for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - DetermineAndMutateInstructionPrecision(*inst_it, - /*skip_parameters=*/true); - } - } - - if (!changed_) { - return false; - } - - // It's possible that an instruction does not define a buffer, but the - // defining instruction's shape has changed. So we need to adjust the output - // shapes of instructions according to the HLO values they refer to. for (auto comp_it = computations_topological_order.rbegin(); comp_it != computations_topological_order.rend(); ++comp_it) { auto insts = (*comp_it)->MakeInstructionPostOrder(); @@ -328,6 +285,162 @@ StatusOr BFloat16Propagation::Run(HloModule* module) { } } } + + // We could have changed a fusion computation's root shape to have a different + // precision than the fusion node's output, if the fusion root does not + // define a buffer (e.g., a tuple). Now we add conversions after such fusion + // roots to make them match the fusion output. If the fusion output is a + // (possibly nested) tuple, we first create get-tuple-elements, then convert + // the unmatching leaf nodes, and finally create a new tuple as the fusion + // computation's root. If tuples and get-tuple-elements are created, we will + // run tuple simplifier and dead code elimination at the end (dead code is not + // allowed in fusion computation). E.g., + // + // (1) (2) (3) + // a b a b a b + // |\ | |\ | |\ | + // \ add -> |add -> | add + // \ | \ | convert | + // tuple tuple \ | + // / \ tuple + // gte gte + // | | + // convert | + // \ / + // tuple + // (1) a is F32 but tuple is BF16 + // (2) after adding conversion + // (3) after tuple simplifier and DCE. + bool needs_tuple_simplifier = false; + for (auto computation : computations_topological_order) { + auto insts = computation->MakeInstructionPostOrder(); + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + auto hlo = *inst_it; + if (hlo->opcode() != HloOpcode::kFusion) { + continue; + } + auto fusion_computation = hlo->fused_instructions_computation(); + auto fusion_root = fusion_computation->root_instruction(); + if (ShapeUtil::Compatible(fusion_root->shape(), hlo->shape())) { + continue; + } + ShapeTree converted_outputs(hlo->shape()); + // Iterate through nodes in the shape tree in pre-order and initialize + // each non-root node with a corresponding get-tuple-element. For a leaf + // node, if its shape does not match the fusion output, create a + // conversion node to overwrite the node value. + for (auto it = converted_outputs.begin(); it != converted_outputs.end(); + ++it) { + ShapeIndex output_index = it->first; + HloInstruction*& output = it->second; + const Shape subshape = + ShapeUtil::GetSubshape(hlo->shape(), output_index); + if (output_index.empty()) { + output = fusion_root; + } else { + ShapeIndex parent_index = output_index; + parent_index.pop_back(); + output = fusion_computation->AddInstruction( + HloInstruction::CreateGetTupleElement( + subshape, converted_outputs.element(parent_index), + output_index.back())); + } + if (ShapeUtil::IsTuple(subshape)) { + continue; + } + if (!ShapeUtil::Compatible( + subshape, + ShapeUtil::GetSubshape(fusion_root->shape(), output_index))) { + output = fusion_computation->AddInstruction( + HloInstruction::CreateConvert(subshape, output)); + } + } + // Iterate through nodes in the shape tree in reverse pre-order and create + // a tuple instruction for each non-leaf node where the elements are the + // values of its child nodes. + for (auto it = converted_outputs.rbegin(); it != converted_outputs.rend(); + ++it) { + ShapeIndex output_index = it->first; + HloInstruction*& output = it->second; + const Shape& subshape = + ShapeUtil::GetSubshape(hlo->shape(), output_index); + if (!ShapeUtil::IsTuple(subshape)) { + continue; + } + std::vector elements( + ShapeUtil::TupleElementCount(subshape)); + ShapeIndex child_index = output_index; + for (int64 i = 0; i < elements.size(); ++i) { + child_index.push_back(i); + elements[i] = converted_outputs.element(child_index); + child_index.pop_back(); + } + output = fusion_computation->AddInstruction( + HloInstruction::CreateTuple(elements)); + } + fusion_computation->set_root_instruction(converted_outputs.element({})); + needs_tuple_simplifier |= ShapeUtil::IsTuple(hlo->shape()); + } + } + if (needs_tuple_simplifier) { + TupleSimplifier tuple_simplifier; + TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + HloDCE dce; + TF_RETURN_IF_ERROR(dce.Run(module).status()); + } + return Status::OK(); +} + +// The algorithm first does a forward pass (parameters to root) to determine a +// set of instructions to consider using bfloat16, then does a backward pass to +// determine the precisions of those instructions according to the need of +// their users. +StatusOr BFloat16Propagation::Run(HloModule* module) { + TF_ASSIGN_OR_RETURN(dataflow_, HloDataflowAnalysis::Run(*module)); + + std::list computations_topological_order = + module->MakeComputationPostOrder(); + // The first step is a forward pass (parameters to root), where we determine + // the potential candidate instructions to use bfloat16 in the outputs that + // are not likely to cause overhead from extra explicit conversions. This is + // done forwardly because we determine whether an HLO is a candidate partially + // based on whether its operands are candidates. + for (auto computation : computations_topological_order) { + for (auto inst : computation->MakeInstructionPostOrder()) { + if (InstructionIsCandidateForBF16Output(inst)) { + consider_using_bfloat16_.insert(inst); + } + } + } + + // The second step is a backward pass (root to parameters), where we modify + // the precisions of the instructions identified in the first step when + // feasible. This is done backwardly because we determine the precision of an + // HLO's output based on how it is later used. + // + // The precision of an instruction is determined by its users, so we do the + // propagation in reverse topological order. + for (auto comp_it = computations_topological_order.rbegin(); + comp_it != computations_topological_order.rend(); ++comp_it) { + if ((*comp_it)->IsFusionComputation()) { + // Fusion computations are handled when visiting the fusion instruction. + continue; + } + auto insts = (*comp_it)->MakeInstructionPostOrder(); + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + DetermineAndMutateInstructionPrecision(*inst_it, + /*skip_parameters=*/true); + } + } + + if (!changed_) { + return false; + } + + // It's possible that an instruction does not define a buffer, but the + // defining instruction's shape has changed. So we need to adjust the output + // shapes of instructions according to the HLO values they refer to. + TF_RETURN_IF_ERROR(ResolveInconsistencyOfAliasingBuffers(module)); return true; } diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index aa81dde3b0..ccf77d7b4e 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -94,10 +94,21 @@ class BFloat16Propagation : public HloPassInterface { // Special handling in the mutation pass for fusion computations. void DetermineAndMutateFusionComputationPrecision(HloInstruction* fusion); + // *************************** + // Functions called by the final inconsistency resolving pass. + + // Adjusts the output shapes of HloInstructions such that if two + // HloInstructions have aliasing buffers in their outputs, they must have the + // same precision. + Status ResolveInconsistencyOfAliasingBuffers(HloModule* module); + // Makes the fusion parameters match the precision of the actual parameters // passed to the fusion node. void AdjustFusionParameters(HloInstruction* fusion); + // *************************** + // Functions called and state used by two or more passes. + // Returns whether all uses of the given HloInstruction can consume BF16 // input. bool AllUsersConsumeBF16(const HloInstruction& hlo, diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 4c86c6b26e..2047e2053a 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -68,7 +68,7 @@ class BFloat16PropagationTest : public HloTestBase { // Returns whether the given HloInstruction's output element type is BF16 or // the only use of it is converting to BF16. - bool OutputsBF16(HloInstruction* inst) { + bool OutputsBF16(const HloInstruction* inst) { if (inst->shape().element_type() == BF16) { return true; } @@ -287,6 +287,64 @@ TEST_F(BFloat16PropagationTest, PropagateThroughFusion) { EXPECT_TRUE(OutputsBF16(b_f1)); } +// Tests that if 1) the root instruction of a fusion is a tuple, 2) the fusion +// outputs are only used by a dot, and 3) one element of the tuple is used by +// an add in the fusion computation, then the propagation pass should create a +// convert in the fusion computation to keep the add's operand in F32 but change +// the fusion output to BF16. E.g., the following fusion computation +// (F32, F32) fusion_computation(F32 a, F32 b) +// = tuple(F32 a, F32 add(F32 a, F32 b)) +// will be changed to +// (BF16, BF16) fusion_computation(F32 a, F32 b) +// = tuple(BF16 convert(a), BF16 add(F32 a, F32 b)) +TEST_F(BFloat16PropagationTest, ConvertTupleFusionElementIfUsedByAdd) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param")); + HloInstruction* add = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param, param)); + + auto builder_f = HloComputation::Builder("fusion0"); + HloInstruction* a_f = + builder_f.AddInstruction(HloInstruction::CreateParameter(0, shape, "a")); + HloInstruction* b_f = + builder_f.AddInstruction(HloInstruction::CreateParameter(1, shape, "b")); + HloInstruction* add_f = builder_f.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, a_f, b_f)); + HloInstruction* tuple_f = + builder_f.AddInstruction(HloInstruction::CreateTuple({a_f, add_f})); + auto comp_f = module->AddEmbeddedComputation(builder_f.Build()); + auto fusion = builder.AddInstruction(HloInstruction::CreateFusion( + tuple_f->shape(), HloInstruction::FusionKind::kCustom, {add, add}, + comp_f)); + + HloInstruction* gte0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, fusion, 0)); + HloInstruction* gte1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, fusion, 1)); + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, gte0, gte1)); + + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE(OutputsBF16(gte0)); + EXPECT_TRUE(OutputsBF16(gte1)); + EXPECT_FALSE(OutputsBF16(a_f)); + EXPECT_FALSE(OutputsBF16(b_f)); + EXPECT_TRUE(OutputsBF16(add_f)); + auto new_fusion_root = comp_f->root_instruction(); + EXPECT_EQ(new_fusion_root->opcode(), HloOpcode::kTuple); + EXPECT_EQ(new_fusion_root->operand(1), add_f); + EXPECT_EQ(new_fusion_root->operand(0)->opcode(), HloOpcode::kConvert); + EXPECT_TRUE(OutputsBF16(new_fusion_root->operand(0))); +} + // A select over tuples does not define the leaf buffers, so the types in // on_true and on_false must match, so that as long as one of them is F32, the // other must be F32 as well. diff --git a/tensorflow/compiler/xla/service/hlo_dce.cc b/tensorflow/compiler/xla/service/hlo_dce.cc index 1e5f0f797a..fcd723af14 100644 --- a/tensorflow/compiler/xla/service/hlo_dce.cc +++ b/tensorflow/compiler/xla/service/hlo_dce.cc @@ -40,7 +40,7 @@ StatusOr HloDCE::Run(HloModule* module) { VLOG(2) << "Before dce:"; XLA_VLOG_LINES(2, module->ToString()); - for (auto* computation : module->MakeNonfusionComputations()) { + for (auto* computation : module->MakeComputationPostOrder()) { std::unordered_set live_instructions; TF_RETURN_IF_ERROR(computation->root_instruction()->Accept( [&live_instructions](HloInstruction* instruction) { -- GitLab From 0b4fdf183a020ea3daf9a54501434038082c198b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Feb 2018 14:06:49 -0800 Subject: [PATCH 040/884] Respects some form of log_device_placement in eager. PiperOrigin-RevId: 186820292 --- tensorflow/c/eager/c_api.cc | 4 ++++ tensorflow/c/eager/c_api_internal.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 98ef6f0d0a..cc318c3878 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -802,6 +802,10 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); + if (ctx->log_device_placement) { + LOG(INFO) << "Executing op " << ndef.op() << " in device " + << device->name(); + } kernel = new tensorflow::KernelAndDevice(ctx->rendezvous); // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 7b9f1db02e..3356054cd0 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -50,7 +50,9 @@ struct TFE_Context { rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( session->device_mgr, opts.session_options.options.env, - TF_GRAPH_DEF_VERSION, &func_lib_def, {})) {} + TF_GRAPH_DEF_VERSION, &func_lib_def, {})), + log_device_placement( + opts.session_options.options.config.log_device_placement()) {} const TFE_ContextDevicePlacementPolicy policy; @@ -88,6 +90,8 @@ struct TFE_Context { std::atomic should_store_metadata{false}; tensorflow::mutex metadata_mu; tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); + + const bool log_device_placement; }; struct TFE_TensorHandle { -- GitLab From eba67dea5a5e83e2bc49a40202233823b7ea9973 Mon Sep 17 00:00:00 2001 From: Noah Eisen Date: Fri, 23 Feb 2018 14:08:57 -0800 Subject: [PATCH 041/884] Automated g4 rollback of changelist 185688704 PiperOrigin-RevId: 186820593 --- tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc | 2 +- tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc index 2ed07e3669..bb14e0197b 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_cache.cc @@ -34,7 +34,7 @@ namespace { class GrpcWorkerCache : public WorkerCachePartial { public: // TODO(ncteisen): consider adding a config var or flag for this - static constexpr const size_t kGrpcWorkerCacheThreadCount = 2; + static constexpr const size_t kGrpcWorkerCacheThreadCount = 8; explicit GrpcWorkerCache(GrpcChannelCache* channel_cache, WorkerInterface* local_worker, diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc index 1beb198732..b20e744a97 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_worker_service.cc @@ -52,7 +52,7 @@ namespace { class GrpcWorkerService : public AsyncServiceInterface { // TODO(ncteisen): consider adding a config var or flag for this - static constexpr const size_t kGrpcWorkerServiceThreadCount = 2; + static constexpr const size_t kGrpcWorkerServiceThreadCount = 8; public: GrpcWorkerService(GrpcWorker* worker, ::grpc::ServerBuilder* builder) -- GitLab From a8213e7d032e676b3135f1ac8ec019f86f7fcd18 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 23 Feb 2018 14:22:06 -0800 Subject: [PATCH 042/884] Preserve user placement as much as possible when optimizing the graph PiperOrigin-RevId: 186822511 --- tensorflow/python/grappler/tf_optimizer.i | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/grappler/tf_optimizer.i b/tensorflow/python/grappler/tf_optimizer.i index 1b657983a4..de9326ccfc 100644 --- a/tensorflow/python/grappler/tf_optimizer.i +++ b/tensorflow/python/grappler/tf_optimizer.i @@ -100,6 +100,7 @@ PyObject* TF_OptimizeGraph( tensorflow::grappler::ItemConfig item_config; item_config.inline_functions = false; item_config.apply_optimizations = false; + item_config.ignore_user_placement = false; std::unique_ptr grappler_item = tensorflow::grappler::GrapplerItemFromMetaGraphDef(graph_id, metagraph, item_config); -- GitLab From bca04a3181d23211b6646021dd971932317bc962 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 14:22:53 -0800 Subject: [PATCH 043/884] * CUB updated to 1.8.0 * updated ShuffleIndex because of API change PiperOrigin-RevId: 186822637 --- .../core/kernels/reduction_gpu_kernels.cu.h | 4 ++-- tensorflow/workspace.bzl | 11 ++++----- third_party/cub/BUILD | 0 .../cub/fix_compilation_in_clang.patch | 23 ------------------- 4 files changed, 6 insertions(+), 32 deletions(-) delete mode 100644 third_party/cub/BUILD delete mode 100644 third_party/cub/fix_compilation_in_clang.patch diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..9237fa51d8 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -280,8 +280,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( const int rows_in_this_warp = min(rows_per_warp, num_rows - start_row_warp); // not the most efficient way to do this sum for (int i = 1; i < rows_in_this_warp; ++i) { - value_type tmp = - cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); + value_type tmp = cub::ShuffleIndex<32, value_type>( + sum, static_cast(threadIdx.x + i * num_cols), 0xffffffff); if (lane < num_cols) sum = op(sum, tmp); } diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 2b370ffbac..d6ac7be8b5 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -666,15 +666,12 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "cub_archive", urls = [ - "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip", - "https://github.com/NVlabs/cub/archive/1.7.4.zip", + "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip", + "https://github.com/NVlabs/cub/archive/1.8.0.zip", ], - sha256 = "20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31", - strip_prefix = "cub-1.7.4", + sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", + strip_prefix = "cub-1.8.0", build_file = str(Label("//third_party:cub.BUILD")), - # TODO: remove the patch when upstream fix is accepted and released. - # PR with a fix: https://github.com/NVlabs/cub/pull/125 - patch_file = str(Label("//third_party/cub:fix_compilation_in_clang.patch")), ) tf_http_archive( diff --git a/third_party/cub/BUILD b/third_party/cub/BUILD deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/third_party/cub/fix_compilation_in_clang.patch b/third_party/cub/fix_compilation_in_clang.patch deleted file mode 100644 index 384e674f20..0000000000 --- a/third_party/cub/fix_compilation_in_clang.patch +++ /dev/null @@ -1,23 +0,0 @@ -From 565b77f7c82048871a4d5e3e506dc663d53cd469 Mon Sep 17 00:00:00 2001 -From: Ilya Biryukov -Date: Fri, 26 Jan 2018 18:46:06 +0100 -Subject: [PATCH] Added missing 'template' keyword. - -To unbreak compilation with clang. ---- - cub/device/dispatch/dispatch_radix_sort.cuh | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/device/dispatch/dispatch_radix_sort.cuh -index 7fbc621f..f622e212 100644 ---- a/cub/device/dispatch/dispatch_radix_sort.cuh -+++ b/cub/device/dispatch/dispatch_radix_sort.cuh -@@ -104,7 +104,7 @@ __global__ void DeviceRadixSortUpsweepKernel( - CTA_SYNC(); - - // Write out digit counts (striped) -- upsweep.ExtractCounts(d_spine, gridDim.x, blockIdx.x); -+ upsweep.template ExtractCounts(d_spine, gridDim.x, blockIdx.x); - } - - -- GitLab From ab6da3024642429367302d6d2623d57beba9b20b Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 23 Feb 2018 14:29:41 -0800 Subject: [PATCH 044/884] Make it easier to debug @assert_no_garbage_created unit test failures Prints a bunch of information about each object. PiperOrigin-RevId: 186823593 --- tensorflow/python/framework/test_util.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index ad9b1291f0..e1c37a52c6 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -506,6 +506,30 @@ def assert_no_garbage_created(f): previous_garbage = len(gc.garbage) f(self, **kwargs) gc.collect() + if len(gc.garbage) > previous_garbage: + logging.error( + "The decorated test created work for Python's garbage collector, " + "likely due to a reference cycle. New objects in cycle(s):") + for i, obj in enumerate(gc.garbage[previous_garbage:]): + try: + logging.error( + "Object %d of %d" % (i, len(gc.garbage) - previous_garbage)) + def _safe_object_str(obj): + return "<%s %d>" % (obj.__class__.__name__, id(obj)) + logging.error(" Object type: %s" % (_safe_object_str(obj),)) + logging.error(" Referrer types: %s" % ( + ', '.join([_safe_object_str(ref) + for ref in gc.get_referrers(obj)]),)) + logging.error(" Referent types: %s" % ( + ', '.join([_safe_object_str(ref) + for ref in gc.get_referents(obj)]),)) + logging.error(" Object attribute names: %s" % (dir(obj),)) + logging.error(" Object __str__:") + logging.error(obj) + logging.error(" Object __repr__:") + logging.error(repr(obj)) + except Exception: + logging.error("(Exception while printing object)") # This will fail if any garbage has been created, typically because of a # reference cycle. self.assertEqual(previous_garbage, len(gc.garbage)) -- GitLab From 7134e84a3dcf2e18e98e4ccc1498e4b4f41de014 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 23 Feb 2018 14:38:37 -0800 Subject: [PATCH 045/884] Make tf.size() with optimize=True encode 0 if any dimension is 0. PiperOrigin-RevId: 186824964 --- tensorflow/python/ops/array_ops.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 08db8a17b5..b3020efc9a 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -401,8 +401,11 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): else: input_tensor = ops.convert_to_tensor(input) input_shape = input_tensor.get_shape() - if optimize and input_shape.is_fully_defined(): - return constant(input_shape.num_elements(), out_type, name=name) + if optimize: + if input_shape.is_fully_defined(): + return constant(input_shape.num_elements(), out_type, name=name) + if input_shape.dims and any(dim == 0 for dim in input_shape.dims): + return constant(0, out_type, name=name) return gen_array_ops.size(input, name=name, out_type=out_type) -- GitLab From 9d2499fd757120a8d23d800b8fcd00a30a3d7420 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 15:06:46 -0800 Subject: [PATCH 046/884] Eager/C: Add a TF_Status argument to a couple of functions. PiperOrigin-RevId: 186829318 --- tensorflow/c/eager/c_api.cc | 12 +++- tensorflow/c/eager/c_api.h | 8 ++- tensorflow/c/eager/c_api_test.cc | 6 +- tensorflow/python/eager/pywrap_tensor.cc | 89 +++++++++++++++++------- 4 files changed, 81 insertions(+), 34 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index cc318c3878..f615e3f11d 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -154,16 +154,22 @@ TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h) { return static_cast(h->t.dtype()); } -int TFE_TensorHandleNumDims(TFE_TensorHandle* h) { return h->t.dims(); } +int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) { + status->status = tensorflow::Status::OK(); + return h->t.dims(); +} -int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index) { +int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, + TF_Status* status) { + status->status = tensorflow::Status::OK(); return h->t.dim_size(dim_index); } -const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h) { +const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { // TODO(apassos) this will be potentially incorrect in the distributed case as // our local device will have a name which depends on the ClusterSpec and // hence will require the context to resolve. + status->status = tensorflow::Status::OK(); return (h->d == nullptr) ? "/job:localhost/replica:0/task:0/device:CPU:0" : h->d->name().c_str(); } diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 7a321b54da..90cfb7500e 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -119,11 +119,13 @@ TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status); TF_CAPI_EXPORT extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); -TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h); +TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h, + TF_Status* status); TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, - int dim_index); + int dim_index, + TF_Status* status); TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName( - TFE_TensorHandle* h); + TFE_TensorHandle* h, TF_Status* status); TF_CAPI_EXPORT extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status); diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 4a3ecbc0ab..00fb7e68d0 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -932,7 +932,8 @@ TEST(CAPI, Variables) { ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); ASSERT_EQ(1, num_retvals); EXPECT_EQ(TF_FLOAT, TFE_TensorHandleDataType(value_handle)); - EXPECT_EQ(0, TFE_TensorHandleNumDims(value_handle)); + EXPECT_EQ(0, TFE_TensorHandleNumDims(value_handle, status)); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); float value = 0.0f; TF_Tensor* t = TFE_TensorHandleResolve(value_handle, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); @@ -974,7 +975,8 @@ void BM_ReadVariable(int iters) { CHECK_EQ(1, num_retvals); CHECK(h); CHECK_EQ(TF_FLOAT, TFE_TensorHandleDataType(h)); - CHECK_EQ(0, TFE_TensorHandleNumDims(h)); + CHECK_EQ(0, TFE_TensorHandleNumDims(h, status)); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); h = nullptr; } tensorflow::testing::StopTiming(); diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 6fa076507d..3ec2109d32 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -185,6 +185,12 @@ typedef struct EagerTensor { // This stores `_keras_mask` object and is set by Tensorflow layers. PyObject* keras_mask; + + // We store a status object here as an optimization to avoid allocating a new + // Status objects on different functions that operate on EagerTensor and need + // to use a TF_Status object. However note that accesses to `status` are not + // thread-safe. + TF_Status* status; } EagerTensor; // tp_init for EagerTensor. @@ -195,6 +201,7 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { self->handle_data = Py_None; Py_INCREF(Py_None); self->keras_mask = Py_None; + self->status = TF_NewStatus(); PyObject* value; PyObject* context = nullptr; PyObject* device = nullptr; @@ -269,17 +276,17 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { } TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get()); if (desired_dtype >= 0 && desired_dtype != handle_dtype) { - auto out_status = tensorflow::make_safe(TF_NewStatus()); handle = tensorflow::make_safe( EagerCast(GetContext(context), handle.get(), handle_dtype, - static_cast(desired_dtype), out_status.get())); - if (TF_GetCode(out_status.get()) != TF_OK) { - PyErr_SetString( - PyExc_ValueError, - tensorflow::strings::StrCat("Error while casting from DataType ", - handle_dtype, " to ", desired_dtype, ". ", - TF_Message(out_status.get())) - .c_str()); + static_cast(desired_dtype), self->status)); + if (TF_GetCode(self->status) != TF_OK) { + PyErr_SetString(PyExc_ValueError, + tensorflow::strings::StrCat( + "Error while casting from DataType ", handle_dtype, + " to ", desired_dtype, ". ", TF_Message(self->status)) + .c_str()); + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); return -1; } handle_dtype = TFE_TensorHandleDataType(handle.get()); @@ -323,6 +330,7 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { // tp_dealloc for EagerTensor. void EagerTensor_dealloc(EagerTensor* self) { + TF_DeleteStatus(self->status); Py_DECREF(self->handle_data); Py_DECREF(self->keras_mask); TFE_DeleteTensorHandle(self->handle); @@ -348,12 +356,21 @@ static PyObject* EagerTensor_datatype_enum(EagerTensor* self) { // Getter for `_shape_tuple`. static PyObject* EagerTensor_shape_tuple(EagerTensor* self) { auto handle = self->handle; - int n = TFE_TensorHandleNumDims(handle); + int n = TFE_TensorHandleNumDims(handle, self->status); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); + return nullptr; + } PyObject* shape = PyTuple_New(n); if (PyErr_Occurred()) return nullptr; for (int i = 0; i < n; ++i) { - PyObject* dim = PyLong_FromLongLong(TFE_TensorHandleDim(handle, i)); - if (dim == nullptr || PyTuple_SetItem(shape, i, dim) != 0) { + PyObject* dim = + PyLong_FromLongLong(TFE_TensorHandleDim(handle, i, self->status)); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError) || + dim == nullptr || PyTuple_SetItem(shape, i, dim) != 0) { + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); Py_DECREF(shape); if (dim != nullptr) Py_DECREF(dim); PyErr_SetString(PyExc_RuntimeError, "Error while creating shape"); @@ -365,10 +382,16 @@ static PyObject* EagerTensor_shape_tuple(EagerTensor* self) { // Getter for `_rank`. static PyObject* EagerTensor_rank(EagerTensor* self) { + int num_dims = TFE_TensorHandleNumDims(self->handle, self->status); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); + return nullptr; + } #if PY_MAJOR_VERSION < 3 - return PyInt_FromLong(TFE_TensorHandleNumDims(self->handle)); + return PyInt_FromLong(num_dims); #else - return PyLong_FromLong(TFE_TensorHandleNumDims(self->handle)); + return PyLong_FromLong(num_dims); #endif } @@ -437,10 +460,16 @@ static PyObject* EagerTensor_numpy(EagerTensor* self) { // Getter `device`. static PyObject* EagerTensor_device(EagerTensor* self) { + const char* device = TFE_TensorHandleDeviceName(self->handle, self->status); + if (MaybeRaiseExceptionFromTFStatus(self->status, PyExc_ValueError)) { + // Cleanup self->status before returning. + TF_SetStatus(self->status, TF_OK, ""); + return nullptr; + } #if PY_MAJOR_VERSION >= 3 - return PyUnicode_FromString(TFE_TensorHandleDeviceName(self->handle)); + return PyUnicode_FromString(device); #else - return PyBytes_FromString(TFE_TensorHandleDeviceName(self->handle)); + return PyBytes_FromString(device); #endif } @@ -576,6 +605,7 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { Py_INCREF(Py_None); t->keras_mask = Py_None; t->handle = handle; + t->status = TF_NewStatus(); } return reinterpret_cast(t); } @@ -673,6 +703,7 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { auto tensor = tensorflow::make_safe(TF_AllocateTensor( TF_INT32, &num_tensors_int, /*num_dims=*/1, /*len=*/4 * num_tensors_int)); int32_t* data = reinterpret_cast(TF_TensorData(tensor.get())); + auto status = tensorflow::make_safe(TF_NewStatus()); for (Py_ssize_t i = 0; i < num_tensors; ++i) { PyObject* tensor_obj = PyList_GET_ITEM(tensor_list, i); if (!EagerTensor_CheckExact(tensor_obj)) { @@ -687,21 +718,27 @@ PyObject* TFE_Py_TensorShapeSlice(PyObject* tensor_list, int slice_dim) { EagerTensor* t = reinterpret_cast(tensor_obj); TFE_TensorHandle* handle = t->handle; - if (slice_dim >= TFE_TensorHandleNumDims(handle)) { - PyErr_SetString(PyExc_IndexError, - tensorflow::strings::StrCat( - "Slice dimension (", slice_dim, - ") must be smaller than rank of all " - "tensors, but tensor at index ", - i, " has rank ", TFE_TensorHandleNumDims(handle)) - .c_str()); + int num_dims = TFE_TensorHandleNumDims(handle, status.get()); + if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_ValueError)) { + return nullptr; + } + if (slice_dim >= num_dims) { + PyErr_SetString( + PyExc_IndexError, + tensorflow::strings::StrCat("Slice dimension (", slice_dim, + ") must be smaller than rank of all " + "tensors, but tensor at index ", + i, " has rank ", num_dims) + .c_str()); + return nullptr; + } + int64_t dim = TFE_TensorHandleDim(handle, slice_dim, status.get()); + if (MaybeRaiseExceptionFromTFStatus(status.get(), PyExc_ValueError)) { return nullptr; } - int64_t dim = TFE_TensorHandleDim(handle, slice_dim); data[i] = dim; } - auto status = tensorflow::make_safe(TF_NewStatus()); TFE_TensorHandle* handle = TFE_NewTensorHandle(tensor.get(), status.get()); if (TF_GetCode(status.get()) != TF_OK) { PyErr_SetString( -- GitLab From eb5f3afcb8717ac6bd737ee78997562f67657fd0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 15:33:22 -0800 Subject: [PATCH 047/884] Adds unit tests for mean op with uint8_t input data. PiperOrigin-RevId: 186833364 --- .../internal/reference/reference_ops.h | 8 ++- tensorflow/contrib/lite/kernels/mean_test.cc | 72 +++++++++++++++++-- 2 files changed, 73 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index f5290a14d3..53de21697b 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -2899,9 +2899,11 @@ inline void Mean(T* input_data, const int* input_dims, const int input_num_dims, for (int idx = 0; idx < num_resolved_axis; ++idx) { num_elements_in_axis *= static_cast(input_dims[resolved_axis[idx]]); } - for (size_t idx = 0; idx < num_outputs; ++idx) { - output_data[idx] = static_cast(static_cast(output_data[idx]) / - num_elements_in_axis); + if (num_elements_in_axis > 0) { + for (size_t idx = 0; idx < num_outputs; ++idx) { + output_data[idx] = static_cast(static_cast(output_data[idx]) / + num_elements_in_axis); + } } } diff --git a/tensorflow/contrib/lite/kernels/mean_test.cc b/tensorflow/contrib/lite/kernels/mean_test.cc index c4c53c2ded..2d6d4bc2da 100644 --- a/tensorflow/contrib/lite/kernels/mean_test.cc +++ b/tensorflow/contrib/lite/kernels/mean_test.cc @@ -74,7 +74,7 @@ class MeanOpDynamicModel : public BaseMeanOpModel { } }; -TEST(ConstMeanOpTest, NotKeepDims) { +TEST(ConstFloatMeanOpTest, NotKeepDims) { std::initializer_list data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; @@ -86,7 +86,7 @@ TEST(ConstMeanOpTest, NotKeepDims) { EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({12, 13}))); } -TEST(ConstMeanOpTest, KeepDims) { +TEST(ConstFloatMeanOpTest, KeepDims) { std::initializer_list data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; @@ -99,7 +99,7 @@ TEST(ConstMeanOpTest, KeepDims) { ElementsAreArray(ArrayFloatNear({10.5, 12.5, 14.5}))); } -TEST(DynamicMeanOpTest, NotKeepDims) { +TEST(DynamicFloatMeanOpTest, NotKeepDims) { std::initializer_list data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; @@ -114,7 +114,7 @@ TEST(DynamicMeanOpTest, NotKeepDims) { EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({12, 13}))); } -TEST(DynamicMeanOpTest, KeepDims) { +TEST(DynamicFloatMeanOpTest, KeepDims) { std::initializer_list data = { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0, 18.0, 19.0, 20.0, 21.0, 22.0, 23.0, 24.0}; @@ -130,6 +130,70 @@ TEST(DynamicMeanOpTest, KeepDims) { ElementsAreArray(ArrayFloatNear({10.5, 12.5, 14.5}))); } +TEST(DynamicFloatMeanOpTest, Scale) { + std::initializer_list data = {9.527}; + MeanOpDynamicModel m({TensorType_FLOAT32, {1}}, {TensorType_FLOAT32, {1}}, + {TensorType_INT32, {1}}, true); + std::initializer_list axis = {0}; + m.SetAxis(axis); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({9.527}))); +} + +TEST(ConstUint8MeanOpTest, NotKeepDims) { + std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}; + MeanOpConstModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {2}}, + {4}, {1, 0, -3, -3}, false); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({12, 13})); +} + +TEST(ConstUint8MeanOpTest, KeepDims) { + std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}; + MeanOpConstModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {3}}, + {2}, {0, 2}, true); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 3, 1})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({10, 12, 14})); +} + +TEST(DynamicUint8MeanOpTest, NotKeepDims) { + std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}; + MeanOpDynamicModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {2}}, + {TensorType_INT32, {4}}, false); + std::initializer_list axis = {1, 0, -3, -3}; + m.SetAxis(axis); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({2})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({12, 13})); +} + +TEST(DynamicUint8MeanOpTest, KeepDims) { + std::initializer_list data = {1, 2, 3, 4, 5, 6, 7, 8, + 9, 10, 11, 12, 13, 14, 15, 16, + 17, 18, 19, 20, 21, 22, 23, 24}; + MeanOpDynamicModel m({TensorType_UINT8, {4, 3, 2}}, {TensorType_UINT8, {3}}, + {TensorType_INT32, {2}}, true); + std::initializer_list axis = {0, 2}; + m.SetAxis(axis); + m.SetInput(data); + m.Invoke(); + EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 3, 1})); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({10, 12, 14})); +} + } // namespace } // namespace tflite -- GitLab From fedca2059d52d4cb753c46d4e65884877b5b4f38 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Feb 2018 15:35:35 -0800 Subject: [PATCH 048/884] Improvement to the eager device placement heuristic. PiperOrigin-RevId: 186833677 --- tensorflow/python/eager/context.py | 3 +-- tensorflow/python/eager/core_test.py | 16 ++++++++++++++-- tensorflow/python/ops/array_ops.py | 5 ++++- tensorflow/python/training/saver.py | 4 ++-- 4 files changed, 21 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 07652d3e02..0e9c21b221 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -60,8 +60,7 @@ class _EagerContext(threading.local): def __init__(self): super(_EagerContext, self).__init__() - self.device_spec = pydev.DeviceSpec.from_string( - "/job:localhost/replica:0/task:0/device:CPU:0") + self.device_spec = pydev.DeviceSpec.from_string("") self.device_name = self.device_spec.to_string() self.mode = _default_mode self.scope_name = "" diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index c68e2f422e..0e40d8a5c0 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -33,6 +33,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import nn_ops @@ -65,8 +66,7 @@ class TFETest(test_util.TensorFlowTestCase): ctx.summary_writer_resource = 'mock' self.assertEqual('mock', ctx.summary_writer_resource) - self.assertEqual('/job:localhost/replica:0/task:0/device:CPU:0', - ctx.device_name) + self.assertEqual('', ctx.device_name) self.assertEqual(ctx.device_name, ctx.device_spec.to_string()) with ctx.device('GPU:0'): self.assertEqual('/job:localhost/replica:0/task:0/device:GPU:0', @@ -100,6 +100,18 @@ class TFETest(test_util.TensorFlowTestCase): self.assertEqual(len(cpu_stats.node_stats), 1) self.assertEqual(cpu_stats.node_stats[0].node_name, 'Add') + def testShouldCopy(self): + if not context.context().num_gpus(): + self.skipTest('No devices other than CPUs found') + with ops.device('gpu:0'): + x = constant_op.constant(1.0) + y = array_ops.identity(x) + # The value we're testing y.device against will depend on what the behavior + # of not explicitly specifying a device in the context is. This behavior is + # subject to change (for example, in the future we may want to use GPUs, if + # available, when no device is explicitly provided) + self.assertEqual(y.device, '/job:localhost/replica:0/task:0/device:CPU:0') + def testContextStackContainsEagerMode(self): # Eager execution has been enabled, and no other context # switch has occurred, so `context_stack` should contain diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index b3020efc9a..cdfb955f54 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -134,7 +134,10 @@ def identity(input, name=None): # pylint: disable=redefined-builtin input = ops.convert_to_tensor(input) in_device = input.device # TODO(ashankar): Does 'identity' need to invoke execution callbacks? - if context.context().device_name != in_device: + context_device = context.context().device_name + if not context_device: + context_device = "/job:localhost/replica:0/task:0/device:CPU:0" + if context_device != in_device: return input._copy() # pylint: disable=protected-access return input diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 3888e9bba4..83e848d598 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -196,8 +196,8 @@ class BaseSaverBuilder(object): # Copy the restored tensor to the variable's device. with ops.device(self._var_device): restored_tensor = array_ops.identity(restored_tensor) - return resource_variable_ops.shape_safe_assign_variable_handle( - self.handle_op, self._var_shape, restored_tensor) + return resource_variable_ops.shape_safe_assign_variable_handle( + self.handle_op, self._var_shape, restored_tensor) def __init__(self, write_version=saver_pb2.SaverDef.V2): self._write_version = write_version -- GitLab From cc171bb7371590ee45e361b6a50a018d026412f6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 15:43:09 -0800 Subject: [PATCH 049/884] Add test for bug in CUB that caused dynamic partition to fail on the GPU. PiperOrigin-RevId: 186834668 --- .../python/kernel_tests/dynamic_partition_op_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index fedbf9e696..5e8937ad2c 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -326,6 +326,18 @@ class DynamicPartitionTest(test.TestCase): with self.assertRaises(ValueError): data_flow_ops.dynamic_partition(data, indices, num_partitions=4) + # see https://github.com/tensorflow/tensorflow/issues/17106 + def testCUBBug(self): + x = constant_op.constant(np.random.randn(3072)) + inds = [0]*189 + [1]*184 + [2]*184 + [3]*191 + [4]*192 + [5]*195 + [6]*195 + inds += [7]*195 + [8]*188 + [9]*195 + [10]*188 + [11]*202 + [12]*194 + inds += [13]*194 + [14]*194 + [15]*192 + self.assertEqual(len(inds), x.shape[0]) + partitioned = data_flow_ops.dynamic_partition(x, inds, 16) + with self.test_session() as sess: + res = sess.run(partitioned) + self.assertEqual(res[-1].shape[0], 192) + if __name__ == "__main__": test.main() -- GitLab From 9a84277be2cb8233c5c14270db6fcdff31ab4d93 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 23 Feb 2018 15:45:02 -0800 Subject: [PATCH 050/884] eager: Change various examples to use tf.keras.Model instead of tfe.Network. PiperOrigin-RevId: 186834891 --- .../eager/python/examples/gan/mnist.py | 99 ++++++------ .../linear_regression/linear_regression.py | 16 +- .../python/examples/resnet50/resnet50.py | 153 ++++++++---------- .../examples/resnet50/resnet50_graph_test.py | 4 +- .../python/examples/resnet50/resnet50_test.py | 6 +- 5 files changed, 122 insertions(+), 156 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist.py b/tensorflow/contrib/eager/python/examples/gan/mnist.py index b9ac79f46c..5f51d52622 100644 --- a/tensorflow/contrib/eager/python/examples/gan/mnist.py +++ b/tensorflow/contrib/eager/python/examples/gan/mnist.py @@ -35,7 +35,7 @@ from tensorflow.examples.tutorials.mnist import input_data FLAGS = None -class Discriminator(tfe.Network): +class Discriminator(tf.keras.Model): """GAN Discriminator. A network to differentiate between generated and real handwritten digits. @@ -56,19 +56,15 @@ class Discriminator(tfe.Network): else: assert data_format == 'channels_last' self._input_shape = [-1, 28, 28, 1] - self.conv1 = self.track_layer(tf.layers.Conv2D(64, 5, padding='SAME', - data_format=data_format, - activation=tf.tanh)) - self.pool1 = self.track_layer( - tf.layers.AveragePooling2D(2, 2, data_format=data_format)) - self.conv2 = self.track_layer(tf.layers.Conv2D(128, 5, - data_format=data_format, - activation=tf.tanh)) - self.pool2 = self.track_layer( - tf.layers.AveragePooling2D(2, 2, data_format=data_format)) - self.flatten = self.track_layer(tf.layers.Flatten()) - self.fc1 = self.track_layer(tf.layers.Dense(1024, activation=tf.tanh)) - self.fc2 = self.track_layer(tf.layers.Dense(1, activation=None)) + self.conv1 = tf.layers.Conv2D( + 64, 5, padding='SAME', data_format=data_format, activation=tf.tanh) + self.pool1 = tf.layers.AveragePooling2D(2, 2, data_format=data_format) + self.conv2 = tf.layers.Conv2D( + 128, 5, data_format=data_format, activation=tf.tanh) + self.pool2 = tf.layers.AveragePooling2D(2, 2, data_format=data_format) + self.flatten = tf.layers.Flatten() + self.fc1 = tf.layers.Dense(1024, activation=tf.tanh) + self.fc2 = tf.layers.Dense(1, activation=None) def call(self, inputs): """Return two logits per image estimating input authenticity. @@ -95,7 +91,7 @@ class Discriminator(tfe.Network): return x -class Generator(tfe.Network): +class Generator(tf.keras.Model): """Generator of handwritten digits similar to the ones in the MNIST dataset. """ @@ -116,18 +112,17 @@ class Generator(tfe.Network): else: assert data_format == 'channels_last' self._pre_conv_shape = [-1, 6, 6, 128] - self.fc1 = self.track_layer(tf.layers.Dense(6 * 6 * 128, - activation=tf.tanh)) + self.fc1 = tf.layers.Dense(6 * 6 * 128, activation=tf.tanh) # In call(), we reshape the output of fc1 to _pre_conv_shape # Deconvolution layer. Resulting image shape: (batch, 14, 14, 64) - self.conv1 = self.track_layer(tf.layers.Conv2DTranspose( - 64, 4, strides=2, activation=None, data_format=data_format)) + self.conv1 = tf.layers.Conv2DTranspose( + 64, 4, strides=2, activation=None, data_format=data_format) # Deconvolution layer. Resulting image shape: (batch, 28, 28, 1) - self.conv2 = self.track_layer(tf.layers.Conv2DTranspose( - 1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format)) + self.conv2 = tf.layers.Conv2DTranspose( + 1, 2, strides=2, activation=tf.nn.sigmoid, data_format=data_format) def call(self, inputs): """Return a batch of generated images. @@ -168,7 +163,8 @@ def discriminator_loss(discriminator_real_outputs, discriminator_gen_outputs): """ loss_on_real = tf.losses.sigmoid_cross_entropy( - tf.ones_like(discriminator_real_outputs), discriminator_real_outputs, + tf.ones_like(discriminator_real_outputs), + discriminator_real_outputs, label_smoothing=0.25) loss_on_generated = tf.losses.sigmoid_cross_entropy( tf.zeros_like(discriminator_gen_outputs), discriminator_gen_outputs) @@ -198,9 +194,8 @@ def generator_loss(discriminator_gen_outputs): return loss -def train_one_epoch(generator, discriminator, - generator_optimizer, discriminator_optimizer, - dataset, log_interval, noise_dim): +def train_one_epoch(generator, discriminator, generator_optimizer, + discriminator_optimizer, dataset, log_interval, noise_dim): """Trains `generator` and `discriminator` models on `dataset`. Args: @@ -222,14 +217,18 @@ def train_one_epoch(generator, discriminator, with tf.contrib.summary.record_summaries_every_n_global_steps(log_interval): current_batch_size = images.shape[0] - noise = tf.random_uniform(shape=[current_batch_size, noise_dim], - minval=-1., maxval=1., seed=batch_index) + noise = tf.random_uniform( + shape=[current_batch_size, noise_dim], + minval=-1., + maxval=1., + seed=batch_index) with tfe.GradientTape(persistent=True) as g: generated_images = generator(noise) - tf.contrib.summary.image('generated_images', - tf.reshape(generated_images, [-1, 28, 28, 1]), - max_images=10) + tf.contrib.summary.image( + 'generated_images', + tf.reshape(generated_images, [-1, 28, 28, 1]), + max_images=10) discriminator_gen_outputs = discriminator(generated_images) discriminator_real_outputs = discriminator(images) @@ -245,17 +244,17 @@ def train_one_epoch(generator, discriminator, discriminator.variables) with tf.variable_scope('generator'): - generator_optimizer.apply_gradients(zip(generator_grad, - generator.variables)) + generator_optimizer.apply_gradients( + zip(generator_grad, generator.variables)) with tf.variable_scope('discriminator'): - discriminator_optimizer.apply_gradients(zip(discriminator_grad, - discriminator.variables)) + discriminator_optimizer.apply_gradients( + zip(discriminator_grad, discriminator.variables)) if log_interval and batch_index > 0 and batch_index % log_interval == 0: print('Batch #%d\tAverage Generator Loss: %.6f\t' - 'Average Discriminator Loss: %.6f' % ( - batch_index, total_generator_loss/batch_index, - total_discriminator_loss/batch_index)) + 'Average Discriminator Loss: %.6f' % + (batch_index, total_generator_loss / batch_index, + total_discriminator_loss / batch_index)) def main(_): @@ -266,10 +265,9 @@ def main(_): # Load the datasets data = input_data.read_data_sets(FLAGS.data_dir) - dataset = (tf.data.Dataset - .from_tensor_slices(data.train.images) - .shuffle(60000) - .batch(FLAGS.batch_size)) + dataset = ( + tf.data.Dataset.from_tensor_slices(data.train.images).shuffle(60000) + .batch(FLAGS.batch_size)) # Create the models and optimizers generator = Generator(data_format) @@ -294,20 +292,17 @@ def main(_): start = time.time() with summary_writer.as_default(): train_one_epoch(generator, discriminator, generator_optimizer, - discriminator_optimizer, - dataset, FLAGS.log_interval, FLAGS.noise) + discriminator_optimizer, dataset, FLAGS.log_interval, + FLAGS.noise) end = time.time() - print('\nTrain time for epoch #%d (global step %d): %f' % ( - epoch, global_step.numpy(), end - start)) + print('\nTrain time for epoch #%d (global step %d): %f' % + (epoch, global_step.numpy(), end - start)) all_variables = ( - generator.variables - + discriminator.variables - + generator_optimizer.variables() - + discriminator_optimizer.variables() - + [global_step]) - tfe.Saver(all_variables).save( - checkpoint_prefix, global_step=global_step) + generator.variables + discriminator.variables + + generator_optimizer.variables() + + discriminator_optimizer.variables() + [global_step]) + tfe.Saver(all_variables).save(checkpoint_prefix, global_step=global_step) if __name__ == '__main__': diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py index 6ce4de6ee0..157a6360ea 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py +++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py @@ -33,23 +33,13 @@ import tensorflow as tf import tensorflow.contrib.eager as tfe -class LinearModel(tfe.Network): - """A TensorFlow linear regression model. - - Uses TensorFlow's eager execution. - - For those familiar with TensorFlow graphs, notice the absence of - `tf.Session`. The `forward()` method here immediately executes and - returns output values. The `loss()` method immediately compares the - output of `forward()` with the target and returns the MSE loss value. - The `fit()` performs gradient-descent training on the model's weights - and bias. - """ +class LinearModel(tf.keras.Model): + """A TensorFlow linear regression model.""" def __init__(self): """Constructs a LinearModel object.""" super(LinearModel, self).__init__() - self._hidden_layer = self.track_layer(tf.layers.Dense(1)) + self._hidden_layer = tf.layers.Dense(1) def call(self, xs): """Invoke the linear model. diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py index 9982fdb07e..6b59413141 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50.py @@ -27,10 +27,9 @@ from __future__ import print_function import functools import tensorflow as tf -import tensorflow.contrib.eager as tfe -class _IdentityBlock(tfe.Network): +class _IdentityBlock(tf.keras.Model): """_IdentityBlock is the block that has no conv layer at shortcut. Args: @@ -50,31 +49,24 @@ class _IdentityBlock(tfe.Network): bn_name_base = 'bn' + str(stage) + block + '_branch' bn_axis = 1 if data_format == 'channels_first' else 3 - self.conv2a = self.track_layer( - tf.layers.Conv2D( - filters1, (1, 1), - name=conv_name_base + '2a', - data_format=data_format)) - self.bn2a = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')) - - self.conv2b = self.track_layer( - tf.layers.Conv2D( - filters2, - kernel_size, - padding='same', - data_format=data_format, - name=conv_name_base + '2b')) - self.bn2b = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')) - - self.conv2c = self.track_layer( - tf.layers.Conv2D( - filters3, (1, 1), - name=conv_name_base + '2c', - data_format=data_format)) - self.bn2c = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')) + self.conv2a = tf.layers.Conv2D( + filters1, (1, 1), name=conv_name_base + '2a', data_format=data_format) + self.bn2a = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2a') + + self.conv2b = tf.layers.Conv2D( + filters2, + kernel_size, + padding='same', + data_format=data_format, + name=conv_name_base + '2b') + self.bn2b = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2b') + + self.conv2c = tf.layers.Conv2D( + filters3, (1, 1), name=conv_name_base + '2c', data_format=data_format) + self.bn2c = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2c') def call(self, input_tensor, training=False): x = self.conv2a(input_tensor) @@ -92,7 +84,7 @@ class _IdentityBlock(tfe.Network): return tf.nn.relu(x) -class _ConvBlock(tfe.Network): +class _ConvBlock(tf.keras.Model): """_ConvBlock is the block that has a conv layer at shortcut. Args: @@ -121,41 +113,35 @@ class _ConvBlock(tfe.Network): bn_name_base = 'bn' + str(stage) + block + '_branch' bn_axis = 1 if data_format == 'channels_first' else 3 - self.conv2a = self.track_layer( - tf.layers.Conv2D( - filters1, (1, 1), - strides=strides, - name=conv_name_base + '2a', - data_format=data_format)) - self.bn2a = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')) - - self.conv2b = self.track_layer( - tf.layers.Conv2D( - filters2, - kernel_size, - padding='same', - name=conv_name_base + '2b', - data_format=data_format)) - self.bn2b = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')) - - self.conv2c = self.track_layer( - tf.layers.Conv2D( - filters3, (1, 1), - name=conv_name_base + '2c', - data_format=data_format)) - self.bn2c = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')) - - self.conv_shortcut = self.track_layer( - tf.layers.Conv2D( - filters3, (1, 1), - strides=strides, - name=conv_name_base + '1', - data_format=data_format)) - self.bn_shortcut = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '1')) + self.conv2a = tf.layers.Conv2D( + filters1, (1, 1), + strides=strides, + name=conv_name_base + '2a', + data_format=data_format) + self.bn2a = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2a') + + self.conv2b = tf.layers.Conv2D( + filters2, + kernel_size, + padding='same', + name=conv_name_base + '2b', + data_format=data_format) + self.bn2b = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2b') + + self.conv2c = tf.layers.Conv2D( + filters3, (1, 1), name=conv_name_base + '2c', data_format=data_format) + self.bn2c = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '2c') + + self.conv_shortcut = tf.layers.Conv2D( + filters3, (1, 1), + strides=strides, + name=conv_name_base + '1', + data_format=data_format) + self.bn_shortcut = tf.layers.BatchNormalization( + axis=bn_axis, name=bn_name_base + '1') def call(self, input_tensor, training=False): x = self.conv2a(input_tensor) @@ -176,7 +162,8 @@ class _ConvBlock(tfe.Network): return tf.nn.relu(x) -class ResNet50(tfe.Network): +# pylint: disable=not-callable +class ResNet50(tf.keras.Model): """Instantiates the ResNet50 architecture. Args: @@ -220,32 +207,28 @@ class ResNet50(tfe.Network): self.include_top = include_top def conv_block(filters, stage, block, strides=(2, 2)): - l = _ConvBlock( + return _ConvBlock( 3, filters, stage=stage, block=block, data_format=data_format, strides=strides) - return self.track_layer(l) def id_block(filters, stage, block): - l = _IdentityBlock( + return _IdentityBlock( 3, filters, stage=stage, block=block, data_format=data_format) - return self.track_layer(l) - - self.conv1 = self.track_layer( - tf.layers.Conv2D( - 64, (7, 7), - strides=(2, 2), - data_format=data_format, - padding='same', - name='conv1')) + + self.conv1 = tf.layers.Conv2D( + 64, (7, 7), + strides=(2, 2), + data_format=data_format, + padding='same', + name='conv1') bn_axis = 1 if data_format == 'channels_first' else 3 - self.bn_conv1 = self.track_layer( - tf.layers.BatchNormalization(axis=bn_axis, name='bn_conv1')) - self.max_pool = self.track_layer( - tf.layers.MaxPooling2D((3, 3), strides=(2, 2), data_format=data_format)) + self.bn_conv1 = tf.layers.BatchNormalization(axis=bn_axis, name='bn_conv1') + self.max_pool = tf.layers.MaxPooling2D( + (3, 3), strides=(2, 2), data_format=data_format) self.l2a = conv_block([64, 64, 256], stage=2, block='a', strides=(1, 1)) self.l2b = id_block([64, 64, 256], stage=2, block='b') @@ -267,13 +250,11 @@ class ResNet50(tfe.Network): self.l5b = id_block([512, 512, 2048], stage=5, block='b') self.l5c = id_block([512, 512, 2048], stage=5, block='c') - self.avg_pool = self.track_layer( - tf.layers.AveragePooling2D( - (7, 7), strides=(7, 7), data_format=data_format)) + self.avg_pool = tf.layers.AveragePooling2D( + (7, 7), strides=(7, 7), data_format=data_format) if self.include_top: - self.fc1000 = self.track_layer( - tf.layers.Dense(classes, name='fc1000')) + self.fc1000 = tf.layers.Dense(classes, name='fc1000') else: reduction_indices = [1, 2] if data_format == 'channels_last' else [2, 3] reduction_indices = tf.constant(reduction_indices) @@ -288,7 +269,7 @@ class ResNet50(tfe.Network): else: self.global_pooling = None - def call(self, input_tensor, training=False): + def call(self, input_tensor, training): x = self.conv1(input_tensor) x = self.bn_conv1(x, training=training) x = tf.nn.relu(x) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py index 23317886e7..551c76b0df 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_graph_test.py @@ -55,7 +55,7 @@ class ResNet50GraphTest(tf.test.TestCase): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) - predictions = model(images) + predictions = model(images, training=False) init = tf.global_variables_initializer() @@ -114,7 +114,7 @@ class ResNet50Benchmarks(tf.test.Benchmark): with tf.Graph().as_default(): images = tf.placeholder(tf.float32, image_shape(None)) model = resnet50.ResNet50(data_format()) - predictions = model(images) + predictions = model(images, training=False) init = tf.global_variables_initializer() diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index 0ff8746884..c106ab0a06 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -71,7 +71,7 @@ class ResNet50Test(tf.test.TestCase): model.call = tfe.defun(model.call) with tf.device(device): images, _ = random_batch(2) - output = model(images) + output = model(images, training=False) self.assertEqual((2, 1000), output.shape) def test_apply(self): @@ -85,7 +85,7 @@ class ResNet50Test(tf.test.TestCase): model = resnet50.ResNet50(data_format, include_top=False) with tf.device(device): images, _ = random_batch(2) - output = model(images) + output = model(images, training=False) output_shape = ((2, 2048, 1, 1) if data_format == 'channels_first' else (2, 1, 1, 2048)) self.assertEqual(output_shape, output.shape) @@ -95,7 +95,7 @@ class ResNet50Test(tf.test.TestCase): model = resnet50.ResNet50(data_format, include_top=False, pooling='avg') with tf.device(device): images, _ = random_batch(2) - output = model(images) + output = model(images, training=False) self.assertEqual((2, 2048), output.shape) def test_train(self): -- GitLab From bd946a5bd7b59be8bb276fdd93e0a97653dedbfd Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 23 Feb 2018 15:51:23 -0800 Subject: [PATCH 051/884] Checkpointable: Utility to gather initialization ops A bit safer, since only variables which will be saved get initialized. Graph building then raises an error when you've used one which won't be saved. Reduces the need for the global collection. Makes it a bit easier to deal with initialization when writing graph/eager agnostic programs. PiperOrigin-RevId: 186835744 --- .../eager/python/checkpointable_utils.py | 128 +++++++++++++++++- .../eager/python/checkpointable_utils_test.py | 86 ++++++++---- tensorflow/python/framework/test_util.py | 1 + 3 files changed, 186 insertions(+), 29 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index d9648ffb03..e26ecc774a 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import abc import collections import weakref @@ -278,6 +279,37 @@ def _serialize_object_graph(root_checkpointable): slot_variables=slot_variables) +def gather_initializers(root_checkpointable): + """Traverse the object graph and find initialization ops. + + Looks for `Checkpointable` objects which are dependencies of + `root_checkpointable` and which have an `initializer` property. Includes + initializers for slot variables only if the variable they are slotting for and + the optimizer are dependencies of `root_checkpointable` (i.e. if they would be + saved with a checkpoint). + + Args: + root_checkpointable: A `Checkpointable` object to gather initializers for. + Returns: + A list of initialization ops. + """ + # TODO(allenl): Extract out gathering logic so the naming logic doesn't have + # to run. + checkpointable_objects, path_to_root = ( + _breadth_first_checkpointable_traversal(root_checkpointable)) + object_names = { + obj: _object_prefix_from_path(path) + for obj, path in path_to_root.items()} + node_ids = {node: node_id for node_id, node + in enumerate(checkpointable_objects)} + _serialize_slot_variables( + checkpointable_objects=checkpointable_objects, + node_ids=node_ids, + object_names=object_names) + return [c.initializer for c in checkpointable_objects + if hasattr(c, "initializer") and c.initializer is not None] + + class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): def __init__(self, tensor, name): @@ -288,7 +320,26 @@ class _NoRestoreSaveable(saver_lib.BaseSaverBuilder.SaveableObject): return control_flow_ops.no_op() -class CheckpointLoadStatus(object): +class _LoadStatus(object): + """Abstract base for load status callbacks.""" + + @abc.abstractmethod + def assert_consumed(self): + """Raises an exception unless a non-trivial restoration has completed.""" + pass + + @abc.abstractmethod + def run_restore_ops(self, session=None): + """Runs restore ops from the checkpoint. Requires a valid checkpoint.""" + pass + + @abc.abstractmethod + def initialize_or_restore(self, session=None): + """Runs restore ops from the checkpoint, or initializes variables.""" + pass + + +class CheckpointLoadStatus(_LoadStatus): """Checks the status of checkpoint loading and manages restore ops. Returned from `Saver.restore`. Since `restore` may defer the loading of values @@ -348,6 +399,70 @@ class CheckpointLoadStatus(object): session = ops.get_default_session() session.run(self._checkpoint.restore_ops, feed_dict=self._feed_dict) + def initialize_or_restore(self, session=None): + """Alias for `run_restore_ops`. + + This method has a sibling in `InitializationOnlyStatus` which instead + initializes variables. That type is returned if no checkpoint is specified + in `Saver.restore`. + + Args: + session: The session to run restore ops in. If `None`, uses the default + session. + """ + self.run_restore_ops(session=session) + + +class InitializationOnlyStatus(_LoadStatus): + """Returned from `Saver.restore` when no checkpoint has been specified. + + Objects of this type have the same `assert_consumed` method as + `CheckpointLoadStatus`, but it always fails. However, + `initialize_or_restore` works on objects of both types, and will + initialize variables in `InitializationOnlyStatus` objects or restore them + otherwise. + """ + + def __init__(self, root_checkpointable): + self._root_checkpointable = root_checkpointable + + def assert_consumed(self): + """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" + raise AssertionError( + "No checkpoint specified (save_path=None); nothing is being restored.") + + def run_restore_ops(self, session=None): + """For consistency with `CheckpointLoadStatus`. + + Use `initialize_or_restore` for initializing if no checkpoint was passed + to `Saver.restore` and restoring otherwise. + + Args: + session: Not used. + """ + raise AssertionError( + "No checkpoint specified, so no restore ops are available " + "(save_path=None to Saver.restore).") + + def initialize_or_restore(self, session=None): + """Runs initialization ops for variables. + + Only objects which would be saved by `Saver.save` will be initialized. See + `gather_initializers` for details. + + This method does nothing when executing eagerly (initializers get run + eagerly). + + Args: + session: The session to run initialization ops in. If `None`, uses the + default session. + """ + if context.in_eager_mode(): + return # run eagerly + if session is None: + session = ops.get_default_session() + session.run(gather_initializers(self._root_checkpointable)) + class _SessionWithFeedDictAdditions(session_lib.SessionInterface): """Pretends to be a session, inserts extra feeds on run().""" @@ -521,17 +636,20 @@ class Saver(object): Args: save_path: The path to the checkpoint, as returned by `save` or `tf.train.latest_checkpoint`. If None (as when there is no latest - checkpoint for `tf.train.latest_checkpoint` to return), does nothing. + checkpoint for `tf.train.latest_checkpoint` to return), returns an + object which may run initializers for objects in the dependency graph. session: The session to retrieve metadata with. Ignored when executing eagerly. If not provided when graph building, the default session is used. Returns: - A `CheckpointLoadStatus` object, which can be used to make assertions - about the status of checkpoint restoration and run restore ops. + A load status object, which can be used to make assertions about the + status of checkpoint restoration and run initialization/restore ops + (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if + `save_path` is `None`). """ if save_path is None: - return + return InitializationOnlyStatus(self._root_checkpointable) in_graph_mode = context.in_graph_mode() if in_graph_mode: if session is None: diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index b7554defde..6b86d41bdb 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -36,7 +36,6 @@ from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables from tensorflow.python.training import adam from tensorflow.python.training import checkpointable from tensorflow.python.training import saver as core_saver @@ -140,7 +139,7 @@ class Checkpoint(checkpointable.Checkpointable): super(Checkpoint, self).__init__() for k, v in sorted(kwargs.items(), key=lambda item: item[0]): setattr(self, k, v) - self._save_counter = None + self._save_counter = None # Created lazily for restore-on-create. self._saver = checkpointable_utils.Saver(weakref.ref(self)) @property @@ -170,8 +169,12 @@ class Checkpoint(checkpointable.Checkpointable): session=session) def restore(self, save_path): - return self._saver.restore( - save_path=save_path) + status = self._saver.restore(save_path=save_path) + # Create the save counter now so it gets initialized with other variables + # when graph building. Creating it earlier would lead to double + # initialization when executing eagerly. + self.save_counter # pylint: disable=pointless-statement + return status class InterfaceTests(test.TestCase): @@ -206,8 +209,7 @@ class InterfaceTests(test.TestCase): with self.assertRaisesRegexp(ValueError, "'duplicate' already exists"): checkpointable_utils.add_variable(obj, name="duplicate", shape=[]) - if context.in_graph_mode(): - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(obj)) self.assertEqual("constant_initializer:0", constant_initializer.name) self.assertEqual(1, self.evaluate(constant_initializer)) self.assertEqual("some_variable_scope/ones_initializer:0", @@ -287,7 +289,8 @@ class CheckpointingTests(test.TestCase): optimizer.minimize( other_network(input_value), global_step=optimizer_step) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) self.evaluate(train_op) named_variables, serialized_graph = ( checkpointable_utils._serialize_object_graph(root_checkpointable)) @@ -385,7 +388,8 @@ class CheckpointingTests(test.TestCase): train_op = optimizer.minimize(network(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_checkpointable.save_counter # pylint: disable=pointless-statement - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) @@ -429,6 +433,7 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual(optimizer_variables[0], self.evaluate(beta1_power)) self.assertAllEqual(optimizer_variables[1], self.evaluate(beta2_power)) + # TODO(allenl): Debug garbage created by this test in python3. def testDeferredRestorationUsageEager(self): """An idiomatic eager execution example.""" num_training_steps = 10 @@ -468,28 +473,57 @@ class CheckpointingTests(test.TestCase): train_op = optimizer.minimize( network(input_value), global_step=root.global_step) - root.save_counter # pylint: disable=pointless-statement - init_op = variables.global_variables_initializer() checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) with self.test_session(graph=ops.get_default_graph()) as session: + status = root.restore(save_path=checkpoint_path) + status.initialize_or_restore(session=session) if checkpoint_path is None: self.assertEqual(0, training_continuation) - session.run(init_op) - # Another alternative would be to run initializers automatically - # if no checkpoint is being loaded. This would make deferred - # loading a bit more useful with graph execution. + with self.assertRaises(AssertionError): + status.assert_consumed() else: - status = root.restore(save_path=checkpoint_path).assert_consumed() - status.run_restore_ops() + status.assert_consumed() for _ in range(num_training_steps): session.run(train_op) - root.save(file_prefix=checkpoint_prefix, - session=session) + root.save(file_prefix=checkpoint_prefix, session=session) self.assertEqual((training_continuation + 1) * num_training_steps, session.run(root.global_step)) self.assertEqual(training_continuation + 1, session.run(root.save_counter)) + @test_util.run_in_graph_and_eager_modes() + def testAgnosticUsage(self): + """Graph/eager agnostic usage.""" + # Does create garbage when executing eagerly due to ops.Graph() creation. + num_training_steps = 10 + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + for training_continuation in range(3): + with ops.Graph().as_default(), self.test_session( + graph=ops.get_default_graph()): + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + root = Checkpoint( + optimizer=optimizer, network=network, + global_step=training_util.get_or_create_global_step()) + checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) + status = root.restore(save_path=checkpoint_path) + input_value = constant_op.constant([[3.]]) + train_fn = functools.partial( + optimizer.minimize, + functools.partial(network, input_value), + global_step=root.global_step) + if context.in_graph_mode(): + train_fn = functools.partial(self.evaluate, train_fn()) + status.initialize_or_restore() + for _ in range(num_training_steps): + train_fn() + root.save(file_prefix=checkpoint_prefix) + self.assertEqual((training_continuation + 1) * num_training_steps, + self.evaluate(root.global_step)) + self.assertEqual(training_continuation + 1, + self.evaluate(root.save_counter)) + def _get_checkpoint_name(self, name): root = checkpointable.Checkpointable() checkpointable_utils.add_variable( @@ -602,7 +636,11 @@ class CheckpointingTests(test.TestCase): optimizer = CheckpointableAdam(0.1) if context.in_graph_mode(): train_op = optimizer.minimize(root.var) - self.evaluate(variables.global_variables_initializer()) + # Note that `optimizer` has not been added as a dependency of + # `root`. Create a one-off grouping so that slot variables for `root.var` + # get initialized too. + self.evaluate(checkpointable_utils.gather_initializers( + Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) else: optimizer.minimize(root.var.read_value) @@ -709,7 +747,7 @@ class CheckpointingTests(test.TestCase): save_root.dep_one.dep_three = dep_three save_root.dep_two.dep_three = dep_three checkpointable_utils.add_variable(dep_three, name="var", initializer=0.) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(save_root)) save_path = checkpointable_utils.Saver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() @@ -732,7 +770,7 @@ class CheckpointingTests(test.TestCase): save_root.dep_one, name="var1", initializer=32., dtype=dtypes.float64) checkpointable_utils.add_variable( save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(save_root)) save_path = checkpointable_utils.Saver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() @@ -760,7 +798,7 @@ class CheckpointingTests(test.TestCase): first, "v1", initializer=[3., 1., 4.]) second.v = checkpointable_utils.add_variable( second, "v2", initializer=[1., 1., 2., 3.]) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(first)) checkpoint_directory = self.get_temp_dir() save_path = checkpointable_utils.Saver(first).save( os.path.join(checkpoint_directory, "ckpt")) @@ -835,7 +873,7 @@ class CheckpointingTests(test.TestCase): obj.var = variable_scope.get_variable(name="v", initializer=0.) obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.Saver(obj) saver.save(checkpoint_prefix) before_ops = graph.get_operations() @@ -853,7 +891,7 @@ class CheckpointingTests(test.TestCase): obj.var = variable_scope.get_variable(name="v", initializer=0.) obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) - self.evaluate(variables.global_variables_initializer()) + self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.Saver(obj) save_path = saver.save(checkpoint_prefix) saver.restore(save_path) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index e1c37a52c6..aabf89a234 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -588,6 +588,7 @@ def run_in_graph_and_eager_modes(__unused__=None, # This decorator runs the wrapped test twice. # Reset the test environment between runs. self.tearDown() + self._tempdir = None self.setUp() def run_eager_mode(self, **kwargs): -- GitLab From beed05217cf8c3d90784a66cec7c97e042ff5258 Mon Sep 17 00:00:00 2001 From: Patrick Nguyen Date: Fri, 23 Feb 2018 16:04:38 -0800 Subject: [PATCH 052/884] Add custom registered graph optimizers run by MetaOptimizer. PiperOrigin-RevId: 186837828 --- tensorflow/core/grappler/optimizers/BUILD | 56 ++++++++++++ .../optimizers/custom_graph_optimizer.h | 35 ++++++++ .../custom_graph_optimizer_registry.cc | 61 +++++++++++++ .../custom_graph_optimizer_registry.h | 65 ++++++++++++++ .../custom_graph_optimizer_registry_test.cc | 87 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 21 ++++- .../optimizers/meta_optimizer_test.cc | 77 ++++++++++++++++ .../core/protobuf/rewriter_config.proto | 3 + 8 files changed, 401 insertions(+), 4 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/custom_graph_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc create mode 100644 tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h create mode 100644 tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry_test.cc create mode 100644 tensorflow/core/grappler/optimizers/meta_optimizer_test.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index e839630605..50ba48ea7a 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -157,6 +157,18 @@ cc_library( ], ) +cc_library( + name = "custom_graph_optimizer", + hdrs = [ + "custom_graph_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_optimizer", + "//tensorflow/core:lib", + ], +) + cc_library( name = "arithmetic_optimizer", srcs = ["arithmetic_optimizer.cc"], @@ -368,6 +380,8 @@ cc_library( ":arithmetic_optimizer", ":auto_parallel", ":constant_folding", + ":custom_graph_optimizer", + ":custom_graph_optimizer_registry", ":dependency_optimizer", ":graph_optimizer", ":layout_optimizer", @@ -382,6 +396,48 @@ cc_library( ], ) +tf_cc_test( + name = "meta_optimizer_test", + srcs = ["meta_optimizer_test.cc"], + deps = [ + ":custom_graph_optimizer", + ":custom_graph_optimizer_registry", + ":meta_optimizer", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + ], +) + +cc_library( + name = "custom_graph_optimizer_registry", + srcs = ["custom_graph_optimizer_registry.cc"], + hdrs = ["custom_graph_optimizer_registry.h"], + visibility = ["//visibility:public"], + deps = [ + ":custom_graph_optimizer", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "custom_graph_optimizer_registry_test", + size = "small", + srcs = ["custom_graph_optimizer_registry_test.cc"], + deps = [ + ":custom_graph_optimizer", + ":custom_graph_optimizer_registry", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "loop_optimizer", srcs = ["loop_optimizer.cc"], diff --git a/tensorflow/core/grappler/optimizers/custom_graph_optimizer.h b/tensorflow/core/grappler/optimizers/custom_graph_optimizer.h new file mode 100644 index 0000000000..a80d46f416 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer.h @@ -0,0 +1,35 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_H_ +#define TENSORFLOW_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_H_ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace grappler { + +// A custom optimizer that can be registered. +class CustomGraphOptimizer : public GraphOptimizer { + public: + virtual ~CustomGraphOptimizer() {} + virtual Status Init() = 0; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc new file mode 100644 index 0000000000..6eed43c2b1 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc @@ -0,0 +1,61 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" + +#include +#include + +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace grappler { + +namespace { +typedef std::unordered_map + RegistrationMap; +RegistrationMap* registered_optimizers = nullptr; +RegistrationMap* GetRegistrationMap() { + if (registered_optimizers == nullptr) + registered_optimizers = new RegistrationMap; + return registered_optimizers; +} +} // namespace + +std::unique_ptr +CustomGraphOptimizerRegistry::CreateByNameOrNull(const string& name) { + const auto it = GetRegistrationMap()->find(name); + if (it == GetRegistrationMap()->end()) return nullptr; + return std::unique_ptr(it->second()); +} + +std::vector CustomGraphOptimizerRegistry::GetRegisteredOptimizers() { + std::vector optimizer_names; + optimizer_names.reserve(GetRegistrationMap()->size()); + for (const auto& opt : *GetRegistrationMap()) + optimizer_names.emplace_back(opt.first); + return optimizer_names; +} + +void CustomGraphOptimizerRegistry::RegisterOptimizerOrDie( + const Creator& optimizer_creator, const string& name) { + const auto it = GetRegistrationMap()->find(name); + if (it != GetRegistrationMap()->end()) { + LOG(FATAL) << "CustomGraphOptimizer is registered twice: " << name; + } + GetRegistrationMap()->insert({name, optimizer_creator}); +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h new file mode 100644 index 0000000000..796da91373 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h @@ -0,0 +1,65 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_REGISTRY_H_ +#define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_REGISTRY_H_ + +#include +#include +#include +#include + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +class CustomGraphOptimizerRegistry { + public: + static std::unique_ptr CreateByNameOrNull( + const string& name); + + static std::vector GetRegisteredOptimizers(); + + typedef std::function Creator; + // Regsiter graph optimizer which can be called during program initialization. + // This class is not thread-safe. + static void RegisterOptimizerOrDie(const Creator& optimizer_creator, + const string& name); +}; + +class CustomGraphOptimizerRegistrar { + public: + explicit CustomGraphOptimizerRegistrar( + const CustomGraphOptimizerRegistry::Creator& creator, + const string& name) { + CustomGraphOptimizerRegistry::RegisterOptimizerOrDie(creator, name); + } +}; + +#define REGISTER_GRAPH_OPTIMIZER_AS(MyCustomGraphOptimizerClass, name) \ + namespace { \ + static CustomGraphOptimizerRegistrar \ + MyCustomGraphOptimizerClass##_registrar( \ + []() { return new MyCustomGraphOptimizerClass; }, (name)); \ + } // namespace + +#define REGISTER_GRAPH_OPTIMIZER(MyCustomGraphOptimizerClass) \ + REGISTER_GRAPH_OPTIMIZER_AS(MyCustomGraphOptimizerClass, \ + #MyCustomGraphOptimizerClass) + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_CUSTOM_GRAPH_OPTIMIZER_REGISTRY_H_ diff --git a/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry_test.cc b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry_test.cc new file mode 100644 index 0000000000..629f5e83c1 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry_test.cc @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" + +#include +#include +#include +#include + +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +static const char* kTestOptimizerName = "Test"; + +class TestGraphOptimizer : public CustomGraphOptimizer { + public: + Status Init() override { return Status::OK(); } + string name() const override { return kTestOptimizerName; } + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override { + return Status::OK(); + } + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override {} +}; + +REGISTER_GRAPH_OPTIMIZER_AS(TestGraphOptimizer, "StaticRegister"); + +TEST(CustomGraphOptimizerRegistryTest, DynamicRegistration) { + std::vector optimizers = + CustomGraphOptimizerRegistry::GetRegisteredOptimizers(); + std::unique_ptr test_optimizer; + ASSERT_EQ( + 0, std::count(optimizers.begin(), optimizers.end(), "DynamicRegister")); + test_optimizer = + CustomGraphOptimizerRegistry::CreateByNameOrNull("DynamicRegister"); + EXPECT_EQ(nullptr, test_optimizer); + CustomGraphOptimizerRegistry::RegisterOptimizerOrDie( + []() { return new TestGraphOptimizer; }, "DynamicRegister"); + optimizers = CustomGraphOptimizerRegistry::GetRegisteredOptimizers(); + ASSERT_EQ( + 1, std::count(optimizers.begin(), optimizers.end(), "DynamicRegister")); + test_optimizer = + CustomGraphOptimizerRegistry::CreateByNameOrNull("DynamicRegister"); + ASSERT_NE(nullptr, test_optimizer); + EXPECT_EQ(kTestOptimizerName, test_optimizer->name()); +} + +TEST(CustomGraphOptimizerRegistryTest, StaticRegistration) { + const std::vector optimizers = + CustomGraphOptimizerRegistry::GetRegisteredOptimizers(); + EXPECT_EQ(1, + std::count(optimizers.begin(), optimizers.end(), "StaticRegister")); + std::unique_ptr test_optimizer = + CustomGraphOptimizerRegistry::CreateByNameOrNull("StaticRegister"); + ASSERT_NE(nullptr, test_optimizer); + EXPECT_EQ(kTestOptimizerName, test_optimizer->name()); +} + +TEST(GraphOptimizerRegistryTest, CrashesOnDuplicateRegistration) { + const auto creator = []() { return new TestGraphOptimizer; }; + EXPECT_DEATH(CustomGraphOptimizerRegistry::RegisterOptimizerOrDie( + creator, "StaticRegister"), + "twice"); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index e27b9df620..7ae77207af 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/arithmetic_optimizer.h" #include "tensorflow/core/grappler/optimizers/auto_parallel.h" #include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" @@ -126,14 +127,26 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, new AutoParallel(cfg_.auto_parallel().num_replicas()))); } } else { - std::set available_optimizers = { + const std::set available_optimizers = { "pruning", "constfold", "layout", "memory", "autoparallel", "arithmetic", "dependency", "loop"}; - for (const auto& optimizer : cfg_.optimizers()) { - if (available_optimizers.find(optimizer) != available_optimizers.end()) { - optimizers.push_back(NewOptimizer(optimizer)); + std::vector custom_optimizer_names; + for (const auto& optimizer_name : cfg_.optimizers()) { + if (available_optimizers.find(optimizer_name) != + available_optimizers.end()) { + optimizers.push_back(NewOptimizer(optimizer_name)); + } else { + custom_optimizer_names.push_back(optimizer_name); } } + // Now run the custom optimizers. + for (const auto& optimizer_name : custom_optimizer_names) { + std::unique_ptr opt = + CustomGraphOptimizerRegistry::CreateByNameOrNull(optimizer_name); + if (opt == nullptr) continue; + TF_RETURN_IF_ERROR(opt->Init()); + optimizers.push_back(std::move(opt)); + } } if (optimizers.empty()) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc new file mode 100644 index 0000000000..536347d834 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/meta_optimizer_test.cc @@ -0,0 +1,77 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/meta_optimizer.h" + +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer.h" +#include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class TestOptimizer : public CustomGraphOptimizer { + public: + static void SetOptimized(const bool flag_value) { optimized_ = flag_value; } + static bool IsOptimized() { return optimized_; } + + TestOptimizer() {} + string name() const override { return "test_optimizer"; } + + Status Init() override { return Status::OK(); } + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override { + optimized_ = true; + *optimized_graph = item.graph; + return Status::OK(); + } + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override {} + + private: + static bool optimized_; +}; + +bool TestOptimizer::optimized_; + +REGISTER_GRAPH_OPTIMIZER(TestOptimizer); + +TEST(MetaOptimizerTest, RunsCustomOptimizer) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, {"CPU:0"}); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + TestOptimizer::SetOptimized(false); + RewriterConfig rewriter_config; + rewriter_config.add_optimizers("TestOptimizer"); + + MetaOptimizer optimizer(nullptr, rewriter_config); + GraphDef output; + const Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + EXPECT_TRUE(TestOptimizer::IsOptimized()); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index a61eecaa29..504ed5d819 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -87,5 +87,8 @@ message RewriterConfig { // ("autoparallel"). Memory optimization passes ("memory") invoked here are // not configurable (in contrast to memory optimization passes through the // meta-optimizer) and act only on manual op annotations. + // + // Custom registered optimizers will be run after the base optimizers, in + // the order that they are specified. repeated string optimizers = 100; } -- GitLab From 73b14e0c9b9ed70e7b44b5ea95ad2cef9feb7102 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 16:05:57 -0800 Subject: [PATCH 053/884] Add Kumaraswamy Bijector, and let Kumaraswamy distribution depend on it. PiperOrigin-RevId: 186838045 --- tensorflow/contrib/distributions/BUILD | 34 ++++ .../bijectors/kumaraswamy_bijector_test.py | 80 +++++++++ .../python/kernel_tests/kumaraswamy_test.py | 8 +- .../python/ops/bijectors/__init__.py | 2 + .../python/ops/bijectors/kumaraswamy.py | 153 ++++++++++++++++++ .../distributions/python/ops/kumaraswamy.py | 89 ++++------ 6 files changed, 305 insertions(+), 61 deletions(-) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 35dd2ee439..ed79ef70f8 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -251,6 +251,21 @@ cuda_py_test( ], ) +cuda_py_test( + name = "kumaraswamy_test", + srcs = ["python/kernel_tests/kumaraswamy_test.py"], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:client", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:nn_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "moving_stats_test", size = "small", @@ -915,6 +930,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "kumaraswamy_bijector_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/kumaraswamy_bijector_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "masked_autoregressive_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py new file mode 100644 index 0000000000..ad11d9f248 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/kumaraswamy_bijector_test.py @@ -0,0 +1,80 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Kumaraswamy Bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import Kumaraswamy +from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite +from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency +from tensorflow.python.platform import test + + +class KumaraswamyBijectorTest(test.TestCase): + """Tests correctness of the Kumaraswamy bijector.""" + + def testBijector(self): + with self.test_session(): + a = 2. + b = 0.3 + bijector = Kumaraswamy( + concentration1=a, concentration0=b, + event_ndims=0, validate_args=True) + self.assertEqual("kumaraswamy", bijector.name) + x = np.array([[[0.1], [0.2], [0.3], [0.4], [0.5]]], dtype=np.float32) + # Kumaraswamy cdf. This is the same as inverse(x). + y = 1. - (1. - x ** a) ** b + self.assertAllClose(y, bijector.inverse(x).eval()) + self.assertAllClose(x, bijector.forward(y).eval()) + kumaraswamy_log_pdf = (np.log(a) + np.log(b) + (a - 1) * np.log(x) + + (b - 1) * np.log1p(-x ** a)) + + self.assertAllClose( + # We should lose a dimension from calculating the determinant of the + # jacobian. + kumaraswamy_log_pdf, + bijector.inverse_log_det_jacobian(x).eval()) + self.assertAllClose( + -bijector.inverse_log_det_jacobian(x).eval(), + bijector.forward_log_det_jacobian(y).eval(), + rtol=1e-4, + atol=0.) + + def testScalarCongruency(self): + with self.test_session(): + assert_scalar_congruency( + Kumaraswamy(concentration1=0.5, concentration0=1.1), + lower_x=0., upper_x=1., n=int(10e3), rtol=0.02) + + def testBijectiveAndFinite(self): + with self.test_session(): + concentration1 = 1.2 + concentration0 = 2. + bijector = Kumaraswamy( + concentration1=concentration1, + concentration0=concentration0, validate_args=True) + # Omitting the endpoints 0 and 1, since idlj will be inifinity at these + # endpoints. + y = np.linspace(.01, 0.99, num=10).astype(np.float32) + x = 1 - (1 - y ** concentration1) ** concentration0 + assert_bijective_and_finite(bijector, x, y, rtol=1e-3) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/kumaraswamy_test.py b/tensorflow/contrib/distributions/python/kernel_tests/kumaraswamy_test.py index ea3c86b5c0..2980e2bfe9 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/kumaraswamy_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/kumaraswamy_test.py @@ -130,10 +130,8 @@ class KumaraswamyTest(test.TestCase): dist.prob([.1, .3, .6]).eval() dist.prob([.2, .3, .5]).eval() # Either condition can trigger. - with self.assertRaisesOpError("sample must be positive"): + with self.assertRaisesOpError("sample must be non-negative"): dist.prob([-1., 0.1, 0.5]).eval() - with self.assertRaisesOpError("sample must be positive"): - dist.prob([0., 0.1, 0.5]).eval() with self.assertRaisesOpError("sample must be no larger than `1`"): dist.prob([.1, .2, 1.2]).eval() @@ -249,13 +247,13 @@ class KumaraswamyTest(test.TestCase): a = np.array([1., 2, 3]) b = np.array([2., 4, 1.2]) dist = kumaraswamy_lib.Kumaraswamy(a, b, allow_nan_stats=False) - with self.assertRaisesOpError("Condition x < y.*"): + with self.assertRaisesOpError("Mode undefined for concentration1 <= 1."): dist.mode().eval() a = np.array([2., 2, 3]) b = np.array([1., 4, 1.2]) dist = kumaraswamy_lib.Kumaraswamy(a, b, allow_nan_stats=False) - with self.assertRaisesOpError("Condition x < y.*"): + with self.assertRaisesOpError("Mode undefined for concentration0 <= 1."): dist.mode().eval() def testKumaraswamyModeEnableAllowNanStats(self): diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 93923c3f08..9437f56b1e 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -26,6 +26,7 @@ @@Identity @@Inline @@Invert +@@Kumaraswamy @@MaskedAutoregressiveFlow @@Permute @@PowerTransform @@ -59,6 +60,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.exp import * from tensorflow.contrib.distributions.python.ops.bijectors.gumbel import * from tensorflow.contrib.distributions.python.ops.bijectors.inline import * from tensorflow.contrib.distributions.python.ops.bijectors.invert import * +from tensorflow.contrib.distributions.python.ops.bijectors.kumaraswamy import * from tensorflow.contrib.distributions.python.ops.bijectors.masked_autoregressive import * from tensorflow.contrib.distributions.python.ops.bijectors.permute import * from tensorflow.contrib.distributions.python.ops.bijectors.power_transform import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py new file mode 100644 index 0000000000..f5de052c9e --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/kumaraswamy.py @@ -0,0 +1,153 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Kumaraswamy bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import bijector + +__all__ = [ + "Kumaraswamy", +] + + +class Kumaraswamy(bijector.Bijector): + """Compute `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a), X in [0, 1]`. + + This bijector maps inputs from `[0, 1]` to [0, 1]`. The inverse of the + bijector applied to a uniform random variable `X ~ U(0, 1) gives back a + random variable with the [Kumaraswamy distribution]( + https://en.wikipedia.org/wiki/Kumaraswamy_distribution): + + ```none + Y ~ Kumaraswamy(a, b) + pdf(y; a, b, 0 <= y <= 1) = a * b * y ** (a - 1) * (1 - y**a) ** (b - 1) + ``` + """ + + def __init__(self, + concentration1=None, + concentration0=None, + event_ndims=0, + validate_args=False, + name="kumaraswamy"): + """Instantiates the `Kumaraswamy` bijector. + + Args: + concentration1: Python `float` scalar indicating the transform power, + i.e., `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a)` where `a` is + `concentration1`. + concentration0: Python `float` scalar indicating the transform power, + i.e., `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a)` where `b` is + `concentration0`. + event_ndims: Python scalar indicating the number of dimensions associated + with a particular draw from the distribution. Currently only zero is + supported. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + + Raises: + ValueError: If `event_ndims` is not zero. + """ + self._graph_parents = [] + self._name = name + self._validate_args = validate_args + + event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") + event_ndims_const = tensor_util.constant_value(event_ndims) + if event_ndims_const is not None and event_ndims_const not in (0,): + raise ValueError("event_ndims(%s) was not 0" % event_ndims_const) + else: + if validate_args: + event_ndims = control_flow_ops.with_dependencies( + [check_ops.assert_equal( + event_ndims, 0, message="event_ndims was not 0")], + event_ndims) + + with self._name_scope("init", values=[concentration1, concentration0]): + concentration1 = self._maybe_assert_valid_concentration( + ops.convert_to_tensor(concentration1, name="concentration1"), + validate_args=validate_args) + concentration0 = self._maybe_assert_valid_concentration( + ops.convert_to_tensor(concentration0, name="concentration0"), + validate_args=validate_args) + + self._concentration1 = concentration1 + self._concentration0 = concentration0 + super(Kumaraswamy, self).__init__( + event_ndims=0, + validate_args=validate_args, + name=name) + + @property + def concentration1(self): + """The `a` in: `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a)`.""" + return self._concentration1 + + @property + def concentration0(self): + """The `b` in: `Y = g(X) = (1 - (1 - X)**(1 / b))**(1 / a)`.""" + return self._concentration0 + + def _forward(self, x): + x = self._maybe_assert_valid(x) + return math_ops.exp( + math_ops.log1p(-math_ops.exp(math_ops.log1p(-x) / self.concentration0)) + / self.concentration1) + + def _inverse(self, y): + y = self._maybe_assert_valid(y) + return math_ops.exp(math_ops.log1p( + -(1 - y**self.concentration1)**self.concentration0)) + + def _inverse_log_det_jacobian(self, y): + y = self._maybe_assert_valid(y) + event_dims = self._event_dims_tensor(y) + return math_ops.reduce_sum( + math_ops.log(self.concentration1) + math_ops.log(self.concentration0) + + (self.concentration1 - 1) * math_ops.log(y) + + (self.concentration0 - 1) * math_ops.log1p(-y**self.concentration1), + axis=event_dims) + + def _maybe_assert_valid_concentration(self, concentration, validate_args): + """Checks the validity of a concentration parameter.""" + if not validate_args: + return concentration + return control_flow_ops.with_dependencies([ + check_ops.assert_positive( + concentration, + message="Concentration parameter must be positive."), + ], concentration) + + def _maybe_assert_valid(self, x): + if not self.validate_args: + return x + return control_flow_ops.with_dependencies([ + check_ops.assert_non_negative( + x, + message="sample must be non-negative"), + check_ops.assert_less_equal( + x, array_ops.ones([], self.concentration0.dtype), + message="sample must be no larger than `1`."), + ], x) diff --git a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py index 74d5d8773c..120b38db3c 100644 --- a/tensorflow/contrib/distributions/python/ops/kumaraswamy.py +++ b/tensorflow/contrib/distributions/python/ops/kumaraswamy.py @@ -20,15 +20,17 @@ from __future__ import print_function import numpy as np +from tensorflow.contrib.distributions.python.ops import bijectors +from tensorflow.contrib.distributions.python.ops import distribution_util +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops from tensorflow.python.ops import special_math_ops -from tensorflow.python.ops.distributions import beta from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util +from tensorflow.python.ops.distributions import transformed_distribution +from tensorflow.python.ops.distributions import uniform from tensorflow.python.util.tf_export import tf_export __all__ = [ @@ -60,7 +62,7 @@ def _harmonic_number(x): @tf_export("distributions.Kumaraswamy") -class Kumaraswamy(beta.Beta): +class Kumaraswamy(transformed_distribution.TransformedDistribution): """Kumaraswamy distribution. The Kumaraswamy distribution is defined over the `(0, 1)` interval using @@ -151,59 +153,32 @@ class Kumaraswamy(beta.Beta): more of the statistic's batch members are undefined. name: Python `str` name prefixed to Ops created by this class. """ + concentration1 = ops.convert_to_tensor( + concentration1, name="concentration1") + concentration0 = ops.convert_to_tensor( + concentration0, name="concentration0") super(Kumaraswamy, self).__init__( - concentration1=concentration1, - concentration0=concentration0, - validate_args=validate_args, - allow_nan_stats=allow_nan_stats, + distribution=uniform.Uniform( + low=array_ops.zeros([], dtype=concentration1.dtype), + high=array_ops.ones([], dtype=concentration1.dtype), + allow_nan_stats=allow_nan_stats), + bijector=bijectors.Kumaraswamy( + concentration1=concentration1, concentration0=concentration0, + validate_args=validate_args), + batch_shape=distribution_util.get_broadcast_shape( + concentration1, concentration0), name=name) self._reparameterization_type = distribution.FULLY_REPARAMETERIZED - def _sample_n(self, n, seed=None): - expanded_concentration1 = array_ops.ones_like( - self.total_concentration, dtype=self.dtype) * self.concentration1 - expanded_concentration0 = array_ops.ones_like( - self.total_concentration, dtype=self.dtype) * self.concentration0 - shape = array_ops.concat([[n], self.batch_shape_tensor()], 0) - uniform_sample = random_ops.random_uniform( - shape=shape, minval=0.0, maxval=1.0, dtype=self.dtype, seed=seed) - - kumaraswamy_sample = (1 - uniform_sample**(1. / expanded_concentration0))**( - 1. / expanded_concentration1) - return kumaraswamy_sample - - @distribution_util.AppendDocstring(_kumaraswamy_sample_note) - def _log_cdf(self, x): - a = self.concentration1 - b = self.concentration0 - return math_ops.log1p(-(1 - x**a)**b) + @property + def concentration1(self): + """Concentration parameter associated with a `1` outcome.""" + return self.bijector.concentration1 - @distribution_util.AppendDocstring(_kumaraswamy_sample_note) - def _cdf(self, x): - a = self.concentration1 - b = self.concentration0 - return 1 - (1 - x**a)**b - - def _survival_function(self, x): - a = self.concentration1 - b = self.concentration0 - return (1 - x**a)**b - - def _log_survival_function(self, x): - a = self.concentration1 - b = self.concentration0 - return b * math_ops.log1p(-x**a) - - def _log_unnormalized_prob(self, x): - x = self._maybe_assert_valid_sample(x) - a = self.concentration1 - b = self.concentration0 - return (a - 1) * math_ops.log(x) + (b - 1) * math_ops.log1p(-x**a) - - def _log_normalization(self): - a = self.concentration1 - b = self.concentration0 - return -(math_ops.log(a) + math_ops.log(b)) + @property + def concentration0(self): + """Concentration parameter associated with a `0` outcome.""" + return self.bijector.concentration0 def _entropy(self): a = self.concentration1 @@ -213,10 +188,11 @@ class Kumaraswamy(beta.Beta): def _moment(self, n): """Compute the n'th (uncentered) moment.""" + total_concentration = self.concentration1 + self.concentration0 expanded_concentration1 = array_ops.ones_like( - self.total_concentration, dtype=self.dtype) * self.concentration1 + total_concentration, dtype=self.dtype) * self.concentration1 expanded_concentration0 = array_ops.ones_like( - self.total_concentration, dtype=self.dtype) * self.concentration0 + total_concentration, dtype=self.dtype) * self.concentration0 beta_arg0 = 1 + n / expanded_concentration1 beta_arg = array_ops.stack([beta_arg0, expanded_concentration0], -1) log_moment = math_ops.log(expanded_concentration0) + special_math_ops.lbeta( @@ -246,13 +222,14 @@ class Kumaraswamy(beta.Beta): name="nan") is_defined = (self.concentration1 > 1.) & (self.concentration0 > 1.) return array_ops.where(is_defined, mode, nan) + return control_flow_ops.with_dependencies([ check_ops.assert_less( - array_ops.ones([], dtype=self.dtype), + array_ops.ones([], dtype=self.concentration1.dtype), self.concentration1, message="Mode undefined for concentration1 <= 1."), check_ops.assert_less( - array_ops.ones([], dtype=self.dtype), + array_ops.ones([], dtype=self.concentration0.dtype), self.concentration0, message="Mode undefined for concentration0 <= 1.") ], mode) -- GitLab From f230f639c53c3e9b54ba4b2c3f7650ba2daae307 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Fri, 23 Feb 2018 16:48:56 -0800 Subject: [PATCH 054/884] Internal change. PiperOrigin-RevId: 186843326 --- tensorflow/contrib/bayesflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index d7beb26e1b..08b29fb6bc 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -39,7 +39,7 @@ py_library( cuda_py_test( name = "metropolis_hastings_test", - size = "medium", + size = "large", srcs = ["python/kernel_tests/metropolis_hastings_test.py"], additional_deps = [ ":bayesflow_py", -- GitLab From f3d2c3dc6f32d63309b683a258bd9a3f19004ac2 Mon Sep 17 00:00:00 2001 From: Zhixian Yan Date: Fri, 23 Feb 2018 16:51:24 -0800 Subject: [PATCH 055/884] Internal change. PiperOrigin-RevId: 186843632 --- tensorflow/contrib/lite/testing/generated_examples_zip_test.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc index 976363fd44..86606d1239 100644 --- a/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc +++ b/tensorflow/contrib/lite/testing/generated_examples_zip_test.cc @@ -92,6 +92,9 @@ std::map kBrokenTests = { // Transpose only supports 1D-4D input tensors. {R"(^\/transpose.*input_shape=\[.,.,.,.,.\])", "71545879"}, + + // Lstm kernel gets different results on tsan, asan, msan. + {R"(^\/lstmdtype=tf.float32.*)", "73830845"}, }; // Allows test data to be unzipped into a temporary directory and makes -- GitLab From 18bab99ac33f31192d400aebcfb7670a121655bd Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Feb 2018 16:51:50 -0800 Subject: [PATCH 056/884] Registers None gradients for ArgMax PiperOrigin-RevId: 186843686 --- tensorflow/python/ops/math_grad.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 9d5289f23d..bf28f74153 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -35,6 +35,12 @@ def _safe_shape_div(x, y): return x // math_ops.maximum(y, 1) +@ops.RegisterGradient("ArgMax") +def _ArgMaxGrad(op, grad): + del op, grad + return [None, None] + + @ops.RegisterGradient("Sum") def _SumGrad(op, grad): """Gradient for Sum.""" -- GitLab From 4f983f23e05da691868a1e20c56e900bb4afbadd Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 23 Feb 2018 16:53:21 -0800 Subject: [PATCH 057/884] Checkpointable: allow using Checkpointable objects in a tf.train.Saver() Checkpointable objects in a Saver's var_list will be unpacked into their SaveableObjects, possibly running some Python logic along the way. This should help keep the transition from name-based saving smooth: to save either way, just override CheckpointableBase._gather_saveables_for_checkpoint. PiperOrigin-RevId: 186843857 --- tensorflow/python/training/saver.py | 6 ++ tensorflow/python/training/saver_test.py | 88 ++++++++++++++++++++++++ 2 files changed, 94 insertions(+) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 83e848d598..9afd1e6643 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -50,6 +50,7 @@ from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.training import training_util from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState from tensorflow.python.util import compat @@ -577,6 +578,11 @@ class BaseSaverBuilder(object): names_to_saveables[name].append(var) else: names_to_saveables[name] = [var] + elif (isinstance(var, checkpointable.CheckpointableBase) + and not isinstance(var, variables.Variable)): + names_to_saveables.update( + BaseSaverBuilder.OpListToDict( + list(var._gather_saveables_for_checkpoint().values()))) else: if context.in_graph_mode(): if convert_variable_to_tensor: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index c5a6f49df5..f00f98db00 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -66,6 +66,7 @@ from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary import summary from tensorflow.python.training import adam +from tensorflow.python.training import checkpointable from tensorflow.python.training import gradient_descent from tensorflow.python.training import queue_runner_impl from tensorflow.python.training import saver as saver_module @@ -2660,5 +2661,92 @@ class ScopedGraphTest(test.TestCase): self.assertEqual(2.0, var_dict2["variable2:0"].eval()) +class _OwnsAVariableSimple(checkpointable.CheckpointableBase): + """A Checkpointable object which can be saved using a tf.train.Saver.""" + + def __init__(self): + self.non_dep_variable = variable_scope.get_variable( + name="non_dep_variable", initializer=6., use_resource=True) + + def _gather_saveables_for_checkpoint(self): + return {checkpointable.VARIABLE_VALUE_KEY: self.non_dep_variable} + + # The Saver sorts by name before parsing, so we need a name property. + @property + def name(self): + return self.non_dep_variable.name + + +class _MirroringSaveable( + saver_module.BaseSaverBuilder.ResourceVariableSaveable): + + def __init__(self, primary_variable, mirrored_variable): + self._primary_variable = primary_variable + self._mirrored_variable = mirrored_variable + super(_MirroringSaveable, self).__init__( + self._primary_variable, "", self._primary_variable.name) + + def restore(self, restored_tensors, restored_shapes): + """Restore the same value into both variables.""" + tensor, = restored_tensors + return control_flow_ops.group( + self._primary_variable.assign(tensor), + self._mirrored_variable.assign(tensor)) + + +class _OwnsMirroredVariables(checkpointable.CheckpointableBase): + """A Checkpointable object which returns a more complex SaveableObject.""" + + def __init__(self): + self.non_dep_variable = variable_scope.get_variable( + name="non_dep_variable", initializer=6., use_resource=True) + self.mirrored = variable_scope.get_variable( + name="mirrored", initializer=15., use_resource=True) + + def _gather_saveables_for_checkpoint(self): + saveable = _MirroringSaveable( + primary_variable=self.non_dep_variable, + mirrored_variable=self.mirrored) + return {checkpointable.VARIABLE_VALUE_KEY: saveable} + + # The Saver sorts by name before parsing, so we need a name property. + @property + def name(self): + return self.non_dep_variable.name + + +@test_util.with_c_api +class CheckpointableCompatibilityTests(test.TestCase): + + # TODO(allenl): Track down python3 reference cycles in these tests. + @test_util.run_in_graph_and_eager_modes() + def testNotSaveableButIsCheckpointable(self): + v = _OwnsAVariableSimple() + saver = saver_module.Saver(var_list=[v]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + self.evaluate(v.non_dep_variable.assign(42.)) + with self.test_session() as sess: + save_path = saver.save(sess, prefix) + self.evaluate(v.non_dep_variable.assign(43.)) + saver.restore(sess, save_path) + self.assertEqual(42., self.evaluate(v.non_dep_variable)) + + @test_util.run_in_graph_and_eager_modes() + def testMoreComplexSaveableReturned(self): + v = _OwnsMirroredVariables() + saver = saver_module.Saver(var_list=[v]) + test_dir = self.get_temp_dir() + prefix = os.path.join(test_dir, "ckpt") + self.evaluate(v.non_dep_variable.assign(42.)) + with self.test_session() as sess: + save_path = saver.save(sess, prefix) + self.evaluate(v.non_dep_variable.assign(43.)) + self.evaluate(v.mirrored.assign(44.)) + saver.restore(sess, save_path) + self.assertEqual(42., self.evaluate(v.non_dep_variable)) + self.assertEqual(42., self.evaluate(v.mirrored)) + + if __name__ == "__main__": test.main() -- GitLab From ce4ae5bed9b47f49b085d9d8287cee2fcc5d42ac Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 23 Feb 2018 16:59:01 -0800 Subject: [PATCH 058/884] Checkpointable: compatibility mode with name-based saving Allows loading a name-based checkpoint using the object-based API. When graph building it's quite seamless. There's no restore-on-create for eager, so it would require program changes to do much useful there (i.e. is not seamless). Adds several tests for checkpoint compatibility (name->object in eager/graph, and eager->graph/graph->eager for object-based saving) PiperOrigin-RevId: 186844431 --- .../eager/python/checkpointable_utils.py | 98 +++++++++++++--- .../eager/python/checkpointable_utils_test.py | 110 ++++++++++++++++++ 2 files changed, 192 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index e26ecc774a..e57093bdbc 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -27,6 +27,7 @@ from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import control_flow_ops @@ -38,6 +39,7 @@ from tensorflow.python.training import checkpointable as core_checkpointable from tensorflow.python.training import checkpointable_utils as core_checkpointable_utils from tensorflow.python.training import optimizer as optimizer_lib from tensorflow.python.training import saver as saver_lib +from tensorflow.python.util import deprecation _ESCAPE_CHAR = "." # For avoiding conflicts with user-specified names. @@ -464,6 +466,41 @@ class InitializationOnlyStatus(_LoadStatus): session.run(gather_initializers(self._root_checkpointable)) +_DEPRECATED_RESTORE_INSTRUCTIONS = ( + "Restoring a name-based tf.train.Saver checkpoint using the object-based " + "restore API. This mode uses global names to match variables, and so is " + "somewhat fragile. It also adds new restore ops to the graph each time it " + "is called. Prefer re-encoding training checkpoints in the object-based " + "format: run save() on the object-based saver (the same one this message " + "is coming from) and use that checkpoint in the future.") + + +class NameBasedSaverStatus(_LoadStatus): + """Status for loading a name-based training checkpoint.""" + + def __init__(self, object_saver, save_path): + self._object_saver = object_saver + self._save_path = save_path + + def assert_consumed(self): + """Assertion for consistency with `CheckpointLoadStatus`. Always fails.""" + raise AssertionError( + "Restoring a name-based checkpoint. No load status is available.") + + @deprecation.deprecated( + date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS) + def run_restore_ops(self, session=None): + """Load the name-based training checkpoint using a new `tf.train.Saver`.""" + if session is None and context.in_graph_mode(): + session = ops.get_default_session() + saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access + sess=session, save_path=self._save_path) + + def initialize_or_restore(self, session=None): + """Alias for `run_restore_ops`.""" + self.run_restore_ops(session=session) + + class _SessionWithFeedDictAdditions(session_lib.SessionInterface): """Pretends to be a session, inserts extra feeds on run().""" @@ -544,7 +581,7 @@ class Saver(object): Args: file_prefix: A prefix to use for the checkpoint filenames (/path/to/directory/and_a_prefix). Names are generated based on this - prefix and the global step, if provided. + prefix and `checkpoint_number`, if provided. checkpoint_number: An integer variable or Tensor, used to number checkpoints. Typically this value is saved along with other variables in training checkpoints, which will happen automatically if it was created @@ -598,6 +635,17 @@ class Saver(object): global_step=checkpoint_number) return save_path + def _global_variable_names(self): + """Generate a `tf.train.Saver`-style `var_list` using `variable.name`s.""" + named_saveables, graph_proto = _serialize_object_graph( + self._root_checkpointable) + saver_names = {} + for object_proto in graph_proto.nodes: + for attribute_proto in object_proto.attributes: + saver_names[attribute_proto.full_name] = named_saveables[ + attribute_proto.checkpoint_key] + return saver_names + def restore(self, save_path, session=None): """Restore a training checkpoint. @@ -633,11 +681,20 @@ class Saver(object): If the checkpoint has not been consumed completely, then the list of restore ops will grow as more objects are added to the dependency graph. + Name-based `tf.train.Saver` checkpoints can be loaded using this + method. There is no deferred loading, and names are used to match + variables. No restore ops are created/run until `run_restore_ops()` or + `initialize_or_restore()` are called on the returned status object, even + when executing eagerly. Re-encode name-based checkpoints using this + object-based `Saver.save` as soon as possible. + Args: save_path: The path to the checkpoint, as returned by `save` or `tf.train.latest_checkpoint`. If None (as when there is no latest checkpoint for `tf.train.latest_checkpoint` to return), returns an - object which may run initializers for objects in the dependency graph. + object which may run initializers for objects in the dependency + graph. If the checkpoint was written by the name-based `tf.train.Saver`, + names are used to match variables. session: The session to retrieve metadata with. Ignored when executing eagerly. If not provided when graph building, the default session is used. @@ -647,6 +704,9 @@ class Saver(object): status of checkpoint restoration and run initialization/restore ops (of type `CheckpointLoadStatus`, or `InitializationOnlyStatus` if `save_path` is `None`). + + If `save_path` points to a name-based checkpoint, a `NameBasedSaverStatus` + object is returned which runs restore ops from a name-based saver. """ if save_path is None: return InitializationOnlyStatus(self._root_checkpointable) @@ -660,21 +720,27 @@ class Saver(object): session = None file_prefix_tensor = constant_op.constant(save_path) file_prefix_feed_dict = None - if not in_graph_mode or self._object_graph_restore_tensor is None: - object_graph_string, = io_ops.restore_v2( - prefix=file_prefix_tensor, - tensor_names=[_OBJECT_GRAPH_PROTO_KEY], - shape_and_slices=[""], - dtypes=[dtypes.string], - name="object_graph_proto_read") + try: + if not in_graph_mode or self._object_graph_restore_tensor is None: + object_graph_string, = io_ops.restore_v2( + prefix=file_prefix_tensor, + tensor_names=[_OBJECT_GRAPH_PROTO_KEY], + shape_and_slices=[""], + dtypes=[dtypes.string], + name="object_graph_proto_read") + if in_graph_mode: + self._object_graph_restore_tensor = object_graph_string if in_graph_mode: - self._object_graph_restore_tensor = object_graph_string - if in_graph_mode: - object_graph_string = session.run( - self._object_graph_restore_tensor, - feed_dict=file_prefix_feed_dict) - else: - object_graph_string = object_graph_string.numpy() + object_graph_string = session.run( + self._object_graph_restore_tensor, + feed_dict=file_prefix_feed_dict) + else: + object_graph_string = object_graph_string.numpy() + except errors_impl.NotFoundError: + # The object graph proto does not exist in this checkpoint. Try again with + # name-based saving. + return NameBasedSaverStatus(self, save_path) + object_graph_proto = ( checkpointable_object_graph_pb2.CheckpointableObjectGraph()) object_graph_proto.ParseFromString(object_graph_string) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 6b86d41bdb..3d6a200276 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -899,5 +899,115 @@ class CheckpointingTests(test.TestCase): saver.restore(save_path) self.assertEqual(before_ops, graph.get_operations()) + +class CheckpointCompatibilityTests(test.TestCase): + + def _initialized_model(self): + input_value = constant_op.constant([[3.]]) + network = MyNetwork() + optimizer = CheckpointableAdam(0.001) + optimizer_step = training_util.get_or_create_global_step() + root_checkpointable = Checkpoint( + optimizer=optimizer, network=network, optimizer_step=optimizer_step) + train_op = optimizer.minimize( + functools.partial(network, input_value), + global_step=optimizer_step) + self.evaluate(checkpointable_utils.gather_initializers( + root_checkpointable)) + self.evaluate(train_op) + # A regular variable, a slot variable, and a non-slot Optimizer variable + # with known values to check when loading. + self.evaluate(network._named_dense.bias.assign([1.])) + self.evaluate(optimizer.get_slot( + var=network._named_dense.bias, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + return root_checkpointable + + def _set_sentinels(self, root_checkpointable): + self.evaluate(root_checkpointable.network._named_dense.bias.assign([101.])) + self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.network._named_dense.bias, name="m") + .assign([102.])) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(103.)) + + def _check_sentinels(self, root_checkpointable): + self.assertAllEqual( + [1.], self.evaluate(root_checkpointable.network._named_dense.bias)) + self.assertAllEqual([2.], self.evaluate( + root_checkpointable.optimizer.get_slot( + var=root_checkpointable.network._named_dense.bias, name="m"))) + beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + + def _write_name_based_checkpoint(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph) as session: + root = self._initialized_model() + name_saver = core_saver.Saver() + return name_saver.save( + sess=session, save_path=checkpoint_prefix, + global_step=root.optimizer_step) + + @test_util.run_in_graph_and_eager_modes() + def testLoadFromNameBasedSaver(self): + """Save a name-based checkpoint, load it using the object-based API.""" + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = checkpointable_utils.Saver(root) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_consumed() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status.initialize_or_restore() + self._check_sentinels(root) + + # TODO(allenl): Test for the core name-based saver loading object-based + # checkpoints once object-based checkpointing is in core. + + def testSaveGraphLoadEager(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph) as session: + root = self._initialized_model() + object_saver = checkpointable_utils.Saver(root) + save_path = object_saver.save( + session=session, file_prefix=checkpoint_prefix) + with context.eager_mode(): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed() + self._check_sentinels(root) + + def testSaveEagerLoadGraph(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + with context.eager_mode(): + root = self._initialized_model() + object_saver = checkpointable_utils.Saver(root) + save_path = object_saver.save(file_prefix=checkpoint_prefix) + with context.graph_mode(): + save_graph = ops.Graph() + with save_graph.as_default(), self.test_session( + graph=save_graph): + root = self._initialized_model() + self._set_sentinels(root) + root.restore(save_path).assert_consumed().run_restore_ops() + self._check_sentinels(root) + if __name__ == "__main__": test.main() -- GitLab From b1cc57604cadb4251efeb764074c9138d4e24521 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 23 Feb 2018 17:19:00 -0800 Subject: [PATCH 059/884] Dropped from previous change. PiperOrigin-RevId: 186846681 --- tensorflow/c/eager/c_api.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index f615e3f11d..c27a7129fa 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -303,11 +303,9 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { // Questionable heuristic ... - // - // Motivation: After an 'op' is placed on GPU because some of its earlier - // inputs are on GPU, we want to keep the 'op' there, even if some later - // inputs of it are not on GPU. - if (IsCPU(op->device) && !IsCPU(h->d)) { + // - If a device was explicitly set on the op, always use that. + // - If not, place on the first non-host device seen. + if (op->device == nullptr && !IsCPU(h->d)) { op->device = h->d; } if (!status->status.ok()) return; -- GitLab From 44bec5d15f656d054df5c61e3eb70d5fbe8bb77a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 17:22:37 -0800 Subject: [PATCH 060/884] Add another utility that captures a function's namespace as a mapping from symbol names to actual values. Update getmethodclass with a hopefully more robust method. PiperOrigin-RevId: 186847003 --- .../contrib/py2tf/pyct/inspect_utils.py | 128 +++++++++++++----- .../contrib/py2tf/pyct/inspect_utils_test.py | 91 +++++++++---- 2 files changed, 158 insertions(+), 61 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils.py b/tensorflow/contrib/py2tf/pyct/inspect_utils.py index 86cf52afd5..c1af95e2ab 100644 --- a/tensorflow/contrib/py2tf/pyct/inspect_utils.py +++ b/tensorflow/contrib/py2tf/pyct/inspect_utils.py @@ -21,22 +21,53 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import itertools + import six from tensorflow.python.util import tf_inspect +def getnamespace(f): + """Returns the complete namespace of a function. + + Namespace is defined here as the mapping of all non-local variables to values. + This includes the globals and the closure variables. Note that this captures + the entire globals collection of the function, and may contain extra symbols + that it does not actually use. + + Args: + f: User defined function. + Returns: + A dict mapping symbol names to values. + """ + namespace = dict(six.get_function_globals(f)) + closure = six.get_function_closure(f) + freevars = six.get_function_code(f).co_freevars + if freevars and closure: + for name, cell in zip(freevars, closure): + namespace[name] = cell.cell_contents + return namespace + + def getcallargs(c, *args, **kwargs): """Extension of getcallargs to non-function callables.""" - if tf_inspect.isfunction(c): + if tf_inspect.isfunction(c) or tf_inspect.ismethod(c): # The traditional getcallargs return tf_inspect.getcallargs(c, *args, **kwargs) if tf_inspect.isclass(c): - # Constructors: pass a fake None for self, then remove it. - arg_map = tf_inspect.getcallargs(c.__init__, None, *args, **kwargs) - assert 'self' in arg_map, 'no "self" argument, is this not a constructor?' - del arg_map['self'] + # Constructors: use a sentinel to remove the self argument. + self_sentinel = object() + arg_map = tf_inspect.getcallargs( + c.__init__, self_sentinel, *args, **kwargs) + # Find and remove the self arg. We cannot assume it's called 'self'. + self_arg_name = None + for name, value in arg_map.items(): + if value is self_sentinel: + self_arg_name = name + break + del arg_map[self_arg_name] return arg_map if hasattr(c, '__call__'): @@ -46,8 +77,29 @@ def getcallargs(c, *args, **kwargs): raise NotImplementedError('unknown callable "%s"' % type(c)) -def getmethodclass(m, namespace): - """Resolves a function's owner, e.g. a method's class.""" +def getmethodclass(m): + """Resolves a function's owner, e.g. a method's class. + + Note that this returns the object that the function was retrieved from, not + necessarily the class where it was defined. + + This function relies on Python stack frame support in the interpreter, and + has the same limitations that inspect.currentframe. + + Limitations. This function will only work correctly if the owned class is + visible in the caller's global or local variables. + + Args: + m: A user defined function + + Returns: + The class that this function was retrieved from, or None if the function + is not an object or class method, or the class that owns the object or + method is not visible to m. + + Raises: + ValueError: if the class could not be resolved for any unexpected reason. + """ # Instance method and class methods: should be bound to a non-null "self". # If self is a class, then it's a class method. @@ -57,34 +109,38 @@ def getmethodclass(m, namespace): return m.__self__ return type(m.__self__) - # Class and static methods: platform specific. - if hasattr(m, 'im_class'): # Python 2 - return m.im_class - - if hasattr(m, '__qualname__'): # Python 3 - qn = m.__qualname__.split('.') - if len(qn) < 2: - return None - owner_name, func_name = qn[-2:] - assert func_name == m.__name__, ( - 'inconsistent names detected ' - '(__qualname__[1] = "%s", __name__ = "%s") for %s.' % (func_name, - m.__name__, m)) - if owner_name == '': - return None - if owner_name not in namespace: - raise ValueError( - 'Could not resolve name "%s" while analyzing %s. Namespace:\n%s' % - (owner_name, m, namespace)) - return namespace[owner_name] - - if six.PY2: - # In Python 2 it's impossible, to our knowledge, to detect the class of a - # static function. So we're forced to walk all the objects in the - # namespace and see if they own it. If any reader finds a better solution, - # please let us know. - for _, v in namespace.items(): - if hasattr(v, m.__name__) and getattr(v, m.__name__) is m: - return v + # Class, static and unbound methods: search all defined classes in any + # namespace. This is inefficient but more robust method. + owners = [] + caller_frame = tf_inspect.currentframe().f_back + try: + # TODO(mdan): This doesn't consider cell variables. + # TODO(mdan): This won't work if the owner is hidden inside a container. + # Cell variables may be pulled using co_freevars and the closure. + for v in itertools.chain(caller_frame.f_locals.values(), + caller_frame.f_globals.values()): + if hasattr(v, m.__name__): + candidate = getattr(v, m.__name__) + # Py2 methods may be bound or unbound, extract im_func to get the + # underlying function. + if hasattr(candidate, 'im_func'): + candidate = candidate.im_func + if hasattr(m, 'im_func'): + m = m.im_func + if candidate is m: + owners.append(v) + finally: + del caller_frame + + if owners: + if len(owners) == 1: + return owners[0] + + # If multiple owners are found, and are not subclasses, raise an error. + owner_types = tuple(o if tf_inspect.isclass(o) else type(o) for o in owners) + for o in owner_types: + if tf_inspect.isclass(o) and issubclass(o, tuple(owner_types)): + return o + raise ValueError('Found too many owners of %s: %s' % (m, owners)) return None diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py b/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py index 5d92e75b18..d96c3df547 100644 --- a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py +++ b/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py @@ -20,6 +20,8 @@ from __future__ import print_function from functools import wraps +import six + from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.python.platform import test @@ -76,6 +78,10 @@ def free_function(): pass +def factory(): + return free_function + + def free_factory(): def local_function(): pass @@ -84,6 +90,43 @@ def free_factory(): class InspectUtilsTest(test.TestCase): + def test_getnamespace_globals(self): + ns = inspect_utils.getnamespace(factory) + self.assertEqual(ns['free_function'], free_function) + + def test_getnamespace_hermetic(self): + + # Intentionally hiding the global function to make sure we don't overwrite + # it in the global namespace. + free_function = object() # pylint:disable=redefined-outer-name + + def test_fn(): + return free_function + + ns = inspect_utils.getnamespace(test_fn) + globs = six.get_function_globals(test_fn) + self.assertTrue(ns['free_function'] is free_function) + self.assertFalse(globs['free_function'] is free_function) + + def test_getnamespace_locals(self): + + def called_fn(): + return 0 + + closed_over_list = [] + closed_over_primitive = 1 + + def local_fn(): + closed_over_list.append(1) + local_var = 1 + return called_fn() + local_var + closed_over_primitive + + ns = inspect_utils.getnamespace(local_fn) + self.assertEqual(ns['called_fn'], called_fn) + self.assertEqual(ns['closed_over_list'], closed_over_list) + self.assertEqual(ns['closed_over_primitive'], closed_over_primitive) + self.assertTrue('local_var' not in ns) + def test_getcallargs_constructor(self): class TestSuperclass(object): @@ -123,48 +166,47 @@ class InspectUtilsTest(test.TestCase): def test_getmethodclass(self): self.assertEqual( - inspect_utils.getmethodclass(free_function, {}), None) + inspect_utils.getmethodclass(free_function), None) self.assertEqual( - inspect_utils.getmethodclass(free_factory(), {}), None) + inspect_utils.getmethodclass(free_factory()), None) - ns = {'TestClass': TestClass} self.assertEqual( - inspect_utils.getmethodclass(TestClass.member_function, ns), + inspect_utils.getmethodclass(TestClass.member_function), TestClass) self.assertEqual( - inspect_utils.getmethodclass(TestClass.decorated_member, ns), + inspect_utils.getmethodclass(TestClass.decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(TestClass.fn_decorated_member, ns), + inspect_utils.getmethodclass(TestClass.fn_decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(TestClass.wrap_decorated_member, ns), + inspect_utils.getmethodclass(TestClass.wrap_decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(TestClass.static_method, ns), + inspect_utils.getmethodclass(TestClass.static_method), TestClass) self.assertEqual( - inspect_utils.getmethodclass(TestClass.class_method, ns), + inspect_utils.getmethodclass(TestClass.class_method), TestClass) test_obj = TestClass() self.assertEqual( - inspect_utils.getmethodclass(test_obj.member_function, ns), + inspect_utils.getmethodclass(test_obj.member_function), TestClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.decorated_member, ns), + inspect_utils.getmethodclass(test_obj.decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.fn_decorated_member, ns), + inspect_utils.getmethodclass(test_obj.fn_decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.wrap_decorated_member, ns), + inspect_utils.getmethodclass(test_obj.wrap_decorated_member), TestClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.static_method, ns), + inspect_utils.getmethodclass(test_obj.static_method), TestClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.class_method, ns), + inspect_utils.getmethodclass(test_obj.class_method), TestClass) def test_getmethodclass_locals(self): @@ -190,34 +232,33 @@ class InspectUtilsTest(test.TestCase): pass self.assertEqual( - inspect_utils.getmethodclass(local_function, {}), None) + inspect_utils.getmethodclass(local_function), None) - ns = {'LocalClass': LocalClass} self.assertEqual( - inspect_utils.getmethodclass(LocalClass.member_function, ns), + inspect_utils.getmethodclass(LocalClass.member_function), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(LocalClass.decorated_member, ns), + inspect_utils.getmethodclass(LocalClass.decorated_member), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(LocalClass.fn_decorated_member, ns), + inspect_utils.getmethodclass(LocalClass.fn_decorated_member), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(LocalClass.wrap_decorated_member, ns), + inspect_utils.getmethodclass(LocalClass.wrap_decorated_member), LocalClass) test_obj = LocalClass() self.assertEqual( - inspect_utils.getmethodclass(test_obj.member_function, ns), + inspect_utils.getmethodclass(test_obj.member_function), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.decorated_member, ns), + inspect_utils.getmethodclass(test_obj.decorated_member), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.fn_decorated_member, ns), + inspect_utils.getmethodclass(test_obj.fn_decorated_member), LocalClass) self.assertEqual( - inspect_utils.getmethodclass(test_obj.wrap_decorated_member, ns), + inspect_utils.getmethodclass(test_obj.wrap_decorated_member), LocalClass) -- GitLab From ca8cb9e928b622d202008c12046a4fb0b7ba9c09 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 23 Feb 2018 17:32:14 -0800 Subject: [PATCH 061/884] Refactor Keras engine by splitting it into short, specialized files. The purpose of this change is to make the codebase more maintainable and readable. Before: engine/topology.py models.py After: engine/base_layer.py engine/input_layer.py engine/network.py engine/sequential.py engine/saving.py This is a large change but it only moves code around with no change in logic or API. New files are all under 1000 lines of logic (network.py is 1500 lines, but under 1000 if you remove imports and docstrings), and often under 500. PiperOrigin-RevId: 186847895 --- tensorflow/python/keras/BUILD | 32 +- .../_impl/keras/applications/densenet.py | 2 +- .../keras/applications/inception_resnet_v2.py | 2 +- .../_impl/keras/applications/inception_v3.py | 2 +- .../_impl/keras/applications/mobilenet.py | 4 +- .../keras/_impl/keras/applications/nasnet.py | 2 +- .../_impl/keras/applications/resnet50.py | 2 +- .../keras/_impl/keras/applications/vgg16.py | 2 +- .../keras/_impl/keras/applications/vgg19.py | 2 +- .../_impl/keras/applications/xception.py | 2 +- .../keras/_impl/keras/engine/__init__.py | 15 +- .../keras/_impl/keras/engine/base_layer.py | 504 +++++++ .../keras/_impl/keras/engine/input_layer.py | 230 +++ .../keras/engine/{topology.py => network.py} | 1059 +------------ .../python/keras/_impl/keras/engine/saving.py | 671 +++++++++ .../keras/_impl/keras/engine/saving_test.py | 375 +++++ .../keras/_impl/keras/engine/sequential.py | 997 +++++++++++++ .../_impl/keras/engine/sequential_test.py | 152 ++ .../keras/_impl/keras/engine/topology_test.py | 169 +-- .../keras/_impl/keras/engine/training.py | 4 +- .../keras/layers/advanced_activations.py | 2 +- .../keras/layers/convolutional_recurrent.py | 2 +- .../keras/_impl/keras/layers/embeddings.py | 2 +- .../python/keras/_impl/keras/layers/local.py | 2 +- .../python/keras/_impl/keras/layers/merge.py | 4 +- .../python/keras/_impl/keras/layers/noise.py | 2 +- .../keras/_impl/keras/layers/recurrent.py | 2 +- .../keras/_impl/keras/layers/wrappers.py | 2 +- tensorflow/python/keras/_impl/keras/models.py | 1325 +---------------- .../python/keras/_impl/keras/models_test.py | 348 +---- .../keras/_impl/keras/utils/generic_utils.py | 17 + .../api/golden/tensorflow.keras.-model.pbtxt | 4 +- .../golden/tensorflow.keras.-sequential.pbtxt | 6 +- .../tensorflow.keras.layers.-activation.pbtxt | 2 +- ...eras.layers.-activity-regularization.pbtxt | 2 +- .../golden/tensorflow.keras.layers.-add.pbtxt | 2 +- ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 2 +- ...low.keras.layers.-average-pooling1-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling2-d.pbtxt | 2 +- ...low.keras.layers.-average-pooling3-d.pbtxt | 2 +- .../tensorflow.keras.layers.-average.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 2 +- ...ow.keras.layers.-batch-normalization.pbtxt | 2 +- ...nsorflow.keras.layers.-bidirectional.pbtxt | 2 +- ...tensorflow.keras.layers.-concatenate.pbtxt | 2 +- ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 2 +- .../tensorflow.keras.layers.-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-conv2-d-transpose.pbtxt | 2 +- .../tensorflow.keras.layers.-conv2-d.pbtxt | 2 +- ...flow.keras.layers.-conv3-d-transpose.pbtxt | 2 +- .../tensorflow.keras.layers.-conv3-d.pbtxt | 2 +- ...sorflow.keras.layers.-convolution1-d.pbtxt | 2 +- ...ras.layers.-convolution2-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution2-d.pbtxt | 2 +- ...ras.layers.-convolution3-d-transpose.pbtxt | 2 +- ...sorflow.keras.layers.-convolution3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-cropping3-d.pbtxt | 2 +- .../tensorflow.keras.layers.-dense.pbtxt | 2 +- .../golden/tensorflow.keras.layers.-dot.pbtxt | 2 +- .../tensorflow.keras.layers.-dropout.pbtxt | 2 +- .../tensorflow.keras.layers.-e-l-u.pbtxt | 2 +- .../tensorflow.keras.layers.-embedding.pbtxt | 2 +- .../tensorflow.keras.layers.-flatten.pbtxt | 2 +- .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 2 +- .../tensorflow.keras.layers.-g-r-u.pbtxt | 2 +- ...rflow.keras.layers.-gaussian-dropout.pbtxt | 2 +- ...sorflow.keras.layers.-gaussian-noise.pbtxt | 2 +- ...as.layers.-global-average-pooling1-d.pbtxt | 2 +- ...as.layers.-global-average-pooling2-d.pbtxt | 2 +- ...as.layers.-global-average-pooling3-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-avg-pool3-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool1-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool2-d.pbtxt | 2 +- ...low.keras.layers.-global-max-pool3-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling1-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling2-d.pbtxt | 2 +- ....keras.layers.-global-max-pooling3-d.pbtxt | 2 +- ...tensorflow.keras.layers.-input-layer.pbtxt | 4 +- ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 2 +- .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 2 +- .../tensorflow.keras.layers.-lambda.pbtxt | 2 +- .../tensorflow.keras.layers.-layer.pbtxt | 2 +- ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 2 +- ...w.keras.layers.-locally-connected1-d.pbtxt | 2 +- ...w.keras.layers.-locally-connected2-d.pbtxt | 2 +- .../tensorflow.keras.layers.-masking.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 2 +- ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 2 +- .../tensorflow.keras.layers.-maximum.pbtxt | 2 +- .../tensorflow.keras.layers.-multiply.pbtxt | 2 +- .../tensorflow.keras.layers.-p-re-l-u.pbtxt | 2 +- .../tensorflow.keras.layers.-permute.pbtxt | 2 +- .../tensorflow.keras.layers.-r-n-n.pbtxt | 2 +- ...nsorflow.keras.layers.-repeat-vector.pbtxt | 2 +- .../tensorflow.keras.layers.-reshape.pbtxt | 2 +- ...flow.keras.layers.-separable-conv1-d.pbtxt | 2 +- ...flow.keras.layers.-separable-conv2-d.pbtxt | 2 +- ...ras.layers.-separable-convolution1-d.pbtxt | 2 +- ...ras.layers.-separable-convolution2-d.pbtxt | 2 +- ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 2 +- ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 2 +- .../tensorflow.keras.layers.-softmax.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout1-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout2-d.pbtxt | 2 +- ...low.keras.layers.-spatial-dropout3-d.pbtxt | 2 +- ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 2 +- ...low.keras.layers.-thresholded-re-l-u.pbtxt | 2 +- ...rflow.keras.layers.-time-distributed.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 2 +- ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 2 +- .../tensorflow.keras.layers.-wrapper.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding1-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding2-d.pbtxt | 2 +- ...orflow.keras.layers.-zero-padding3-d.pbtxt | 2 +- .../tensorflow.keras.models.-model.pbtxt | 4 +- .../tensorflow.keras.models.-sequential.pbtxt | 6 +- 127 files changed, 3162 insertions(+), 2980 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/engine/base_layer.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/input_layer.py rename tensorflow/python/keras/_impl/keras/engine/{topology.py => network.py} (59%) create mode 100644 tensorflow/python/keras/_impl/keras/engine/saving.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/saving_test.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/sequential.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/sequential_test.py diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index 16738066ce..a98d08f928 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -39,7 +39,11 @@ py_library( "_impl/keras/datasets/mnist.py", "_impl/keras/datasets/reuters.py", "_impl/keras/engine/__init__.py", - "_impl/keras/engine/topology.py", + "_impl/keras/engine/base_layer.py", + "_impl/keras/engine/input_layer.py", + "_impl/keras/engine/network.py", + "_impl/keras/engine/saving.py", + "_impl/keras/engine/sequential.py", "_impl/keras/engine/training.py", "_impl/keras/engine/training_eager.py", "_impl/keras/estimator.py", @@ -761,9 +765,31 @@ py_test( srcs_version = "PY2AND3", deps = [ ":keras", - "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", + "//third_party/py/numpy", + ], +) + +py_test( + name = "saving_test", + size = "small", + srcs = ["_impl/keras/engine/saving_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":keras", + "//tensorflow/python:client_testlib", + "//third_party/py/numpy", + ], +) + +py_test( + name = "sequential_test", + size = "small", + srcs = ["_impl/keras/engine/sequential_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":keras", + "//tensorflow/python:client_testlib", "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/keras/_impl/keras/applications/densenet.py b/tensorflow/python/keras/_impl/keras/applications/densenet.py index 6521f84104..ca83e86912 100644 --- a/tensorflow/python/keras/_impl/keras/applications/densenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/densenet.py @@ -31,7 +31,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import AveragePooling2D from tensorflow.python.keras._impl.keras.layers import BatchNormalization diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py index bf3901fc54..17e407dd58 100644 --- a/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py +++ b/tensorflow/python/keras/_impl/keras/applications/inception_resnet_v2.py @@ -31,7 +31,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import AveragePooling2D from tensorflow.python.keras._impl.keras.layers import BatchNormalization diff --git a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py index e268e97bc6..2897c6058e 100644 --- a/tensorflow/python/keras/_impl/keras/applications/inception_v3.py +++ b/tensorflow/python/keras/_impl/keras/applications/inception_v3.py @@ -37,7 +37,7 @@ from tensorflow.python.keras._impl.keras import layers from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import AveragePooling2D from tensorflow.python.keras._impl.keras.layers import BatchNormalization diff --git a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py index 1bbbedb85e..ad96b53a45 100644 --- a/tensorflow/python/keras/_impl/keras/applications/mobilenet.py +++ b/tensorflow/python/keras/_impl/keras/applications/mobilenet.py @@ -79,8 +79,8 @@ from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.engine import InputSpec -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import BatchNormalization from tensorflow.python.keras._impl.keras.layers import Conv2D diff --git a/tensorflow/python/keras/_impl/keras/applications/nasnet.py b/tensorflow/python/keras/_impl/keras/applications/nasnet.py index 08dae57f00..dd33230a7e 100644 --- a/tensorflow/python/keras/_impl/keras/applications/nasnet.py +++ b/tensorflow/python/keras/_impl/keras/applications/nasnet.py @@ -49,7 +49,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.applications.inception_v3 import preprocess_input -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import add from tensorflow.python.keras._impl.keras.layers import AveragePooling2D diff --git a/tensorflow/python/keras/_impl/keras/applications/resnet50.py b/tensorflow/python/keras/_impl/keras/applications/resnet50.py index a47dd657bb..46c0e63557 100644 --- a/tensorflow/python/keras/_impl/keras/applications/resnet50.py +++ b/tensorflow/python/keras/_impl/keras/applications/resnet50.py @@ -34,7 +34,7 @@ from tensorflow.python.keras._impl.keras import layers from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.applications.imagenet_utils import preprocess_input -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import AveragePooling2D from tensorflow.python.keras._impl.keras.layers import BatchNormalization diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg16.py b/tensorflow/python/keras/_impl/keras/applications/vgg16.py index 9da74253ab..cefb25063e 100644 --- a/tensorflow/python/keras/_impl/keras/applications/vgg16.py +++ b/tensorflow/python/keras/_impl/keras/applications/vgg16.py @@ -32,7 +32,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.applications.imagenet_utils import preprocess_input -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Conv2D from tensorflow.python.keras._impl.keras.layers import Dense from tensorflow.python.keras._impl.keras.layers import Flatten diff --git a/tensorflow/python/keras/_impl/keras/applications/vgg19.py b/tensorflow/python/keras/_impl/keras/applications/vgg19.py index 961c1f9918..dadaf4fdf0 100644 --- a/tensorflow/python/keras/_impl/keras/applications/vgg19.py +++ b/tensorflow/python/keras/_impl/keras/applications/vgg19.py @@ -32,7 +32,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions from tensorflow.python.keras._impl.keras.applications.imagenet_utils import preprocess_input -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Conv2D from tensorflow.python.keras._impl.keras.layers import Dense from tensorflow.python.keras._impl.keras.layers import Flatten diff --git a/tensorflow/python/keras/_impl/keras/applications/xception.py b/tensorflow/python/keras/_impl/keras/applications/xception.py index 7e7ca5a18a..971063a16d 100644 --- a/tensorflow/python/keras/_impl/keras/applications/xception.py +++ b/tensorflow/python/keras/_impl/keras/applications/xception.py @@ -44,7 +44,7 @@ from tensorflow.python.keras._impl.keras import layers from tensorflow.python.keras._impl.keras.applications import imagenet_utils from tensorflow.python.keras._impl.keras.applications.imagenet_utils import _obtain_input_shape from tensorflow.python.keras._impl.keras.applications.imagenet_utils import decode_predictions -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs from tensorflow.python.keras._impl.keras.layers import Activation from tensorflow.python.keras._impl.keras.layers import BatchNormalization from tensorflow.python.keras._impl.keras.layers import Conv2D diff --git a/tensorflow/python/keras/_impl/keras/engine/__init__.py b/tensorflow/python/keras/_impl/keras/engine/__init__.py index 31f624f9af..1bc533ab8f 100644 --- a/tensorflow/python/keras/_impl/keras/engine/__init__.py +++ b/tensorflow/python/keras/_impl/keras/engine/__init__.py @@ -18,13 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.keras._impl.keras.engine.topology import get_source_inputs -from tensorflow.python.keras._impl.keras.engine.topology import Input -from tensorflow.python.keras._impl.keras.engine.topology import InputLayer -from tensorflow.python.keras._impl.keras.engine.topology import InputSpec -from tensorflow.python.keras._impl.keras.engine.topology import Layer +from tensorflow.python.keras._impl.keras.engine.base_layer import InputSpec +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.engine.input_layer import Input +from tensorflow.python.keras._impl.keras.engine.input_layer import InputLayer +from tensorflow.python.keras._impl.keras.engine.network import get_source_inputs +from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.engine.training import Model - - -# Note: topology.Node is an internal class, -# it isn't meant to be used by Keras users. diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py new file mode 100644 index 0000000000..142325041b --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -0,0 +1,504 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=protected-access +"""Base layer code (`Layer`). +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from six.moves import zip # pylint: disable=redefined-builtin + +from tensorflow.python.eager import context +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import constraints +from tensorflow.python.keras._impl.keras import initializers +from tensorflow.python.keras._impl.keras import regularizers +from tensorflow.python.keras._impl.keras.utils import generic_utils +from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.tf_export import tf_export + + +# pylint: disable=invalid-name +InputSpec = tf_base_layers.InputSpec +Node = tf_base_layers.Node +TFBaseLayer = tf_base_layers.Layer +# pylint: enable=invalid-name + + +@tf_export('keras.layers.Layer') +class Layer(tf_base_layers.Layer): + """Abstract base layer class. + + # Properties + name: String, must be unique within a model. + input_spec: List of InputSpec class instances + each entry describes one required input: + - ndim + - dtype + A layer with `n` input tensors must have + an `input_spec` of length `n`. + trainable: Boolean, whether the layer weights + will be updated during training. + uses_learning_phase: Whether any operation + of the layer uses `K.in_training_phase()` + or `K.in_test_phase()`. + input_shape: Shape tuple. Provided for convenience, + but note that there may be cases in which this + attribute is ill-defined (e.g. a shared layer + with multiple input shapes), in which case + requesting `input_shape` will raise an Exception. + Prefer using `layer.get_input_shape_for(input_shape)`, + or `layer.get_input_shape_at(node_index)`. + output_shape: Shape tuple. See above. + inbound_nodes: List of nodes. + outbound_nodes: List of nodes. + input, output: Input/output tensor(s). Note that if the layer is used + more than once (shared layer), this is ill-defined + and will raise an exception. In such cases, use + `layer.get_input_at(node_index)`. + input_mask, output_mask: Same as above, for masks. + trainable_weights: List of variables. + non_trainable_weights: List of variables. + weights: The concatenation of the lists trainable_weights and + non_trainable_weights (in this order). + + # Methods + call(x, mask=None): Where the layer's logic lives. + __call__(x, mask=None): Wrapper around the layer logic (`call`). + If x is a Keras tensor: + - Connect current layer with last layer from tensor: + `self._add_inbound_node(last_layer)` + - Add layer to tensor history + If layer is not built: + - Build from inputs shape + get_weights() + set_weights(weights) + get_config() + count_params() + compute_output_shape(input_shape) + compute_mask(x, mask) + get_input_at(node_index) + get_output_at(node_index) + get_input_shape_at(node_index) + get_output_shape_at(node_index) + get_input_mask_at(node_index) + get_output_mask_at(node_index) + + # Class Methods + from_config(config) + + # Internal methods: + build(input_shape) + _add_inbound_node(layer, index=0) + """ + + def __init__(self, **kwargs): + # These properties should be set by the user via keyword arguments. + # note that 'dtype', 'input_shape' and 'batch_input_shape' + # are only applicable to input layers: do not pass these keywords + # to non-input layers. + allowed_kwargs = { + 'activity_regularizer', + 'input_shape', + 'batch_input_shape', + 'batch_size', + 'dtype', + 'name', + 'trainable', + 'weights', + } + # Validate optional keyword arguments. + for kwarg in kwargs: + if kwarg not in allowed_kwargs: + raise TypeError('Keyword argument not understood:', kwarg) + + # Get layer name. + name = kwargs.get('name') + + # Get `trainable` status. + trainable = kwargs.get('trainable', True) + + # Get `dtype`. + dtype = kwargs.get('dtype') + if dtype is None: + dtype = K.floatx() + + # Call super, which will set all properties common to Keras layers + # and core TF layers. + super(Layer, self).__init__( + name=name, dtype=dtype, trainable=trainable, + activity_regularizer=kwargs.get('activity_regularizer')) + + # Add properties that are Keras-only for now. + self.supports_masking = False + + # Manage input shape information if passed. + if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: + # In this case we will later create an input layer + # to insert before the current layer + if 'batch_input_shape' in kwargs: + batch_input_shape = tuple(kwargs['batch_input_shape']) + elif 'input_shape' in kwargs: + if 'batch_size' in kwargs: + batch_size = kwargs['batch_size'] + else: + batch_size = None + batch_input_shape = (batch_size,) + tuple(kwargs['input_shape']) + self._batch_input_shape = batch_input_shape + + # Manage initial weight values if passed. + if 'weights' in kwargs: + self._initial_weights = kwargs['weights'] + else: + self._initial_weights = None + + def add_weight(self, + name, + shape, + dtype=None, + initializer=None, + regularizer=None, + trainable=True, + constraint=None): + """Adds a weight variable to the layer. + + Arguments: + name: String, the name for the weight variable. + shape: The shape tuple of the weight. + dtype: The dtype of the weight. + initializer: An Initializer instance (callable). + regularizer: An optional Regularizer instance. + trainable: A boolean, whether the weight should + be trained via backprop or not (assuming + that the layer itself is also trainable). + constraint: An optional Constraint instance. + + Returns: + The created weight variable. + """ + if dtype is None: + dtype = K.floatx() + weight = self.add_variable(name, shape, + dtype=dtype, + initializer=initializers.get(initializer), + regularizer=regularizers.get(regularizer), + constraint=constraints.get(constraint), + trainable=trainable) + return weight + + def call(self, inputs, **kwargs): # pylint: disable=unused-argument + """This is where the layer's logic lives. + + Arguments: + inputs: Input tensor, or list/tuple of input tensors. + **kwargs: Additional keyword arguments. + + Returns: + A tensor or list/tuple of tensors. + """ + return inputs + + def __call__(self, inputs, **kwargs): + """Wrapper around self.call(), for handling internal references. + + If a Keras tensor is passed: + - We call self._add_inbound_node(). + - If necessary, we `build` the layer to match + the shape of the input(s). + - We update the _keras_history of the output tensor(s) + with the current layer. + This is done as part of _add_inbound_node(). + + Arguments: + inputs: Can be a tensor or list/tuple of tensors. + **kwargs: Additional keyword arguments to be passed to `call()`. + + Returns: + Output of the layer's `call` method. + + Raises: + ValueError: in case the layer is missing shape information + for its `build` call. + """ + # Actually call the layer (optionally building it). + output = super(Layer, self).__call__(inputs, **kwargs) + if context.in_eager_mode(): + return output + + # Un-built subclassed network: build it + if hasattr(self, '_set_inputs') and not self.inputs: + self._set_inputs(inputs, training=kwargs.get('training')) + + # Update learning phase info. + output_tensors = generic_utils.to_list(output) + uses_lp = any( + [getattr(x, '_uses_learning_phase', False) + for x in generic_utils.to_list(inputs)]) + uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp + for i in range(len(output_tensors)): + output_tensors[i]._uses_learning_phase = getattr( + output_tensors[i], '_uses_learning_phase', False) or uses_lp + + # Optionally load weight values that were specified at layer instantiation. + if hasattr(self, '_initial_weights') and self._initial_weights is not None: + self.set_weights(self._initial_weights) + del self._initial_weights + return output + + def compute_output_shape(self, input_shape): + """Computes the output shape of the layer. + + Assumes that the layer will be built + to match that input shape provided. + + Arguments: + input_shape: Shape tuple (tuple of integers) + or list of shape tuples (one per output tensor of the layer). + Shape tuples can include None for free dimensions, + instead of an integer. + + Returns: + An input shape tuple. + """ + logging.warning( + 'All custom layers should implement the ' + '`compute_output_shape` method. This layer (' + self.name + ') ' + 'is relying on the base `Layer.compute_output_shape` implementation, ' + 'which will start raising a `NotImplementedError` ' + 'as of July 1st, 2018.') + return input_shape + + def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument + """Computes an output mask tensor. + + Arguments: + inputs: Tensor or list of tensors. + mask: Tensor or list of tensors. + + Returns: + None or a tensor (or list of tensors, + one per output tensor of the layer). + """ + if not self.supports_masking: + if mask is not None: + if isinstance(mask, list): + if any(m is not None for m in mask): + raise TypeError('Layer ' + self.name + ' does not support masking, ' + 'but was passed an input_mask: ' + str(mask)) + else: + raise TypeError('Layer ' + self.name + ' does not support masking, ' + 'but was passed an input_mask: ' + str(mask)) + # masking not explicitly supported: return None as mask + return None + # if masking is explicitly supported, by default + # carry over the input mask + return mask + + def get_input_mask_at(self, node_index): + """Retrieves the input mask tensor(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple inputs). + """ + inputs = self.get_input_at(node_index) + if isinstance(inputs, list): + return [getattr(x, '_keras_mask', None) for x in inputs] + else: + return getattr(inputs, '_keras_mask', None) + + def get_output_mask_at(self, node_index): + """Retrieves the output mask tensor(s) of a layer at a given node. + + Arguments: + node_index: Integer, index of the node + from which to retrieve the attribute. + E.g. `node_index=0` will correspond to the + first time the layer was called. + + Returns: + A mask tensor + (or list of tensors if the layer has multiple outputs). + """ + output = self.get_output_at(node_index) + if isinstance(output, list): + return [getattr(x, '_keras_mask', None) for x in output] + else: + return getattr(output, '_keras_mask', None) + + @property + def input_mask(self): + """Retrieves the input mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Input mask tensor (potentially None) or list of input + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + inputs = self.input + if isinstance(inputs, list): + return [getattr(x, '_keras_mask', None) for x in inputs] + else: + return getattr(inputs, '_keras_mask', None) + + @property + def output_mask(self): + """Retrieves the output mask tensor(s) of a layer. + + Only applicable if the layer has exactly one inbound node, + i.e. if it is connected to one incoming layer. + + Returns: + Output mask tensor (potentially None) or list of output + mask tensors. + + Raises: + AttributeError: if the layer is connected to + more than one incoming layers. + """ + output = self.output + if isinstance(output, list): + return [getattr(x, '_keras_mask', None) for x in output] + else: + return getattr(output, '_keras_mask', None) + + def set_weights(self, weights): + """Sets the weights of the layer, from Numpy arrays. + + Arguments: + weights: a list of Numpy arrays. The number + of arrays and their shape must match + number of the dimensions of the weights + of the layer (i.e. it should match the + output of `get_weights`). + + Raises: + ValueError: If the provided weights list does not match the + layer's specifications. + """ + params = self.weights + if len(params) != len(weights): + raise ValueError('You called `set_weights(weights)` on layer "' + + self.name + '" with a weight list of length ' + + str(len(weights)) + ', but the layer was expecting ' + + str(len(params)) + ' weights. Provided weights: ' + + str(weights)[:50] + '...') + if not params: + return + weight_value_tuples = [] + param_values = K.batch_get_value(params) + for pv, p, w in zip(param_values, params, weights): + if pv.shape != w.shape: + raise ValueError('Layer weight shape ' + str(pv.shape) + + ' not compatible with ' + 'provided weight shape ' + str(w.shape)) + weight_value_tuples.append((p, w)) + K.batch_set_value(weight_value_tuples) + + def get_weights(self): + """Returns the current weights of the layer. + + Returns: + Weights values as a list of numpy arrays. + """ + params = self.weights + return K.batch_get_value(params) + + def get_config(self): + """Returns the config of the layer. + + A layer config is a Python dictionary (serializable) + containing the configuration of a layer. + The same layer can be reinstantiated later + (without its trained weights) from this configuration. + + The config of a layer does not include connectivity + information, nor the layer class name. These are handled + by `Network` (one layer of abstraction above). + + Returns: + Python dictionary. + """ + config = {'name': self.name, 'trainable': self.trainable} + if hasattr(self, '_batch_input_shape'): + config['batch_input_shape'] = self._batch_input_shape + if hasattr(self, 'dtype'): + config['dtype'] = self.dtype + return config + + @classmethod + def from_config(cls, config): + """Creates a layer from its config. + + This method is the reverse of `get_config`, + capable of instantiating the same layer from the config + dictionary. It does not handle layer connectivity + (handled by Network), nor weights (handled by `set_weights`). + + Arguments: + config: A Python dictionary, typically the + output of get_config. + + Returns: + A layer instance. + """ + return cls(**config) + + @tf_base_layers.Layer.activity_regularizer.setter + def activity_regularizer(self, activity_regularizer): + self._activity_regularizer = activity_regularizer + + +def shape_type_conversion(fn): + """Decorator that handles tuple/TensorShape conversion. + + Used in `compute_output_shape` and `build`. + + Arguments: + fn: function to wrap. + + Returns: + Wrapped function. + """ + + def wrapper(instance, input_shape): + if input_shape is not None: + if isinstance(input_shape, list): + input_shape = [ + tuple(tensor_shape.TensorShape(x).as_list()) for x in input_shape] + else: + input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) + output_shape = fn(instance, input_shape) + if output_shape is not None: + if isinstance(output_shape, list): + return [tensor_shape.TensorShape(x) for x in output_shape] + return tensor_shape.TensorShape(output_shape) + + return wrapper diff --git a/tensorflow/python/keras/_impl/keras/engine/input_layer.py b/tensorflow/python/keras/_impl/keras/engine/input_layer.py new file mode 100644 index 0000000000..8f9ea6f7a4 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/input_layer.py @@ -0,0 +1,230 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=protected-access +"""Input layer code (`Input` and `InputLayer`). +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.framework import tensor_shape +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras.engine import base_layer +from tensorflow.python.layers import base as tf_base_layers +from tensorflow.python.ops import array_ops +from tensorflow.python.util.tf_export import tf_export + + +class InputLayer(base_layer.Layer): + """Layer to be used as an entry point into a Network (a graph of layers). + + It can either wrap an existing tensor (pass an `input_tensor` argument) + or create its a placeholder tensor (pass arguments `input_shape`, and + optionally, `dtype`). + + It is generally recommend to use the functional layer API via `Input`, + (which creates an `InputLayer`) without directly using `InputLayer`. + + Arguments: + input_shape: Shape tuple (not including the batch axis), or `TensorShape` + instance (not including the batch axis). + batch_size: Optional input batch size (integer or None). + dtype: Datatype of the input. + input_tensor: Optional tensor to use as layer input + instead of creating a placeholder. + sparse: Boolean, whether the placeholder created + is meant to be sparse. + name: Name of the layer (string). + """ + + def __init__(self, + input_shape=None, + batch_size=None, + dtype=None, + input_tensor=None, + sparse=False, + name=None, + **kwargs): + if 'batch_input_shape' in kwargs: + batch_input_shape = kwargs.pop('batch_input_shape') + if input_shape and batch_input_shape: + raise ValueError('Only provide the input_shape OR ' + 'batch_input_shape argument to ' + 'InputLayer, not both at the same time.') + batch_size = batch_input_shape[0] + input_shape = batch_input_shape[1:] + if kwargs: + raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) + + if not name: + prefix = 'input' + name = prefix + '_' + str(K.get_uid(prefix)) + + if not dtype: + if input_tensor is None: + dtype = K.floatx() + else: + dtype = K.dtype(input_tensor) + super(InputLayer, self).__init__(dtype=dtype, name=name) + self.built = True + self.sparse = sparse + self.batch_size = batch_size + + if isinstance(input_shape, tensor_shape.TensorShape): + input_shape = tuple(input_shape.as_list()) + + if input_tensor is None: + if input_shape is not None: + batch_input_shape = (batch_size,) + tuple(input_shape) + else: + batch_input_shape = None + + if context.in_eager_mode(): + # In eager mode, create a temporary placeholder to call the layer on. + input_tensor = tf_base_layers._DeferredTensor( # pylint: disable=protected-access + shape=batch_input_shape, + dtype=dtype, + name=self.name) + else: + # In graph mode, create a graph placeholder to call the layer on. + if sparse: + input_tensor = array_ops.sparse_placeholder( + shape=batch_input_shape, + dtype=dtype, + name=self.name) + else: + input_tensor = array_ops.placeholder( + shape=batch_input_shape, + dtype=dtype, + name=self.name) + + # For compatibility with Keras API. + self.is_placeholder = True + self._batch_input_shape = batch_input_shape + else: + # For compatibility with Keras API. + self.is_placeholder = False + self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) + + # Create an input node to add to self.outbound_node + # and set output_tensors' _keras_history. + input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access + tf_base_layers.Node( + self, + inbound_layers=[], + node_indices=[], + tensor_indices=[], + input_tensors=[input_tensor], + output_tensors=[input_tensor]) + + def get_config(self): + config = { + 'batch_input_shape': self._batch_input_shape, + 'dtype': self.dtype, + 'sparse': self.sparse, + 'name': self.name + } + return config + + +@tf_export('keras.layers.Input', 'keras.Input') +def Input( # pylint: disable=invalid-name + shape=None, + batch_size=None, + name=None, + dtype=None, + sparse=False, + tensor=None, + **kwargs): + """`Input()` is used to instantiate a Keras tensor. + + A Keras tensor is a tensor object from the underlying backend + (Theano or TensorFlow), which we augment with certain + attributes that allow us to build a Keras model + just by knowing the inputs and outputs of the model. + + For instance, if a, b and c are Keras tensors, + it becomes possible to do: + `model = Model(input=[a, b], output=c)` + + The added Keras attribute is: + `_keras_history`: Last layer applied to the tensor. + the entire layer graph is retrievable from that layer, + recursively. + + Arguments: + shape: A shape tuple (integers), not including the batch size. + For instance, `shape=(32,)` indicates that the expected input + will be batches of 32-dimensional vectors. + batch_size: optional static batch size (integer). + name: An optional name string for the layer. + Should be unique in a model (do not reuse the same name twice). + It will be autogenerated if it isn't provided. + dtype: The data type expected by the input, as a string + (`float32`, `float64`, `int32`...) + sparse: A boolean specifying whether the placeholder + to be created is sparse. + tensor: Optional existing tensor to wrap into the `Input` layer. + If set, the layer will not create a placeholder tensor. + **kwargs: deprecated arguments support. + + Returns: + A tensor. + + Example: + + ```python + # this is a logistic regression in Keras + x = Input(shape=(32,)) + y = Dense(16, activation='softmax')(x) + model = Model(x, y) + ``` + + Raises: + ValueError: in case of invalid arguments. + """ + if 'batch_shape' in kwargs: + batch_shape = kwargs.pop('batch_shape') + if shape and batch_shape: + raise ValueError('Only provide the shape OR ' + 'batch_shape argument to ' + 'Input, not both at the same time.') + batch_size = batch_shape[0] + shape = batch_shape[1:] + if kwargs: + raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) + + if dtype is None: + dtype = K.floatx() + if not shape and tensor is None: + raise ValueError('Please provide to Input either a `shape`' + ' or a `tensor` argument. Note that ' + '`shape` does not include the batch ' + 'dimension.') + input_layer = InputLayer( + input_shape=shape, + batch_size=batch_size, + name=name, + dtype=dtype, + sparse=sparse, + input_tensor=tensor) + # Return tensor including `_keras_history`. + # Note that in this case train_output and test_output are the same pointer. + outputs = input_layer._inbound_nodes[0].output_tensors + if len(outputs) == 1: + return outputs[0] + else: + return outputs diff --git a/tensorflow/python/keras/_impl/keras/engine/topology.py b/tensorflow/python/keras/_impl/keras/engine/network.py similarity index 59% rename from tensorflow/python/keras/_impl/keras/engine/topology.py rename to tensorflow/python/keras/_impl/keras/engine/network.py index f562a19cf5..453cc8f8b7 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== # pylint: disable=protected-access -"""Base layer code and base model (Network) code. +"""A `Network` is way to compose layers: the topological form of a `Model`. """ from __future__ import absolute_import from __future__ import division @@ -30,19 +30,16 @@ from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras import constraints -from tensorflow.python.keras._impl.keras import initializers -from tensorflow.python.keras._impl.keras import regularizers -from tensorflow.python.keras._impl.keras.utils import conv_utils +from tensorflow.python.keras._impl.keras.engine import base_layer +from tensorflow.python.keras._impl.keras.engine import saving +from tensorflow.python.keras._impl.keras.utils import generic_utils from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary as print_layer_summary from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.layers import utils as tf_layers_util -from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect -from tensorflow.python.util.tf_export import tf_export # pylint: disable=g-import-not-at-top @@ -57,684 +54,12 @@ except ImportError: yaml = None # pylint: enable=g-import-not-at-top -# pylint: disable=invalid-name -InputSpec = tf_base_layers.InputSpec -Node = tf_base_layers.Node -TFBaseLayer = tf_base_layers.Layer -# pylint: enable=invalid-name - - -@tf_export('keras.layers.Layer') -class Layer(tf_base_layers.Layer): - """Abstract base layer class. - - # Properties - name: String, must be unique within a model. - input_spec: List of InputSpec class instances - each entry describes one required input: - - ndim - - dtype - A layer with `n` input tensors must have - an `input_spec` of length `n`. - trainable: Boolean, whether the layer weights - will be updated during training. - uses_learning_phase: Whether any operation - of the layer uses `K.in_training_phase()` - or `K.in_test_phase()`. - input_shape: Shape tuple. Provided for convenience, - but note that there may be cases in which this - attribute is ill-defined (e.g. a shared layer - with multiple input shapes), in which case - requesting `input_shape` will raise an Exception. - Prefer using `layer.get_input_shape_for(input_shape)`, - or `layer.get_input_shape_at(node_index)`. - output_shape: Shape tuple. See above. - inbound_nodes: List of nodes. - outbound_nodes: List of nodes. - input, output: Input/output tensor(s). Note that if the layer is used - more than once (shared layer), this is ill-defined - and will raise an exception. In such cases, use - `layer.get_input_at(node_index)`. - input_mask, output_mask: Same as above, for masks. - trainable_weights: List of variables. - non_trainable_weights: List of variables. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - - # Methods - call(x, mask=None): Where the layer's logic lives. - __call__(x, mask=None): Wrapper around the layer logic (`call`). - If x is a Keras tensor: - - Connect current layer with last layer from tensor: - `self._add_inbound_node(last_layer)` - - Add layer to tensor history - If layer is not built: - - Build from inputs shape - get_weights() - set_weights(weights) - get_config() - count_params() - compute_output_shape(input_shape) - compute_mask(x, mask) - get_input_at(node_index) - get_output_at(node_index) - get_input_shape_at(node_index) - get_output_shape_at(node_index) - get_input_mask_at(node_index) - get_output_mask_at(node_index) - - # Class Methods - from_config(config) - - # Internal methods: - build(input_shape) - _add_inbound_node(layer, index=0) - """ - - def __init__(self, **kwargs): - # These properties should be set by the user via keyword arguments. - # note that 'dtype', 'input_shape' and 'batch_input_shape' - # are only applicable to input layers: do not pass these keywords - # to non-input layers. - allowed_kwargs = { - 'activity_regularizer', - 'input_shape', - 'batch_input_shape', - 'batch_size', - 'dtype', - 'name', - 'trainable', - 'weights', - } - # Validate optional keyword arguments. - for kwarg in kwargs: - if kwarg not in allowed_kwargs: - raise TypeError('Keyword argument not understood:', kwarg) - - # Get layer name. - name = kwargs.get('name') - - # Get `trainable` status. - trainable = kwargs.get('trainable', True) - - # Get `dtype`. - dtype = kwargs.get('dtype') - if dtype is None: - dtype = K.floatx() - - # Call super, which will set all properties common to Keras layers - # and core TF layers. - super(Layer, self).__init__( - name=name, dtype=dtype, trainable=trainable, - activity_regularizer=kwargs.get('activity_regularizer')) - - # Add properties that are Keras-only for now. - self.supports_masking = False - - # Manage input shape information if passed. - if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: - # In this case we will later create an input layer - # to insert before the current layer - if 'batch_input_shape' in kwargs: - batch_input_shape = tuple(kwargs['batch_input_shape']) - elif 'input_shape' in kwargs: - if 'batch_size' in kwargs: - batch_size = kwargs['batch_size'] - else: - batch_size = None - batch_input_shape = (batch_size,) + tuple(kwargs['input_shape']) - self._batch_input_shape = batch_input_shape - - # Manage initial weight values if passed. - if 'weights' in kwargs: - self._initial_weights = kwargs['weights'] - else: - self._initial_weights = None - - def add_weight(self, - name, - shape, - dtype=None, - initializer=None, - regularizer=None, - trainable=True, - constraint=None): - """Adds a weight variable to the layer. - - Arguments: - name: String, the name for the weight variable. - shape: The shape tuple of the weight. - dtype: The dtype of the weight. - initializer: An Initializer instance (callable). - regularizer: An optional Regularizer instance. - trainable: A boolean, whether the weight should - be trained via backprop or not (assuming - that the layer itself is also trainable). - constraint: An optional Constraint instance. - - Returns: - The created weight variable. - """ - if dtype is None: - dtype = K.floatx() - weight = self.add_variable(name, shape, - dtype=dtype, - initializer=initializers.get(initializer), - regularizer=regularizers.get(regularizer), - constraint=constraints.get(constraint), - trainable=trainable) - return weight - - def call(self, inputs, **kwargs): # pylint: disable=unused-argument - """This is where the layer's logic lives. - - Arguments: - inputs: Input tensor, or list/tuple of input tensors. - **kwargs: Additional keyword arguments. - - Returns: - A tensor or list/tuple of tensors. - """ - return inputs - - def __call__(self, inputs, **kwargs): - """Wrapper around self.call(), for handling internal references. - - If a Keras tensor is passed: - - We call self._add_inbound_node(). - - If necessary, we `build` the layer to match - the shape of the input(s). - - We update the _keras_history of the output tensor(s) - with the current layer. - This is done as part of _add_inbound_node(). - - Arguments: - inputs: Can be a tensor or list/tuple of tensors. - **kwargs: Additional keyword arguments to be passed to `call()`. - - Returns: - Output of the layer's `call` method. - - Raises: - ValueError: in case the layer is missing shape information - for its `build` call. - """ - # Actually call the layer (optionally building it). - output = super(Layer, self).__call__(inputs, **kwargs) - if context.in_eager_mode(): - return output - - # Un-built subclassed network: build it - if isinstance(self, Network) and not self.inputs: - self._set_inputs(inputs, training=kwargs.get('training')) - - # Update learning phase info. - output_tensors = to_list(output) - uses_lp = any( - [getattr(x, '_uses_learning_phase', False) for x in to_list(inputs)]) - uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp - for i in range(len(output_tensors)): - output_tensors[i]._uses_learning_phase = getattr( - output_tensors[i], '_uses_learning_phase', False) or uses_lp - - # Optionally load weight values that were specified at layer instantiation. - if hasattr(self, '_initial_weights') and self._initial_weights is not None: - self.set_weights(self._initial_weights) - del self._initial_weights - return output - - def compute_output_shape(self, input_shape): - """Computes the output shape of the layer. - - Assumes that the layer will be built - to match that input shape provided. - - Arguments: - input_shape: Shape tuple (tuple of integers) - or list of shape tuples (one per output tensor of the layer). - Shape tuples can include None for free dimensions, - instead of an integer. - - Returns: - An input shape tuple. - """ - logging.warning( - 'All custom layers should implement the ' - '`compute_output_shape` method. This layer (' + self.name + ') ' - 'is relying on the base `Layer.compute_output_shape` implementation, ' - 'which will start raising a `NotImplementedError` ' - 'as of July 1st, 2018.') - return input_shape - - def compute_mask(self, inputs, mask=None): # pylint: disable=unused-argument - """Computes an output mask tensor. - - Arguments: - inputs: Tensor or list of tensors. - mask: Tensor or list of tensors. - - Returns: - None or a tensor (or list of tensors, - one per output tensor of the layer). - """ - if not self.supports_masking: - if mask is not None: - if isinstance(mask, list): - if any(m is not None for m in mask): - raise TypeError('Layer ' + self.name + ' does not support masking, ' - 'but was passed an input_mask: ' + str(mask)) - else: - raise TypeError('Layer ' + self.name + ' does not support masking, ' - 'but was passed an input_mask: ' + str(mask)) - # masking not explicitly supported: return None as mask - return None - # if masking is explicitly supported, by default - # carry over the input mask - return mask - - def get_input_mask_at(self, node_index): - """Retrieves the input mask tensor(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple inputs). - """ - inputs = self.get_input_at(node_index) - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - def get_output_mask_at(self, node_index): - """Retrieves the output mask tensor(s) of a layer at a given node. - - Arguments: - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - Returns: - A mask tensor - (or list of tensors if the layer has multiple outputs). - """ - output = self.get_output_at(node_index) - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - @property - def input_mask(self): - """Retrieves the input mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Input mask tensor (potentially None) or list of input - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - inputs = self.input - if isinstance(inputs, list): - return [getattr(x, '_keras_mask', None) for x in inputs] - else: - return getattr(inputs, '_keras_mask', None) - - @property - def output_mask(self): - """Retrieves the output mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - Returns: - Output mask tensor (potentially None) or list of output - mask tensors. - - Raises: - AttributeError: if the layer is connected to - more than one incoming layers. - """ - output = self.output - if isinstance(output, list): - return [getattr(x, '_keras_mask', None) for x in output] - else: - return getattr(output, '_keras_mask', None) - - def set_weights(self, weights): - """Sets the weights of the layer, from Numpy arrays. - - Arguments: - weights: a list of Numpy arrays. The number - of arrays and their shape must match - number of the dimensions of the weights - of the layer (i.e. it should match the - output of `get_weights`). - - Raises: - ValueError: If the provided weights list does not match the - layer's specifications. - """ - params = self.weights - if len(params) != len(weights): - raise ValueError('You called `set_weights(weights)` on layer "' + - self.name + '" with a weight list of length ' + - str(len(weights)) + ', but the layer was expecting ' + - str(len(params)) + ' weights. Provided weights: ' + - str(weights)[:50] + '...') - if not params: - return - weight_value_tuples = [] - param_values = K.batch_get_value(params) - for pv, p, w in zip(param_values, params, weights): - if pv.shape != w.shape: - raise ValueError('Layer weight shape ' + str(pv.shape) + - ' not compatible with ' - 'provided weight shape ' + str(w.shape)) - weight_value_tuples.append((p, w)) - K.batch_set_value(weight_value_tuples) - - def get_weights(self): - """Returns the current weights of the layer. - - Returns: - Weights values as a list of numpy arrays. - """ - params = self.weights - return K.batch_get_value(params) - - def get_config(self): - """Returns the config of the layer. - - A layer config is a Python dictionary (serializable) - containing the configuration of a layer. - The same layer can be reinstantiated later - (without its trained weights) from this configuration. - - The config of a layer does not include connectivity - information, nor the layer class name. These are handled - by `Network` (one layer of abstraction above). - - Returns: - Python dictionary. - """ - config = {'name': self.name, 'trainable': self.trainable} - if hasattr(self, '_batch_input_shape'): - config['batch_input_shape'] = self._batch_input_shape - if hasattr(self, 'dtype'): - config['dtype'] = self.dtype - return config - - @classmethod - def from_config(cls, config): - """Creates a layer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same layer from the config - dictionary. It does not handle layer connectivity - (handled by Network), nor weights (handled by `set_weights`). - - Arguments: - config: A Python dictionary, typically the - output of get_config. - - Returns: - A layer instance. - """ - return cls(**config) - - @tf_base_layers.Layer.activity_regularizer.setter - def activity_regularizer(self, activity_regularizer): - self._activity_regularizer = activity_regularizer +class Network(base_layer.Layer): + """A `Network` is a composition of layers. -class InputLayer(Layer): - """Layer to be used as an entry point into a Network (a graph of layers). - - It can either wrap an existing tensor (pass an `input_tensor` argument) - or create its a placeholder tensor (pass arguments `input_shape`, and - optionally, `dtype`). - - It is generally recommend to use the functional layer API via `Input`, - (which creates an `InputLayer`) without directly using `InputLayer`. - - Arguments: - input_shape: Shape tuple (not including the batch axis), or `TensorShape` - instance (not including the batch axis). - batch_size: Optional input batch size (integer or None). - dtype: Datatype of the input. - input_tensor: Optional tensor to use as layer input - instead of creating a placeholder. - sparse: Boolean, whether the placeholder created - is meant to be sparse. - name: Name of the layer (string). - """ - - def __init__(self, - input_shape=None, - batch_size=None, - dtype=None, - input_tensor=None, - sparse=False, - name=None, - **kwargs): - if 'batch_input_shape' in kwargs: - batch_input_shape = kwargs.pop('batch_input_shape') - if input_shape and batch_input_shape: - raise ValueError('Only provide the input_shape OR ' - 'batch_input_shape argument to ' - 'InputLayer, not both at the same time.') - batch_size = batch_input_shape[0] - input_shape = batch_input_shape[1:] - if kwargs: - raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) - - if not name: - prefix = 'input' - name = prefix + '_' + str(K.get_uid(prefix)) - - if not dtype: - if input_tensor is None: - dtype = K.floatx() - else: - dtype = K.dtype(input_tensor) - super(InputLayer, self).__init__(dtype=dtype, name=name) - self.built = True - self.sparse = sparse - self.batch_size = batch_size - - if isinstance(input_shape, tensor_shape.TensorShape): - input_shape = tuple(input_shape.as_list()) - - if input_tensor is None: - if input_shape is not None: - batch_input_shape = (batch_size,) + tuple(input_shape) - else: - batch_input_shape = None - - if context.in_eager_mode(): - # In eager mode, create a temporary placeholder to call the layer on. - input_tensor = tf_base_layers._DeferredTensor( # pylint: disable=protected-access - shape=batch_input_shape, - dtype=dtype, - name=self.name) - else: - # In graph mode, create a graph placeholder to call the layer on. - if sparse: - input_tensor = array_ops.sparse_placeholder( - shape=batch_input_shape, - dtype=dtype, - name=self.name) - else: - input_tensor = array_ops.placeholder( - shape=batch_input_shape, - dtype=dtype, - name=self.name) - - # For compatibility with Keras API. - self.is_placeholder = True - self._batch_input_shape = batch_input_shape - else: - # For compatibility with Keras API. - self.is_placeholder = False - self._batch_input_shape = tuple(input_tensor.get_shape().as_list()) - - # Create an input node to add to self.outbound_node - # and set output_tensors' _keras_history. - input_tensor._keras_history = (self, 0, 0) # pylint: disable=protected-access - tf_base_layers.Node( - self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=[input_tensor], - output_tensors=[input_tensor]) - - def get_config(self): - config = { - 'batch_input_shape': self._batch_input_shape, - 'dtype': self.dtype, - 'sparse': self.sparse, - 'name': self.name - } - return config - - -@tf_export('keras.layers.Input', 'keras.Input') -def Input( # pylint: disable=invalid-name - shape=None, - batch_size=None, - name=None, - dtype=None, - sparse=False, - tensor=None, - **kwargs): - """`Input()` is used to instantiate a Keras tensor. - - A Keras tensor is a tensor object from the underlying backend - (Theano or TensorFlow), which we augment with certain - attributes that allow us to build a Keras model - just by knowing the inputs and outputs of the model. - - For instance, if a, b and c are Keras tensors, - it becomes possible to do: - `model = Model(input=[a, b], output=c)` - - The added Keras attribute is: - `_keras_history`: Last layer applied to the tensor. - the entire layer graph is retrievable from that layer, - recursively. - - Arguments: - shape: A shape tuple (integers), not including the batch size. - For instance, `shape=(32,)` indicates that the expected input - will be batches of 32-dimensional vectors. - batch_size: optional static batch size (integer). - name: An optional name string for the layer. - Should be unique in a model (do not reuse the same name twice). - It will be autogenerated if it isn't provided. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - sparse: A boolean specifying whether the placeholder - to be created is sparse. - tensor: Optional existing tensor to wrap into the `Input` layer. - If set, the layer will not create a placeholder tensor. - **kwargs: deprecated arguments support. - - Returns: - A tensor. - - Example: - - ```python - # this is a logistic regression in Keras - x = Input(shape=(32,)) - y = Dense(16, activation='softmax')(x) - model = Model(x, y) - ``` - - Raises: - ValueError: in case of invalid arguments. - """ - if 'batch_shape' in kwargs: - batch_shape = kwargs.pop('batch_shape') - if shape and batch_shape: - raise ValueError('Only provide the shape OR ' - 'batch_shape argument to ' - 'Input, not both at the same time.') - batch_size = batch_shape[0] - shape = batch_shape[1:] - if kwargs: - raise ValueError('Unrecognized keyword arguments:', kwargs.keys()) - - if dtype is None: - dtype = K.floatx() - if not shape and tensor is None: - raise ValueError('Please provide to Input either a `shape`' - ' or a `tensor` argument. Note that ' - '`shape` does not include the batch ' - 'dimension.') - input_layer = InputLayer( - input_shape=shape, - batch_size=batch_size, - name=name, - dtype=dtype, - sparse=sparse, - input_tensor=tensor) - # Return tensor including `_keras_history`. - # Note that in this case train_output and test_output are the same pointer. - outputs = input_layer._inbound_nodes[0].output_tensors - if len(outputs) == 1: - return outputs[0] - else: - return outputs - - -class Network(Layer): - """A Network is a directed acyclic graph of layers. - - It is the topological form of a "model". A Model - is simply a Network with added training routines. - - # Properties - name - inputs - outputs - input_layers - output_layers - input_spec (list of class instances) - each entry describes one required input: - - ndim - - dtype - trainable (boolean) - input_shape - output_shape - inbound_nodes: list of nodes - outbound_nodes: list of nodes - trainable_weights (list of variables) - non_trainable_weights (list of variables) - - # Methods - summary - get_layer - get_weights - set_weights - get_config - compute_output_shape - - # Class Methods - from_config + It is the topological form of a "model". A `Model` + is simply a `Network` with added training routines. """ def __init__(self, *args, **kwargs): # pylint: disable=super-init-not-called @@ -1053,11 +378,11 @@ class Network(Layer): if not self._is_graph_network: return None - inputs = to_list(inputs) + inputs = generic_utils.to_list(inputs) if mask is None: masks = [None for _ in range(len(inputs))] else: - masks = to_list(mask) + masks = generic_utils.to_list(mask) cache_key = (tf_layers_util.object_list_uid(inputs) + '_' + tf_layers_util.object_list_uid(masks)) if cache_key in self._output_mask_cache: @@ -1818,7 +1143,7 @@ class Network(Layer): if not proceed: return with h5py.File(filepath, 'w') as f: - save_weights_to_hdf5_group(f, self.layers) + saving.save_weights_to_hdf5_group(f, self.layers) def load_weights(self, filepath, by_name=False): """Loads all layer weights from a HDF5 save file. @@ -1849,9 +1174,9 @@ class Network(Layer): if 'layer_names' not in f.attrs and 'model_weights' in f: f = f['model_weights'] if by_name: - load_weights_from_hdf5_group_by_name(f, self.layers) + saving.load_weights_from_hdf5_group_by_name(f, self.layers) else: - load_weights_from_hdf5_group(f, self.layers) + saving.load_weights_from_hdf5_group(f, self.layers) def _updated_config(self): """Util hared between different serialization methods. @@ -1989,364 +1314,6 @@ def get_source_inputs(tensor, layer=None, node_index=None): return source_tensors -def to_list(x): - """Normalizes a list/tensor into a list. - - If a tensor is passed, we return - a list of size 1 containing the tensor. - - Arguments: - x: target object to be normalized. - - Returns: - A list. - """ - if isinstance(x, list): - return x - return [x] - - -def save_weights_to_hdf5_group(f, layers): - from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - - f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers] - f.attrs['backend'] = K.backend().encode('utf8') - f.attrs['keras_version'] = str(keras_version).encode('utf8') - - for layer in layers: - g = f.create_group(layer.name) - symbolic_weights = layer.weights - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - weight_names.append(name.encode('utf8')) - g.attrs['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - param_dset = g.create_dataset(name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - -def preprocess_weights_for_loading(layer, - weights, - original_keras_version=None, - original_backend=None): - """Converts layers weights from Keras 1 format to Keras 2. - - Arguments: - layer: Layer instance. - weights: List of weights values (Numpy arrays). - original_keras_version: Keras version for the weights, as a string. - original_backend: Keras backend the weights were trained with, - as a string. - - Returns: - A list of weights values (Numpy arrays). - """ - if layer.__class__.__name__ == 'Bidirectional': - num_weights_per_layer = len(weights) // 2 - forward_weights = preprocess_weights_for_loading( - layer.forward_layer, weights[:num_weights_per_layer], - original_keras_version, original_backend) - backward_weights = preprocess_weights_for_loading( - layer.backward_layer, weights[num_weights_per_layer:], - original_keras_version, original_backend) - weights = forward_weights + backward_weights - - if original_keras_version == '1': - if layer.__class__.__name__ == 'TimeDistributed': - weights = preprocess_weights_for_loading( - layer.layer, weights, original_keras_version, original_backend) - - if layer.__class__.__name__ == 'Conv1D': - shape = weights[0].shape - # Handle Keras 1.1 format - if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: - # Legacy shape: - # (filters, input_dim, filter_length, 1) - assert shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0], - 1) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - weights[0] = weights[0][:, 0, :, :] - - if layer.__class__.__name__ == 'Conv2D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - - if layer.__class__.__name__ == 'Conv2DTranspose': - if layer.data_format == 'channels_last': - # old: (kernel_rows, kernel_cols, stack_size, filters) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) - - if layer.__class__.__name__ == 'Conv3D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, ...) - # new: (..., stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) - - if layer.__class__.__name__ == 'GRU': - if len(weights) == 9: - kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[4], weights[7]], axis=-1) - bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'LSTM': - if len(weights) == 12: - # old: i, c, f, o - # new: i, f, c, o - kernel = np.concatenate( - [weights[0], weights[6], weights[3], weights[9]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[7], weights[4], weights[10]], axis=-1) - bias = np.concatenate( - [weights[2], weights[8], weights[5], weights[11]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'ConvLSTM2D': - if len(weights) == 12: - kernel = np.concatenate( - [weights[0], weights[6], weights[3], weights[9]], axis=-1) - recurrent_kernel = np.concatenate( - [weights[1], weights[7], weights[4], weights[10]], axis=-1) - bias = np.concatenate( - [weights[2], weights[8], weights[5], weights[11]], axis=-1) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - kernel = np.transpose(kernel, (2, 3, 1, 0)) - recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ in ['Model', 'Sequential']: - new_weights = [] - # trainable weights - for sublayer in layer.layers: - num_weights = len(sublayer.trainable_weights) - if num_weights > 0: - new_weights.extend( - preprocess_weights_for_loading( - layer=sublayer, - weights=weights[:num_weights], - original_keras_version=original_keras_version, - original_backend=original_backend)) - weights = weights[num_weights:] - - # non-trainable weights - for sublayer in layer.layers: - num_weights = len([ - l for l in sublayer.weights if l not in sublayer.trainable_weights - ]) - if num_weights > 0: - new_weights.extend( - preprocess_weights_for_loading( - layer=sublayer, - weights=weights[:num_weights], - original_keras_version=original_keras_version, - original_backend=original_backend)) - weights = weights[num_weights:] - weights = new_weights - - conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'] - if layer.__class__.__name__ in conv_layers: - if original_backend == 'theano': - weights[0] = conv_utils.convert_kernel(weights[0]) - if layer.__class__.__name__ == 'ConvLSTM2D': - weights[1] = conv_utils.convert_kernel(weights[1]) - if K.int_shape(layer.weights[0]) != weights[0].shape: - weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) - if layer.__class__.__name__ == 'ConvLSTM2D': - weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) - - # Convert the weights of CuDNNLSTM so that they could be loaded into LSTM - if layer.__class__.__name__ == 'LSTM' and len(weights) == 3: - # Determine if loading a CuDNNLSTM layer from the number of bias weights: - # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) - # if there's no bias weight in the file, skip this conversion - units = weights[1].shape[0] - bias = weights[2] - if len(bias) == units * 8: - # reshape the kernels - kernels = np.split(weights[0], 4, axis=1) - kernels = [ - kernel.reshape(-1).reshape(kernel.shape, order='F') - for kernel in kernels - ] - weights[0] = np.concatenate(kernels, axis=1) - - # transpose the recurrent kernels - recurrent_kernels = np.split(weights[1], 4, axis=1) - recurrent_kernels = [kernel.T for kernel in recurrent_kernels] - weights[1] = np.concatenate(recurrent_kernels, axis=1) - - # split the bias into half and merge - weights[2] = bias[:units * 4] + bias[units * 4:] - - return weights - - -def load_weights_from_hdf5_group(f, layers): - """Implements topological (order-based) weight loading. - - Arguments: - f: A pointer to a HDF5 group. - layers: a list of target layers. - - Raises: - ValueError: in case of mismatch between provided layers - and weights file. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'].decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'].decode('utf8') - else: - original_backend = None - - filtered_layers = [] - for layer in layers: - weights = layer.weights - if weights: - filtered_layers.append(layer) - - layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] - filtered_layer_names = [] - for name in layer_names: - g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] - if weight_names: - filtered_layer_names.append(name) - layer_names = filtered_layer_names - if len(layer_names) != len(filtered_layers): - raise ValueError('You are trying to load a weight file ' - 'containing ' + str(len(layer_names)) + - ' layers into a model with ' + str(len(filtered_layers)) + - ' layers.') - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] - weight_values = [g[weight_name] for weight_name in weight_names] - layer = filtered_layers[k] - symbolic_weights = layer.weights - weight_values = preprocess_weights_for_loading( - layer, weight_values, original_keras_version, original_backend) - if len(weight_values) != len(symbolic_weights): - raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + - '" in the current model) was found to ' - 'correspond to layer ' + name + ' in the save file. ' - 'However the new layer ' + layer.name + ' expects ' + - str(len(symbolic_weights)) + - ' weights, but the saved weights have ' + - str(len(weight_values)) + ' elements.') - weight_value_tuples += zip(symbolic_weights, weight_values) - K.batch_set_value(weight_value_tuples) - - -def load_weights_from_hdf5_group_by_name(f, layers): - """Implements name-based weight loading. - - (instead of topological weight loading). - - Layers that have no matching name are skipped. - - Arguments: - f: A pointer to a HDF5 group. - layers: a list of target layers. - - Raises: - ValueError: in case of mismatch between provided layers - and weights file. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'].decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'].decode('utf8') - else: - original_backend = None - - # New file format. - layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] - - # Reverse index of layer name to list of layers with name. - index = {} - for layer in layers: - if layer.name: - index.setdefault(layer.name, []).append(layer) - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] - weight_values = [g[weight_name] for weight_name in weight_names] - - for layer in index.get(name, []): - symbolic_weights = layer.weights - weight_values = preprocess_weights_for_loading( - layer, weight_values, original_keras_version, original_backend) - if len(weight_values) != len(symbolic_weights): - raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + - '") expects ' + str(len(symbolic_weights)) + - ' weight(s), but the saved weights' + ' have ' + - str(len(weight_values)) + ' element(s).') - # Set values. - for i in range(len(weight_values)): - weight_value_tuples.append((symbolic_weights[i], weight_values[i])) - K.batch_set_value(weight_value_tuples) - - -def shape_type_conversion(fn): - """Decorator that handles tuple/TensorShape conversion. - - Used in `compute_output_shape` and `build`. - - Arguments: - fn: function to wrap. - - Returns: - Wrapped function. - """ - - def wrapper(instance, input_shape): - if input_shape is not None: - if isinstance(input_shape, list): - input_shape = [ - tuple(tensor_shape.TensorShape(x).as_list()) for x in input_shape] - else: - input_shape = tuple(tensor_shape.TensorShape(input_shape).as_list()) - output_shape = fn(instance, input_shape) - if output_shape is not None: - if isinstance(output_shape, list): - return [tensor_shape.TensorShape(x) for x in output_shape] - return tensor_shape.TensorShape(output_shape) - - return wrapper - - def _make_node_key(layer_name, node_index): return layer_name + '_ib-' + str(node_index) diff --git a/tensorflow/python/keras/_impl/keras/engine/saving.py b/tensorflow/python/keras/_impl/keras/engine/saving.py new file mode 100644 index 0000000000..52522e6935 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/saving.py @@ -0,0 +1,671 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=protected-access +"""Model saving utilities. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import json +import os + +import numpy as np +from six.moves import zip # pylint: disable=redefined-builtin + +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import optimizers +from tensorflow.python.keras._impl.keras.utils import conv_utils +from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.tf_export import tf_export + +# pylint: disable=g-import-not-at-top +try: + import h5py +except ImportError: + h5py = None + +try: + import yaml +except ImportError: + yaml = None +# pylint: enable=g-import-not-at-top + + +@tf_export('keras.models.save_model') +def save_model(model, filepath, overwrite=True, include_optimizer=True): + """Save a model to a HDF5 file. + + The saved model contains: + - the model's configuration (topology) + - the model's weights + - the model's optimizer's state (if any) + + Thus the saved model can be reinstantiated in + the exact same state, without any of the code + used for model definition or training. + + Arguments: + model: Keras model instance to be saved. + filepath: String, path where to save the model. + overwrite: Whether we should overwrite any existing + model at the target location, or instead + ask the user with a manual prompt. + include_optimizer: If True, save optimizer's state together. + + Raises: + ImportError: if h5py is not available. + """ + + if h5py is None: + raise ImportError('`save_model` requires h5py.') + + def get_json_type(obj): + """Serialize any object to a JSON-serializable structure. + + Arguments: + obj: the object to serialize + + Returns: + JSON-serializable structure representing `obj`. + + Raises: + TypeError: if `obj` cannot be serialized. + """ + # if obj is a serializable Keras class instance + # e.g. optimizer, layer + if hasattr(obj, 'get_config'): + return {'class_name': obj.__class__.__name__, 'config': obj.get_config()} + + # if obj is any numpy type + if type(obj).__module__ == np.__name__: + if isinstance(obj, np.ndarray): + return {'type': type(obj), 'value': obj.tolist()} + else: + return obj.item() + + # misc functions (e.g. loss function) + if callable(obj): + return obj.__name__ + + # if obj is a python 'type' + if type(obj).__name__ == type.__name__: + return obj.__name__ + + raise TypeError('Not JSON Serializable:', obj) + + from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top + + # If file exists and should not be overwritten. + if not overwrite and os.path.isfile(filepath): + proceed = ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + + with h5py.File(filepath, mode='w') as f: + f.attrs['keras_version'] = str(keras_version).encode('utf8') + f.attrs['backend'] = K.backend().encode('utf8') + f.attrs['model_config'] = json.dumps( + { + 'class_name': model.__class__.__name__, + 'config': model.get_config() + }, + default=get_json_type).encode('utf8') + + model_weights_group = f.create_group('model_weights') + model_layers = model.layers + save_weights_to_hdf5_group(model_weights_group, model_layers) + + if include_optimizer and hasattr(model, 'optimizer'): + if isinstance(model.optimizer, optimizers.TFOptimizer): + logging.warning( + 'TensorFlow optimizers do not ' + 'make it possible to access ' + 'optimizer attributes or optimizer state ' + 'after instantiation. ' + 'As a result, we cannot save the optimizer ' + 'as part of the model save file.' + 'You will have to compile your model again after loading it. ' + 'Prefer using a Keras optimizer instead ' + '(see keras.io/optimizers).') + else: + f.attrs['training_config'] = json.dumps( + { + 'optimizer_config': { + 'class_name': model.optimizer.__class__.__name__, + 'config': model.optimizer.get_config() + }, + 'loss': model.loss, + 'metrics': model.metrics, + 'sample_weight_mode': model.sample_weight_mode, + 'loss_weights': model.loss_weights, + }, + default=get_json_type).encode('utf8') + + # Save optimizer weights. + symbolic_weights = getattr(model.optimizer, 'weights') + if symbolic_weights: + optimizer_weights_group = f.create_group('optimizer_weights') + weight_values = K.batch_get_value(symbolic_weights) + weight_names = [] + for w, val in zip(symbolic_weights, weight_values): + name = str(w.name) + weight_names.append(name.encode('utf8')) + optimizer_weights_group.attrs['weight_names'] = weight_names + for name, val in zip(weight_names, weight_values): + param_dset = optimizer_weights_group.create_dataset( + name, val.shape, dtype=val.dtype) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + f.flush() + + +@tf_export('keras.models.load_model') +def load_model(filepath, custom_objects=None, compile=True): # pylint: disable=redefined-builtin + """Loads a model saved via `save_model`. + + Arguments: + filepath: String, path to the saved model. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + compile: Boolean, whether to compile the model + after loading. + + Returns: + A Keras model instance. If an optimizer was found + as part of the saved model, the model is already + compiled. Otherwise, the model is uncompiled and + a warning will be displayed. When `compile` is set + to False, the compilation is omitted without any + warning. + + Raises: + ImportError: if h5py is not available. + ValueError: In case of an invalid savefile. + """ + if h5py is None: + raise ImportError('`load_model` requires h5py.') + + if not custom_objects: + custom_objects = {} + + def convert_custom_objects(obj): + """Handles custom object lookup. + + Arguments: + obj: object, dict, or list. + + Returns: + The same structure, where occurrences + of a custom object name have been replaced + with the custom object. + """ + if isinstance(obj, list): + deserialized = [] + for value in obj: + deserialized.append(convert_custom_objects(value)) + return deserialized + if isinstance(obj, dict): + deserialized = {} + for key, value in obj.items(): + deserialized[key] = convert_custom_objects(value) + return deserialized + if obj in custom_objects: + return custom_objects[obj] + return obj + + with h5py.File(filepath, mode='r') as f: + # instantiate model + model_config = f.attrs.get('model_config') + if model_config is None: + raise ValueError('No model found in config file.') + model_config = json.loads(model_config.decode('utf-8')) + model = model_from_config(model_config, custom_objects=custom_objects) + + # set weights + load_weights_from_hdf5_group(f['model_weights'], model.layers) + + # Early return if compilation is not required. + if not compile: + return model + + # instantiate optimizer + training_config = f.attrs.get('training_config') + if training_config is None: + logging.warning('No training configuration found in save file: ' + 'the model was *not* compiled. Compile it manually.') + return model + training_config = json.loads(training_config.decode('utf-8')) + optimizer_config = training_config['optimizer_config'] + optimizer = optimizers.deserialize( + optimizer_config, custom_objects=custom_objects) + + # Recover loss functions and metrics. + loss = convert_custom_objects(training_config['loss']) + metrics = convert_custom_objects(training_config['metrics']) + sample_weight_mode = training_config['sample_weight_mode'] + loss_weights = training_config['loss_weights'] + + # Compile model. + model.compile( + optimizer=optimizer, + loss=loss, + metrics=metrics, + loss_weights=loss_weights, + sample_weight_mode=sample_weight_mode) + + # Set optimizer weights. + if 'optimizer_weights' in f: + # Build train function (to get weight updates). + model._make_train_function() + optimizer_weights_group = f['optimizer_weights'] + optimizer_weight_names = [ + n.decode('utf8') + for n in optimizer_weights_group.attrs['weight_names'] + ] + optimizer_weight_values = [ + optimizer_weights_group[n] for n in optimizer_weight_names + ] + try: + model.optimizer.set_weights(optimizer_weight_values) + except ValueError: + logging.warning('Error in loading the saved optimizer ' + 'state. As a result, your model is ' + 'starting with a freshly initialized ' + 'optimizer.') + return model + + +@tf_export('keras.models.model_from_config') +def model_from_config(config, custom_objects=None): + """Instantiates a Keras model from its config. + + Arguments: + config: Configuration dictionary. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + + Raises: + TypeError: if `config` is not a dictionary. + """ + if isinstance(config, list): + raise TypeError('`model_from_config` expects a dictionary, not a list. ' + 'Maybe you meant to use ' + '`Sequential.from_config(config)`?') + from tensorflow.python.keras._impl.keras.layers import deserialize # pylint: disable=g-import-not-at-top + return deserialize(config, custom_objects=custom_objects) + + +@tf_export('keras.models.model_from_yaml') +def model_from_yaml(yaml_string, custom_objects=None): + """Parses a yaml model configuration file and returns a model instance. + + Arguments: + yaml_string: YAML string encoding a model configuration. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + + Raises: + ImportError: if yaml module is not found. + """ + if yaml is None: + raise ImportError('Requires yaml module installed.') + config = yaml.load(yaml_string) + from tensorflow.python.keras._impl.keras.layers import deserialize # pylint: disable=g-import-not-at-top + return deserialize(config, custom_objects=custom_objects) + + +@tf_export('keras.models.model_from_json') +def model_from_json(json_string, custom_objects=None): + """Parses a JSON model configuration file and returns a model instance. + + Arguments: + json_string: JSON string encoding a model configuration. + custom_objects: Optional dictionary mapping names + (strings) to custom classes or functions to be + considered during deserialization. + + Returns: + A Keras model instance (uncompiled). + """ + config = json.loads(json_string) + from tensorflow.python.keras._impl.keras.layers import deserialize # pylint: disable=g-import-not-at-top + return deserialize(config, custom_objects=custom_objects) + + +def save_weights_to_hdf5_group(f, layers): + from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top + + f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers] + f.attrs['backend'] = K.backend().encode('utf8') + f.attrs['keras_version'] = str(keras_version).encode('utf8') + + for layer in layers: + g = f.create_group(layer.name) + symbolic_weights = layer.weights + weight_values = K.batch_get_value(symbolic_weights) + weight_names = [] + for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): + if hasattr(w, 'name') and w.name: + name = str(w.name) + else: + name = 'param_' + str(i) + weight_names.append(name.encode('utf8')) + g.attrs['weight_names'] = weight_names + for name, val in zip(weight_names, weight_values): + param_dset = g.create_dataset(name, val.shape, dtype=val.dtype) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + + +def preprocess_weights_for_loading(layer, + weights, + original_keras_version=None, + original_backend=None): + """Converts layers weights from Keras 1 format to Keras 2. + + Arguments: + layer: Layer instance. + weights: List of weights values (Numpy arrays). + original_keras_version: Keras version for the weights, as a string. + original_backend: Keras backend the weights were trained with, + as a string. + + Returns: + A list of weights values (Numpy arrays). + """ + if layer.__class__.__name__ == 'Bidirectional': + num_weights_per_layer = len(weights) // 2 + forward_weights = preprocess_weights_for_loading( + layer.forward_layer, weights[:num_weights_per_layer], + original_keras_version, original_backend) + backward_weights = preprocess_weights_for_loading( + layer.backward_layer, weights[num_weights_per_layer:], + original_keras_version, original_backend) + weights = forward_weights + backward_weights + + if original_keras_version == '1': + if layer.__class__.__name__ == 'TimeDistributed': + weights = preprocess_weights_for_loading( + layer.layer, weights, original_keras_version, original_backend) + + if layer.__class__.__name__ == 'Conv1D': + shape = weights[0].shape + # Handle Keras 1.1 format + if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: + # Legacy shape: + # (filters, input_dim, filter_length, 1) + assert shape[0] == layer.filters and shape[2:] == (layer.kernel_size[0], + 1) + weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) + weights[0] = weights[0][:, 0, :, :] + + if layer.__class__.__name__ == 'Conv2D': + if layer.data_format == 'channels_first': + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, stack_size, filters) + weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) + + if layer.__class__.__name__ == 'Conv2DTranspose': + if layer.data_format == 'channels_last': + # old: (kernel_rows, kernel_cols, stack_size, filters) + # new: (kernel_rows, kernel_cols, filters, stack_size) + weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) + if layer.data_format == 'channels_first': + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, filters, stack_size) + weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) + + if layer.__class__.__name__ == 'Conv3D': + if layer.data_format == 'channels_first': + # old: (filters, stack_size, ...) + # new: (..., stack_size, filters) + weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) + + if layer.__class__.__name__ == 'GRU': + if len(weights) == 9: + kernel = np.concatenate([weights[0], weights[3], weights[6]], axis=-1) + recurrent_kernel = np.concatenate( + [weights[1], weights[4], weights[7]], axis=-1) + bias = np.concatenate([weights[2], weights[5], weights[8]], axis=-1) + weights = [kernel, recurrent_kernel, bias] + + if layer.__class__.__name__ == 'LSTM': + if len(weights) == 12: + # old: i, c, f, o + # new: i, f, c, o + kernel = np.concatenate( + [weights[0], weights[6], weights[3], weights[9]], axis=-1) + recurrent_kernel = np.concatenate( + [weights[1], weights[7], weights[4], weights[10]], axis=-1) + bias = np.concatenate( + [weights[2], weights[8], weights[5], weights[11]], axis=-1) + weights = [kernel, recurrent_kernel, bias] + + if layer.__class__.__name__ == 'ConvLSTM2D': + if len(weights) == 12: + kernel = np.concatenate( + [weights[0], weights[6], weights[3], weights[9]], axis=-1) + recurrent_kernel = np.concatenate( + [weights[1], weights[7], weights[4], weights[10]], axis=-1) + bias = np.concatenate( + [weights[2], weights[8], weights[5], weights[11]], axis=-1) + if layer.data_format == 'channels_first': + # old: (filters, stack_size, kernel_rows, kernel_cols) + # new: (kernel_rows, kernel_cols, stack_size, filters) + kernel = np.transpose(kernel, (2, 3, 1, 0)) + recurrent_kernel = np.transpose(recurrent_kernel, (2, 3, 1, 0)) + weights = [kernel, recurrent_kernel, bias] + + if layer.__class__.__name__ in ['Model', 'Sequential']: + new_weights = [] + # trainable weights + for sublayer in layer.layers: + num_weights = len(sublayer.trainable_weights) + if num_weights > 0: + new_weights.extend( + preprocess_weights_for_loading( + layer=sublayer, + weights=weights[:num_weights], + original_keras_version=original_keras_version, + original_backend=original_backend)) + weights = weights[num_weights:] + + # non-trainable weights + for sublayer in layer.layers: + num_weights = len([ + l for l in sublayer.weights if l not in sublayer.trainable_weights + ]) + if num_weights > 0: + new_weights.extend( + preprocess_weights_for_loading( + layer=sublayer, + weights=weights[:num_weights], + original_keras_version=original_keras_version, + original_backend=original_backend)) + weights = weights[num_weights:] + weights = new_weights + + conv_layers = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'ConvLSTM2D'] + if layer.__class__.__name__ in conv_layers: + if original_backend == 'theano': + weights[0] = conv_utils.convert_kernel(weights[0]) + if layer.__class__.__name__ == 'ConvLSTM2D': + weights[1] = conv_utils.convert_kernel(weights[1]) + if K.int_shape(layer.weights[0]) != weights[0].shape: + weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) + if layer.__class__.__name__ == 'ConvLSTM2D': + weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) + + # Convert the weights of CuDNNLSTM so that they could be loaded into LSTM + if layer.__class__.__name__ == 'LSTM' and len(weights) == 3: + # Determine if loading a CuDNNLSTM layer from the number of bias weights: + # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) + # if there's no bias weight in the file, skip this conversion + units = weights[1].shape[0] + bias = weights[2] + if len(bias) == units * 8: + # reshape the kernels + kernels = np.split(weights[0], 4, axis=1) + kernels = [ + kernel.reshape(-1).reshape(kernel.shape, order='F') + for kernel in kernels + ] + weights[0] = np.concatenate(kernels, axis=1) + + # transpose the recurrent kernels + recurrent_kernels = np.split(weights[1], 4, axis=1) + recurrent_kernels = [kernel.T for kernel in recurrent_kernels] + weights[1] = np.concatenate(recurrent_kernels, axis=1) + + # split the bias into half and merge + weights[2] = bias[:units * 4] + bias[units * 4:] + + return weights + + +def load_weights_from_hdf5_group(f, layers): + """Implements topological (order-based) weight loading. + + Arguments: + f: A pointer to a HDF5 group. + layers: a list of target layers. + + Raises: + ValueError: in case of mismatch between provided layers + and weights file. + """ + if 'keras_version' in f.attrs: + original_keras_version = f.attrs['keras_version'].decode('utf8') + else: + original_keras_version = '1' + if 'backend' in f.attrs: + original_backend = f.attrs['backend'].decode('utf8') + else: + original_backend = None + + filtered_layers = [] + for layer in layers: + weights = layer.weights + if weights: + filtered_layers.append(layer) + + layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] + filtered_layer_names = [] + for name in layer_names: + g = f[name] + weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + if weight_names: + filtered_layer_names.append(name) + layer_names = filtered_layer_names + if len(layer_names) != len(filtered_layers): + raise ValueError('You are trying to load a weight file ' + 'containing ' + str(len(layer_names)) + + ' layers into a model with ' + str(len(filtered_layers)) + + ' layers.') + + # We batch weight value assignments in a single backend call + # which provides a speedup in TensorFlow. + weight_value_tuples = [] + for k, name in enumerate(layer_names): + g = f[name] + weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + weight_values = [g[weight_name] for weight_name in weight_names] + layer = filtered_layers[k] + symbolic_weights = layer.weights + weight_values = preprocess_weights_for_loading( + layer, weight_values, original_keras_version, original_backend) + if len(weight_values) != len(symbolic_weights): + raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + + '" in the current model) was found to ' + 'correspond to layer ' + name + ' in the save file. ' + 'However the new layer ' + layer.name + ' expects ' + + str(len(symbolic_weights)) + + ' weights, but the saved weights have ' + + str(len(weight_values)) + ' elements.') + weight_value_tuples += zip(symbolic_weights, weight_values) + K.batch_set_value(weight_value_tuples) + + +def load_weights_from_hdf5_group_by_name(f, layers): + """Implements name-based weight loading. + + (instead of topological weight loading). + + Layers that have no matching name are skipped. + + Arguments: + f: A pointer to a HDF5 group. + layers: a list of target layers. + + Raises: + ValueError: in case of mismatch between provided layers + and weights file. + """ + if 'keras_version' in f.attrs: + original_keras_version = f.attrs['keras_version'].decode('utf8') + else: + original_keras_version = '1' + if 'backend' in f.attrs: + original_backend = f.attrs['backend'].decode('utf8') + else: + original_backend = None + + # New file format. + layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] + + # Reverse index of layer name to list of layers with name. + index = {} + for layer in layers: + if layer.name: + index.setdefault(layer.name, []).append(layer) + + # We batch weight value assignments in a single backend call + # which provides a speedup in TensorFlow. + weight_value_tuples = [] + for k, name in enumerate(layer_names): + g = f[name] + weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + weight_values = [g[weight_name] for weight_name in weight_names] + + for layer in index.get(name, []): + symbolic_weights = layer.weights + weight_values = preprocess_weights_for_loading( + layer, weight_values, original_keras_version, original_backend) + if len(weight_values) != len(symbolic_weights): + raise ValueError('Layer #' + str(k) + ' (named "' + layer.name + + '") expects ' + str(len(symbolic_weights)) + + ' weight(s), but the saved weights' + ' have ' + + str(len(weight_values)) + ' element(s).') + # Set values. + for i in range(len(weight_values)): + weight_value_tuples.append((symbolic_weights[i], weight_values[i])) + K.batch_set_value(weight_value_tuples) diff --git a/tensorflow/python/keras/_impl/keras/engine/saving_test.py b/tensorflow/python/keras/_impl/keras/engine/saving_test.py new file mode 100644 index 0000000000..bdb17641b0 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/saving_test.py @@ -0,0 +1,375 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#,============================================================================ +"""Tests for model saving.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os +import shutil +import tempfile + +import numpy as np + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test +from tensorflow.python.training import training as training_module + +try: + import h5py # pylint:disable=g-import-not-at-top +except ImportError: + h5py = None + + +class TestWeightSavingAndLoading(test.TestCase): + + def test_weight_loading(self): + with self.test_session(): + a = keras.layers.Input(shape=(2,)) + x = keras.layers.Dense(3)(a) + b = keras.layers.Dense(1)(x) + model = keras.models.Model(a, b) + + x = np.random.random((3, 2)) + ref_y = model.predict(x) + weights = model.get_weights() + model.set_weights(weights) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + with self.assertRaises(ValueError): + model.set_weights(weights[1:]) + with self.assertRaises(ValueError): + model.set_weights(weights[::-1]) + + if h5py is None: + return # Skip rest of test if H5py isn't available. + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + + h5_path = os.path.join(temp_dir, 'test.h5') + model.save_weights(h5_path) + model.load_weights(h5_path) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + model.load_weights(h5_path, by_name=True) + y = model.predict(x) + self.assertAllClose(ref_y, y) + + def test_weight_preprocessing(self): + input_dim = 3 + output_dim = 3 + size = 2 + cases = [ + [ + (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), + [np.random.random((2, 1)), np.random.random((2, 1))], + (None, 3, 2), + ], + [ + (keras.layers.TimeDistributed(keras.layers.Dense(1))), + [np.random.random((2, 1)), np.random.random((1,))], + (None, 3, 2), + ], + [ + (keras.layers.Conv1D(output_dim, size, use_bias=False)), + [np.random.random((output_dim, input_dim, size, 1))], + (None, 4, input_dim), + ], + [ + (keras.layers.Conv2D(output_dim, size, + use_bias=False, data_format='channels_first')), + [np.random.random((output_dim, input_dim, size, size))], + (None, input_dim, 4, 4), + ], + [ + (keras.layers.Conv2DTranspose(output_dim, size, + use_bias=False, + data_format='channels_first')), + [np.random.random((output_dim, input_dim, size, size))], + (None, input_dim, 4, 4), + ], + [ + (keras.layers.Conv2DTranspose(output_dim, size, + use_bias=False, + data_format='channels_last')), + [np.random.random((size, size, input_dim, output_dim))], + (None, 4, 4, input_dim), + ], + [ + (keras.layers.Conv3D(output_dim, size, + use_bias=False, data_format='channels_first')), + [np.random.random((output_dim, input_dim, size, size, size))], + (None, input_dim, 4, 4, 4), + ], + [ + (keras.layers.GRU(output_dim)), + [np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,))], + (None, 4, input_dim), + ], + [ + (keras.layers.LSTM(output_dim)), + [np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,)), + np.random.random((input_dim, output_dim)), + np.random.random((output_dim, output_dim)), + np.random.random((output_dim,))], + (None, 4, input_dim), + ], + ] + for layer, weights, input_shape in cases: + layer.build(input_shape) + _ = keras.engine.saving.preprocess_weights_for_loading( + layer, weights, original_keras_version='1') + + model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) + _ = keras.engine.saving.preprocess_weights_for_loading( + model, model.weights, original_keras_version='1') + + x = keras.Input((2,)) + y = keras.layers.Dense(2)(x) + model = keras.models.Model(x, y) + _ = keras.engine.saving.preprocess_weights_for_loading( + model, model.weights, original_keras_version='1') + + def test_sequential_weight_loading(self): + if h5py is None: + return + + temp_dir = self.get_temp_dir() + self.addCleanup(shutil.rmtree, temp_dir) + h5_path = os.path.join(temp_dir, 'test.h5') + + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.add(keras.layers.Dense(num_classes)) + + x = np.random.random((batch_size, input_dim)) + ref_y = model.predict(x) + + model.save_weights(h5_path) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.add(keras.layers.Dense(num_classes)) + model.load_weights(h5_path) + y = model.predict(x) + + self.assertAllClose(y, ref_y) + + +class TestWholeModelSaving(test.TestCase): + + def test_sequential_model_saving(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.RepeatVector(3)) + model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) + model.compile(loss=keras.losses.MSE, + optimizer=keras.optimizers.RMSprop(lr=0.0001), + metrics=[keras.metrics.categorical_accuracy], + sample_weight_mode='temporal') + x = np.random.random((1, 3)) + y = np.random.random((1, 3, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + + new_model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + # test that new updates are the same with both models + x = np.random.random((1, 3)) + y = np.random.random((1, 3, 3)) + model.train_on_batch(x, y) + new_model.train_on_batch(x, y) + out = model.predict(x) + out2 = new_model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_sequential_model_saving_2(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + # test with custom optimizer, loss + + class CustomOp(keras.optimizers.RMSprop): + pass + + def custom_loss(y_true, y_pred): + return keras.losses.mse(y_true, y_pred) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc']) + + x = np.random.random((1, 3)) + y = np.random.random((1, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + + model = keras.models.load_model( + fname, + custom_objects={'CustomOp': CustomOp, + 'custom_loss': custom_loss}) + os.close(fd) + os.remove(fname) + + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_functional_model_saving(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + inputs = keras.layers.Input(shape=(3,)) + x = keras.layers.Dense(2)(inputs) + output = keras.layers.Dense(3)(x) + + model = keras.models.Model(inputs, output) + model.compile(loss=keras.losses.MSE, + optimizer=keras.optimizers.RMSprop(lr=0.0001), + metrics=[keras.metrics.categorical_accuracy]) + x = np.random.random((1, 3)) + y = np.random.random((1, 3)) + model.train_on_batch(x, y) + + out = model.predict(x) + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + def test_saving_without_compilation(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + def test_saving_with_tf_optimizer(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss='mse', + optimizer=training_module.AdadeltaOptimizer(0.1), + metrics=['acc']) + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + def test_saving_right_after_compilation(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(2, input_shape=(3,))) + model.add(keras.layers.Dense(3)) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) + model.model._make_train_function() + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + def test_saving_lambda_numpy_array_arguments(self): + if h5py is None: + return # Skip test if models cannot be saved. + + mean = np.random.random((4, 2, 3)) + std = np.abs(np.random.random((4, 2, 3))) + 1e-5 + inputs = keras.layers.Input(shape=(4, 2, 3)) + output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, + arguments={'mu': mean, 'std': std})(inputs) + model = keras.models.Model(inputs, output) + model.compile(loss='mse', optimizer='sgd', metrics=['acc']) + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + + model = keras.models.load_model(fname) + os.close(fd) + os.remove(fname) + + self.assertAllClose(mean, model.layers[1].arguments['mu']) + self.assertAllClose(std, model.layers[1].arguments['std']) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/sequential.py b/tensorflow/python/keras/_impl/keras/engine/sequential.py new file mode 100644 index 0000000000..db5e7754bc --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/sequential.py @@ -0,0 +1,997 @@ +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +# pylint: disable=protected-access +"""Home of the `Sequential` model. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy +import os + +from tensorflow.python.framework import ops +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import layers as layer_module +from tensorflow.python.keras._impl.keras.engine import base_layer +from tensorflow.python.keras._impl.keras.engine import network +from tensorflow.python.keras._impl.keras.engine import saving +from tensorflow.python.keras._impl.keras.engine.input_layer import Input +from tensorflow.python.keras._impl.keras.engine.input_layer import InputLayer +from tensorflow.python.keras._impl.keras.engine.training import Model +from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.tf_export import tf_export + +try: + import h5py # pylint: disable=g-import-not-at-top +except ImportError: + h5py = None + + +@tf_export('keras.models.Sequential', 'keras.Sequential') +class Sequential(Model): + """Linear stack of layers. + + Arguments: + layers: list of layers to add to the model. + + # Note + The first layer passed to a Sequential model + should have a defined input shape. What that + means is that it should have received an `input_shape` + or `batch_input_shape` argument, + or for some type of layers (recurrent, Dense...) + an `input_dim` argument. + + Example: + + ```python + model = Sequential() + # first layer must have a defined input shape + model.add(Dense(32, input_dim=500)) + # afterwards, Keras does automatic shape inference + model.add(Dense(32)) + + # also possible (equivalent to the above): + model = Sequential() + model.add(Dense(32, input_shape=(500,))) + model.add(Dense(32)) + + # also possible (equivalent to the above): + model = Sequential() + # here the batch dimension is None, + # which means any batch size will be accepted by the model. + model.add(Dense(32, batch_input_shape=(None, 500))) + model.add(Dense(32)) + ``` + """ + + def __init__(self, layers=None, name=None): + self._is_graph_network = True + self._is_compiled = False + self._layers = [] # Stack of layers. + self.model = None # Internal Model instance. + self.inputs = [] # List of input tensors + self.outputs = [] # List of length 1: the output tensor (unique). + self._trainable = True + self._initial_weights = None + self._input_layers = [] + + # Model attributes. + self._inbound_nodes = [] + self._outbound_nodes = [] + self.built = False + + # Set model name. + if not name: + prefix = 'sequential_' + name = prefix + str(K.get_uid(prefix)) + self._name = name + + # Used by Layer base class. + self._dtype = None + self._activity_regularizer = None + + # The following properties are not actually used by Keras; + # they exist for compatibility with TF's variable scoping mechanism. + self._updates = [] + self._losses = [] + self._scope = None + self._reuse = None + self._base_name = name + self._graph = ops.get_default_graph() + + # Add to the model any layers passed to the constructor. + if layers: + for layer in layers: + self.add(layer) + + def add(self, layer): + """Adds a layer instance on top of the layer stack. + + Arguments: + layer: layer instance. + + Raises: + TypeError: If `layer` is not a layer instance. + ValueError: In case the `layer` argument does not + know its input shape. + ValueError: In case the `layer` argument has + multiple output tensors, or is already connected + somewhere else (forbidden in `Sequential` models). + """ + if not isinstance(layer, (base_layer.Layer, base_layer.TFBaseLayer)): + raise TypeError('The added layer must be ' + 'an instance of class Layer. ' + 'Found: ' + str(layer)) + if not self.outputs: + # First layer in model: check that it is an input layer. + if not isinstance(layer, InputLayer): + # Create an input layer. + # First, we need to infer its expected input shape and dtype. + if isinstance(layer, (Model, Sequential)): + # We were passed a model as first layer. + # This requires a specific way to figure out the + # input shape and dtype. + if not layer.layers: + raise ValueError('Cannot add an empty model ' + 'to a `Sequential` model.') + # In case of nested models: recover the first layer + # of the deepest model to infer input shape and dtype. + first_layer = layer.layers[0] + while isinstance(first_layer, (Model, Sequential)): + first_layer = first_layer.layers[0] + batch_shape = first_layer._batch_input_shape + dtype = first_layer.dtype + else: + # We were passed a regular layer, and it should + # know about its input shape. Otherwise, that's an error. + if not hasattr(layer, '_batch_input_shape'): + raise ValueError('The first layer in a ' + 'Sequential model must ' + 'get an `input_shape` argument.') + batch_shape = layer._batch_input_shape + dtype = layer.dtype + # Instantiate the input layer. + x = Input( + batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') + # This will build the current layer + # and create the node connecting the current layer + # to the input layer we just created. + layer(x) + + if len(layer._inbound_nodes[-1].output_tensors) != 1: + raise ValueError('All layers in a Sequential model ' + 'should have a single output tensor. ' + 'For multi-output layers, ' + 'use the functional API.') + + self.outputs = [layer._inbound_nodes[-1].output_tensors[0]] + self.inputs = network.get_source_inputs(self.outputs[0]) + + # We create an input node, which we will keep updated + # as we add more layers + base_layer.Node( + outbound_layer=self, + inbound_layers=[], + node_indices=[], + tensor_indices=[], + input_tensors=self.inputs, + output_tensors=self.outputs) + else: + output_tensor = layer(self.outputs[0]) + if isinstance(output_tensor, list): + raise TypeError('All layers in a Sequential model ' + 'should have a single output tensor. ' + 'For multi-output layers, ' + 'use the functional API.') + self.outputs = [output_tensor] + # update self._inbound_nodes + self._inbound_nodes[0].output_tensors = self.outputs + self._inbound_nodes[0].output_shapes = [K.int_shape(self.outputs[0])] + + self._layers.append(layer) + self.built = False + + def pop(self): + """Removes the last layer in the model. + + Raises: + TypeError: if there are no layers in the model. + """ + if not self.layers: + raise TypeError('There are no layers in the model.') + + self.layers.pop() + if not self.layers: + self.outputs = [] + self._inbound_nodes = [] + self._outbound_nodes = [] + else: + self.layers[-1]._outbound_nodes = [] + self.outputs = [self.layers[-1].output] + # update self._inbound_nodes + self._inbound_nodes[0].output_tensors = self.outputs + self._inbound_nodes[0].output_shapes = [K.int_shape(self.outputs[0])] + self.built = False + + def get_layer(self, name=None, index=None): + """Retrieve a layer that is part of the model. + + Returns a layer based on either its name (unique) + or its index in the graph. Indices are based on + order of horizontal graph traversal (bottom-up). + + Arguments: + name: string, name of layer. + index: integer, index of layer. + + Returns: + A layer instance. + """ + if not self.built: + self.build() + return self.model.get_layer(name, index) + + def call(self, inputs, **kwargs): + if not self.built: + self.build() + return self.model.call(inputs, **kwargs) + + def build(self, input_shape=None): + if not self.inputs or not self.outputs: + raise TypeError('Sequential model cannot be built: model is empty.' + ' Add some layers first.') + # actually create the model + self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model') + self.model.trainable = self.trainable + + # mirror model attributes + self.supports_masking = self.model.supports_masking + self._output_mask_cache = self.model._output_mask_cache + self._output_tensor_cache = self.model._output_tensor_cache + self._output_shape_cache = self.model._output_shape_cache + self._input_layers = self.model._input_layers + self._output_layers = self.model._output_layers + self._input_coordinates = self.model._input_coordinates + self._output_coordinates = self.model._output_coordinates + self._nodes_by_depth = self.model._nodes_by_depth + self._network_nodes = self.model._network_nodes + self.output_names = self.model.output_names + self.input_names = self.model.input_names + self._feed_input_names = self.model._feed_input_names + self._feed_inputs = self.model._feed_inputs + + # Make sure child model callbacks + # will call the parent Sequential model. + self.model.callback_model = self + + self.built = True + + @property + def uses_learning_phase(self): + if not self.built: + self.build() + return self.model.uses_learning_phase + + def _gather_list_attr(self, attr): + all_attrs = [] + for layer in self.layers: + all_attrs += getattr(layer, attr, []) + return all_attrs + + def _make_train_function(self): + self.model._make_train_function() + + def _make_test_function(self): + self.model._make_test_function() + + def _make_predict_function(self): + self.model._make_predict_function() + + @property + def trainable(self): + return self._trainable + + @trainable.setter + def trainable(self, value): + if self.model: + self.model.trainable = value + self._trainable = value + + @property + def trainable_weights(self): + if not self.trainable: + return [] + return self._gather_list_attr('trainable_weights') + + @property + def non_trainable_weights(self): + weights = self._gather_list_attr('non_trainable_weights') + if not self.trainable: + trainable_weights = self._gather_list_attr('trainable_weights') + return trainable_weights + weights + return weights + + @property + def regularizers(self): + if not self.built: + self.build() + return self.model.regularizers + + def get_weights(self): + """Retrieves the weights of the model. + + Returns: + A flat list of Numpy arrays + (one array per model weight). + """ + if not self.built: + self.build() + return self.model.get_weights() + + def set_weights(self, weights): + """Sets the weights of the model. + + Arguments: + weights: Should be a list + of Numpy arrays with shapes and types matching + the output of `model.get_weights()`. + """ + if not self.built: + self.build() + self.model.set_weights(weights) + + def load_weights(self, filepath, by_name=False): + if h5py is None: + raise ImportError('`load_weights` requires h5py.') + f = h5py.File(filepath, mode='r') + if 'layer_names' not in f.attrs and 'model_weights' in f: + f = f['model_weights'] + layers = self.layers + if by_name: + saving.load_weights_from_hdf5_group_by_name(f, layers) + else: + saving.load_weights_from_hdf5_group(f, layers) + if hasattr(f, 'close'): + f.close() + + def save_weights(self, filepath, overwrite=True): + if h5py is None: + raise ImportError('`save_weights` requires h5py.') + # If file exists and should not be overwritten: + if not overwrite and os.path.isfile(filepath): + proceed = ask_to_proceed_with_overwrite(filepath) + if not proceed: + return + layers = self.layers + f = h5py.File(filepath, 'w') + saving.save_weights_to_hdf5_group(f, layers) + f.flush() + f.close() + + def compile(self, + optimizer, + loss, + metrics=None, + sample_weight_mode=None, + weighted_metrics=None, + target_tensors=None, + **kwargs): + """Configures the model for training. + + Arguments: + optimizer: String (name of optimizer) or optimizer object. + See [optimizers](/optimizers). + loss: String (name of objective function) or objective function. + See [losses](/losses). + If the model has multiple outputs, you can use a different loss + on each output by passing a dictionary or a list of losses. + The loss value that will be minimized by the model + will then be the sum of all individual losses. + metrics: List of metrics to be evaluated by the model + during training and testing. + Typically you will use `metrics=['accuracy']`. + To specify different metrics for different outputs of a + multi-output model, you could also pass a dictionary, + such as `metrics={'output_a': 'accuracy'}`. + sample_weight_mode: If you need to do timestep-wise + sample weighting (2D weights), set this to `"temporal"`. + `None` defaults to sample-wise weights (1D). + If the model has multiple outputs, you can use a different + `sample_weight_mode` on each output by passing a + dictionary or a list of modes. + weighted_metrics: list of metrics to be evaluated and weighted + by `sample_weight` or `class_weight` during training and testing. + target_tensors: By default, Keras will create a placeholder for the + model's target, which will be fed with the target data during + training. If instead you would like to use your own + target tensor (in turn, Keras will not expect external + Numpy data for these targets at training time), you + can specify them via the `target_tensors` argument. + It should be a single tensor + (for a single-output `Sequential` model). + **kwargs: These arguments are passed into `tf.Session.run`. + + Example: + ```python + model = Sequential() + model.add(Dense(32, input_shape=(500,))) + model.add(Dense(10, activation='softmax')) + model.compile(optimizer='rmsprop', + loss='categorical_crossentropy', + metrics=['accuracy']) + ``` + """ + # create the underlying model + self.build() + # call compile method of Model class + self.model.compile( + optimizer, + loss, + metrics=metrics, + sample_weight_mode=sample_weight_mode, + weighted_metrics=weighted_metrics, + target_tensors=target_tensors, + **kwargs) + self.optimizer = self.model.optimizer + self.loss = self.model.loss + self.metrics = self.model.metrics + self.loss_weights = self.model.loss_weights + self.sample_weight_mode = self.model.sample_weight_mode + self.weighted_metrics = self.model.weighted_metrics + self.targets = self.model.targets + self.metrics_tensors = self.model.metrics_tensors + self.metrics_names = self.model.metrics_names + self.sample_weights = self.model.sample_weights + self.total_loss = self.model.total_loss + + def fit(self, + x=None, + y=None, + batch_size=None, + epochs=1, + verbose=1, + callbacks=None, + validation_split=0., + validation_data=None, + shuffle=True, + class_weight=None, + sample_weight=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None, + **kwargs): + """Trains the model for a fixed number of epochs. + + Arguments: + x: Numpy array of training data. + If the input layer in the model is named, you can also pass a + dictionary mapping the input name to a Numpy array. + `x` can be `None` (default) if feeding from + TensorFlow data tensors. + y: Numpy array of target (label) data. + If the output layer in the model is named, you can also pass a + dictionary mapping the output name to a Numpy array. + `y` can be `None` (default) if feeding from + TensorFlow data tensors. + batch_size: Integer or `None`. + Number of samples per gradient update. + If unspecified, it will default to 32. + epochs: Integer. Number of epochs to train the model. + An epoch is an iteration over the entire `x` and `y` + data provided. + Note that in conjunction with `initial_epoch`, + `epochs` is to be understood as "final epoch". + The model is not trained for a number of iterations + given by `epochs`, but merely until the epoch + of index `epochs` is reached. + verbose: 0, 1, or 2. Verbosity mode. + 0 = silent, 1 = progress bar, 2 = one line per epoch. + callbacks: List of `keras.callbacks.Callback` instances. + List of callbacks to apply during training. + See [callbacks](/callbacks). + validation_split: Float between 0 and 1: + Fraction of the training data to be used as validation data. + The model will set apart this fraction of the training data, + will not train on it, and will evaluate + the loss and any model metrics + on this data at the end of each epoch. + The validation data is selected from the last samples + in the `x` and `y` data provided, before shuffling. + validation_data: tuple `(x_val, y_val)` or tuple + `(x_val, y_val, val_sample_weights)` on which to evaluate + the loss and any model metrics at the end of each epoch. + The model will not be trained on this data. + This will override `validation_split`. + shuffle: Boolean (whether to shuffle the training data + before each epoch) or str (for 'batch'). + 'batch' is a special option for dealing with the + limitations of HDF5 data; it shuffles in batch-sized chunks. + Has no effect when `steps_per_epoch` is not `None`. + class_weight: Optional dictionary mapping class indices (integers) + to a weight (float) value, used for weighting the loss function + (during training only). + This can be useful to tell the model to + "pay more attention" to samples from + an under-represented class. + sample_weight: Optional Numpy array of weights for + the training samples, used for weighting the loss function + (during training only). You can either pass a flat (1D) + Numpy array with the same length as the input samples + (1:1 mapping between weights and samples), + or in the case of temporal data, + you can pass a 2D array with shape + `(samples, sequence_length)`, + to apply a different weight to every timestep of every sample. + In this case you should make sure to specify + `sample_weight_mode="temporal"` in `compile()`. + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run). + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. When training with input tensors such as + TensorFlow data tensors, the default `None` is equal to + the number of unique samples in your dataset divided by + the batch size, or 1 if that cannot be determined. + validation_steps: Only relevant if `steps_per_epoch` + is specified. Total number of steps (batches of samples) + to validate before stopping. + **kwargs: Used for backwards compatibility support. + + Returns: + A `History` object. Its `History.history` attribute is + a record of training loss values and metrics values + at successive epochs, as well as validation loss values + and validation metrics values (if applicable). + + Raises: + RuntimeError: If the model was never compiled. + ValueError: In case of mismatch between the provided input data + and what the model expects. + """ + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.fit( + x, + y, + batch_size=batch_size, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_split=validation_split, + validation_data=validation_data, + shuffle=shuffle, + class_weight=class_weight, + sample_weight=sample_weight, + initial_epoch=initial_epoch, + steps_per_epoch=steps_per_epoch, + validation_steps=validation_steps) + + def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None): + """Computes the loss on some input data, batch by batch. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + y: labels, as a Numpy array. + batch_size: integer. Number of samples per gradient update. + verbose: verbosity mode, 0 or 1. + sample_weight: sample weights, as a Numpy array. + + Returns: + Scalar test loss (if the model has no metrics) + or list of scalars (if the model computes other metrics). + The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: if the model was never compiled. + """ + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.evaluate( + x, + y, + batch_size=batch_size, + verbose=verbose, + sample_weight=sample_weight) + + def predict(self, x, batch_size=32, verbose=0): + """Generates output predictions for the input samples. + + The input samples are processed batch by batch. + + Arguments: + x: the input data, as a Numpy array. + batch_size: integer. + verbose: verbosity mode, 0 or 1. + + Returns: + A Numpy array of predictions. + """ + if not self.built: + self.build() + return self.model.predict(x, batch_size=batch_size, verbose=verbose) + + def predict_on_batch(self, x): + """Returns predictions for a single batch of samples. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + + Returns: + A Numpy array of predictions. + """ + if not self.built: + self.build() + return self.model.predict_on_batch(x) + + def train_on_batch(self, x, y, class_weight=None, sample_weight=None): + """Single gradient update over one batch of samples. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + y: labels, as a Numpy array. + class_weight: dictionary mapping classes to a weight value, + used for scaling the loss function (during training only). + sample_weight: sample weights, as a Numpy array. + + Returns: + Scalar training loss (if the model has no metrics) + or list of scalars (if the model computes other metrics). + The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: if the model was never compiled. + """ + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.train_on_batch( + x, y, sample_weight=sample_weight, class_weight=class_weight) + + def test_on_batch(self, x, y, sample_weight=None): + """Evaluates the model over a single batch of samples. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + y: labels, as a Numpy array. + sample_weight: sample weights, as a Numpy array. + + Returns: + Scalar test loss (if the model has no metrics) + or list of scalars (if the model computes other metrics). + The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: if the model was never compiled. + """ + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.test_on_batch(x, y, sample_weight=sample_weight) + + def predict_proba(self, x, batch_size=32, verbose=0): + """Generates class probability predictions for the input samples. + + The input samples are processed batch by batch. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + batch_size: integer. + verbose: verbosity mode, 0 or 1. + + Returns: + A Numpy array of probability predictions. + """ + preds = self.predict(x, batch_size, verbose) + if preds.min() < 0. or preds.max() > 1.: + logging.warning('Network returning invalid probability values. ' + 'The last layer might not normalize predictions ' + 'into probabilities ' + '(like softmax or sigmoid would).') + return preds + + def predict_classes(self, x, batch_size=32, verbose=0): + """Generate class predictions for the input samples. + + The input samples are processed batch by batch. + + Arguments: + x: input data, as a Numpy array or list of Numpy arrays + (if the model has multiple inputs). + batch_size: integer. + verbose: verbosity mode, 0 or 1. + + Returns: + A numpy array of class predictions. + """ + proba = self.predict(x, batch_size=batch_size, verbose=verbose) + if proba.shape[-1] > 1: + return proba.argmax(axis=-1) + else: + return (proba > 0.5).astype('int32') + + def fit_generator(self, + generator, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=True, + initial_epoch=0, + **kwargs): + """Fits the model on data generated batch-by-batch by a Python generator. + + The generator is run in parallel to the model, for efficiency. + For instance, this allows you to do real-time data augmentation + on images on CPU in parallel to training your model on GPU. + + Arguments: + generator: A generator. + The output of the generator must be either + - a tuple (inputs, targets) + - a tuple (inputs, targets, sample_weights). + All arrays should contain the same number of samples. + The generator is expected to loop over its data + indefinitely. An epoch finishes when `steps_per_epoch` + batches have been seen by the model. + steps_per_epoch: Total number of steps (batches of samples) + to yield from `generator` before declaring one epoch + finished and starting the next epoch. It should typically + be equal to the number of samples of your dataset + divided by the batch size. + Optional for `Sequence`: if unspecified, will use + the `len(generator)` as a number of steps. + epochs: Integer, total number of iterations on the data. + Note that in conjunction with initial_epoch, the parameter + epochs is to be understood as "final epoch". The model is + not trained for n steps given by epochs, but until the + epoch epochs is reached. + verbose: Verbosity mode, 0, 1, or 2. + callbacks: List of callbacks to be called during training. + validation_data: This can be either + - A generator for the validation data + - A tuple (inputs, targets) + - A tuple (inputs, targets, sample_weights). + validation_steps: Only relevant if `validation_data` + is a generator. + Number of steps to yield from validation generator + at the end of every epoch. It should typically + be equal to the number of samples of your + validation dataset divided by the batch size. + Optional for `Sequence`: if unspecified, will use + the `len(validation_data)` as a number of steps. + class_weight: Dictionary mapping class indices to a weight + for the class. + max_queue_size: Maximum size for the generator queue + workers: Maximum number of processes to spin up + use_multiprocessing: If True, use process based threading. + Note that because + this implementation relies on multiprocessing, + you should not pass + non picklable arguments to the generator + as they can't be passed + easily to children processes. + shuffle: Whether to shuffle the order of the batches at + the beginning of each epoch. Only used with instances + of `Sequence` (keras.utils.Sequence). + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run) + **kwargs: support for legacy arguments. + + Returns: + A `History` object. + + Raises: + RuntimeError: if the model was never compiled. + ValueError: In case the generator yields + data in an invalid format. + + Example: + + ```python + def generate_arrays_from_file(path): + while 1: + f = open(path) + for line in f: + # create Numpy arrays of input data + # and labels, from each line in the file + x, y = process_line(line) + yield (x, y) + f.close() + + model.fit_generator(generate_arrays_from_file('/my_file.txt'), + steps_per_epoch=1000, epochs=10) + ``` + """ + # Legacy support + if 'max_q_size' in kwargs: + max_queue_size = kwargs.pop('max_q_size') + logging.warning('The argument `max_q_size` has been renamed ' + '`max_queue_size`. Update your method calls accordingly.') + if 'pickle_safe' in kwargs: + use_multiprocessing = kwargs.pop('pickle_safe') + logging.warning('The argument `pickle_safe` has been renamed ' + '`use_multiprocessing`. ' + 'Update your method calls accordingly.') + if kwargs: + raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) + + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.fit_generator( + generator, + steps_per_epoch, + epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch) + + def evaluate_generator(self, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + **kwargs): + """Evaluates the model on a data generator. + + The generator should return the same kind of data + as accepted by `test_on_batch`. + + Arguments: + generator: Generator yielding tuples (inputs, targets) + or (inputs, targets, sample_weights) + steps: Total number of steps (batches of samples) + to yield from `generator` before stopping. + Optional for `Sequence`: if unspecified, will use + the `len(generator)` as a number of steps. + max_queue_size: maximum size for the generator queue + workers: maximum number of processes to spin up + use_multiprocessing: if True, use process based threading. + Note that because this implementation + relies on multiprocessing, you should not pass + non picklable arguments to the generator + as they can't be passed easily to children processes. + **kwargs: support for legacy arguments. + + Returns: + Scalar test loss (if the model has no metrics) + or list of scalars (if the model computes other metrics). + The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + + Raises: + RuntimeError: if the model was never compiled. + ValueError: In case the generator yields + data in an invalid format. + """ + # Legacy support + if 'max_q_size' in kwargs: + max_queue_size = kwargs.pop('max_q_size') + logging.warning('The argument `max_q_size` has been renamed ' + '`max_queue_size`. Update your method calls accordingly.') + if 'pickle_safe' in kwargs: + use_multiprocessing = kwargs.pop('pickle_safe') + logging.warning('The argument `pickle_safe` has been renamed ' + '`use_multiprocessing`. ' + 'Update your method calls accordingly.') + if kwargs: + raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) + + if not self.built: + raise RuntimeError('The model needs to be compiled before being used.') + return self.model.evaluate_generator( + generator, + steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing) + + def predict_generator(self, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0, + **kwargs): + """Generates predictions for the input samples from a data generator. + + The generator should return the same kind of data as accepted by + `predict_on_batch`. + + Arguments: + generator: generator yielding batches of input samples. + steps: Total number of steps (batches of samples) + to yield from `generator` before stopping. + Optional for `Sequence`: if unspecified, will use + the `len(generator)` as a number of steps. + max_queue_size: maximum size for the generator queue + workers: maximum number of processes to spin up + use_multiprocessing: if True, use process based threading. + Note that because this implementation + relies on multiprocessing, you should not pass + non picklable arguments to the generator + as they can't be passed easily to children processes. + verbose: verbosity mode, 0 or 1. + **kwargs: support for legacy arguments. + + Returns: + A Numpy array of predictions. + + Raises: + ValueError: In case the generator yields + data in an invalid format. + """ + # Legacy support + if 'max_q_size' in kwargs: + max_queue_size = kwargs.pop('max_q_size') + logging.warning('The argument `max_q_size` has been renamed ' + '`max_queue_size`. Update your method calls accordingly.') + if 'pickle_safe' in kwargs: + use_multiprocessing = kwargs.pop('pickle_safe') + logging.warning('The argument `pickle_safe` has been renamed ' + '`use_multiprocessing`. ' + 'Update your method calls accordingly.') + if kwargs: + raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) + + if not self.built: + self.build() + return self.model.predict_generator( + generator, + steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + verbose=verbose) + + def get_config(self): + config = [] + for layer in self.layers: + config.append({ + 'class_name': layer.__class__.__name__, + 'config': layer.get_config() + }) + return copy.deepcopy(config) + + @classmethod + def from_config(cls, config, custom_objects=None): + model = cls() + for conf in config: + layer = layer_module.deserialize(conf, custom_objects=custom_objects) + model.add(layer) + return model diff --git a/tensorflow/python/keras/_impl/keras/engine/sequential_test.py b/tensorflow/python/keras/_impl/keras/engine/sequential_test.py new file mode 100644 index 0000000000..166634bd82 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/sequential_test.py @@ -0,0 +1,152 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests specific to `Sequential` model.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl import keras +from tensorflow.python.platform import test + + +class TestSequential(test.TestCase): + """Most Sequential model API tests are covered in `training_test.py`. + """ + + def test_basic_methods(self): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_dim=2)) + model.add(keras.layers.Dropout(0.3, name='dp')) + model.add(keras.layers.Dense(2, kernel_regularizer='l2', + kernel_constraint='max_norm')) + model.build() + self.assertEqual(model.state_updates, model.model.state_updates) + self.assertEqual(model.get_layer(name='dp').name, 'dp') + + def test_sequential_pop(self): + num_hidden = 5 + input_dim = 3 + batch_size = 5 + num_classes = 2 + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.add(keras.layers.Dense(num_classes)) + model.compile(loss='mse', optimizer='sgd') + x = np.random.random((batch_size, input_dim)) + y = np.random.random((batch_size, num_classes)) + model.fit(x, y, epochs=1) + model.pop() + self.assertEqual(len(model.layers), 1) + self.assertEqual(model.output_shape, (None, num_hidden)) + model.compile(loss='mse', optimizer='sgd') + y = np.random.random((batch_size, num_hidden)) + model.fit(x, y, epochs=1) + + # Test popping single-layer model + model = keras.models.Sequential() + model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) + model.pop() + self.assertEqual(len(model.layers), 0) + self.assertEqual(len(model.outputs), 0) + + # Invalid use case + model = keras.models.Sequential() + with self.assertRaises(TypeError): + model.pop() + + def test_invalid_use_cases(self): + with self.test_session(): + # Added objects must be layer instances + with self.assertRaises(TypeError): + model = keras.models.Sequential() + model.add(None) + + # Added layers must have an inputs shape + with self.assertRaises(ValueError): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1)) + + # Added layers cannot have multiple outputs + class MyLayer(keras.layers.Layer): + + def call(self, inputs): + return [3 * inputs, 2 * inputs] + + def compute_output_shape(self, input_shape): + return [input_shape, input_shape] + + with self.assertRaises(ValueError): + model = keras.models.Sequential() + model.add(MyLayer(input_shape=(3,))) + with self.assertRaises(TypeError): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_dim=1)) + model.add(MyLayer()) + + # Building empty model + model = keras.models.Sequential() + with self.assertRaises(TypeError): + model.build() + + def test_nested_sequential_trainability(self): + input_dim = 20 + num_units = 10 + num_classes = 2 + + inner_model = keras.models.Sequential() + inner_model.add(keras.layers.Dense(num_units, input_shape=(input_dim,))) + + model = keras.models.Sequential() + model.add(inner_model) + model.add(keras.layers.Dense(num_classes)) + + self.assertEqual(len(model.trainable_weights), 4) + inner_model.trainable = False + self.assertEqual(len(model.trainable_weights), 2) + inner_model.trainable = True + self.assertEqual(len(model.trainable_weights), 4) + + def test_sequential_update_disabling(self): + val_a = np.random.random((10, 4)) + val_out = np.random.random((10, 4)) + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.BatchNormalization(input_shape=(4,))) + + model.trainable = False + assert not model.updates + + model.compile('sgd', 'mse') + assert not model.updates + assert not model.model.updates + + x1 = model.predict(val_a) + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + self.assertAllClose(x1, x2, atol=1e-7) + + model.trainable = True + model.compile('sgd', 'mse') + assert model.updates + assert model.model.updates + + model.train_on_batch(val_a, val_out) + x2 = model.predict(val_a) + assert np.abs(np.sum(x1 - x2)) > 1e-5 diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 139621db6d..04434323d6 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -18,9 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os -import shutil - import numpy as np from tensorflow.python.eager import context @@ -28,7 +25,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import test_util from tensorflow.python.keras._impl import keras -from tensorflow.python.layers import base as base_layers +from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops @@ -39,11 +36,6 @@ try: except ImportError: yaml = None -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - class TopologyConstructionTest(test.TestCase): @@ -84,7 +76,7 @@ class TopologyConstructionTest(test.TestCase): self.assertEqual(len(layer.get_updates_for(x2)), 1) self.assertEqual(len(layer.get_updates_for(None)), 1) - network = keras.engine.topology.Network(x2, y2) + network = keras.engine.Network(x2, y2) self.assertEqual(len(network.updates), 2) self.assertEqual(len(network.get_updates_for(x1)), 0) self.assertEqual(len(network.get_updates_for(x2)), 1) @@ -146,7 +138,7 @@ class TopologyConstructionTest(test.TestCase): self.assertEqual(len(layer.get_losses_for(x2)), 1) self.assertEqual(len(layer.get_losses_for(None)), 1) - network = keras.engine.topology.Network(x2, y2) + network = keras.engine.Network(x2, y2) self.assertEqual(len(network.losses), 2) self.assertEqual(len(network.get_losses_for(x1)), 0) self.assertEqual(len(network.get_losses_for(x2)), 1) @@ -267,7 +259,7 @@ class TopologyConstructionTest(test.TestCase): x = keras.Input(shape=(32,)) dense = keras.layers.Dense(2) y = dense(x) - network = keras.engine.topology.Network(x, y, name='dense_network') + network = keras.engine.Network(x, y, name='dense_network') # test basic attributes self.assertEqual(network.name, 'dense_network') @@ -502,7 +494,7 @@ class TopologyConstructionTest(test.TestCase): self.assertListEqual([x.shape for x in fn_outputs], [(10, 64), (10, 5)]) # test get_source_inputs - self.assertListEqual(keras.engine.topology.get_source_inputs(c), [a, b]) + self.assertListEqual(keras.engine.network.get_source_inputs(c), [a, b]) # serialization / deserialization json_config = model.to_json() @@ -762,7 +754,7 @@ class TopologyConstructionTest(test.TestCase): if context.in_graph_mode(): x = keras.Input(shape=(32,)) y = MaskedLayer()(x) # pylint: disable=not-callable - network = keras.engine.topology.Network(x, y) + network = keras.engine.Network(x, y) # test callability on Input x_2 = keras.Input(shape=(32,)) @@ -875,139 +867,12 @@ class TopologyConstructionTest(test.TestCase): self.assertEqual(np.min(preds), 0.) # At least one unit was dropped. -class TestSaving(test.TestCase): - - def test_weight_loading(self): - with self.test_session(): - a = keras.layers.Input(shape=(2,)) - x = keras.layers.Dense(3)(a) - b = keras.layers.Dense(1)(x) - model = keras.models.Model(a, b) - - x = np.random.random((3, 2)) - ref_y = model.predict(x) - weights = model.get_weights() - model.set_weights(weights) - y = model.predict(x) - self.assertAllClose(ref_y, y) - - with self.assertRaises(ValueError): - model.set_weights(weights[1:]) - with self.assertRaises(ValueError): - model.set_weights(weights[::-1]) - - if h5py is None: - return # Skip rest of test if H5py isn't available. - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - - h5_path = os.path.join(temp_dir, 'test.h5') - model.save_weights(h5_path) - model.load_weights(h5_path) - y = model.predict(x) - self.assertAllClose(ref_y, y) - - model.load_weights(h5_path, by_name=True) - y = model.predict(x) - self.assertAllClose(ref_y, y) - - def test_weight_preprocessing(self): - input_dim = 3 - output_dim = 3 - size = 2 - cases = [ - [ - (keras.layers.Bidirectional(keras.layers.SimpleRNN(2))), - [np.random.random((2, 1)), np.random.random((2, 1))], - (None, 3, 2), - ], - [ - (keras.layers.TimeDistributed(keras.layers.Dense(1))), - [np.random.random((2, 1)), np.random.random((1,))], - (None, 3, 2), - ], - [ - (keras.layers.Conv1D(output_dim, size, use_bias=False)), - [np.random.random((output_dim, input_dim, size, 1))], - (None, 4, input_dim), - ], - [ - (keras.layers.Conv2D(output_dim, size, - use_bias=False, data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size))], - (None, input_dim, 4, 4), - ], - [ - (keras.layers.Conv2DTranspose(output_dim, size, - use_bias=False, - data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size))], - (None, input_dim, 4, 4), - ], - [ - (keras.layers.Conv2DTranspose(output_dim, size, - use_bias=False, - data_format='channels_last')), - [np.random.random((size, size, input_dim, output_dim))], - (None, 4, 4, input_dim), - ], - [ - (keras.layers.Conv3D(output_dim, size, - use_bias=False, data_format='channels_first')), - [np.random.random((output_dim, input_dim, size, size, size))], - (None, input_dim, 4, 4, 4), - ], - [ - (keras.layers.GRU(output_dim)), - [np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,))], - (None, 4, input_dim), - ], - [ - (keras.layers.LSTM(output_dim)), - [np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,)), - np.random.random((input_dim, output_dim)), - np.random.random((output_dim, output_dim)), - np.random.random((output_dim,))], - (None, 4, input_dim), - ], - ] - for layer, weights, input_shape in cases: - layer.build(input_shape) - _ = keras.engine.topology.preprocess_weights_for_loading( - layer, weights, original_keras_version='1') - - model = keras.models.Sequential([keras.layers.Dense(2, input_dim=2)]) - _ = keras.engine.topology.preprocess_weights_for_loading( - model, model.weights, original_keras_version='1') - - x = keras.Input((2,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - _ = keras.engine.topology.preprocess_weights_for_loading( - model, model.weights, original_keras_version='1') - - class DeferredModeTest(test.TestCase): def testDeferredTensorAttributes(self): - x = base_layers._DeferredTensor(shape=(None, 2), dtype='float32', name='x') + x = tf_base_layers._DeferredTensor(shape=(None, 2), + dtype='float32', + name='x') self.assertEqual(str(x), 'DeferredTensor(\'x\', shape=(?, 2), dtype=float32)') self.assertEqual(repr(x), @@ -1015,21 +880,21 @@ class DeferredModeTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testSimpleNetworkBuilding(self): - inputs = keras.engine.topology.Input(shape=(32,)) + inputs = keras.engine.Input(shape=(32,)) if context.in_eager_mode(): - self.assertIsInstance(inputs, base_layers._DeferredTensor) + self.assertIsInstance(inputs, tf_base_layers._DeferredTensor) self.assertEqual(inputs.dtype.name, 'float32') self.assertEqual(inputs.shape.as_list(), [None, 32]) x = keras.layers.Dense(2)(inputs) if context.in_eager_mode(): - self.assertIsInstance(x, base_layers._DeferredTensor) + self.assertIsInstance(x, tf_base_layers._DeferredTensor) self.assertEqual(x.dtype.name, 'float32') self.assertEqual(x.shape.as_list(), [None, 2]) outputs = keras.layers.Dense(4)(x) - network = keras.engine.topology.Network(inputs, outputs) - self.assertIsInstance(network, keras.engine.topology.Network) + network = keras.engine.Network(inputs, outputs) + self.assertIsInstance(network, keras.engine.Network) if context.in_eager_mode(): # It should be possible to call such a network on EagerTensors. @@ -1040,8 +905,8 @@ class DeferredModeTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testMultiIONetworkbuilding(self): - input_a = keras.engine.topology.Input(shape=(32,)) - input_b = keras.engine.topology.Input(shape=(16,)) + input_a = keras.engine.Input(shape=(32,)) + input_b = keras.engine.Input(shape=(16,)) a = keras.layers.Dense(16)(input_a) class AddLayer(keras.layers.Layer): @@ -1055,7 +920,7 @@ class DeferredModeTest(test.TestCase): c = AddLayer()([a, input_b]) # pylint: disable=not-callable c = keras.layers.Dense(2)(c) - network = keras.engine.topology.Network([input_a, input_b], [a, c]) + network = keras.engine.Network([input_a, input_b], [a, c]) if context.in_eager_mode(): a_val = constant_op.constant( np.random.random((10, 32)).astype('float32')) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index d8ea2fe3db..57451ad470 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -31,8 +31,8 @@ from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers from tensorflow.python.keras._impl.keras.engine import training_eager -from tensorflow.python.keras._impl.keras.engine.topology import Layer -from tensorflow.python.keras._impl.keras.engine.topology import Network +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence diff --git a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py index 7cac17c51a..c40ee109aa 100644 --- a/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py +++ b/tensorflow/python/keras/_impl/keras/layers/advanced_activations.py @@ -25,7 +25,7 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py index d2792b9636..d95a094245 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_recurrent.py @@ -26,7 +26,7 @@ from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.layers.recurrent import Recurrent from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/embeddings.py b/tensorflow/python/keras/_impl/keras/layers/embeddings.py index ca92899a45..006ecd3135 100644 --- a/tensorflow/python/keras/_impl/keras/layers/embeddings.py +++ b/tensorflow/python/keras/_impl/keras/layers/embeddings.py @@ -23,7 +23,7 @@ from tensorflow.python.keras._impl.keras import constraints from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/local.py b/tensorflow/python/keras/_impl/keras/layers/local.py index df0efe6b8b..13d96e9392 100644 --- a/tensorflow/python/keras/_impl/keras/layers/local.py +++ b/tensorflow/python/keras/_impl/keras/layers/local.py @@ -25,7 +25,7 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils import conv_utils from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/merge.py b/tensorflow/python/keras/_impl/keras/layers/merge.py index cdf2878e83..c660cbd449 100644 --- a/tensorflow/python/keras/_impl/keras/layers/merge.py +++ b/tensorflow/python/keras/_impl/keras/layers/merge.py @@ -21,8 +21,8 @@ from __future__ import division from __future__ import print_function from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras.engine.topology import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/noise.py b/tensorflow/python/keras/_impl/keras/layers/noise.py index 9010f49615..e309d160e5 100644 --- a/tensorflow/python/keras/_impl/keras/layers/noise.py +++ b/tensorflow/python/keras/_impl/keras/layers/noise.py @@ -22,7 +22,7 @@ import numpy as np from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index a81971d9ee..0264c7ae01 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -31,7 +31,7 @@ from tensorflow.python.keras._impl.keras import initializers from tensorflow.python.keras._impl.keras import regularizers from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/layers/wrappers.py b/tensorflow/python/keras/_impl/keras/layers/wrappers.py index 61f1a758e4..76ddd9299d 100644 --- a/tensorflow/python/keras/_impl/keras/layers/wrappers.py +++ b/tensorflow/python/keras/_impl/keras/layers/wrappers.py @@ -25,7 +25,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras.engine import InputSpec from tensorflow.python.keras._impl.keras.engine import Layer -from tensorflow.python.keras._impl.keras.engine.topology import shape_type_conversion +from tensorflow.python.keras._impl.keras.engine.base_layer import shape_type_conversion from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg from tensorflow.python.layers import utils as tf_layers_util from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/keras/_impl/keras/models.py b/tensorflow/python/keras/_impl/keras/models.py index 8000eaabab..9602e7ba39 100644 --- a/tensorflow/python/keras/_impl/keras/models.py +++ b/tensorflow/python/keras/_impl/keras/models.py @@ -13,1305 +13,30 @@ # limitations under the License. # ============================================================================== # pylint: disable=protected-access -"""Home of the Sequential model, and the `save_model`/`load_model` functions. +"""Code for model cloning, plus model-related API entries. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy -import json -import os - -import numpy as np - -from tensorflow.python.framework import ops from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras import layers as layer_module -from tensorflow.python.keras._impl.keras import optimizers -from tensorflow.python.keras._impl.keras.engine import topology -from tensorflow.python.keras._impl.keras.engine.topology import Input -from tensorflow.python.keras._impl.keras.engine.topology import InputLayer -from tensorflow.python.keras._impl.keras.engine.topology import Layer -from tensorflow.python.keras._impl.keras.engine.topology import TFBaseLayer -from tensorflow.python.keras._impl.keras.engine.training import Model +from tensorflow.python.keras._impl.keras.engine import saving +from tensorflow.python.keras._impl.keras.engine import sequential +from tensorflow.python.keras._impl.keras.engine import training +from tensorflow.python.keras._impl.keras.engine.input_layer import Input +from tensorflow.python.keras._impl.keras.engine.input_layer import InputLayer +from tensorflow.python.keras._impl.keras.utils import generic_utils from tensorflow.python.keras._impl.keras.utils.generic_utils import has_arg -from tensorflow.python.keras._impl.keras.utils.io_utils import ask_to_proceed_with_overwrite -from tensorflow.python.platform import tf_logging as logging -from tensorflow.python.util.tf_export import tf_export - - -# pylint: disable=g-import-not-at-top -try: - import h5py -except ImportError: - h5py = None - -try: - import yaml -except ImportError: - yaml = None -# pylint: enable=g-import-not-at-top - - -@tf_export('keras.models.save_model') -def save_model(model, filepath, overwrite=True, include_optimizer=True): - """Save a model to a HDF5 file. - - The saved model contains: - - the model's configuration (topology) - - the model's weights - - the model's optimizer's state (if any) - - Thus the saved model can be reinstantiated in - the exact same state, without any of the code - used for model definition or training. - - Arguments: - model: Keras model instance to be saved. - filepath: String, path where to save the model. - overwrite: Whether we should overwrite any existing - model at the target location, or instead - ask the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - - Raises: - ImportError: if h5py is not available. - """ - - if h5py is None: - raise ImportError('`save_model` requires h5py.') - - def get_json_type(obj): - """Serialize any object to a JSON-serializable structure. - - Arguments: - obj: the object to serialize - - Returns: - JSON-serializable structure representing `obj`. - - Raises: - TypeError: if `obj` cannot be serialized. - """ - # if obj is a serializable Keras class instance - # e.g. optimizer, layer - if hasattr(obj, 'get_config'): - return {'class_name': obj.__class__.__name__, 'config': obj.get_config()} - - # if obj is any numpy type - if type(obj).__module__ == np.__name__: - if isinstance(obj, np.ndarray): - return {'type': type(obj), 'value': obj.tolist()} - else: - return obj.item() - - # misc functions (e.g. loss function) - if callable(obj): - return obj.__name__ - - # if obj is a python 'type' - if type(obj).__name__ == type.__name__: - return obj.__name__ - - raise TypeError('Not JSON Serializable:', obj) - - from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - - # If file exists and should not be overwritten. - if not overwrite and os.path.isfile(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - - with h5py.File(filepath, mode='w') as f: - f.attrs['keras_version'] = str(keras_version).encode('utf8') - f.attrs['backend'] = K.backend().encode('utf8') - f.attrs['model_config'] = json.dumps( - { - 'class_name': model.__class__.__name__, - 'config': model.get_config() - }, - default=get_json_type).encode('utf8') - - model_weights_group = f.create_group('model_weights') - model_layers = model.layers - topology.save_weights_to_hdf5_group(model_weights_group, model_layers) - - if include_optimizer and hasattr(model, 'optimizer'): - if isinstance(model.optimizer, optimizers.TFOptimizer): - logging.warning( - 'TensorFlow optimizers do not ' - 'make it possible to access ' - 'optimizer attributes or optimizer state ' - 'after instantiation. ' - 'As a result, we cannot save the optimizer ' - 'as part of the model save file.' - 'You will have to compile your model again after loading it. ' - 'Prefer using a Keras optimizer instead ' - '(see keras.io/optimizers).') - else: - f.attrs['training_config'] = json.dumps( - { - 'optimizer_config': { - 'class_name': model.optimizer.__class__.__name__, - 'config': model.optimizer.get_config() - }, - 'loss': model.loss, - 'metrics': model.metrics, - 'sample_weight_mode': model.sample_weight_mode, - 'loss_weights': model.loss_weights, - }, - default=get_json_type).encode('utf8') - - # Save optimizer weights. - symbolic_weights = getattr(model.optimizer, 'weights') - if symbolic_weights: - optimizer_weights_group = f.create_group('optimizer_weights') - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for w, val in zip(symbolic_weights, weight_values): - name = str(w.name) - weight_names.append(name.encode('utf8')) - optimizer_weights_group.attrs['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - param_dset = optimizer_weights_group.create_dataset( - name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - f.flush() - - -@tf_export('keras.models.load_model') -def load_model(filepath, custom_objects=None, compile=True): # pylint: disable=redefined-builtin - """Loads a model saved via `save_model`. - - Arguments: - filepath: String, path to the saved model. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - compile: Boolean, whether to compile the model - after loading. - - Returns: - A Keras model instance. If an optimizer was found - as part of the saved model, the model is already - compiled. Otherwise, the model is uncompiled and - a warning will be displayed. When `compile` is set - to False, the compilation is omitted without any - warning. - - Raises: - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - if h5py is None: - raise ImportError('`load_model` requires h5py.') - - if not custom_objects: - custom_objects = {} - - def convert_custom_objects(obj): - """Handles custom object lookup. - - Arguments: - obj: object, dict, or list. - - Returns: - The same structure, where occurrences - of a custom object name have been replaced - with the custom object. - """ - if isinstance(obj, list): - deserialized = [] - for value in obj: - deserialized.append(convert_custom_objects(value)) - return deserialized - if isinstance(obj, dict): - deserialized = {} - for key, value in obj.items(): - deserialized[key] = convert_custom_objects(value) - return deserialized - if obj in custom_objects: - return custom_objects[obj] - return obj - - with h5py.File(filepath, mode='r') as f: - # instantiate model - model_config = f.attrs.get('model_config') - if model_config is None: - raise ValueError('No model found in config file.') - model_config = json.loads(model_config.decode('utf-8')) - model = model_from_config(model_config, custom_objects=custom_objects) - - # set weights - topology.load_weights_from_hdf5_group(f['model_weights'], model.layers) - - # Early return if compilation is not required. - if not compile: - return model - - # instantiate optimizer - training_config = f.attrs.get('training_config') - if training_config is None: - logging.warning('No training configuration found in save file: ' - 'the model was *not* compiled. Compile it manually.') - return model - training_config = json.loads(training_config.decode('utf-8')) - optimizer_config = training_config['optimizer_config'] - optimizer = optimizers.deserialize( - optimizer_config, custom_objects=custom_objects) - - # Recover loss functions and metrics. - loss = convert_custom_objects(training_config['loss']) - metrics = convert_custom_objects(training_config['metrics']) - sample_weight_mode = training_config['sample_weight_mode'] - loss_weights = training_config['loss_weights'] - - # Compile model. - model.compile( - optimizer=optimizer, - loss=loss, - metrics=metrics, - loss_weights=loss_weights, - sample_weight_mode=sample_weight_mode) - - # Set optimizer weights. - if 'optimizer_weights' in f: - # Build train function (to get weight updates). - if isinstance(model, Sequential): - model.model._make_train_function() - else: - model._make_train_function() - optimizer_weights_group = f['optimizer_weights'] - optimizer_weight_names = [ - n.decode('utf8') - for n in optimizer_weights_group.attrs['weight_names'] - ] - optimizer_weight_values = [ - optimizer_weights_group[n] for n in optimizer_weight_names - ] - try: - model.optimizer.set_weights(optimizer_weight_values) - except ValueError: - logging.warning('Error in loading the saved optimizer ' - 'state. As a result, your model is ' - 'starting with a freshly initialized ' - 'optimizer.') - return model - - -@tf_export('keras.models.model_from_config') -def model_from_config(config, custom_objects=None): - """Instantiates a Keras model from its config. - - Arguments: - config: Configuration dictionary. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - - Raises: - TypeError: if `config` is not a dictionary. - """ - if isinstance(config, list): - raise TypeError('`model_from_config` expects a dictionary, not a list. ' - 'Maybe you meant to use ' - '`Sequential.from_config(config)`?') - return layer_module.deserialize(config, custom_objects=custom_objects) - - -@tf_export('keras.models.model_from_yaml') -def model_from_yaml(yaml_string, custom_objects=None): - """Parses a yaml model configuration file and returns a model instance. - - Arguments: - yaml_string: YAML string encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - - Raises: - ImportError: if yaml module is not found. - """ - if yaml is None: - raise ImportError('Requires yaml module installed.') - config = yaml.load(yaml_string) - return layer_module.deserialize(config, custom_objects=custom_objects) - - -@tf_export('keras.models.model_from_json') -def model_from_json(json_string, custom_objects=None): - """Parses a JSON model configuration file and returns a model instance. - - Arguments: - json_string: JSON string encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - Returns: - A Keras model instance (uncompiled). - """ - config = json.loads(json_string) - return layer_module.deserialize(config, custom_objects=custom_objects) - - -@tf_export('keras.models.Sequential', 'keras.Sequential') -class Sequential(Model): - """Linear stack of layers. - - Arguments: - layers: list of layers to add to the model. - - # Note - The first layer passed to a Sequential model - should have a defined input shape. What that - means is that it should have received an `input_shape` - or `batch_input_shape` argument, - or for some type of layers (recurrent, Dense...) - an `input_dim` argument. - - Example: - - ```python - model = Sequential() - # first layer must have a defined input shape - model.add(Dense(32, input_dim=500)) - # afterwards, Keras does automatic shape inference - model.add(Dense(32)) - - # also possible (equivalent to the above): - model = Sequential() - model.add(Dense(32, input_shape=(500,))) - model.add(Dense(32)) - - # also possible (equivalent to the above): - model = Sequential() - # here the batch dimension is None, - # which means any batch size will be accepted by the model. - model.add(Dense(32, batch_input_shape=(None, 500))) - model.add(Dense(32)) - ``` - """ - - def __init__(self, layers=None, name=None): - self._is_graph_network = True - self._is_compiled = False - self._layers = [] # Stack of layers. - self.model = None # Internal Model instance. - self.inputs = [] # List of input tensors - self.outputs = [] # List of length 1: the output tensor (unique). - self._trainable = True - self._initial_weights = None - self._input_layers = [] - - # Model attributes. - self._inbound_nodes = [] - self._outbound_nodes = [] - self.built = False - - # Set model name. - if not name: - prefix = 'sequential_' - name = prefix + str(K.get_uid(prefix)) - self._name = name - - # Used by Layer base class. - self._dtype = None - self._activity_regularizer = None - - # The following properties are not actually used by Keras; - # they exist for compatibility with TF's variable scoping mechanism. - self._updates = [] - self._losses = [] - self._scope = None - self._reuse = None - self._base_name = name - self._graph = ops.get_default_graph() - - # Add to the model any layers passed to the constructor. - if layers: - for layer in layers: - self.add(layer) - - def add(self, layer): - """Adds a layer instance on top of the layer stack. - - Arguments: - layer: layer instance. - - Raises: - TypeError: If `layer` is not a layer instance. - ValueError: In case the `layer` argument does not - know its input shape. - ValueError: In case the `layer` argument has - multiple output tensors, or is already connected - somewhere else (forbidden in `Sequential` models). - """ - if not isinstance(layer, (Layer, TFBaseLayer)): - raise TypeError('The added layer must be ' - 'an instance of class Layer. ' - 'Found: ' + str(layer)) - if not self.outputs: - # First layer in model: check that it is an input layer. - if not isinstance(layer, InputLayer): - # Create an input layer. - # First, we need to infer its expected input shape and dtype. - if isinstance(layer, (Model, Sequential)): - # We were passed a model as first layer. - # This requires a specific way to figure out the - # input shape and dtype. - if not layer.layers: - raise ValueError('Cannot add an empty model ' - 'to a `Sequential` model.') - # In case of nested models: recover the first layer - # of the deepest model to infer input shape and dtype. - first_layer = layer.layers[0] - while isinstance(first_layer, (Model, Sequential)): - first_layer = first_layer.layers[0] - batch_shape = first_layer._batch_input_shape - dtype = first_layer.dtype - else: - # We were passed a regular layer, and it should - # know about its input shape. Otherwise, that's an error. - if not hasattr(layer, '_batch_input_shape'): - raise ValueError('The first layer in a ' - 'Sequential model must ' - 'get an `input_shape` argument.') - batch_shape = layer._batch_input_shape - dtype = layer.dtype - # Instantiate the input layer. - x = Input( - batch_shape=batch_shape, dtype=dtype, name=layer.name + '_input') - # This will build the current layer - # and create the node connecting the current layer - # to the input layer we just created. - layer(x) - - if len(layer._inbound_nodes[-1].output_tensors) != 1: - raise ValueError('All layers in a Sequential model ' - 'should have a single output tensor. ' - 'For multi-output layers, ' - 'use the functional API.') - - self.outputs = [layer._inbound_nodes[-1].output_tensors[0]] - self.inputs = topology.get_source_inputs(self.outputs[0]) - - # We create an input node, which we will keep updated - # as we add more layers - topology.Node( - outbound_layer=self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=self.inputs, - output_tensors=self.outputs) - else: - output_tensor = layer(self.outputs[0]) - if isinstance(output_tensor, list): - raise TypeError('All layers in a Sequential model ' - 'should have a single output tensor. ' - 'For multi-output layers, ' - 'use the functional API.') - self.outputs = [output_tensor] - # update self._inbound_nodes - self._inbound_nodes[0].output_tensors = self.outputs - self._inbound_nodes[0].output_shapes = [K.int_shape(self.outputs[0])] - - self._layers.append(layer) - self.built = False - - def pop(self): - """Removes the last layer in the model. - - Raises: - TypeError: if there are no layers in the model. - """ - if not self.layers: - raise TypeError('There are no layers in the model.') - - self.layers.pop() - if not self.layers: - self.outputs = [] - self._inbound_nodes = [] - self._outbound_nodes = [] - else: - self.layers[-1]._outbound_nodes = [] - self.outputs = [self.layers[-1].output] - # update self._inbound_nodes - self._inbound_nodes[0].output_tensors = self.outputs - self._inbound_nodes[0].output_shapes = [K.int_shape(self.outputs[0])] - self.built = False - - def get_layer(self, name=None, index=None): - """Retrieve a layer that is part of the model. - - Returns a layer based on either its name (unique) - or its index in the graph. Indices are based on - order of horizontal graph traversal (bottom-up). - - Arguments: - name: string, name of layer. - index: integer, index of layer. - - Returns: - A layer instance. - """ - if not self.built: - self.build() - return self.model.get_layer(name, index) - - def call(self, inputs, **kwargs): - if not self.built: - self.build() - return self.model.call(inputs, **kwargs) - - def build(self, input_shape=None): - if not self.inputs or not self.outputs: - raise TypeError('Sequential model cannot be built: model is empty.' - ' Add some layers first.') - # actually create the model - self.model = Model(self.inputs, self.outputs[0], name=self.name + '_model') - self.model.trainable = self.trainable - - # mirror model attributes - self.supports_masking = self.model.supports_masking - self._output_mask_cache = self.model._output_mask_cache - self._output_tensor_cache = self.model._output_tensor_cache - self._output_shape_cache = self.model._output_shape_cache - self._input_layers = self.model._input_layers - self._output_layers = self.model._output_layers - self._input_coordinates = self.model._input_coordinates - self._output_coordinates = self.model._output_coordinates - self._nodes_by_depth = self.model._nodes_by_depth - self._network_nodes = self.model._network_nodes - self.output_names = self.model.output_names - self.input_names = self.model.input_names - self._feed_input_names = self.model._feed_input_names - self._feed_inputs = self.model._feed_inputs - - # Make sure child model callbacks - # will call the parent Sequential model. - self.model.callback_model = self - - self.built = True - - @property - def uses_learning_phase(self): - if not self.built: - self.build() - return self.model.uses_learning_phase - - def _gather_list_attr(self, attr): - all_attrs = [] - for layer in self.layers: - all_attrs += getattr(layer, attr, []) - return all_attrs - - @property - def trainable(self): - return self._trainable - - @trainable.setter - def trainable(self, value): - if self.model: - self.model.trainable = value - self._trainable = value - - @property - def trainable_weights(self): - if not self.trainable: - return [] - return self._gather_list_attr('trainable_weights') - - @property - def non_trainable_weights(self): - weights = self._gather_list_attr('non_trainable_weights') - if not self.trainable: - trainable_weights = self._gather_list_attr('trainable_weights') - return trainable_weights + weights - return weights - - @property - def regularizers(self): - if not self.built: - self.build() - return self.model.regularizers - - def get_weights(self): - """Retrieves the weights of the model. - - Returns: - A flat list of Numpy arrays - (one array per model weight). - """ - if not self.built: - self.build() - return self.model.get_weights() - - def set_weights(self, weights): - """Sets the weights of the model. - - Arguments: - weights: Should be a list - of Numpy arrays with shapes and types matching - the output of `model.get_weights()`. - """ - if not self.built: - self.build() - self.model.set_weights(weights) - - def load_weights(self, filepath, by_name=False): - if h5py is None: - raise ImportError('`load_weights` requires h5py.') - f = h5py.File(filepath, mode='r') - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - layers = self.layers - if by_name: - topology.load_weights_from_hdf5_group_by_name(f, layers) - else: - topology.load_weights_from_hdf5_group(f, layers) - if hasattr(f, 'close'): - f.close() - - def save_weights(self, filepath, overwrite=True): - if h5py is None: - raise ImportError('`save_weights` requires h5py.') - # If file exists and should not be overwritten: - if not overwrite and os.path.isfile(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - layers = self.layers - f = h5py.File(filepath, 'w') - topology.save_weights_to_hdf5_group(f, layers) - f.flush() - f.close() - - def compile(self, - optimizer, - loss, - metrics=None, - sample_weight_mode=None, - weighted_metrics=None, - target_tensors=None, - **kwargs): - """Configures the model for training. - - Arguments: - optimizer: String (name of optimizer) or optimizer object. - See [optimizers](/optimizers). - loss: String (name of objective function) or objective function. - See [losses](/losses). - If the model has multiple outputs, you can use a different loss - on each output by passing a dictionary or a list of losses. - The loss value that will be minimized by the model - will then be the sum of all individual losses. - metrics: List of metrics to be evaluated by the model - during training and testing. - Typically you will use `metrics=['accuracy']`. - To specify different metrics for different outputs of a - multi-output model, you could also pass a dictionary, - such as `metrics={'output_a': 'accuracy'}`. - sample_weight_mode: If you need to do timestep-wise - sample weighting (2D weights), set this to `"temporal"`. - `None` defaults to sample-wise weights (1D). - If the model has multiple outputs, you can use a different - `sample_weight_mode` on each output by passing a - dictionary or a list of modes. - weighted_metrics: list of metrics to be evaluated and weighted - by `sample_weight` or `class_weight` during training and testing. - target_tensors: By default, Keras will create a placeholder for the - model's target, which will be fed with the target data during - training. If instead you would like to use your own - target tensor (in turn, Keras will not expect external - Numpy data for these targets at training time), you - can specify them via the `target_tensors` argument. - It should be a single tensor - (for a single-output `Sequential` model). - **kwargs: These arguments are passed into `tf.Session.run`. - - Example: - ```python - model = Sequential() - model.add(Dense(32, input_shape=(500,))) - model.add(Dense(10, activation='softmax')) - model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - ``` - """ - # create the underlying model - self.build() - # call compile method of Model class - self.model.compile( - optimizer, - loss, - metrics=metrics, - sample_weight_mode=sample_weight_mode, - weighted_metrics=weighted_metrics, - target_tensors=target_tensors, - **kwargs) - self.optimizer = self.model.optimizer - self.loss = self.model.loss - self.metrics = self.model.metrics - self.loss_weights = self.model.loss_weights - self.sample_weight_mode = self.model.sample_weight_mode - self.weighted_metrics = self.model.weighted_metrics - self.targets = self.model.targets - self.metrics_tensors = self.model.metrics_tensors - self.metrics_names = self.model.metrics_names - self.sample_weights = self.model.sample_weights - self.total_loss = self.model.total_loss - - def fit(self, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - **kwargs): - """Trains the model for a fixed number of epochs. - - Arguments: - x: Numpy array of training data. - If the input layer in the model is named, you can also pass a - dictionary mapping the input name to a Numpy array. - `x` can be `None` (default) if feeding from - TensorFlow data tensors. - y: Numpy array of target (label) data. - If the output layer in the model is named, you can also pass a - dictionary mapping the output name to a Numpy array. - `y` can be `None` (default) if feeding from - TensorFlow data tensors. - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, it will default to 32. - epochs: Integer. Number of epochs to train the model. - An epoch is an iteration over the entire `x` and `y` - data provided. - Note that in conjunction with `initial_epoch`, - `epochs` is to be understood as "final epoch". - The model is not trained for a number of iterations - given by `epochs`, but merely until the epoch - of index `epochs` is reached. - verbose: 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See [callbacks](/callbacks). - validation_split: Float between 0 and 1: - Fraction of the training data to be used as validation data. - The model will set apart this fraction of the training data, - will not train on it, and will evaluate - the loss and any model metrics - on this data at the end of each epoch. - The validation data is selected from the last samples - in the `x` and `y` data provided, before shuffling. - validation_data: tuple `(x_val, y_val)` or tuple - `(x_val, y_val, val_sample_weights)` on which to evaluate - the loss and any model metrics at the end of each epoch. - The model will not be trained on this data. - This will override `validation_split`. - shuffle: Boolean (whether to shuffle the training data - before each epoch) or str (for 'batch'). - 'batch' is a special option for dealing with the - limitations of HDF5 data; it shuffles in batch-sized chunks. - Has no effect when `steps_per_epoch` is not `None`. - class_weight: Optional dictionary mapping class indices (integers) - to a weight (float) value, used for weighting the loss function - (during training only). - This can be useful to tell the model to - "pay more attention" to samples from - an under-represented class. - sample_weight: Optional Numpy array of weights for - the training samples, used for weighting the loss function - (during training only). You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - `sample_weight_mode="temporal"` in `compile()`. - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run). - steps_per_epoch: Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of unique samples in your dataset divided by - the batch size, or 1 if that cannot be determined. - validation_steps: Only relevant if `steps_per_epoch` - is specified. Total number of steps (batches of samples) - to validate before stopping. - **kwargs: Used for backwards compatibility support. - - Returns: - A `History` object. Its `History.history` attribute is - a record of training loss values and metrics values - at successive epochs, as well as validation loss values - and validation metrics values (if applicable). - - Raises: - RuntimeError: If the model was never compiled. - ValueError: In case of mismatch between the provided input data - and what the model expects. - """ - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.fit( - x, - y, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_split=validation_split, - validation_data=validation_data, - shuffle=shuffle, - class_weight=class_weight, - sample_weight=sample_weight, - initial_epoch=initial_epoch, - steps_per_epoch=steps_per_epoch, - validation_steps=validation_steps) - - def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None): - """Computes the loss on some input data, batch by batch. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - y: labels, as a Numpy array. - batch_size: integer. Number of samples per gradient update. - verbose: verbosity mode, 0 or 1. - sample_weight: sample weights, as a Numpy array. - - Returns: - Scalar test loss (if the model has no metrics) - or list of scalars (if the model computes other metrics). - The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - RuntimeError: if the model was never compiled. - """ - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.evaluate( - x, - y, - batch_size=batch_size, - verbose=verbose, - sample_weight=sample_weight) - - def predict(self, x, batch_size=32, verbose=0): - """Generates output predictions for the input samples. - - The input samples are processed batch by batch. - - Arguments: - x: the input data, as a Numpy array. - batch_size: integer. - verbose: verbosity mode, 0 or 1. - - Returns: - A Numpy array of predictions. - """ - if not self.built: - self.build() - return self.model.predict(x, batch_size=batch_size, verbose=verbose) - - def predict_on_batch(self, x): - """Returns predictions for a single batch of samples. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - - Returns: - A Numpy array of predictions. - """ - if not self.built: - self.build() - return self.model.predict_on_batch(x) - - def train_on_batch(self, x, y, class_weight=None, sample_weight=None): - """Single gradient update over one batch of samples. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - y: labels, as a Numpy array. - class_weight: dictionary mapping classes to a weight value, - used for scaling the loss function (during training only). - sample_weight: sample weights, as a Numpy array. - - Returns: - Scalar training loss (if the model has no metrics) - or list of scalars (if the model computes other metrics). - The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - RuntimeError: if the model was never compiled. - """ - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.train_on_batch( - x, y, sample_weight=sample_weight, class_weight=class_weight) - - def test_on_batch(self, x, y, sample_weight=None): - """Evaluates the model over a single batch of samples. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - y: labels, as a Numpy array. - sample_weight: sample weights, as a Numpy array. - - Returns: - Scalar test loss (if the model has no metrics) - or list of scalars (if the model computes other metrics). - The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - RuntimeError: if the model was never compiled. - """ - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.test_on_batch(x, y, sample_weight=sample_weight) - - def predict_proba(self, x, batch_size=32, verbose=0): - """Generates class probability predictions for the input samples. - - The input samples are processed batch by batch. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - batch_size: integer. - verbose: verbosity mode, 0 or 1. - - Returns: - A Numpy array of probability predictions. - """ - preds = self.predict(x, batch_size, verbose) - if preds.min() < 0. or preds.max() > 1.: - logging.warning('Network returning invalid probability values. ' - 'The last layer might not normalize predictions ' - 'into probabilities ' - '(like softmax or sigmoid would).') - return preds - - def predict_classes(self, x, batch_size=32, verbose=0): - """Generate class predictions for the input samples. - - The input samples are processed batch by batch. - - Arguments: - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - batch_size: integer. - verbose: verbosity mode, 0 or 1. - - Returns: - A numpy array of class predictions. - """ - proba = self.predict(x, batch_size=batch_size, verbose=verbose) - if proba.shape[-1] > 1: - return proba.argmax(axis=-1) - else: - return (proba > 0.5).astype('int32') - - def fit_generator(self, - generator, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=True, - initial_epoch=0, - **kwargs): - """Fits the model on data generated batch-by-batch by a Python generator. - - The generator is run in parallel to the model, for efficiency. - For instance, this allows you to do real-time data augmentation - on images on CPU in parallel to training your model on GPU. - - Arguments: - generator: A generator. - The output of the generator must be either - - a tuple (inputs, targets) - - a tuple (inputs, targets, sample_weights). - All arrays should contain the same number of samples. - The generator is expected to loop over its data - indefinitely. An epoch finishes when `steps_per_epoch` - batches have been seen by the model. - steps_per_epoch: Total number of steps (batches of samples) - to yield from `generator` before declaring one epoch - finished and starting the next epoch. It should typically - be equal to the number of samples of your dataset - divided by the batch size. - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - epochs: Integer, total number of iterations on the data. - Note that in conjunction with initial_epoch, the parameter - epochs is to be understood as "final epoch". The model is - not trained for n steps given by epochs, but until the - epoch epochs is reached. - verbose: Verbosity mode, 0, 1, or 2. - callbacks: List of callbacks to be called during training. - validation_data: This can be either - - A generator for the validation data - - A tuple (inputs, targets) - - A tuple (inputs, targets, sample_weights). - validation_steps: Only relevant if `validation_data` - is a generator. - Number of steps to yield from validation generator - at the end of every epoch. It should typically - be equal to the number of samples of your - validation dataset divided by the batch size. - Optional for `Sequence`: if unspecified, will use - the `len(validation_data)` as a number of steps. - class_weight: Dictionary mapping class indices to a weight - for the class. - max_queue_size: Maximum size for the generator queue - workers: Maximum number of processes to spin up - use_multiprocessing: If True, use process based threading. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. - shuffle: Whether to shuffle the order of the batches at - the beginning of each epoch. Only used with instances - of `Sequence` (keras.utils.Sequence). - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) - **kwargs: support for legacy arguments. - - Returns: - A `History` object. - - Raises: - RuntimeError: if the model was never compiled. - ValueError: In case the generator yields - data in an invalid format. - - Example: - - ```python - def generate_arrays_from_file(path): - while 1: - f = open(path) - for line in f: - # create Numpy arrays of input data - # and labels, from each line in the file - x, y = process_line(line) - yield (x, y) - f.close() - - model.fit_generator(generate_arrays_from_file('/my_file.txt'), - steps_per_epoch=1000, epochs=10) - ``` - """ - # Legacy support - if 'max_q_size' in kwargs: - max_queue_size = kwargs.pop('max_q_size') - logging.warning('The argument `max_q_size` has been renamed ' - '`max_queue_size`. Update your method calls accordingly.') - if 'pickle_safe' in kwargs: - use_multiprocessing = kwargs.pop('pickle_safe') - logging.warning('The argument `pickle_safe` has been renamed ' - '`use_multiprocessing`. ' - 'Update your method calls accordingly.') - if kwargs: - raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.fit_generator( - generator, - steps_per_epoch, - epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch) - - def evaluate_generator(self, - generator, - steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - **kwargs): - """Evaluates the model on a data generator. - - The generator should return the same kind of data - as accepted by `test_on_batch`. - - Arguments: - generator: Generator yielding tuples (inputs, targets) - or (inputs, targets, sample_weights) - steps: Total number of steps (batches of samples) - to yield from `generator` before stopping. - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - max_queue_size: maximum size for the generator queue - workers: maximum number of processes to spin up - use_multiprocessing: if True, use process based threading. - Note that because this implementation - relies on multiprocessing, you should not pass - non picklable arguments to the generator - as they can't be passed easily to children processes. - **kwargs: support for legacy arguments. - - Returns: - Scalar test loss (if the model has no metrics) - or list of scalars (if the model computes other metrics). - The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - Raises: - RuntimeError: if the model was never compiled. - ValueError: In case the generator yields - data in an invalid format. - """ - # Legacy support - if 'max_q_size' in kwargs: - max_queue_size = kwargs.pop('max_q_size') - logging.warning('The argument `max_q_size` has been renamed ' - '`max_queue_size`. Update your method calls accordingly.') - if 'pickle_safe' in kwargs: - use_multiprocessing = kwargs.pop('pickle_safe') - logging.warning('The argument `pickle_safe` has been renamed ' - '`use_multiprocessing`. ' - 'Update your method calls accordingly.') - if kwargs: - raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - - if not self.built: - raise RuntimeError('The model needs to be compiled before being used.') - return self.model.evaluate_generator( - generator, - steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - def predict_generator(self, - generator, - steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0, - **kwargs): - """Generates predictions for the input samples from a data generator. - - The generator should return the same kind of data as accepted by - `predict_on_batch`. - - Arguments: - generator: generator yielding batches of input samples. - steps: Total number of steps (batches of samples) - to yield from `generator` before stopping. - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - max_queue_size: maximum size for the generator queue - workers: maximum number of processes to spin up - use_multiprocessing: if True, use process based threading. - Note that because this implementation - relies on multiprocessing, you should not pass - non picklable arguments to the generator - as they can't be passed easily to children processes. - verbose: verbosity mode, 0 or 1. - **kwargs: support for legacy arguments. - - Returns: - A Numpy array of predictions. - - Raises: - ValueError: In case the generator yields - data in an invalid format. - """ - # Legacy support - if 'max_q_size' in kwargs: - max_queue_size = kwargs.pop('max_q_size') - logging.warning('The argument `max_q_size` has been renamed ' - '`max_queue_size`. Update your method calls accordingly.') - if 'pickle_safe' in kwargs: - use_multiprocessing = kwargs.pop('pickle_safe') - logging.warning('The argument `pickle_safe` has been renamed ' - '`use_multiprocessing`. ' - 'Update your method calls accordingly.') - if kwargs: - raise ValueError('Unrecognized keyword arguments: ' + str(kwargs)) - - if not self.built: - self.build() - return self.model.predict_generator( - generator, - steps, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose) - def get_config(self): - config = [] - for layer in self.layers: - config.append({ - 'class_name': layer.__class__.__name__, - 'config': layer.get_config() - }) - return copy.deepcopy(config) - @classmethod - def from_config(cls, config, custom_objects=None): - model = cls() - for conf in config: - layer = layer_module.deserialize(conf, custom_objects=custom_objects) - model.add(layer) - return model +# API entries importable from `keras.models`: +Model = training.Model # pylint: disable=invalid-name +Sequential = sequential.Sequential # pylint: disable=invalid-name +save_model = saving.save_model +load_model = saving.load_model +model_from_config = saving.model_from_config +model_from_yaml = saving.model_from_yaml +model_from_json = saving.model_from_json def _clone_functional_model(model, input_tensors=None): @@ -1365,7 +90,7 @@ def _clone_functional_model(model, input_tensors=None): else: # Make sure that all input tensors come from a Keras layer. # If tensor comes from an input layer: cache the input layer. - input_tensors = topology.to_list(input_tensors) + input_tensors = generic_utils.to_list(input_tensors) input_tensors_ = [] for i, x in enumerate(input_tensors): if not K.is_keras_tensor(x): @@ -1402,7 +127,7 @@ def _clone_functional_model(model, input_tensors=None): # Reuse previously cloned layer. layer = layer_map[layer] # Don't call InputLayer multiple times. - if isinstance(layer, topology.InputLayer): + if isinstance(layer, InputLayer): continue # Gather inputs to call the new layer. @@ -1427,8 +152,9 @@ def _clone_functional_model(model, input_tensors=None): if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_mask - output_tensors = topology.to_list(layer(computed_tensor, **kwargs)) - output_masks = topology.to_list( + output_tensors = generic_utils.to_list(layer(computed_tensor, + **kwargs)) + output_masks = generic_utils.to_list( layer.compute_mask(computed_tensor, computed_mask)) computed_tensors = [computed_tensor] computed_masks = [computed_mask] @@ -1438,8 +164,9 @@ def _clone_functional_model(model, input_tensors=None): if has_arg(layer.call, 'mask'): if 'mask' not in kwargs: kwargs['mask'] = computed_masks - output_tensors = topology.to_list(layer(computed_tensors, **kwargs)) - output_masks = topology.to_list( + output_tensors = generic_utils.to_list(layer(computed_tensors, + **kwargs)) + output_masks = generic_utils.to_list( layer.compute_mask(computed_tensors, computed_masks)) # Update tensor_map. for x, y, mask in zip(reference_output_tensors, output_tensors, @@ -1489,14 +216,14 @@ def _clone_sequential_model(model, input_tensors=None): if input_tensors is None: return Sequential(layers=layers, name=model.name) else: - if len(topology.to_list(input_tensors)) != 1: + if len(generic_utils.to_list(input_tensors)) != 1: raise ValueError('To clone a `Sequential` model, we expect ' ' at most one tensor ' 'as part of `input_tensors`.') - x = topology.to_list(input_tensors)[0] + x = generic_utils.to_list(input_tensors)[0] if K.is_keras_tensor(x): origin_layer = x._keras_history[0] - if isinstance(origin_layer, topology.InputLayer): + if isinstance(origin_layer, InputLayer): return Sequential(layers=[origin_layer] + layers, name=model.name) else: raise ValueError('Cannot clone a `Sequential` model on top ' diff --git a/tensorflow/python/keras/_impl/keras/models_test.py b/tensorflow/python/keras/_impl/keras/models_test.py index 04017e4b28..5978ddd987 100644 --- a/tensorflow/python/keras/_impl/keras/models_test.py +++ b/tensorflow/python/keras/_impl/keras/models_test.py @@ -12,362 +12,16 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for training routines.""" +"""Tests for `models.py` (model cloning, mainly).""" from __future__ import absolute_import from __future__ import division from __future__ import print_function -import os -import shutil -import tempfile - import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.platform import test -from tensorflow.python.training import training as training_module - -try: - import h5py # pylint:disable=g-import-not-at-top -except ImportError: - h5py = None - - -class TestModelSaving(test.TestCase): - - def test_sequential_model_saving(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.RepeatVector(3)) - model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) - model.compile(loss=keras.losses.MSE, - optimizer=keras.optimizers.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy], - sample_weight_mode='temporal') - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - - new_model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - # test that new updates are the same with both models - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - new_model.train_on_batch(x, y) - out = model.predict(x) - out2 = new_model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_sequential_model_saving_2(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - # test with custom optimizer, loss - - class CustomOp(keras.optimizers.RMSprop): - pass - - def custom_loss(y_true, y_pred): - return keras.losses.mse(y_true, y_pred) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss=custom_loss, optimizer=CustomOp(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - - model = keras.models.load_model( - fname, - custom_objects={'CustomOp': CustomOp, - 'custom_loss': custom_loss}) - os.close(fd) - os.remove(fname) - - out2 = model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_functional_model_saving(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - inputs = keras.layers.Input(shape=(3,)) - x = keras.layers.Dense(2)(inputs) - output = keras.layers.Dense(3)(x) - - model = keras.models.Model(inputs, output) - model.compile(loss=keras.losses.MSE, - optimizer=keras.optimizers.RMSprop(lr=0.0001), - metrics=[keras.metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - out2 = model.predict(x) - self.assertAllClose(out, out2, atol=1e-05) - - def test_saving_without_compilation(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - def test_saving_with_tf_optimizer(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', - optimizer=training_module.AdadeltaOptimizer(0.1), - metrics=['acc']) - - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - def test_saving_right_after_compilation(self): - if h5py is None: - return # Skip test if models cannot be saved. - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_shape=(3,))) - model.add(keras.layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - model.model._make_train_function() - - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - def test_saving_lambda_numpy_array_arguments(self): - if h5py is None: - return # Skip test if models cannot be saved. - - mean = np.random.random((4, 2, 3)) - std = np.abs(np.random.random((4, 2, 3))) + 1e-5 - inputs = keras.layers.Input(shape=(4, 2, 3)) - output = keras.layers.Lambda(lambda image, mu, std: (image - mu) / std, - arguments={'mu': mean, 'std': std})(inputs) - model = keras.models.Model(inputs, output) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - fd, fname = tempfile.mkstemp('.h5') - keras.models.save_model(model, fname) - - model = keras.models.load_model(fname) - os.close(fd) - os.remove(fname) - - self.assertAllClose(mean, model.layers[1].arguments['mu']) - self.assertAllClose(std, model.layers[1].arguments['std']) - - -class TestSequential(test.TestCase): - """Most Sequential model API tests are covered in `training_test.py`. - """ - - def test_basic_methods(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_dim=2)) - model.add(keras.layers.Dropout(0.3, name='dp')) - model.add(keras.layers.Dense(2, kernel_regularizer='l2', - kernel_constraint='max_norm')) - model.build() - self.assertEqual(model.state_updates, model.model.state_updates) - self.assertEqual(model.get_layer(name='dp').name, 'dp') - - def test_sequential_pop(self): - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.add(keras.layers.Dense(num_classes)) - model.compile(loss='mse', optimizer='sgd') - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - model.fit(x, y, epochs=1) - model.pop() - self.assertEqual(len(model.layers), 1) - self.assertEqual(model.output_shape, (None, num_hidden)) - model.compile(loss='mse', optimizer='sgd') - y = np.random.random((batch_size, num_hidden)) - model.fit(x, y, epochs=1) - - # Test popping single-layer model - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.pop() - self.assertEqual(len(model.layers), 0) - self.assertEqual(len(model.outputs), 0) - - # Invalid use case - model = keras.models.Sequential() - with self.assertRaises(TypeError): - model.pop() - - def test_sequential_weight_loading(self): - if h5py is None: - return - - temp_dir = self.get_temp_dir() - self.addCleanup(shutil.rmtree, temp_dir) - h5_path = os.path.join(temp_dir, 'test.h5') - - num_hidden = 5 - input_dim = 3 - batch_size = 5 - num_classes = 2 - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.add(keras.layers.Dense(num_classes)) - - x = np.random.random((batch_size, input_dim)) - ref_y = model.predict(x) - - model.save_weights(h5_path) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(num_hidden, input_dim=input_dim)) - model.add(keras.layers.Dense(num_classes)) - model.load_weights(h5_path) - y = model.predict(x) - - self.assertAllClose(y, ref_y) - - def test_invalid_use_cases(self): - with self.test_session(): - # Added objects must be layer instances - with self.assertRaises(TypeError): - model = keras.models.Sequential() - model.add(None) - - # Added layers must have an inputs shape - with self.assertRaises(ValueError): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1)) - - # Added layers cannot have multiple outputs - class MyLayer(keras.layers.Layer): - - def call(self, inputs): - return [3 * inputs, 2 * inputs] - - def compute_output_shape(self, input_shape): - return [input_shape, input_shape] - - with self.assertRaises(ValueError): - model = keras.models.Sequential() - model.add(MyLayer(input_shape=(3,))) - with self.assertRaises(TypeError): - model = keras.models.Sequential() - model.add(keras.layers.Dense(1, input_dim=1)) - model.add(MyLayer()) - - # Building empty model - model = keras.models.Sequential() - with self.assertRaises(TypeError): - model.build() - - def test_nested_sequential_trainability(self): - input_dim = 20 - num_units = 10 - num_classes = 2 - - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(num_units, input_shape=(input_dim,))) - - model = keras.models.Sequential() - model.add(inner_model) - model.add(keras.layers.Dense(num_classes)) - - self.assertEqual(len(model.trainable_weights), 4) - inner_model.trainable = False - self.assertEqual(len(model.trainable_weights), 2) - inner_model.trainable = True - self.assertEqual(len(model.trainable_weights), 4) - - def test_sequential_update_disabling(self): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - with self.test_session(): - model = keras.models.Sequential() - model.add(keras.layers.BatchNormalization(input_shape=(4,))) - - model.trainable = False - assert not model.updates - - model.compile('sgd', 'mse') - assert not model.updates - assert not model.model.updates - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - self.assertAllClose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile('sgd', 'mse') - assert model.updates - assert model.model.updates - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 class TestModelCloning(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py index 462d600bf8..5196bf1740 100644 --- a/tensorflow/python/keras/_impl/keras/utils/generic_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/generic_utils.py @@ -509,3 +509,20 @@ def slice_arrays(arrays, start=None, stop=None): return arrays[start:stop] else: return [None] + + +def to_list(x): + """Normalizes a list/tensor into a list. + + If a tensor is passed, we return + a list of size 1 containing the tensor. + + Arguments: + x: target object to be normalized. + + Returns: + A list. + """ + if isinstance(x, list): + return x + return [x] diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 04724e3a1a..241db8956a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.Model" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index c94bd2faa4..9673a508d6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -1,9 +1,9 @@ path: "tensorflow.keras.Sequential" tf_class { - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt index f4ab075959..041acf29ff 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Activation" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt index eb558cddaf..48143b2cd6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ActivityRegularization" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt index 770a107b66..11f78fed97 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Add" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt index 0ce42b706e..84eb825632 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.AlphaDropout" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt index d6c98fa225..ab377a248f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt index 754fd310c6..c2edd79f52 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt index 9b62880c79..f3f37eed99 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt index b371ad148c..31d1d1c049 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Average" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 3e2aba55fd..6582e1b18e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt index fb37308cce..12f66095d2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 813470ffc7..3a45fa180e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt index e251ac18e5..a0f272c178 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.BatchNormalization" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index 699208a0b9..9c7d3154ad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Bidirectional" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt index ff08def0a0..949b225e54 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Concatenate" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index 6db22ca032..a736c84a10 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt index 577f206e35..95f9afed28 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 72924c32b4..38ba15400a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt index 16be08d9b2..bc84e2a97e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index 11e05f884d..0802578c22 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt index 72b72d6b3b..8ad4646c74 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt index ee93247f63..110e267b75 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index e5023287e5..24cfc83af6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt index ba38cb7121..c56e89187f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 58724a1e16..3674f2746c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt index 98d52c430c..5a8f9d7702 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt index 33b6ebe1af..caa748be81 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Cropping1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt index 4b241ebb0f..97bd4a265a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Cropping2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt index 1856a9ee21..20c43eeed1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Cropping3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt index a8c37af31f..256f0e4bdf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Dense" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt index 07d3f023e5..d1e53f900c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Dot" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt index e2e21b5f12..b010ff6805 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Dropout" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt index 92b9760d53..fffd3854bb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ELU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt index 83c528b401..1155fe03fc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Embedding" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt index 7360975288..5e4bebb15b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Flatten" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt index b329f1c46b..cb9bb3d821 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.GRUCell" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index c741d4d6e6..9a36e80649 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GRU" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt index 57596badf1..eb32238e15 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.GaussianDropout" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt index 3829353cc3..37fc8e29ae 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.GaussianNoise" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index e53e78a977..490816458b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAveragePooling1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index 48fcd1044e..ab49f67f33 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAveragePooling2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index 66c06ed472..3d7cb3ba49 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAveragePooling3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index 4f2420f74a..c99ddab4f3 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAvgPool1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index 7912a6d933..290d2eaebe 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAvgPool2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index d5b2d2c274..cf63069641 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalAvgPool3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index d88ff17eb6..2dadc67c09 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPool1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index c8cc5a0ddf..1a1a1dcf64 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPool2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 7956c5a340..44898e23ad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPool3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 0a7e16413d..941d867d24 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPooling1D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 6c8a58a996..9a5a6325f8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPooling2D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index 7678ce8aab..7a0c1932f6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.GlobalMaxPooling3D" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt index 1e9370b02f..f679c1d006 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.InputLayer" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index 3b171b137a..ad1e7f2cad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.LSTMCell" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 29d9cf78ab..6dad4b4897 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.LSTM" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt index ca01449299..fa45d8c902 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Lambda" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt index c52ad72754..023d6c0d69 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt @@ -1,6 +1,6 @@ path: "tensorflow.keras.layers.Layer" tf_class { - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index 8134fb7386..e429fced77 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.LeakyReLU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt index c5d4523009..462568124f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.LocallyConnected1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt index bcbed9241b..11bf6a2b42 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.LocallyConnected2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt index 244e79b4ff..a932448891 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Masking" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt index 56cbf5df78..6ff2adddac 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt index 33c2d30e86..2957673d4d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt index 94f91059b7..2191c10b73 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt index 247230a6d6..af750ac1b6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 8d61b67e7c..9046061510 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt index ad2e308020..a40666807b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt index ff0db15f19..65378cef42 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Maximum" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt index 1d3f33f045..b037559e02 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.Multiply" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt index c86bc49b22..b3a7f47fa5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.PReLU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt index 2043e1a126..b2f22f7da3 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Permute" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt index ad539a7c4c..792eacf90d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.RNN" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt index 4b0e98520a..5b79a021ca 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.RepeatVector" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt index 34bc71af8a..99c64505ee 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Reshape" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt index dd67b76523..d5873ccf76 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt index 5d898fb2bd..76b4c10a46 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index bf62c095e7..40cd87de5f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index c758d87993..c44c0da148 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -4,7 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index 6e3cde3e3e..bd70c31c38 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.SimpleRNNCell" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index 6fafc77b94..de717976cf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.SimpleRNN" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt index ee4b2fa39e..a93b7b8f6e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Softmax" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index e4727072e3..4dc24b195e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index c5ff704311..a3bb1cc414 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index 476a7f362c..f9a78106fa 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -3,7 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 3dde1e5769..5aa21f4022 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.StackedRNNCells" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index ef31c5443e..88e8a46572 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ThresholdedReLU" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index 1e176d8d4b..f2a7673998 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -2,7 +2,7 @@ path: "tensorflow.keras.layers.TimeDistributed" tf_class { is_instance: "" is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt index a81b83be49..4db82ddfa9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.UpSampling1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 5403279d45..61e65ad56d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.UpSampling2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 96c337caf2..3d9402db4e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.UpSampling3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index ea3bb2f8f5..0223799ed4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.Wrapper" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt index b81a4b1c50..2e4429833a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ZeroPadding1D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt index 1a26f2f3c9..26cf7b9e49 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ZeroPadding2D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 310277fe67..64d35d9447 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.keras.layers.ZeroPadding3D" tf_class { is_instance: "" - is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 88eb237cec..18be9c9701 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -1,8 +1,8 @@ path: "tensorflow.keras.models.Model" tf_class { is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index 34f10f01ad..b934632922 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -1,9 +1,9 @@ path: "tensorflow.keras.models.Sequential" tf_class { - is_instance: "" + is_instance: "" is_instance: "" - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" is_instance: "" member { -- GitLab From 917136b3bb7d83a1674bb24d3c0b0892ad77e056 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Fri, 23 Feb 2018 18:18:15 -0800 Subject: [PATCH 062/884] Exclude more tests for cuda_on_cpu. PiperOrigin-RevId: 186851831 --- tensorflow/contrib/lite/testing/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index b5960d6f8d..83b9e21427 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -317,7 +317,10 @@ tf_cc_test( "//tensorflow/contrib/lite:testdata/multi_add.bin", "//tensorflow/contrib/lite:testdata/multi_add.pb", ], - tags = ["no_oss"], + tags = [ + "no_cuda_on_cpu_tap", + "no_oss", + ], deps = [ ":tflite_diff_flags", ":tflite_diff_util", -- GitLab From 0220d128c78f4061595a13d40037aebc865239cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 24 Feb 2018 06:35:12 -0800 Subject: [PATCH 063/884] Use the new inspect_utils API to to get the function's namespace. PiperOrigin-RevId: 186884307 --- tensorflow/contrib/py2tf/impl/conversion.py | 22 +++++++-------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 4bf698f207..044de33568 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import gast -import six from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.converters import asserts @@ -36,6 +35,7 @@ from tensorflow.contrib.py2tf.converters import side_effect_guards from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import naming from tensorflow.contrib.py2tf.pyct import context +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity @@ -155,7 +155,7 @@ def class_to_graph(c, conversion_map): if not members: raise ValueError('Cannot convert %s: it has no member methods.') - class_globals = None + class_namespace = None for _, m in members: node, _ = function_to_graph( m, @@ -164,10 +164,10 @@ def class_to_graph(c, conversion_map): arg_types={'self': (c.__name__, c)}, owner_type=c) # TODO(mdan): Do not assume all members have the same view of globals. - if class_globals is None: - class_globals = six.get_function_globals(m) + if class_namespace is None: + class_namespace = inspect_utils.getnamespace(m) converted_members[m] = node - namer = conversion_map.new_namer(class_globals) + namer = conversion_map.new_namer(class_namespace) class_name = namer.compiled_class_name(c.__name__, c) node = gast.ClassDef( class_name, @@ -202,19 +202,11 @@ def function_to_graph(f, conversion_map, arg_values, arg_types, """Specialization of `entity_to_graph` for callable functions.""" node, source = parser.parse_entity(f) node = node.body[0] - namespace = six.get_function_globals(f) - - # This is needed for non-global functions. - closure = six.get_function_closure(f) - if closure: - for e in closure: - if callable(e.cell_contents): - fn = e.cell_contents - namespace[fn.__name__] = fn + namespace = inspect_utils.getnamespace(f) _add_self_references(namespace, conversion_map.api_module) - namer = conversion_map.new_namer(namespace) + ctx = context.EntityContext( namer=namer, source_code=source, -- GitLab From eb0792340efaca19e75adcb73b6f3250dfd36ca0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 24 Feb 2018 23:51:54 -0800 Subject: [PATCH 064/884] Re-enables moving_average_optimizer_test. Resource variable bug fixed by apassos@ PiperOrigin-RevId: 186921623 --- tensorflow/contrib/opt/BUILD | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/opt/BUILD b/tensorflow/contrib/opt/BUILD index 52e88348c1..827279bd47 100644 --- a/tensorflow/contrib/opt/BUILD +++ b/tensorflow/contrib/opt/BUILD @@ -70,9 +70,6 @@ py_test( srcs = ["python/training/moving_average_optimizer_test.py"], srcs_version = "PY2AND3", tags = [ - "manual", - "no_oss", - "notap", "notsan", # b/31055119 ], deps = [ -- GitLab From 020408675695bce8133076d2dd6cc7188adde534 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Sun, 25 Feb 2018 02:55:29 -0800 Subject: [PATCH 065/884] [XLA] Remove bitcast-converts between same shape. PiperOrigin-RevId: 186929931 --- tensorflow/compiler/xla/service/algebraic_simplifier.cc | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 4391462c1c..5ddd8ec377 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -122,6 +122,8 @@ class AlgebraicSimplifierVisitor : public DfsHloVisitorWithDefault { Status HandleBitcast(HloInstruction* bitcast) override; + Status HandleBitcastConvert(HloInstruction* bitcast) override; + Status HandleBroadcast(HloInstruction* broadcast) override; Status HandleConcatenate(HloInstruction* concatenate) override; @@ -411,6 +413,13 @@ Status AlgebraicSimplifierVisitor::HandleBitcast(HloInstruction* bitcast) { return Status::OK(); } +Status AlgebraicSimplifierVisitor::HandleBitcastConvert( + HloInstruction* bitcast) { + // Eliminate bitcast converts between same shape. + ReplaceInstructionIfSameShape(bitcast, bitcast->mutable_operand(0)); + return Status::OK(); +} + Status AlgebraicSimplifierVisitor::HandleCopy(HloInstruction* copy) { // If a copy feeds a copy, make it a single copy. if (copy->operand(0)->opcode() == HloOpcode::kCopy) { -- GitLab From 27adc952de9aa38d75fa513d972f2e7012da1d0f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 07:21:08 -0800 Subject: [PATCH 066/884] Annotate attribute nodes with the value or type of their parent. This helps with resolving function owners, since using reflection to do it is unreliable. PiperOrigin-RevId: 187017742 --- tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py | 2 ++ .../contrib/py2tf/pyct/static_analysis/live_values_test.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py index 9c0a9a9e74..0388be5d25 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values.py @@ -86,6 +86,7 @@ class LiveValueResolver(transformer.Base): if not hasattr(parent_object, node.attr): raise AttributeError('%s has no attribute %s' % (parent_object, node.attr)) + anno.setanno(node, 'parent_type', type(parent_object)) anno.setanno(node, 'live_val', getattr(parent_object, node.attr)) anno.setanno(node, 'fqn', anno.getanno(node.value, 'fqn') + (node.attr,)) # TODO(mdan): Investigate the role built-in annotations can play here. @@ -96,6 +97,7 @@ class LiveValueResolver(transformer.Base): # This would not hold for dynamic members like function attributes. # For the dynamic case, we simply leave the node without an annotation, # and let downstream consumers figure out what to do. + anno.setanno(node, 'parent_type', parent_type) anno.setanno(node, 'live_val', getattr(parent_type, node.attr)) anno.setanno(node, 'fqn', anno.getanno(node.value, 'type_fqn') + (node.attr,)) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py index 1e81bc70a8..c133a455b3 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/live_values_test.py @@ -103,6 +103,7 @@ class LiveValuesResolverTest(test.TestCase): arg_types={'self': (TestClass.__name__, TestClass)}) func_node = node.body[0].body[0].value.func self.assertEquals(TestClass.member, anno.getanno(func_node, 'live_val')) + self.assertEquals(TestClass, anno.getanno(func_node, 'parent_type')) self.assertEquals(('TestClass', 'member'), anno.getanno(func_node, 'fqn')) -- GitLab From 546d30232a07c790de55ea75795f24614312c12a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 08:04:09 -0800 Subject: [PATCH 067/884] Drop the getcallargs extension as its logic had to be moved to a higher level into api.py. PiperOrigin-RevId: 187022717 --- .../contrib/py2tf/pyct/inspect_utils.py | 27 -------------- .../contrib/py2tf/pyct/inspect_utils_test.py | 36 ------------------- 2 files changed, 63 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils.py b/tensorflow/contrib/py2tf/pyct/inspect_utils.py index c1af95e2ab..d19c6ed75e 100644 --- a/tensorflow/contrib/py2tf/pyct/inspect_utils.py +++ b/tensorflow/contrib/py2tf/pyct/inspect_utils.py @@ -50,33 +50,6 @@ def getnamespace(f): return namespace -def getcallargs(c, *args, **kwargs): - """Extension of getcallargs to non-function callables.""" - if tf_inspect.isfunction(c) or tf_inspect.ismethod(c): - # The traditional getcallargs - return tf_inspect.getcallargs(c, *args, **kwargs) - - if tf_inspect.isclass(c): - # Constructors: use a sentinel to remove the self argument. - self_sentinel = object() - arg_map = tf_inspect.getcallargs( - c.__init__, self_sentinel, *args, **kwargs) - # Find and remove the self arg. We cannot assume it's called 'self'. - self_arg_name = None - for name, value in arg_map.items(): - if value is self_sentinel: - self_arg_name = name - break - del arg_map[self_arg_name] - return arg_map - - if hasattr(c, '__call__'): - # Callable objects: map self to the object itself - return tf_inspect.getcallargs(c.__call__, *args, **kwargs) - - raise NotImplementedError('unknown callable "%s"' % type(c)) - - def getmethodclass(m): """Resolves a function's owner, e.g. a method's class. diff --git a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py b/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py index d96c3df547..5528ac851f 100644 --- a/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py +++ b/tensorflow/contrib/py2tf/pyct/inspect_utils_test.py @@ -127,42 +127,6 @@ class InspectUtilsTest(test.TestCase): self.assertEqual(ns['closed_over_primitive'], closed_over_primitive) self.assertTrue('local_var' not in ns) - def test_getcallargs_constructor(self): - - class TestSuperclass(object): - - def __init__(self, x): - pass - - class TestCallable(TestSuperclass): - pass - - self.assertDictEqual({ - 'x': 1 - }, inspect_utils.getcallargs(TestCallable, 1)) - - def test_getcallargs_object(self): - - class TestCallable(object): - - def __call__(self, x): - pass - - obj = TestCallable() - self.assertDictEqual({ - 'self': obj, - 'x': 1 - }, inspect_utils.getcallargs(obj, 1)) - - def test_getcallargs_function(self): - - def test_fn(x): - return x + 1 - - self.assertDictEqual({ - 'x': 1 - }, inspect_utils.getcallargs(test_fn, 1)) - def test_getmethodclass(self): self.assertEqual( -- GitLab From d2ecfc5ab0a22be088e4385c2d601c2ba8ad8816 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 08:58:48 -0800 Subject: [PATCH 068/884] Add __str__ method to _RefVariableProcessor. PiperOrigin-RevId: 187029027 --- tensorflow/python/training/optimizer.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 678d6322aa..454cc3add5 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -98,6 +98,9 @@ class _RefVariableProcessor(_OptimizableVariable): def __init__(self, v): self._v = v + def __str__(self): + return "<_RefVariableProcessor(%s)>" % self._v + def target(self): return self._v._ref() # pylint: disable=protected-access -- GitLab From c76dd17b2086b760ac38e1e12ec3d4df6268d0b3 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 26 Feb 2018 09:24:38 -0800 Subject: [PATCH 069/884] [XLA:GPU] Fix HLO profiling when multiple streams are involved. We were enqueueing the timer on the main stream, but not blocking the substreams, so the results were nonsensical. PiperOrigin-RevId: 187032412 --- .../xla/service/gpu/gpu_executable.cc | 21 ++++++++++++------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc index 623d6714de..04b37d913e 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc @@ -46,12 +46,14 @@ namespace { class HloExecutionProfiler { public: // If profiling is enabled, start an execution timer running. - explicit HloExecutionProfiler(bool do_profile, HloExecutionProfile* profile, - se::Stream* stream, - const HloComputation* computation) + explicit HloExecutionProfiler( + bool do_profile, HloExecutionProfile* profile, se::Stream* stream, + const std::vector::SmartPtr>& sub_streams, + const HloComputation* computation) : do_profile_(do_profile), profile_(profile), stream_(stream), + sub_streams_(sub_streams), computation_(computation) { if (do_profile_) { clock_rate_ghz_ = @@ -70,6 +72,7 @@ class HloExecutionProfiler { CHECK(!finished_execution_) << "Call FinishExecution only once!"; finished_execution_ = true; if (do_profile_) { + stream_->ThenWaitFor(&sub_streams_); stream_->ThenStopTimer(execution_timer_.get()); stream_->BlockHostUntilDone().IgnoreError(); profile_->set_total_cycles_executed( @@ -88,6 +91,7 @@ class HloExecutionProfiler { // that the hlo_instruction took to execute in the profile. void FinishOperation(const HloInstruction* hlo_instruction) { if (do_profile_) { + stream_->ThenWaitFor(&sub_streams_); stream_->ThenStopTimer(per_op_timer_.get()); stream_->BlockHostUntilDone().IgnoreError(); profile_->SetCyclesTakenBy( @@ -100,6 +104,7 @@ class HloExecutionProfiler { double clock_rate_ghz_; HloExecutionProfile* profile_; se::Stream* stream_; + const std::vector::SmartPtr>& sub_streams_; const HloComputation* computation_; std::unique_ptr execution_timer_; std::unique_ptr per_op_timer_; @@ -147,13 +152,9 @@ Status GpuExecutable::ExecuteThunks( LOG(WARNING) << "PROFILING: profiling is enabled"; } - HloExecutionProfiler profiler(do_profile, hlo_execution_profile, main_stream, - hlo_module_->entry_computation()); - - uint64 start_micros = tensorflow::Env::Default()->NowMicros(); - // Stream 0 indicates `main_stream` and substreams start from stream 1. std::vector::SmartPtr> sub_streams; + sub_streams.reserve(thunk_schedule_->StreamCount() - 1); while (sub_streams.size() + 1 < thunk_schedule_->StreamCount()) { sub_streams.emplace_back(); TF_ASSIGN_OR_RETURN( @@ -161,6 +162,10 @@ Status GpuExecutable::ExecuteThunks( run_options->BorrowStream(main_stream->parent()->device_ordinal())); } + HloExecutionProfiler profiler(do_profile, hlo_execution_profile, main_stream, + sub_streams, hlo_module_->entry_computation()); + uint64 start_micros = tensorflow::Env::Default()->NowMicros(); + // The next event enqueued on stream N must not run until the thunk at // last_blocking_thunk_for_stream[N] completes. std::map last_blocking_thunk_for_stream; -- GitLab From 9e823230c42b9e2ba08726ef711ebaff7e1de7af Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Mon, 26 Feb 2018 09:32:47 -0800 Subject: [PATCH 070/884] Fix print format error. PiperOrigin-RevId: 187033623 --- .../python/grappler/hierarchical_controller.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/grappler/hierarchical_controller.py b/tensorflow/python/grappler/hierarchical_controller.py index 655e43e78f..b06fb3c6d0 100644 --- a/tensorflow/python/grappler/hierarchical_controller.py +++ b/tensorflow/python/grappler/hierarchical_controller.py @@ -612,10 +612,10 @@ class HierarchicalController(Controller): num_inter_group_connections = num_connections - num_intra_group_connections if verbose: print("grouping evaluation metric") - print("num_connections={} num_intra_group_connections={} " - "num_inter_group_connections={}").format( - num_connections, num_intra_group_connections, - num_inter_group_connections) + print(("num_connections={} num_intra_group_connections={} " + "num_inter_group_connections={}").format( + num_connections, num_intra_group_connections, + num_inter_group_connections)) self.dag_matrix = dag_matrix # output_shape @@ -972,8 +972,8 @@ class HierarchicalController(Controller): controller_ops["reward"]["ph"][child_id]: reward, }) if verbose: - print("run_time={:<.5f} reward={:<.5f} " - "best_reward={:<.5f}").format(run_time, reward, best_reward) + print(("run_time={:<.5f} reward={:<.5f} " + "best_reward={:<.5f}").format(run_time, reward, best_reward)) # Reward is a double, best_reward a float: allow for some slack in the # comparison. -- GitLab From 109004b00ad515fbf44d2df7718a2e9638d4b611 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 26 Feb 2018 10:11:43 -0800 Subject: [PATCH 071/884] Update version string to 1.6.0 (#17251) --- tensorflow/core/public/version.h | 2 +- tensorflow/docs_src/install/install_c.md | 2 +- tensorflow/docs_src/install/install_go.md | 2 +- tensorflow/docs_src/install/install_java.md | 22 +++++++++---------- tensorflow/docs_src/install/install_linux.md | 22 +++++++++---------- tensorflow/docs_src/install/install_mac.md | 10 ++++----- .../docs_src/install/install_sources.md | 14 ++++++------ tensorflow/tools/pip_package/setup.py | 2 +- 8 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/core/public/version.h b/tensorflow/core/public/version.h index 7405e01e14..22f2c02b78 100644 --- a/tensorflow/core/public/version.h +++ b/tensorflow/core/public/version.h @@ -24,7 +24,7 @@ limitations under the License. // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. "-alpha", "-alpha.1", // "-beta", "-rc", "-rc.1") -#define TF_VERSION_SUFFIX "-rc1" +#define TF_VERSION_SUFFIX "" #define TF_STR_HELPER(x) #x #define TF_STR(x) TF_STR_HELPER(x) diff --git a/tensorflow/docs_src/install/install_c.md b/tensorflow/docs_src/install/install_c.md index f3620cf687..1a151ec758 100644 --- a/tensorflow/docs_src/install/install_c.md +++ b/tensorflow/docs_src/install/install_c.md @@ -38,7 +38,7 @@ enable TensorFlow for C: OS="linux" # Change to "darwin" for macOS TARGET_DIRECTORY="/usr/local" curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_go.md b/tensorflow/docs_src/install/install_go.md index 4bf4bacaec..bc874c034d 100644 --- a/tensorflow/docs_src/install/install_go.md +++ b/tensorflow/docs_src/install/install_go.md @@ -38,7 +38,7 @@ steps to install this library and enable TensorFlow for Go: TF_TYPE="cpu" # Change to "gpu" for GPU support TARGET_DIRECTORY='/usr/local' curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-${TF_TYPE}-$(go env GOOS)-x86_64-1.6.0.tar.gz" | sudo tar -C $TARGET_DIRECTORY -xz The `tar` command extracts the TensorFlow C library into the `lib` diff --git a/tensorflow/docs_src/install/install_java.md b/tensorflow/docs_src/install/install_java.md index 1905f9729e..313de2049a 100644 --- a/tensorflow/docs_src/install/install_java.md +++ b/tensorflow/docs_src/install/install_java.md @@ -36,7 +36,7 @@ following to the project's `pom.xml` to use the TensorFlow Java APIs: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 ``` @@ -65,7 +65,7 @@ As an example, these steps will create a Maven project that uses TensorFlow: org.tensorflow tensorflow - 1.6.0-rc1 + 1.6.0 @@ -123,12 +123,12 @@ instead: org.tensorflow libtensorflow - 1.6.0-rc1 + 1.6.0 org.tensorflow libtensorflow_jni_gpu - 1.6.0-rc1 + 1.6.0 ``` @@ -147,7 +147,7 @@ refer to the simpler instructions above instead. Take the following steps to install TensorFlow for Java on Linux or macOS: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Decide whether you will run TensorFlow for Java on CPU(s) only or with @@ -166,7 +166,7 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: OS=$(uname -s | tr '[:upper:]' '[:lower:]') mkdir -p ./jni curl -L \ - "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0-rc1.tar.gz" | + "https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-${TF_TYPE}-${OS}-x86_64-1.6.0.tar.gz" | tar -xz -C ./jni ### Install on Windows @@ -174,10 +174,10 @@ Take the following steps to install TensorFlow for Java on Linux or macOS: Take the following steps to install TensorFlow for Java on Windows: 1. Download - [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0-rc1.jar), + [libtensorflow.jar](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow-1.6.0.jar), which is the TensorFlow Java Archive (JAR). 2. Download the following Java Native Interface (JNI) file appropriate for - [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0-rc1.zip). + [TensorFlow for Java on Windows](https://storage.googleapis.com/tensorflow/libtensorflow/libtensorflow_jni-cpu-windows-x86_64-1.6.0.zip). 3. Extract this .zip file. @@ -225,7 +225,7 @@ must be part of your `classpath`. For example, you can include the downloaded `.jar` in your `classpath` by using the `-cp` compilation flag as follows: -
javac -cp libtensorflow-1.6.0-rc1.jar HelloTF.java
+
javac -cp libtensorflow-1.6.0.jar HelloTF.java
### Running @@ -239,11 +239,11 @@ two files are available to the JVM: For example, the following command line executes the `HelloTF` program on Linux and macOS X: -
java -cp libtensorflow-1.6.0-rc1.jar:. -Djava.library.path=./jni HelloTF
+
java -cp libtensorflow-1.6.0.jar:. -Djava.library.path=./jni HelloTF
And the following command line executes the `HelloTF` program on Windows: -
java -cp libtensorflow-1.6.0-rc1.jar;. -Djava.library.path=jni HelloTF
+
java -cp libtensorflow-1.6.0.jar;. -Djava.library.path=jni HelloTF
If the program prints Hello from version, you've successfully installed TensorFlow for Java and are ready to use the API. If the program diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 62bd45650a..5382c9db31 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -188,7 +188,7 @@ Take the following steps to install TensorFlow with Virtualenv: Virtualenv environment:
(tensorflow)$ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl If you encounter installation problems, see [Common Installation Problems](#common_installation_problems). @@ -293,7 +293,7 @@ take the following steps:
      $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
      
If this step fails, see @@ -480,7 +480,7 @@ Take the following steps to install TensorFlow in an Anaconda environment:
      (tensorflow)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+ https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl @@ -648,14 +648,14 @@ This section documents the relevant values for Linux installations. CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp27-none-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp27-none-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp27-none-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -667,14 +667,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp34-cp34m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp34-cp34m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp34-cp34m-linux_x86_64.whl
 
Note that GPU support requires the NVIDIA hardware and software described in @@ -686,14 +686,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp35-cp35m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp35-cp35m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp35-cp35m-linux_x86_64.whl
 
@@ -705,14 +705,14 @@ Note that GPU support requires the NVIDIA hardware and software described in CPU only:
-https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.6.0-cp36-cp36m-linux_x86_64.whl
 
GPU support:
-https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0rc1-cp36-cp36m-linux_x86_64.whl
+https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.6.0-cp36-cp36m-linux_x86_64.whl
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index e3832a7a2a..62f896375f 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -115,7 +115,7 @@ Take the following steps to install TensorFlow with Virtualenv: TensorFlow in the active Virtualenv is as follows:
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -238,7 +238,7 @@ take the following steps: issue the following command:
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl 
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl If the preceding command fails, see [installation problems](#common-installation-problems). @@ -347,7 +347,7 @@ Take the following steps to install TensorFlow in an Anaconda environment: TensorFlow for Python 2.7:
 (targetDirectory)$ pip install --ignore-installed --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl @@ -520,7 +520,7 @@ This section documents the relevant values for Mac OS installations.
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
 
@@ -528,5 +528,5 @@ https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-a
-https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
+https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
 
diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 051da692d3..638a64cc15 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -359,10 +359,10 @@ Invoke `pip install` to install that pip package. The filename of the `.whl` file depends on your platform. For example, the following command will install the pip package -for TensorFlow 1.6.0rc1 on Linux: +for TensorFlow 1.6.0 on Linux:
-$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0rc1-py2-none-any.whl
+$ sudo pip install /tmp/tensorflow_pkg/tensorflow-1.6.0-py2-none-any.whl
 
## Validate your installation @@ -460,8 +460,8 @@ Stack Overflow and specify the `tensorflow` tag. **Linux** - - + + @@ -479,7 +479,7 @@ Stack Overflow and specify the `tensorflow` tag. **Mac**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0rc1GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.6.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.0N/AN/A
tensorflow_gpu-1.6.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.9.079
tensorflow-1.5.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.0N/AN/A
tensorflow_gpu-1.5.0GPU2.7, 3.3-3.6GCC 4.8Bazel 0.8.079
tensorflow-1.4.0CPU2.7, 3.3-3.6GCC 4.8Bazel 0.5.4N/AN/A
- + @@ -493,8 +493,8 @@ Stack Overflow and specify the `tensorflow` tag. **Windows**
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc1CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.6.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.5.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.8.1N/AN/A
tensorflow-1.4.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.5.4N/AN/A
tensorflow-1.3.0CPU2.7, 3.3-3.6Clang from xcodeBazel 0.4.5N/AN/A
- - + + diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index a835275dae..8510a4260e 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -29,7 +29,7 @@ from setuptools.dist import Distribution # This version string is semver compatible, but incompatible with pip. # For pip, we will remove all '-' characters from this string, and use the # result for pip. -_VERSION = '1.6.0-rc1' +_VERSION = '1.6.0' REQUIRED_PACKAGES = [ 'absl-py >= 0.1.6', -- GitLab From f4e70be18b104fbb2efeefeb83bea190aec12727 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 10:07:09 -0800 Subject: [PATCH 072/884] Fix pip install examples to match text: Use pip and point to Py2 packages PiperOrigin-RevId: 187038889 --- tensorflow/docs_src/install/install_mac.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 5be38ae1ef..623ca6bb79 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -118,8 +118,8 @@ Take the following steps to install TensorFlow with Virtualenv: Python 2.7, the command to install TensorFlow in the active Virtualenv is as follows: -
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl
+
 $ pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl
If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -241,8 +241,8 @@ take the following steps: you are installing TensorFlow for Mac OS and Python 2.7 issue the following command: -
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py3-none-any.whl 
+
 $ sudo pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0rc1-py2-none-any.whl 
If the preceding command fails, see [installation problems](#common-installation-problems). -- GitLab From 3b08cd35bc108f48b4f63d73af7a53eb8a1169f9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 10:17:15 -0800 Subject: [PATCH 073/884] Generalize the gather_indices dimension that stores indices This is now exposed as a index_vector_dim dimension number. Also fixed an off-by-one error in ValidateGatherDimensionNumbers in the expression computing output_shape_rank. PiperOrigin-RevId: 187040748 --- .../compiler/xla/service/hlo_instruction.cc | 9 +- .../compiler/xla/service/hlo_instruction.h | 3 +- .../xla/service/hlo_instruction_test.cc | 43 +++- .../compiler/xla/service/shape_inference.cc | 42 ++-- .../xla/service/shape_inference_test.cc | 191 ++++++++++++++---- tensorflow/compiler/xla/xla_data.proto | 4 + .../performance/xla/operation_semantics.md | 61 ++++-- 7 files changed, 274 insertions(+), 79 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b7dd055d7c..a534d8ff06 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1172,7 +1172,8 @@ bool HloInstruction::HasSideEffect() const { /* static */ GatherDimensionNumbers HloInstruction::MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims) { + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim) { GatherDimensionNumbers gather_dim_numbers; for (int64 output_window_dim : output_window_dims) { gather_dim_numbers.add_output_window_dims(output_window_dim); @@ -1184,6 +1185,7 @@ bool HloInstruction::HasSideEffect() const { gather_dim_numbers.add_gather_dims_to_operand_dims(gather_dim_to_input_dim); } + gather_dim_numbers.set_index_vector_dim(index_vector_dim); return gather_dim_numbers; } @@ -3369,9 +3371,12 @@ string HloInstruction::GatherDimensionNumbersToString() const { string gather_dims_to_operand_dims = StrCat( "gather_dims_to_operand_dims={", Join(gather_dimension_numbers_->gather_dims_to_operand_dims(), ","), "}"); + string index_vector_dim = StrCat( + "index_vector_dim=", gather_dimension_numbers_->index_vector_dim()); return Join>( - {output_window_dims, elided_window_dims, gather_dims_to_operand_dims}, + {output_window_dims, elided_window_dims, gather_dims_to_operand_dims, + index_vector_dim}, ", "); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e4d22e5703..e4c86214c2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -502,7 +502,8 @@ class HloInstruction { static GatherDimensionNumbers MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims); + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim); // Returns the opcode for this instruction. HloOpcode opcode() const { return opcode_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 32d3ed272b..f2980d309d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1271,7 +1271,7 @@ TEST_F(HloInstructionTest, Stringification) { "true_computation=%TransposeDot, false_computation=%TransposeDot"); } -TEST_F(HloInstructionTest, StringifyGather) { +TEST_F(HloInstructionTest, StringifyGather_0) { Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); Shape gather_indices_tensor_shape = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); @@ -1291,7 +1291,8 @@ TEST_F(HloInstructionTest, StringifyGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); HloModule module(TestName()); @@ -1303,7 +1304,43 @@ TEST_F(HloInstructionTest, StringifyGather) { "s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), " "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " "gather_dims_to_operand_dims={0,1,2,3,4}, " - "window_bounds={30,29,28,27,26}"); + "index_vector_dim=4, window_bounds={30,29,28,27,26}"); +} + +TEST_F(HloInstructionTest, StringifyGather_1) { + Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); + Shape gather_indices_tensor_shape = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); + Shape gather_result_shape = + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}); + + HloComputation::Builder builder("Gather"); + HloInstruction* input = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_tensor_shape, "input_tensor")); + HloInstruction* gather_indices = + builder.AddInstruction(HloInstruction::CreateParameter( + 1, gather_indices_tensor_shape, "gather_indices")); + + HloInstruction* gather_instruction = + builder.AddInstruction(HloInstruction::CreateGather( + gather_result_shape, input, gather_indices, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + HloModule module(TestName()); + module.AddEntryComputation(builder.Build()); + + EXPECT_EQ(gather_instruction->ToString(), + "%gather = f32[10,9,7,6,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} " + "gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, " + "s64[10,9,5,7,6]{4,3,2,1,0} %gather_indices), " + "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " + "gather_dims_to_operand_dims={0,1,2,3,4}, " + "index_vector_dim=2, window_bounds={30,29,28,27,26}"); } } // namespace diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index c9692757b2..607a672025 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2467,27 +2467,27 @@ static Status ValidateGatherDimensionNumbers( const int64 output_window_dim_count = dim_numbers.output_window_dims_size(); const int64 output_shape_rank = - output_window_dim_count + gather_indices_shape.size(); + output_window_dim_count + gather_indices_shape.size() - 1; for (int i = 0; i < dim_numbers.output_window_dims_size(); ++i) { int64 window_index = dim_numbers.output_window_dims(i); if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in" - "[0,%lld)", + "have been in [0,%lld)", i, window_index, output_shape_rank); } } if (dim_numbers.gather_dims_to_operand_dims_size() != - gather_indices_shape.back()) { + gather_indices_shape[dim_numbers.index_vector_dim()]) { return InvalidArgument( - "There must be exactly as many elements in gather_dims_to_operand_dims " - "as there are elements in the last dimension of %%gather_indices; got: " - "%d, expected %lld", + "Gather op has %d elements in gather_dims_to_operand_dims and the " + "bound of dimension index_vector_dim=%lld of gather_indices is " + "%lld. These two numbers must be equal.", dim_numbers.gather_dims_to_operand_dims_size(), - gather_indices_shape.back()); + dim_numbers.index_vector_dim(), + gather_indices_shape[dim_numbers.index_vector_dim()]); } for (int i = 0; i < dim_numbers.gather_dims_to_operand_dims_size(); i++) { @@ -2550,24 +2550,33 @@ static Status ValidateGatherDimensionNumbers( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( gather_indices_shape, "gather indices operand of gather op")); - if (gather_indices_shape.dimensions_size() < 1) { + if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must at least of rank 1; got %s", + "Gather indices parameter must be an integral tensor; got %s", ShapeUtil::HumanString(gather_indices_shape).c_str()); } - if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { + // We implicitly reshape gather indices of shape P[A,B,C] to P[A,B,C,1] if + // index_vector_dim is rank(P). The bounds of this expanded shape is + // stored in expanded_gather_indices_shape. + + if (gather_indices_shape.dimensions_size() < + gather_dim_numbers.index_vector_dim() || + gather_dim_numbers.index_vector_dim() < 0) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", - ShapeUtil::HumanString(gather_indices_shape).c_str()); + "Gather index leaf dimension must be within [0, rank(gather_indices) + " + "1). rank(gather_indices) is %d and gather index leaf dimension is " + "%lld.", + gather_indices_shape.dimensions_size(), + gather_dim_numbers.index_vector_dim()); } std::vector expanded_gather_indices_shape; - // We implicitly reshape gather indices of shape P[N] to P[N,1]. expanded_gather_indices_shape.reserve(gather_indices_shape.dimensions_size()); c_copy(gather_indices_shape.dimensions(), std::back_inserter(expanded_gather_indices_shape)); - if (expanded_gather_indices_shape.size() == 1) { + if (expanded_gather_indices_shape.size() == + gather_dim_numbers.index_vector_dim()) { expanded_gather_indices_shape.push_back(1); } @@ -2632,6 +2641,9 @@ static Status ValidateGatherDimensionNumbers( } current_bound = window_bounds[window_dims_seen++]; } else { + if (gather_dims_seen == gather_dim_numbers.index_vector_dim()) { + gather_dims_seen++; + } current_bound = expanded_gather_indices_shape[gather_dims_seen++]; } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 7eb120843f..029d2b3b86 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1530,11 +1530,17 @@ TEST_F(ShapeInferenceTest, BadSlice) { class GatherShapeInferenceTest : public ShapeInferenceTest { protected: + const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); + const Shape s64_vector_5_ = ShapeUtil::MakeShape(S64, {5}); const Shape s64_vector_32_ = ShapeUtil::MakeShape(S64, {32}); const Shape s64_4d_tensor_10_9_8_7_1_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 1}); const Shape s64_4d_tensor_10_9_8_7_5_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); + const Shape s64_4d_tensor_5_10_9_7_6_ = + ShapeUtil::MakeShape(S64, {5, 10, 9, 7, 6}); + const Shape s64_4d_tensor_10_9_5_7_6_ = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); const Shape f32_5d_tensor_50_49_48_47_46_ = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -1548,7 +1554,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {64, 32}))) @@ -1562,7 +1569,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherV2) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{1}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/1), /*window_bounds=*/{1, 48})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {32, 48}))) @@ -1576,7 +1584,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherNd) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 48})); EXPECT_TRUE(ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {10, 9, 8, 7, 48}))) @@ -1591,7 +1600,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); EXPECT_TRUE(ShapeUtil::Equal( gather_shape, @@ -1599,12 +1609,85 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { << ShapeUtil::HumanString(gather_shape); } +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_A) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_B) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_5_10_9_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NoOutputGatherDims) { + // This is equivalent to a dynamic slice. + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_vector_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3, 4}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, ScalarGatherIndices) { + // The gather indices "tensor" is a scalar S here that's used to slice out + // [S,0,0,0,0]..[S,30,29,28,27] into a [30,29,28,27] shaped result. + TF_ASSERT_OK_AND_ASSIGN(Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_scalar_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3}, + /*elided_window_dims=*/{0}, + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/0), + /*window_bounds=*/{1, 30, 29, 28, 27})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27}))) + << ShapeUtil::HumanString(gather_shape); +} + TEST_F(GatherShapeInferenceTest, TupleShapedTensorInput) { StatusOr statusor = ShapeInference::InferGatherShape( tuple_shape_, s64_vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1617,7 +1700,8 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { s64_vector_32_, tuple_shape_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1625,25 +1709,13 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { << statusor.status(); } -TEST_F(GatherShapeInferenceTest, ScalarGatherIndicesInput) { - StatusOr statusor = ShapeInference::InferGatherShape( - s64_vector_32_, s32_, - HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, - /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), - /*window_bounds=*/{64, 1}); - ASSERT_FALSE(statusor.ok()); - EXPECT_THAT(statusor.status().error_message(), - HasSubstr("Gather indices parameter must at least of rank 1")) - << statusor.status(); -} - TEST_F(GatherShapeInferenceTest, FloatingPointGatherIndicesInput) { StatusOr statusor = ShapeInference::InferGatherShape( s64_vector_32_, vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1658,7 +1730,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 8, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1674,7 +1747,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1690,7 +1764,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 99, 100, 101}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1698,6 +1773,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, + InvalidGatherDimNumbers_WindowIndexBarelyOutOfBounds) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_8_7_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 9}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), + /*window_bounds=*/{30, 29, 28, 27, 26}); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Window index 4 in gather op is out of bounds")) + << statusor.status(); +} + TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_MismatchingElidedWindowDims) { StatusOr statusor = ShapeInference::InferGatherShape( @@ -1705,7 +1796,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{4}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1722,7 +1814,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 19}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1738,7 +1831,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 3}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1755,15 +1849,15 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( statusor.status().error_message(), - HasSubstr( - "There must be exactly as many elements in " - "gather_dims_to_operand_dims " - "as there are elements in the last dimension of %gather_indices")) + HasSubstr("Gather op has 4 elements in gather_dims_to_operand_dims and " + "the bound of dimension index_vector_dim=4 of " + "gather_indices is 5. These two numbers must be equal.")) << statusor.status(); } @@ -1774,7 +1868,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1791,7 +1886,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1808,7 +1904,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{2, 1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 1, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1822,7 +1919,8 @@ TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_WindowBoundsTooLarge) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{2}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 1, 300, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1838,7 +1936,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1855,7 +1954,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26, 20}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1864,5 +1964,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, OutOfBoundsGatherIndicesLeafDim) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/32), + /*window_bounds=*/{30, 29, 28, 27, 26}); + + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Gather index leaf dimension must be within [0, " + "rank(gather_indices) + 1)")) + << statusor.status(); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 28620c3b86..1f16e6d251 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -418,6 +418,10 @@ message GatherDimensionNumbers { // transforms the gather index looked up from the gather_indices tensor into // the starting index in the input space. repeated int64 gather_dims_to_operand_dims = 3; + + // The dimension in the gather_indices input that contains the starting + // indices. + int64 index_vector_dim = 4; } // Operation requests that are all collected as a tagged union with a oneof diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 1f7a3a1e2c..eaf6aeba3d 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -1050,6 +1050,9 @@ For a more intuitive description, see the "Informal Description" section below. : : : indices of the slices we're : : : : we're stitching together into : : : : the output tensor. : +|`index_vector_dim` | `int64` | The dimension in | +: : : `gather_indices` that contains : +: : : the starting indices. : |`output_window_dims` | `ArraySlice` | The set of dimensions in the | : : : output shape that are _window : : : : dimensions_ (defined below). : @@ -1066,22 +1069,20 @@ For a more intuitive description, see the "Informal Description" section below. : : : `output_window_dims`) and the window : : : : dimensions that are elided (via : : : : `elided_window_dims`). : -|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | +|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | : : : array is interpreted as mapping `i` to : : : : `gather_dims_to_operand_dims[i]`) from : : : : the gather indices in `gather_indices` to : : : : the operand index space. It has to be : : : : one-to-one and total. : -If `gather_indices` is a vector with `N` elements then we implicitly reshape it -to a tensor of shape `[N,1]` before proceeding. - For every index `Out` in the output tensor, we compute two things (more precisely described later): - - An index into the first `gather_indices.rank` - `1` dimensions of - `gather_indices`, which gives us a starting index of a slice, _operand - slice_, in the operand tensor. + - An index into `gather_indices.rank` - `1` dimensions of `gather_indices`, + which gives us a starting index of a slice, _operand slice_, in the operand + tensor. These `gather_indices.rank` - `1` dimensions are all the dimensions + in `gather_indices` except `index_vector_dim`. - A _window index_ that has the same rank as the operand. This index is composed of the values in `Out` at dimensions `output_window_dims`, embedded @@ -1093,29 +1094,42 @@ should be present in the output at index `Out`. The output is a tensor of rank `output_window_dims.size` + `gather_indices.rank` - `1`. Additionally, as a shorthand, we define `output_gather_dims` of type `ArraySlice` as the set of dimensions in the output shape but not in -`output_window_dims`, in ascending order. E.g. if the output tensor has rank 5, -`output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, `3`} +`output_window_dims`, in ascending order. E.g. if the output tensor has rank +`5`, `output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, +`3`} + +If `index_vector_dim` is equal to `gather_indices.rank` we implicitly +consider `gather_indices` to have a trailing `1` dimension (i.e. if +`gather_indices` was of shape `[6,7]` and `index_vector_dim` is `2` then +we implicitly consider the shape of `gather_indices` to be `[6,7,1]`). The bounds for the output tensor along dimension `i` is computed as follows: 1. If `i` is present in `output_gather_dims` (i.e. is equal to - `output_gather_dims[k]` for some `k`) then we pick the corresponding - dimension bounds out of `gather_indices.shape` (i.e. pick - `gather_indices.shape.dims[k]`). + `output_gather_dims[k]` for some `k`) then we pick the corresponding + dimension bounds out of `gather_indices.shape`, skipping + `index_vector_dim` (i.e. pick `gather_indices.shape.dims`[`k`] if `k` + < `index_vector_dim` and `gather_indices.shape.dims`[`k`+`1`] + otherwise). 2. If `i` is present in `output_window_dims` (i.e. equal to - `output_window_dims[k]` for some `k`) then we pick the corresponding bound - out of `window_bounds` after accounting for `elided_window_dims` (i.e. we - pick `adjusted_window_bounds[k]` where `adjusted_window_bounds` is - `window_bounds` with the bounds at indices `elided_window_dims` removed). + `output_window_dims`[`k`] for some `k`) then we pick the corresponding + bound out of `window_bounds` after accounting for `elided_window_dims` + (i.e. we pick `adjusted_window_bounds`[`k`] where `adjusted_window_bounds` + is `window_bounds` with the bounds at indices `elided_window_dims` + removed). The operand index `In` corresponding to an output index `Out` is computed as follows: 1. Let `G` = { `Out`[`k`] for `k` in `output_gather_dims` }. Use `G` to slice - out vector `S` such that `S`[`i`] = `gather_indices`[`G`, `i`]. - 2. Create an index, `S``in`, into `operand` using `S` by scattering - `S` using the `gather_dims_to_operand_dims` map (`S``in` is the - starting indices for _operand slice_ mentioned above.). More precisely: + out vector `S` such that `S`[`i`] = `gather_indices`[Combine(`G`, `i`)] + where Combine(A, b) inserts b at position `index_vector_dim` into A. + Note that this is well defined even if `G` is empty -- if `G` is empty then + `S` = `gather_indices`. + 2. Create an index, `S``in`, into `operand` using `S` by + scattering `S` using the `gather_dims_to_operand_dims` map + (`S``in` is the starting indices for _operand slice_ mentioned + above). More precisely: 1. `S``in`[`gather_dims_to_operand_dims`[`k`]] = `S`[`k`] if `k` < `gather_dims_to_operand_dims.size`. 2. `S``in`[`_`] = `0` otherwise. @@ -1136,7 +1150,12 @@ follows: `operand.rank` is `6` and `elided_window_dims` is {`0`, `2`} then `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. -### Informal Description +### Informal Description and Examples + +`index_vector_dim` is set to `gather_indices.rank` - `1` in all of the +examples that follow. More interesting values for `index_vector_dim` +does not change the operation fundamentally, but makes the visual representation +more cumbersome. To get an intuition on how all of the above fits together, let's look at an example that gathers 5 slices of shape `[8,6]` from a `[16,11]` tensor. The -- GitLab From c6807e0c7c998f0e38e6930fca4a8cf667f791c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 10:24:08 -0800 Subject: [PATCH 074/884] Arithemtic optimization: Rewite Sub(0, y) => Neg(y) PiperOrigin-RevId: 187041872 --- .../grappler/optimizers/constant_folding.cc | 18 +++++++++++++++++- .../grappler/optimizers/constant_folding.h | 1 + .../optimizers/constant_folding_test.cc | 7 +++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 182e03f04e..10ca7dcce0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1434,6 +1434,17 @@ void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, graph_modified_ = true; } +void ConstantFolding::ReplaceSubtractionFromZeroByNegation(NodeDef* node, + GraphDef* graph) { + node->set_op("Neg"); + node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = + AddControlDependency(node->input(1), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(1), ctrl_dep); + node->set_input(1, ctrl_dep); + graph_modified_ = true; +} + Status ConstantFolding::ReplaceOperationWithConstant( double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph) { @@ -1636,12 +1647,17 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { - // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. ReplaceOperationWithSnapshot(1, node, output); continue; } + if (y_matches_output_shape && (is_sub && x_is_zero)) { + // Replace 0 - y with Neg(y). + ReplaceSubtractionFromZeroByNegation(node, output); + continue; + } + // Replace 1 / y with Reciprocal op. if (y_matches_output_shape && is_any_div && x_is_one) { DataType type = node->attr().at("T").type(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 232b2f9fa0..2fd59c7f9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -82,6 +82,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* graph); void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceSubtractionFromZeroByNegation(NodeDef* node, GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 219f3bd5ec..c6540192d7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -286,10 +286,9 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { - // We don't handle this case yet. - EXPECT_EQ("Sub", node.op()); - EXPECT_EQ("zeros", node.input(0)); - EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("Neg", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); } const std::set square_zero_const{"mul1", "mul2", "mul5", "mul6", "matmul1", "matmul2"}; -- GitLab From 3ce1adbdf7b1f9a4a53d5438985d12b6526dbd14 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 10:24:56 -0800 Subject: [PATCH 075/884] Move accumulate_n_v2 to core. PiperOrigin-RevId: 187042001 --- tensorflow/contrib/framework/BUILD | 38 ------ .../framework/python/ops/accumulate_n_v2.py | 111 ------------------ tensorflow/python/kernel_tests/BUILD | 34 ++++++ .../kernel_tests/accumulate_n_eager_test.py} | 27 ++--- .../kernel_tests/accumulate_n_test.py} | 34 +++--- tensorflow/python/ops/math_ops.py | 81 ++++++------- 6 files changed, 99 insertions(+), 226 deletions(-) delete mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2.py rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_eager_test.py => python/kernel_tests/accumulate_n_eager_test.py} (72%) rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_test.py => python/kernel_tests/accumulate_n_test.py} (79%) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index dbdb5cfaac..1accb319d2 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -28,7 +28,6 @@ tf_custom_op_py_library( "python/framework/graph_util.py", "python/framework/tensor_util.py", "python/ops/__init__.py", - "python/ops/accumulate_n_v2.py", "python/ops/arg_scope.py", "python/ops/audio_ops.py", "python/ops/checkpoint_ops.py", @@ -161,23 +160,6 @@ py_test( ], ) -py_test( - name = "accumulate_n_v2_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:platform_test", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - cuda_py_test( name = "critical_section_test", size = "medium", @@ -196,26 +178,6 @@ cuda_py_test( ], ) -py_test( - name = "accumulate_n_v2_eager_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_eager_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/eager:backprop", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:tape", - "//third_party/py/numpy", - ], -) - py_test( name = "ops_test", size = "small", diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py deleted file mode 100644 index 476528b0dd..0000000000 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Ops that will eventually be folded into tensorflow/python/ops/math_ops.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops - - - -def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): - """Returns the element-wise sum of a list of tensors. - - Optionally, pass `shape` and `tensor_dtype` for shape and type checking, - otherwise, these are inferred. - - `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not - wait for all of its inputs to be ready before beginning to sum. This can - save memory if inputs are ready at different times, since minimum temporary - storage is proportional to the output size rather than the inputs size. - - Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. - - For example: - - ```python - a = tf.constant([[1, 2], [3, 4]]) - b = tf.constant([[5, 0], [0, 6]]) - tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] - - # Explicitly pass shape and type - tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) - # [[7, 4], - # [6, 14]] - ``` - - Args: - inputs: A list of `Tensor` objects, each with same shape and type. - shape: Shape of elements of `inputs`. - tensor_dtype: The type of `inputs`. - name: A name for the operation (optional). - - Returns: - A `Tensor` of same shape and type as the elements of `inputs`. - - Raises: - ValueError: If `inputs` don't all have same shape and dtype or the shape - cannot be inferred. - """ - _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor" - "with the same dtype and shape") - if not inputs or not isinstance(inputs, (list, tuple)): - raise _INPUTS_ERR_MSG - inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) - if not all(isinstance(x, ops.Tensor) for x in inputs): - raise _INPUTS_ERR_MSG - if not all(x.dtype == inputs[0].dtype for x in inputs): - raise _INPUTS_ERR_MSG - if shape is not None: - shape = tensor_shape.as_shape(shape) - else: - shape = tensor_shape.unknown_shape() - for input_tensor in inputs: - if isinstance(input_tensor, ops.Tensor): - shape = shape.merge_with(input_tensor.get_shape()) - - # tensor_dtype is for safety only; operator's output type computed in C++ - if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}" - .format(tensor_dtype, inputs[0].dtype)) - - if len(inputs) == 1 and name is None: - return inputs[0] - elif len(inputs) == 1 and name is not None: - return array_ops.identity(inputs[0], name=name) - elif context.in_eager_mode(): - # TemporaryVariable not currently supported in eager mode; fall back - # onto AddN for now. - # TODO(frreiss) remove this once the lifetime of eager variables gets - # addressed - return math_ops.add_n(inputs, name=name) - else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) - -# The following code should eventually be merged into -# tensorflow/python/ops/math_grad.py -@ops.RegisterGradient("AccumulateNV2") -def _AddNGrad(op, grad): - """Same as gradient for AddN. Copies the gradient to all inputs.""" - # Not broadcasting. - return [grad] * len(op.inputs) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d4ceb2e489..c9aa4a252d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2892,6 +2892,40 @@ tf_py_test( ], ) +tf_py_test( + name = "accumulate_n_test", + size = "small", + srcs = ["accumulate_n_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + +tf_py_test( + name = "accumulate_n_eager_test", + size = "small", + srcs = ["accumulate_n_eager_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py similarity index 72% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py rename to tensorflow/python/kernel_tests/accumulate_n_eager_test.py index 35974b9e21..dc11b7dece 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py @@ -12,48 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`. - -These test cases spefically exercise the `eager` APIs. They need to be in a -separate file from the remaining tests because eager mode is currently something -you can turn on but can't turn off for the lifetime of the current process.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test - class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testMinimalEagerMode(self): forty = constant_op.constant(40) two = constant_op.constant(2) - answer = av2.accumulate_n_v2([forty, two]) + answer = math_ops.accumulate_n([forty, two]) self.assertEqual(42, answer.numpy()) - def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).numpy()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).numpy()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).numpy()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).numpy()) def testGrad(self): np.random.seed(42) @@ -65,16 +58,14 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): ] def fn(first, second, third): - return av2.accumulate_n_v2([first, second, third]) + return math_ops.accumulate_n([first, second, third]) grad_fn = backprop.gradients_function(fn) grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) - self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 [elem.numpy() for elem in grad]) - if __name__ == "__main__": ops.enable_eager_execution() test.main() - diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py similarity index 79% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py rename to tensorflow/python/kernel_tests/accumulate_n_test.py index 45962098e9..0a6d4aea37 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -12,42 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest class AccumulateNV2Test(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).eval()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).eval()) def testInt(self): np.random.seed(54321) x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllEqual(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllEqual(x[0] * 6, av2.accumulate_n_v2([tf_x[0]] * 6).eval()) + self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllEqual(x[0] * 6, + math_ops.accumulate_n([tf_x[0]] * 6).eval()) def testGrad(self): np.random.seed(42) @@ -55,9 +55,9 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True) as sess: input_vars = [ variables.Variable(10.0 * np.random.random()) - for i in range(0, num_inputs) + for _ in range(0, num_inputs) ] - accum_n = av2.accumulate_n_v2(input_vars) + accum_n = math_ops.accumulate_n(input_vars) sess.run(variables.global_variables_initializer()) accum_n_grad = gradients.gradients(accum_n, input_vars) self.assertAllEqual( @@ -77,7 +77,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): ops.convert_to_tensor(x, dtype=dtypes_lib.float32) for x in random_arrays ] - tf_val = av2.accumulate_n_v2(random_tensors) + tf_val = math_ops.accumulate_n(random_tensors) np_val = random_arrays[0] for random_array in random_arrays[1:]: np_val += random_array @@ -86,7 +86,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): def testZeroArgs(self): with self.test_session(): with self.assertRaises(ValueError): - tf_val = av2.accumulate_n_v2([]) + tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): @@ -94,28 +94,28 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) - tf_val = av2.accumulate_n_v2([a, b], shape=[2, 2]) # Should be shape=[] + math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testIncompatibleShapes(self): with self.test_session(): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1, 0.2])) b = variables.Variable(np.array([[0.3], [0.4]])) - tf_val = av2.accumulate_n_v2([a, b]) + math_ops.accumulate_n([a, b]) def testWrongType(self): with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a, b], tensor_dtype=np.int32) + math_ops.accumulate_n([a, b], tensor_dtype=np.int32) def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + math_ops.accumulate_n([a], tensor_dtype=np.int32) if __name__ == "__main__": diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index a09540028f..c3899c7e12 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -158,14 +158,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gen_spectral_ops -from tensorflow.python.ops import gen_state_ops -from tensorflow.python.ops import state_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_math_ops import * @@ -2181,14 +2178,12 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): Optionally, pass `shape` and `tensor_dtype` for shape and type checking, otherwise, these are inferred. - NOTE: This operation is not differentiable and cannot be used if inputs depend - on trainable variables. Please use `tf.add_n` for such cases. + `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not + wait for all of its inputs to be ready before beginning to sum. This can + save memory if inputs are ready at different times, since minimum temporary + storage is proportional to the output size rather than the inputs size. - Aside from differentiability, `tf.accumulate_n` performs the same operation as - `tf.add_n`, but does not wait for all of its inputs to be ready before - beginning to sum. This can save memory if inputs are ready at different times, - since minimum temporary storage is proportional to the output size rather than - the inputs size. + `accumulate_n` is differentiable (but wasn't previous to TensorFlow 1.7). For example: @@ -2198,8 +2193,9 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): tf.accumulate_n([a, b, a]) # [[7, 4], [6, 14]] # Explicitly pass shape and type - tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) # [[7, 4], - # [6, 14]] + tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + # [[7, 4], + # [6, 14]] ``` Args: @@ -2215,20 +2211,17 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ - if context.in_eager_mode(): - # TODO(apassos) remove this once the lifetime of eager variables gets - # addressed. - raise ValueError("accumulate_n not supported in eager mode") + def _input_error(): + return ValueError( + "inputs must be a list of at least one Tensor with the " + "same dtype and shape") if not inputs or not isinstance(inputs, (list, tuple)): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) if not all(isinstance(x, ops.Tensor) for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if not all(x.dtype == inputs[0].dtype for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if shape is not None: shape = tensor_shape.as_shape(shape) else: @@ -2236,27 +2229,31 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): for input_tensor in inputs: if isinstance(input_tensor, ops.Tensor): shape = shape.merge_with(input_tensor.get_shape()) - if tensor_dtype is None: - tensor_dtype = inputs[0].dtype - if tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}".format( - tensor_dtype, inputs[0].dtype)) - if len(inputs) == 1: + + # tensor_dtype is for safety only; operator's output type computed in C++ + if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: + raise TypeError("tensor_dtype is {}, but input is of type {}" + .format(tensor_dtype, inputs[0].dtype)) + + if len(inputs) == 1 and name is None: return inputs[0] - with ops.name_scope(name, "AccumulateN", inputs) as name: - var = gen_state_ops._temporary_variable( - shape=tensor_shape.vector(0), dtype=tensor_dtype) - with ops.colocate_with(var): - zeros = array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]) - zeros.set_shape(shape) - ref = state_ops.assign(var, zeros, validate_shape=False) - update_ops = [ - state_ops.assign_add(ref, input_tensor, use_locking=True) - for input_tensor in inputs - ] - with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name, name=name) + elif len(inputs) == 1 and name is not None: + return array_ops.identity(inputs[0], name=name) + elif context.in_eager_mode(): + # TemporaryVariable not currently supported in eager mode; fall back + # onto AddN for now. + # TODO(frreiss) remove this once the lifetime of eager variables gets + # addressed + return add_n(inputs, name=name) + else: + return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + + +@ops.RegisterGradient("AccumulateNV2") +def _accumulate_n_grad(op, grad): + """Same as gradient for AddN. Copies the gradient to all inputs.""" + # Not broadcasting. + return [grad] * len(op.inputs) @tf_export("nn.sigmoid", "sigmoid") -- GitLab From 0b94d6270866789d210d1914e60937b6f231a669 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 26 Feb 2018 10:41:44 -0800 Subject: [PATCH 076/884] Deleting references to outdated `translate/seq2seq` tutorial. PiperOrigin-RevId: 187044697 --- tensorflow/tools/ci_build/builds/test_tutorials.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh index 67e5af5564..db335f14ca 100755 --- a/tensorflow/tools/ci_build/builds/test_tutorials.sh +++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh @@ -277,17 +277,6 @@ test_ptb_word_lm() { fi } - -# ----------------------------------------------------------- -# translate_test -test_translate_test() { - LOG_FILE=$1 - - run_in_directory "${TEST_DIR}" "${LOG_FILE}" \ - "${TF_MODELS_DIR}/tutorials/rnn/translate/translate.py" --self_test=True -} - - # Run the tutorial tests test_runner "tutorial test-on-install" \ "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_BLACKLIST}" "${LOGS_DIR}" -- GitLab From ca328de4d8805a7495485e787811484d843c43a2 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 26 Feb 2018 10:42:59 -0800 Subject: [PATCH 077/884] [XLA] Add kConvert to EffectiveOperandPrecisionIsOutputPrecision list. PiperOrigin-RevId: 187044921 --- tensorflow/compiler/xla/service/bfloat16_support.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index 3fd9e24601..07b4b14b5e 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( case HloOpcode::kBroadcast: case HloOpcode::kClamp: case HloOpcode::kConcatenate: + case HloOpcode::kConvert: case HloOpcode::kCopy: case HloOpcode::kGetTupleElement: case HloOpcode::kMaximum: -- GitLab From 7735b2db761fba6e76c170066b2e5c3b7f10688b Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 10:52:05 -0800 Subject: [PATCH 078/884] [XLA] Do not recompute flattened sets inside layout assignment. Cache the flattened sets instead of recomputing them. This matters for large graphs, since we may request the flattened set thousands of times on the same instruction, and it may be fairly expensive to construct for large tuples. PiperOrigin-RevId: 187046642 --- .../compiler/xla/service/layout_assignment.cc | 31 ++++++++++++++----- .../compiler/xla/service/layout_assignment.h | 10 ++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 0668f66051..4929300f7d 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -192,17 +192,34 @@ LayoutConstraints::LayoutConstraints( } } +PointsToSet::BufferSet* LayoutConstraints::GetBufferSet( + const HloInstruction* instruction) const { + auto it = buffer_sets_cache_.find(instruction); + if (it != buffer_sets_cache_.end()) { + return it->second.get(); + } + auto& buffer_set = + buffer_sets_cache_ + .emplace(instruction, MakeUnique()) + .first->second; + const auto& points_to_set = points_to_analysis_.GetPointsToSet(instruction); + points_to_set.ForEachElement( + [&buffer_set](const ShapeIndex& /*index*/, + const PointsToSet::BufferList& buffers) { + buffer_set->insert(buffers.begin(), buffers.end()); + }); + return buffer_set.get(); +} + bool LayoutConstraints::OperandBufferForwarded( const HloInstruction* instruction, int64 operand_no) const { // The operand is potentially forwarded if the intersection of points-to sets // of the operand and the instruction is non-empty. - auto output_buffers = - points_to_analysis_.GetPointsToSet(instruction).CreateFlattenedSet(); - auto operand_buffers = - points_to_analysis_.GetPointsToSet(instruction->operand(operand_no)) - .CreateFlattenedSet(); - for (const LogicalBuffer* output_buffer : output_buffers) { - if (operand_buffers.count(output_buffer) > 0) { + PointsToSet::BufferSet* output_buffers = GetBufferSet(instruction); + PointsToSet::BufferSet* operand_buffers = + GetBufferSet(instruction->operand(operand_no)); + for (const LogicalBuffer* output_buffer : *output_buffers) { + if (operand_buffers->count(output_buffer) > 0) { return true; } } diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2901858448..7126cb50cf 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -199,6 +200,11 @@ class LayoutConstraints { string ToString() const; private: + // Find a bufferset in the bufferset cache. This is useful since we can + // currently create the flattened buffer set for the same instruction many + // times, which is often slow. + PointsToSet::BufferSet* GetBufferSet(const HloInstruction* instruction) const; + // The set of BufferLayoutConstraints applied to the computation. std::unordered_map buffer_constraints_; @@ -221,6 +227,10 @@ class LayoutConstraints { // Array-shaped buffers which have not yet been constrained. std::set unconstrained_buffer_ids_; + mutable tensorflow::gtl::FlatMap> + buffer_sets_cache_; + HloComputation* computation_; }; -- GitLab From 5a657b47f724b96730f764d3fb21c89e342e9c35 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 10:54:31 -0800 Subject: [PATCH 079/884] Integrate ClusterResolvers with TPUEstimator. PiperOrigin-RevId: 187047094 --- tensorflow/contrib/cluster_resolver/BUILD | 1 + .../python/training/cluster_resolver.py | 23 +- .../python/training/cluster_resolver_test.py | 2 + .../python/training/gce_cluster_resolver.py | 3 + .../python/training/tpu_cluster_resolver.py | 150 +++++++++--- .../training/tpu_cluster_resolver_test.py | 226 +++++++++++++----- .../contrib/tpu/python/tpu/tpu_config.py | 31 +++ 7 files changed, 345 insertions(+), 91 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/BUILD b/tensorflow/contrib/cluster_resolver/BUILD index 6b03df2b8e..1a124eca36 100644 --- a/tensorflow/contrib/cluster_resolver/BUILD +++ b/tensorflow/contrib/cluster_resolver/BUILD @@ -110,5 +110,6 @@ tf_py_test( "//tensorflow/python:platform_test", "//tensorflow/python:training", ], + grpc_enabled = True, main = "python/training/tpu_cluster_resolver_test.py", ) diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py index b04822fa9d..1c480b2513 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py @@ -53,11 +53,16 @@ class ClusterResolver(object): raise NotImplementedError( 'cluster_spec is not implemented for {}.'.format(self)) + @abc.abstractmethod + def master(self): + """...""" + raise NotImplementedError('master is not implemented for {}.'.format(self)) + class SimpleClusterResolver(ClusterResolver): """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" - def __init__(self, cluster_spec): + def __init__(self, cluster_spec, master=''): """Creates a SimpleClusterResolver from a ClusterSpec.""" super(SimpleClusterResolver, self).__init__() @@ -65,10 +70,18 @@ class SimpleClusterResolver(ClusterResolver): raise TypeError('cluster_spec must be a ClusterSpec.') self._cluster_spec = cluster_spec + if not isinstance(master, str): + raise TypeError('master must be a string.') + self._master = master + def cluster_spec(self): """Returns the ClusterSpec passed into the constructor.""" return self._cluster_spec + def master(self): + """Returns the master address to use when creating a session.""" + return self._master + class UnionClusterResolver(ClusterResolver): """Performs a union on underlying ClusterResolvers. @@ -87,9 +100,13 @@ class UnionClusterResolver(ClusterResolver): Raises: TypeError: If any argument is not a subclass of `ClusterResolvers`. + ValueError: If there are no arguments passed. """ super(UnionClusterResolver, self).__init__() + if not args: + raise ValueError('At least one ClusterResolver is required.') + for cluster_resolver in args: if not isinstance(cluster_resolver, ClusterResolver): raise TypeError('All arguments must be a sub-class of ' @@ -169,3 +186,7 @@ class UnionClusterResolver(ClusterResolver): merged_cluster[job_name].update(task_dict) return ClusterSpec(merged_cluster) + + def master(self): + """master returns the master address from the first cluster resolver.""" + return self._cluster_resolvers[0].master() diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py index dbfb77723c..d9c97d53eb 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py @@ -234,5 +234,7 @@ class UnionClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(cluster_spec, expected_proto) +# TODO(saeta): Include tests for master resolution + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py index d6f2eced93..3f58241289 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py @@ -134,3 +134,6 @@ class GceClusterResolver(ClusterResolver): worker_list.sort() return ClusterSpec({self._job_name: worker_list}) + + def master(self): + return '' diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index a6a6e642e4..aeccf4c06b 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -23,7 +23,8 @@ from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen from tensorflow.contrib.cluster_resolver.python.training.cluster_resolver import ClusterResolver -from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -46,13 +47,23 @@ class TPUClusterResolver(ClusterResolver): req = Request('http://metadata/computeMetadata/v1/%s' % path, headers={'Metadata-Flavor': 'Google'}) resp = urlopen(req) - return resp.read() + return compat.as_bytes(resp.read()) + + def _shouldResolve(self): + if (self._tpu == compat.as_bytes('') or + self._tpu == compat.as_bytes('local') or + self._tpu.startswith(compat.as_bytes('/bns')) or + self._tpu.startswith(compat.as_bytes('grpc://'))): + return False + return True def __init__(self, - tpu_names, + tpu, zone=None, project=None, - job_name='tpu_worker', + job_name='worker', + coordinator_name='coordinator', + coordinator_address=None, credentials='default', service=None): """Creates a new TPUClusterResolver object. @@ -61,7 +72,11 @@ class TPUClusterResolver(ClusterResolver): for the IP addresses and ports of each Cloud TPU listed. Args: - tpu_names: A list of names of the target Cloud TPUs. + tpu: Either a string, or a list of strings corresponding to the TPUs to + use. If the single string is the empty string, the string 'local', or a + string that begins with 'grpc://' or '/bns', then it is assumed to not + correspond with a Cloud TPU and will instead be passed as the session + master and no ClusterSpec propagation will be done. zone: Zone where the TPUs are located. If omitted or empty, we will assume that the zone of the TPU is the same as the zone of the GCE VM, which we will try to discover from the GCE metadata service. @@ -69,6 +84,12 @@ class TPUClusterResolver(ClusterResolver): empty, we will try to discover the project name of the GCE VM from the GCE metadata service. job_name: Name of the TensorFlow job the TPUs belong to. + coordinator_name: The name to use for the coordinator. Set to None if the + coordinator should not be included in the computed ClusterSpec. + coordinator_address: The address of the coordinator (typically an ip:port + pair). If set to None, a TF server will be started. If coordinator_name + is None, a TF server will not be started even if coordinator_address is + None. credentials: GCE Credentials. If None, then we use default credentials from the oauth2client service: The GCE API object returned by the googleapiclient.discovery @@ -77,26 +98,36 @@ class TPUClusterResolver(ClusterResolver): Raises: ImportError: If the googleapiclient is not installed. + ValueError: If no TPUs are specified. """ + if isinstance(tpu, list): + if not tpu: + raise ValueError('At least one TPU must be specified.') + if len(tpu) != 1: + raise NotImplementedError( + 'Using multiple TPUs in a single session is not yet implemented') + tpu = tpu[0] + self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes + self._job_name = job_name + self._credentials = credentials - if not project: - project = self._requestComputeMetadata('/project/project-id') + should_resolve = self._shouldResolve() - if not zone: - zone_path = self._requestComputeMetadata('/instance/zone') + if not project and should_resolve: + project = self._requestComputeMetadata('project/project-id') + + if not zone and should_resolve: + zone_path = self._requestComputeMetadata('instance/zone') zone = zone_path.split('/')[-1] self._project = project self._zone = zone - self._tpu_names = tpu_names - self._job_name = job_name - self._credentials = credentials - if credentials == 'default': + if credentials == 'default' and should_resolve: if _GOOGLE_API_CLIENT_INSTALLED: self._credentials = GoogleCredentials.get_application_default() - if service is None: + if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') @@ -107,25 +138,41 @@ class TPUClusterResolver(ClusterResolver): else: self._service = service - def get_master(self): - """Get the ClusterSpec grpc master path. + self._coordinator_name = coordinator_name + if coordinator_name and not coordinator_address and should_resolve: + self._start_local_server() + else: + self._coordinator_address = coordinator_address + + def master(self): + """Get the Master string to be used for the session. + + In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of + first instance in the ClusterSpec returned by the cluster_spec function. - This returns the grpc path (grpc://1.2.3.4:8470) of first instance in the - ClusterSpec returned by the cluster_spec function. This is suitable for use - for the `master` argument in tf.Session() when you are using one TPU. + If a non-TPU name is used when constructing a TPUClusterResolver, that will + be returned instead (e.g. If the tpus argument's value when constructing + this TPUClusterResolver was 'grpc://10.240.1.2:8470', + 'grpc://10.240.1.2:8470' will be returned). Returns: - string, the grpc path of the first instance in the ClusterSpec. + string, the connection string to use when creating a session. Raises: ValueError: If none of the TPUs specified exists. """ + if not self._shouldResolve(): + return self._tpu + job_tasks = self.cluster_spec().job_tasks(self._job_name) if not job_tasks: raise ValueError('No TPUs exists with the specified names exist.') return 'grpc://' + job_tasks[0] + def get_master(self): + return self.master() + def cluster_spec(self): """Returns a ClusterSpec object based on the latest TPU information. @@ -134,17 +181,54 @@ class TPUClusterResolver(ClusterResolver): Returns: A ClusterSpec containing host information returned from Cloud TPUs. - """ - worker_list = [] - - for tpu_name in self._tpu_names: - full_name = 'projects/%s/locations/%s/nodes/%s' % ( - self._project, self._zone, tpu_name) - request = self._service.projects().locations().nodes().get(name=full_name) - response = request.execute() - if 'health' in response and response['health'] == 'HEALTHY': - instance_url = '%s:%s' % (response['ipAddress'], response['port']) - worker_list.append(instance_url) - - return ClusterSpec({self._job_name: worker_list}) + Raises: + RuntimeError: If the provided TPU is not healthy. + """ + if not self._shouldResolve(): + return server_lib.ClusterSpec({}) + + full_name = 'projects/%s/locations/%s/nodes/%s' % ( + self._project, self._zone, compat.as_text(self._tpu)) + request = self._service.projects().locations().nodes().get(name=full_name) + response = request.execute() + + if 'health' in response and response['health'] != 'HEALTHY': + raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, + response['health'])) + + if 'networkEndpoints' in response: + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in response['networkEndpoints'] + ] + else: + # Fall back to the deprecated response format + instance_url = '%s:%s' % (response['ipAddress'], response['port']) + worker_list = [instance_url] + + cluster_spec = {self._job_name: worker_list} + + if self._coordinator_address: + cluster_spec[self._coordinator_name] = [self._coordinator_address] + + return server_lib.ClusterSpec(cluster_spec) + + def _start_local_server(self): + address = self._requestComputeMetadata('instance/network-interfaces/0/ip') + self._server = server_lib.Server( + { + 'local': ['0.0.0.0:0'] + }, protocol='grpc', config=None, start=True) + # self._server.target is of the form: grpc://ipaddress:port + target = compat.as_bytes(self._server.target) + splits = target.split(compat.as_bytes(':')) + assert len(splits) == 3, self._server.target + assert splits[0] == compat.as_bytes('grpc'), self._server.target + self._coordinator_port = compat.as_text(splits[2]) + self._coordinator_address = '%s:%s' % ( + address, compat.as_text(self._coordinator_port)) + + def __deepcopy__(self, memo): + # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy. + return self diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 4fd34629cf..6b4a155152 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver import TPUClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib - +from tensorflow.python.util import compat mock = test.mock @@ -50,10 +50,12 @@ class MockNodeClass(object): def mock_request_compute_metadata(cls, *args, **kwargs): del cls, kwargs # Unused. - if args[0] == '/project/project-id': + if args[0] == 'project/project-id': return 'test-project' - elif args[0] == '/instance/zone': + elif args[0] == 'instance/zone': return 'projects/test-project/locations/us-central1-c' + elif args[0] == 'instance/network-interfaces/0/ip': + return '10.128.1.2' return '' @@ -113,17 +115,26 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( project=None, zone=None, - tpu_names=['test-tpu-1'], + tpu=['test-tpu-1'], credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } - """ - self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + job { + name: 'coordinator' + tasks { key: 0 value: '10.128.1.2:%s' } + } + job { + name: 'worker' + tasks { key: 0 value: '10.1.2.3:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) - def testSimpleSuccessfulRetrieval(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', @@ -133,116 +144,217 @@ class TPUClusterResolverTest(test.TestCase): } tpu_cluster_resolver = TPUClusterResolver( - project='test-project', - zone='us-central1-c', - tpu_names=['test-tpu-1'], + project=None, + zone=None, + tpu=['test-tpu-1'], + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testMultipleSuccessfulRetrieval(self): + def testSimpleSuccessfulRetrieval(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', 'port': '8470', 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu=['test-tpu-1'], + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.4.5.6:8470' } - tasks { key: 1 value: '10.1.2.3:8470' } } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testHealthyTpuNodeRetrieval(self): + def testNewNetworkEndpointFormat(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-3': { - 'ipAddress': '10.7.8.9', - 'port': '8470', - 'health': 'UNHEALTHY' + 'health': 'HEALTHY', + 'networkEndpoints': [{ + 'ipAddress': '10.2.3.4', + 'port': 8470, + }] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1', 'test-tpu-3'], + tpu='test-tpu-1', + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { - name: 'tpu_worker' - tasks { - key: 0 - value: '10.1.2.3:8470' - } - } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + self.assertEqual('grpc://10.2.3.4:8470', tpu_cluster_resolver.master()) - def testGetMasterMultipleEntries(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testPodResolution(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] + } + } + + tpu_cluster_resolver = TPUClusterResolver( + tpu='test-tpu-1', + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'coordinator', + tasks { key: 0 value: '10.128.1.2:%s'} + } + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) + + def testPodResolutionNoCoordinator(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu='test-tpu-1', + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) - self.assertEqual('grpc://10.4.5.6:8470', tpu_cluster_resolver.get_master()) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ + self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) def testGetMasterNoEntries(self): tpu_map = {} + with self.assertRaises(ValueError): + TPUClusterResolver( + project='test-project', + zone='us-central1-c', + tpu=[], + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + # TODO(saeta): Convert to parameterized test when included in OSS TF. + def verifyShouldResolve(self, tpu, should_resolve): tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=[], + tpu=tpu, + coordinator_name=None, credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) - with self.assertRaises(ValueError): - tpu_cluster_resolver.get_master() + service=self.mock_service_client(tpu_map={})) + self.assertEqual(should_resolve, tpu_cluster_resolver._shouldResolve(), + "TPU: '%s'" % tpu) + + def testShouldResolveNoName(self): + self.verifyShouldResolve('', False) + + def testShouldResolveLocal(self): + self.verifyShouldResolve('local', False) + + def testShouldResolveGrpc(self): + self.verifyShouldResolve('grpc://10.1.2.3:8470', False) + + def testShouldResolveBns(self): + self.verifyShouldResolve('/bns/foo/bar', False) + + def testShouldResolveName(self): + self.verifyShouldResolve('mytpu', True) + + def testShouldResolveList(self): + self.verifyShouldResolve(['myothertpu'], True) + + def testShouldResolveGrpcPrefix(self): + self.verifyShouldResolve('grpctpu', True) + + def testNoCallComputeMetadata(self): + tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') + self.assertEqual(compat.as_bytes('/bns/foo/bar'), + tpu_cluster_resolver.master()) + self.assertEqual( + server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 6440702182..7ceb4069cf 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -26,6 +26,7 @@ import os import numpy as np from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.platform import tf_logging as logging @@ -140,6 +141,7 @@ class RunConfig(run_config_lib.RunConfig): tpu_config=None, evaluation_master=None, master=None, + cluster=None, **kwargs): """Constructs a RunConfig. @@ -148,15 +150,26 @@ class RunConfig(run_config_lib.RunConfig): evaluation_master: a string. The address of the master to use for eval. Defaults to master if not set. master: a string. The address of the master to use for training. + cluster: a ClusterResolver **kwargs: keyword config parameters. + + Raises: + ValueError: if cluster is not None and the provided session_config has a + cluster_def already. """ super(RunConfig, self).__init__(**kwargs) self._tpu_config = tpu_config or TPUConfig() + self._cluster = cluster # If user sets master and/or evaluation_master explicilty, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: + if cluster is not None: + raise ValueError('Both master and cluster are set.') self._master = master + else: + if cluster: + self._master = cluster.master() if evaluation_master is not None: self._evaluation_master = evaluation_master @@ -170,6 +183,20 @@ class RunConfig(run_config_lib.RunConfig): # evaluation_master to master, unless user overwrites it. self._evaluation_master = self._master + # Set the ClusterSpec to use + if cluster: + self._cluster_spec = cluster.cluster_spec() + + # Merge the cluster_def into the ConfigProto. + if self._session_config is None: # pylint: disable=access-member-before-definition + self._session_config = config_pb2.ConfigProto(allow_soft_placement=True) + if self._session_config.HasField('cluster_def'): + raise ValueError( + 'You cannot provide a ClusterResolver and ' + 'session_config.cluster_def.') + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) + @property def evaluation_master(self): return self._evaluation_master @@ -182,6 +209,10 @@ class RunConfig(run_config_lib.RunConfig): def tpu_config(self): return self._tpu_config + @property + def cluster(self): + return self._cluster + def replace(self, **kwargs): if 'tpu_config' not in kwargs: return super(RunConfig, self).replace(**kwargs) -- GitLab From 24c619b6c4dd38fc4ef0f51b92e5f16809cc4ec8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Mon, 26 Feb 2018 10:59:54 -0800 Subject: [PATCH 080/884] Automated g4 rollback of changelist 185324160 PiperOrigin-RevId: 187048135 --- tensorflow/contrib/cmake/tf_core_cpu.cmake | 7 ++ tensorflow/contrib/makefile/Makefile | 1 + .../core/common_runtime/gpu/gpu_id_manager.cc | 50 +++++++-- .../core/common_runtime/gpu/gpu_id_manager.h | 14 ++- tensorflow/core/grappler/clusters/BUILD | 26 ++++- .../core/grappler/clusters/single_machine.cc | 17 ++- tensorflow/core/grappler/clusters/utils.cc | 71 ++++++++----- tensorflow/core/grappler/clusters/utils.h | 3 +- .../core/grappler/clusters/utils_test.cc | 100 ++++++++++++++++++ tensorflow/core/grappler/costs/BUILD | 1 + tensorflow/core/grappler/costs/utils.cc | 18 +++- 11 files changed, 262 insertions(+), 46 deletions(-) create mode 100644 tensorflow/core/grappler/clusters/utils_test.cc diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 96ac60d095..a54cbff33b 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -63,6 +63,12 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc" ) +file(GLOB_RECURSE tf_core_cpu_whitelisted_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.h" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc" +) +list(REMOVE_ITEM tf_core_cpu_exclude_srcs ${tf_core_cpu_whitelisted_srcs}) list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs}) if (tensorflow_ENABLE_GPU) @@ -79,6 +85,7 @@ if (tensorflow_ENABLE_GPU) "${tensorflow_source_dir}/tensorflow/core/*test*.cc" ) list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs}) + list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_cpu_whitelisted_srcs}) list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs}) endif() diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 81327407d4..05e8d9064b 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -677,6 +677,7 @@ endif # TEGRA TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # Add in any extra files that don't fit the patterns easily TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c +TF_CC_SRCS += tensorflow/core/common_runtime/gpu/gpu_id_manager.cc # Also include the op and kernel definitions. TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt) PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt) diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index 207afdca75..7dfff3269c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -18,7 +18,10 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" namespace tensorflow { @@ -27,8 +30,8 @@ namespace { class TfToCudaGpuIdMap { public: static TfToCudaGpuIdMap* singleton() { - static auto* manager = new TfToCudaGpuIdMap; - return manager; + static auto* id_map = new TfToCudaGpuIdMap; + return id_map; } void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) @@ -47,18 +50,41 @@ class TfToCudaGpuIdMap { } } - int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { + CudaGpuId FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); + return FindOrDieLocked(tf_gpu_id); + } + + bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + if (id_map_.count(tf_gpu_id.value()) == 0) return false; + *cuda_gpu_id = FindOrDieLocked(tf_gpu_id); + return true; + } + + private: + TfToCudaGpuIdMap() = default; + + CudaGpuId FindOrDieLocked(TfGpuId tf_gpu_id) const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto result = id_map_.find(tf_gpu_id.value()); CHECK(result != id_map_.end()) << "Could not find the mapping for TfGpuId: " << tf_gpu_id; - return result->second; + return CudaGpuId(result->second); + } + + void TestOnlyReset() LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + id_map_.clear(); } - private: using IdMapType = std::unordered_map; mutable mutex mu_; IdMapType id_map_ GUARDED_BY(mu_); + + friend class ::tensorflow::GpuIdManager; + TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); }; } // namespace @@ -67,8 +93,20 @@ void GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, TfToCudaGpuIdMap::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); } +Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { + if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { + return Status::OK(); + } + return errors::NotFound("TF GPU device with id ", tf_gpu_id.value(), + " was not registered"); +} + CudaGpuId GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id) { - return CudaGpuId(TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id)); + return TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id); +} + +void GpuIdManager::TestOnlyReset() { + TfToCudaGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 33925d8c36..2b54cc184c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -17,15 +17,25 @@ limitations under the License. #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { -// Class that manages the translation between Tensorflow GPU ids and CUDA GPU -// ids. +// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// translation between them. class GpuIdManager { public: + // Adds a mapping from tf_gpu_id to cuda_gpu_id. static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + + // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Similar to the above version, but returns the result, and checks fail if + // no result is found. static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); + + // Clears the map. Used in unit tests only. + static void TestOnlyReset(); }; } // namespace tensorflow diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index b8f8e13c9a..b653f902e8 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -1,7 +1,12 @@ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "tf_cuda_tests_tags", +) filegroup( name = "all_files", @@ -26,13 +31,12 @@ config_setting( tf_cuda_library( name = "utils", srcs = ["utils.cc"], - hdrs = [ - "utils.h", - ], + hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ "//third_party/eigen3", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ] + select({ @@ -41,6 +45,21 @@ tf_cuda_library( }), ) +tf_cc_test( + name = "utils_test", + srcs = ["utils_test.cc"], + linkstatic = if_cuda(1, 0), + tags = tf_cuda_tests_tags(), + deps = [ + ":utils", + "//tensorflow/core:gpu_id", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "cluster", srcs = ["cluster.cc"], @@ -104,6 +123,7 @@ cc_library( "//tensorflow/core:core_cpu_lib", "//tensorflow/core:direct_session", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core/grappler:utils", "//tensorflow/core/kernels:ops_util", diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index cc7f418d49..8e236c9ee8 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/cc/training/queue_runner.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/kernels/ops_util.h" @@ -80,13 +82,24 @@ Status SingleMachine::Provision() { std::vector devices; TF_RETURN_IF_ERROR(session_->ListDevices(&devices)); - int gpu_id = 0; for (const auto& dev : devices) { DeviceProperties attr; if (dev.device_type() == "CPU") { attr = GetLocalCPUInfo(); } else if (dev.device_type() == "GPU") { - attr = GetLocalGPUInfo(gpu_id++); + DeviceNameUtils::ParsedName parsed; + if (!DeviceNameUtils::ParseFullName(dev.name(), &parsed)) { + return errors::InvalidArgument( + strings::StrCat("Not able to parse GPU device name: ", dev.name())); + } + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + return errors::Unavailable("Unknown TF GPU device with id ", + tf_gpu_id.value(), ": ", s.ToString()); + } + attr = GetLocalGPUInfo(cuda_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index 607e10e1ab..b54b34959a 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -27,6 +27,9 @@ limitations under the License. #include "include/libxsmm.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" @@ -66,36 +69,40 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(int gpu_id) { +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id); - if (error == cudaSuccess) { - device.set_vendor("NVidia"); - device.set_model(properties.name); - device.set_frequency(properties.clockRate * 1e-3); - device.set_num_cores(properties.multiProcessorCount); - device.set_num_registers(properties.regsPerMultiprocessor); - // For compute capability less than 5, l1 cache size is configurable to - // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For - // compute capability larger or equal to 5, l1 cache (unified with texture - // cache) size is 24 KB. This number may need to be updated for future - // compute capabilities. - device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); - device.set_l2_cache_size(properties.l2CacheSize); - device.set_l3_cache_size(0); - device.set_shared_memory_size_per_multiprocessor( - properties.sharedMemPerMultiprocessor); - device.set_memory_size(properties.totalGlobalMem); - // 8 is the number of bits per byte. 2 is accounted for - // double data rate (DDR). - device.set_bandwidth(properties.memoryBusWidth / 8 * - properties.memoryClockRate * 2); + cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + if (error != cudaSuccess) { + device.set_type("UNKNOWN"); + LOG(ERROR) << "Failed to get device properties, error code: " << error; + return device; } + device.set_vendor("NVIDIA"); + device.set_model(properties.name); + device.set_frequency(properties.clockRate * 1e-3); + device.set_num_cores(properties.multiProcessorCount); + device.set_num_registers(properties.regsPerMultiprocessor); + // For compute capability less than 5, l1 cache size is configurable to + // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For + // compute capability larger or equal to 5, l1 cache (unified with texture + // cache) size is 24 KB. This number may need to be updated for future + // compute capabilities. + device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); + device.set_l2_cache_size(properties.l2CacheSize); + device.set_l3_cache_size(0); + device.set_shared_memory_size_per_multiprocessor( + properties.sharedMemPerMultiprocessor); + device.set_memory_size(properties.totalGlobalMem); + // 8 is the number of bits per byte. 2 is accounted for + // double data rate (DDR). + device.set_bandwidth(properties.memoryBusWidth / 8 * + properties.memoryClockRate * 2); + (*device.mutable_environment())["architecture"] = strings::StrCat(properties.major, ".", properties.minor); (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION); @@ -106,18 +113,26 @@ DeviceProperties GetLocalGPUInfo(int gpu_id) { } DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + if (device.type == "CPU") { return GetLocalCPUInfo(); } else if (device.type == "GPU") { if (device.has_id) { - return GetLocalGPUInfo(device.id); + TfGpuId tf_gpu_id(device.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else { - return GetLocalGPUInfo(0); + return GetLocalGPUInfo(CudaGpuId(0)); } } - DeviceProperties result; - result.set_type("UNKNOWN"); - return result; + return unknown; } } // end namespace grappler diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index 191942040a..df8e7dca44 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ #define TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/util/device_name_utils.h" @@ -27,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(int gpu_id); +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc new file mode 100644 index 0000000000..74218adbac --- /dev/null +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/clusters/utils.h" + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(UtilsTest, GetLocalGPUInfo) { + GpuIdManager::TestOnlyReset(); +#if GOOGLE_CUDA + LOG(INFO) << "CUDA is enabled."; + DeviceProperties properties; + + // Invalid CUDA GPU ID. + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Succeed when a valid CUDA GPU id was inserted. + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#else + LOG(INFO) << "CUDA is not enabled."; + DeviceProperties properties; + + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("GPU", properties.type()); +#endif +} + +TEST(UtilsTest, GetDeviceInfo) { + GpuIdManager::TestOnlyReset(); + DeviceNameUtils::ParsedName device; + DeviceProperties properties; + + // Invalid type. + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Cpu info. + device.type = "CPU"; + properties = GetDeviceInfo(device); + EXPECT_EQ("CPU", properties.type()); + + // No TF GPU id provided. + device.type = "GPU"; + device.has_id = false; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); +#if GOOGLE_CUDA + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif + + // TF to CUDA GPU id mapping entry doesn't exist. + device.has_id = true; + device.id = 0; + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + +#if GOOGLE_CUDA + // Invalid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Valid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + device.id = 1; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 0fe01e9c9e..5336df1f51 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -142,6 +142,7 @@ tf_cuda_library( "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 602f69f12e..076945d5c6 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -26,6 +26,8 @@ limitations under the License. #include "cuda/include/cudnn.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" @@ -200,17 +202,25 @@ std::vector FindInputFeatures( } DeviceProperties GetDeviceInfo(const string& device_str) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + DeviceNameUtils::ParsedName parsed; if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { - return GetLocalGPUInfo(parsed.id); + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } } - DeviceProperties device; - device.set_type("UNKNOWN"); - return device; + return unknown; } DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) { -- GitLab From 49b666dbbd58958a7499fa3961c1c8c75757ad7c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:08:54 -0800 Subject: [PATCH 081/884] Bring in `isbuiltin`. PiperOrigin-RevId: 187049824 --- tensorflow/python/util/tf_inspect.py | 5 +++++ tensorflow/python/util/tf_inspect_test.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c2fe6fc449..a7cead5555 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -149,6 +149,11 @@ def getsource(object): # pylint: disable=redefined-builtin return _inspect.getsource(tf_decorator.unwrap(object)[1]) +def isbuiltin(object): # pylint: disable=redefined-builtin + """TFDecorator-aware replacement for inspect.isbuiltin.""" + return _inspect.isbuiltin(tf_decorator.unwrap(object)[1]) + + def isclass(object): # pylint: disable=redefined-builtin """TFDecorator-aware replacement for inspect.isclass.""" return _inspect.isclass(tf_decorator.unwrap(object)[1]) diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 8903e1156b..129408449e 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'): self.assertEqual( expected, tf_inspect.getsource(test_decorated_function_with_defaults)) + def testIsBuiltin(self): + self.assertEqual( + tf_inspect.isbuiltin(TestDecoratedClass), + inspect.isbuiltin(TestDecoratedClass)) + self.assertEqual( + tf_inspect.isbuiltin(test_decorated_function), + inspect.isbuiltin(test_decorated_function)) + self.assertEqual( + tf_inspect.isbuiltin(test_undecorated_function), + inspect.isbuiltin(test_undecorated_function)) + self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range)) + self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max)) + def testIsClass(self): self.assertTrue(tf_inspect.isclass(TestDecoratedClass)) self.assertFalse(tf_inspect.isclass(test_decorated_function)) -- GitLab From 59e59b7b1065715e0e59ee134e769f625ec28edd Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 26 Feb 2018 11:10:20 -0800 Subject: [PATCH 082/884] eager/examples/resnet50: Fix breakage. PiperOrigin-RevId: 187050075 --- .../contrib/eager/python/examples/resnet50/resnet50_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index c106ab0a06..65dcc53aab 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -194,11 +194,11 @@ class ResNet50Benchmarks(tf.test.Benchmark): with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): - model(images).cpu() + model(images, training=False).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): - model(images).cpu() + model(images, training=False).cpu() self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_apply(self): -- GitLab From 98f38b608073e761d75227373b2b2c7d26c483e5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 11:12:04 -0800 Subject: [PATCH 083/884] Add support for parsing the "gather" HLO PiperOrigin-RevId: 187050345 --- .../compiler/xla/tools/parser/hlo_parser.cc | 37 +++++++++++++++++-- .../xla/tools/parser/hlo_parser_test.cc | 24 ++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index cd2b843ad3..e60a5a4919 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateDot(shape, operands[0], operands[1], dnum)); break; } - case HloOpcode::kGather: - // TODO(b/72710576): HLO parsing is not implemented for Gather. - return TokenError("HLO parsing is not implemented for Gather"); + case HloOpcode::kGather: { + optional> output_window_dims; + attrs["output_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; + optional> elided_window_dims; + attrs["elided_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; + optional> gather_dims_to_operand_dims; + attrs["gather_dims_to_operand_dims"] = {/*required=*/true, + AttrTy::kBracedInt64List, + &gather_dims_to_operand_dims}; + optional index_vector_dim; + attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64, + &index_vector_dim}; + optional> window_bounds; + attrs["window_bounds"] = {/*required=*/true, AttrTy::kBracedInt64List, + &window_bounds}; + + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + + GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/*output_window_dims, + /*elided_window_dims=*/*elided_window_dims, + /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims, + /*index_vector_dim=*/*index_vector_dim); + + instruction = builder->AddInstruction(HloInstruction::CreateGather( + shape, /*operand=*/operands[0], /*gather_indices=*/operands[1], + dim_numbers, *window_bounds)); + break; + } case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index b8c6b59204..863081d654 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] { ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6}) } +)" +}, +{ +"gather", +R"(HloModule StringifyGather + +ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] { + %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); @@ -860,6 +872,18 @@ ENTRY dot { ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0} } +)" +}, +{ +"gather", +R"(HloModule gather + +ENTRY Gather { + input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); -- GitLab From b7b4fe66ee8adf936b1c2508a298c1e26a858af1 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Feb 2018 11:13:09 -0800 Subject: [PATCH 084/884] Added const to Node* in various parts of the code base. PiperOrigin-RevId: 187050526 --- tensorflow/compiler/tf2xla/const_analysis.cc | 4 ++-- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- .../core/common_runtime/shape_refiner.cc | 4 ++-- .../core/distributed_runtime/scheduler.cc | 18 +++++++++--------- .../core/distributed_runtime/scheduler.h | 6 +++--- tensorflow/core/graph/costmodel.cc | 2 +- tensorflow/core/graph/graph.cc | 2 +- tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 2 +- tensorflow/core/graph/graph_partition.cc | 6 +++--- tensorflow/core/graph/node_builder.cc | 6 +++--- tensorflow/core/graph/node_builder.h | 6 +++--- tensorflow/core/graph/optimizer_cse.cc | 16 ++++++++-------- 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 82923722c5..6f46532419 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -37,7 +37,7 @@ Status BackwardsConstAnalysis(const Graph& g, }; Status status; - std::unordered_set must_be_const; + std::unordered_set must_be_const; auto visit = [&status, &metadata_ops, &must_be_const, compile_time_const_args](Node* node) { if (!status.ok()) return; @@ -55,7 +55,7 @@ Status BackwardsConstAnalysis(const Graph& g, compile_time_const_args->at(index) = true; return; } - for (Node* pred : node->in_nodes()) { + for (const Node* pred : node->in_nodes()) { must_be_const.insert(pred); } return; diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 058a1f2621..b20c1ffc7d 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -130,7 +130,7 @@ Status GraphCompiler::Compile() { // Set up inputs from outputs of previous nodes. for (auto* e : n->in_edges()) { if (e->IsControlEdge()) continue; - Node* src = e->src(); + const Node* src = e->src(); TF_RET_CHECK(src->id() < output_registry.size()); const NodeOutputs& src_outputs = output_registry[src->id()]; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 45cdab98e0..2acaa31d32 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -211,14 +211,14 @@ Status ShapeRefiner::AddNode(const Node* node) { // For each 'input' of this node, fetch the corresponding shape // from 'input's InferenceContext, and store into a vector // indexed by 'node's input. - std::vector input_nodes(node->num_inputs()); + std::vector input_nodes(node->num_inputs()); std::vector input_shapes(node->num_inputs()); std::vector>> input_handle_shapes_and_types(node->num_inputs()); for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) continue; - Node* input = e->src(); + const Node* input = e->src(); auto it = node_to_context_.find(input); if (it == node_to_context_.end()) { return errors::FailedPrecondition( diff --git a/tensorflow/core/distributed_runtime/scheduler.cc b/tensorflow/core/distributed_runtime/scheduler.cc index 9dae5b3b92..8403636197 100644 --- a/tensorflow/core/distributed_runtime/scheduler.cc +++ b/tensorflow/core/distributed_runtime/scheduler.cc @@ -80,7 +80,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { std::vector pending_count(graph_->num_node_ids()); InitializePending(graph_, &pending_count); - std::deque queue; + std::deque queue; Node* srcNode = graph_->source_node(); queue.push_back(srcNode); (*asap_times)[srcNode->id()] = 0; @@ -92,7 +92,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { for (const Edge* out_edge : curr->out_edges()) { // The time needed for 'out' to get its input from 'curr'. Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && curr->assigned_device_name() != out->assigned_device_name()) { // Add an arbitrary 10microsecs for each copy. @@ -137,7 +137,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { } } - std::deque queue; + std::deque queue; Node* sinkNode = graph_->sink_node(); queue.push_back(sinkNode); (*alap_times)[sinkNode->id()] = 0; @@ -148,7 +148,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { for (const Edge* in_edge : curr->in_edges()) { // The time needed for 'curr' to get its input from 'src'. Microseconds copy_time(0); - Node* src = in_edge->src(); + const Node* src = in_edge->src(); if (!in_edge->IsControlEdge() && src->assigned_device_name() != curr->assigned_device_name()) { // TODO(yuanbyu): Use the real cost model @@ -236,7 +236,7 @@ Microseconds GreedyScheduler::ComputeSchedule( for (const Edge* out_edge : event.node->out_edges()) { Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && event.node->assigned_device_name() != out->assigned_device_name()) { // TODO(yuanbyu): Use below with the real cost model. @@ -277,11 +277,11 @@ Microseconds GreedyScheduler::ComputeSchedule( return max_completion; } -Node* GreedyScheduler::GetNodeWithHighestPriority( - const std::vector& nodes) { - Node* curr_node = nullptr; +const Node* GreedyScheduler::GetNodeWithHighestPriority( + const std::vector& nodes) { + const Node* curr_node = nullptr; int64 curr_priority = kint64max; - for (Node* n : nodes) { + for (const Node* n : nodes) { if ((*priority_)[n->id()] < curr_priority) { curr_node = n; curr_priority = (*priority_)[n->id()]; diff --git a/tensorflow/core/distributed_runtime/scheduler.h b/tensorflow/core/distributed_runtime/scheduler.h index ef87b9834d..bf9d0d1bec 100644 --- a/tensorflow/core/distributed_runtime/scheduler.h +++ b/tensorflow/core/distributed_runtime/scheduler.h @@ -57,11 +57,11 @@ class GreedyScheduler { struct Sim { int degree_parallelism; int num_running; - std::vector ready_nodes; + std::vector ready_nodes; }; struct Event { - Node* node; + const Node* node; Microseconds time; bool is_completion; @@ -79,7 +79,7 @@ class GreedyScheduler { private: // Returns the ready node with the highest priority for a sim. - Node* GetNodeWithHighestPriority(const std::vector& nodes); + const Node* GetNodeWithHighestPriority(const std::vector& nodes); const DeviceSet* devices_; const CostModel* cost_model_; diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 4f3a6ec38c..1df45d9b89 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -427,7 +427,7 @@ static void AssignSizes(const Graph& g, CostModel* cost_model) { if (e->IsControlEdge()) { continue; } - Node* src = e->src(); + const Node* src = e->src(); // TODO(josh11b): Get an estimate from the Op Bytes size(1); diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 9b56216f1f..a7af5e2312 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -339,7 +339,7 @@ Node* Graph::AddNode(const NodeDef& node_def, Status* status) { return node; } -Node* Graph::CopyNode(Node* node) { +Node* Graph::CopyNode(const Node* node) { DCHECK(!node->IsSource()); DCHECK(!node->IsSink()); Node* copy = AllocateNode(node->props_, node); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 9d96cd4654..cbd58b051a 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -422,7 +422,7 @@ class Graph { // Copies *node, which may belong to another graph, to a new node, // which is returned. Does not copy any edges. *this owns the // returned instance. - Node* CopyNode(Node* node); + Node* CopyNode(const Node* node); // Removes a node from this graph, including all edges from or to it. // *node should not be accessed after calling this function. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 0629ff32d0..627309078a 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1271,7 +1271,7 @@ void CopyGraph(const Graph& src, Graph* dest) { dest->set_versions(src.versions()); // Copy the nodes - std::unordered_map + std::unordered_map node_map; // "Node in src" -> "Node in *dest" node_map[src.source_node()] = dest->source_node(); node_map[src.sink_node()] = dest->sink_node(); diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index add80eda23..17a174101b 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -123,8 +123,8 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { return false; } - Node* src = edge->src(); - Node* dst = edge->dst(); + const Node* src = edge->src(); + const Node* dst = edge->dst(); if (src->assigned_device_name() == dst->assigned_device_name()) { int src_port = edge->src_output(); int dst_port = edge->dst_input(); @@ -141,7 +141,7 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { // Return true iff (dst, dst_input) is specified on host memory. bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) { - Node* dst = edge->dst(); + const Node* dst = edge->dst(); int dst_port = edge->dst_input(); if (info.device_types[dst->id()] != DEVICE_CPU) { if (edge->IsControlEdge()) return false; diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index 138952dcb3..114962c0e4 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -88,7 +88,7 @@ NodeBuilder& NodeBuilder::ControlInput(Node* src_node) { NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice src_nodes) { control_inputs_.insert(control_inputs_.end(), src_nodes.begin(), src_nodes.end()); - for (Node* src_node : src_nodes) { + for (const Node* src_node : src_nodes) { def_builder_.ControlInput(src_node->name()); } return *this; @@ -127,7 +127,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const { return Status::OK(); } -void NodeBuilder::AddIndexError(Node* node, int i) { +void NodeBuilder::AddIndexError(const Node* node, int i) { if (node == nullptr) { errors_.emplace_back( strings::StrCat("Attempt to add nullptr Node to node with type ", @@ -140,7 +140,7 @@ void NodeBuilder::AddIndexError(Node* node, int i) { } } -bool NodeBuilder::GetOutputType(Node* node, int i, DataType* dt) { +bool NodeBuilder::GetOutputType(const Node* node, int i, DataType* dt) { bool error; *dt = SafeGetOutput(node, i, &error); if (error) AddIndexError(node, i); diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h index 86647a49c1..f6b7b5674b 100644 --- a/tensorflow/core/graph/node_builder.h +++ b/tensorflow/core/graph/node_builder.h @@ -120,7 +120,7 @@ class NodeBuilder { const OpDef& op_def() const { return def_builder_.op_def(); } private: - static DataType SafeGetOutput(Node* node, int i, bool* error) { + static DataType SafeGetOutput(const Node* node, int i, bool* error) { if (node != nullptr && i >= 0 && i < node->num_outputs()) { *error = false; return node->output_type(i); @@ -131,11 +131,11 @@ class NodeBuilder { } // If SafeGetOutput indicates a range error, add it to errors_. - void AddIndexError(Node* node, int i); + void AddIndexError(const Node* node, int i); // Set *dt and returns true if i is in range. Combines // SafeGetOutput() and AddIndexError(). - bool GetOutputType(Node* node, int i, DataType* dt); + bool GetOutputType(const Node* node, int i, DataType* dt); NodeDefBuilder def_builder_; std::vector inputs_; diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc index 6b452a1d5d..4073255db3 100644 --- a/tensorflow/core/graph/optimizer_cse.cc +++ b/tensorflow/core/graph/optimizer_cse.cc @@ -65,8 +65,8 @@ class OptimizerCSE { }; static void FillInputs(const Node* n, - gtl::InlinedVector* control_edges, - gtl::InlinedVector, 4>* in) { + gtl::InlinedVector* control_edges, + gtl::InlinedVector, 4>* in) { DCHECK_EQ(in->size(), n->num_inputs()); control_edges->clear(); for (const Edge* e : n->in_edges()) { @@ -96,8 +96,8 @@ size_t OptimizerCSE::NodeHash(const Node* n) { const int N_in = n->num_inputs(); strings::StrAppend(&str_to_hash, N_in); - gtl::InlinedVector control_edges; - gtl::InlinedVector, 4> in(N_in); + gtl::InlinedVector control_edges; + gtl::InlinedVector, 4> in(N_in); FillInputs(n, &control_edges, &in); for (const auto& edge : in) { strings::StrAppend(&str_to_hash, edge.first->id(), edge.second); @@ -147,10 +147,10 @@ bool OptimizerCSE::Equivalent(const Node* a, const Node* b, // Compare input sources if (a->num_inputs() != b->num_inputs()) return false; const int N_in = a->num_inputs(); - gtl::InlinedVector a_control_edges; - gtl::InlinedVector b_control_edges; - gtl::InlinedVector, 4> a_in(N_in); - gtl::InlinedVector, 4> b_in(N_in); + gtl::InlinedVector a_control_edges; + gtl::InlinedVector b_control_edges; + gtl::InlinedVector, 4> a_in(N_in); + gtl::InlinedVector, 4> b_in(N_in); FillInputs(a, &a_control_edges, &a_in); FillInputs(b, &b_control_edges, &b_in); if (a_in != b_in) return false; -- GitLab From e5b73fc9a8df0d87cb964ed49e946d2477c73e19 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 26 Feb 2018 11:22:43 -0800 Subject: [PATCH 085/884] TFLite: Ensures pointers to tensors won't be invalidated unless 16+ tensors are added. PiperOrigin-RevId: 187052100 --- tensorflow/contrib/lite/interpreter.cc | 13 +++---- tensorflow/contrib/lite/interpreter.h | 20 +++++++++++ tensorflow/contrib/lite/interpreter_test.cc | 40 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 370e495527..0f5e17f0de 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -27,13 +27,6 @@ limitations under the License. #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" -namespace { - -// std::vector preallocation tuning. -constexpr const int kSlotsToReserve = 128; - -} // namespace - namespace tflite { // A trivial implementation of GraphInfo around the Interpreter. @@ -85,8 +78,8 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.GetExecutionPlan = nullptr; // Reserve some space for the tensors to avoid excessive resizing. - tensors_.reserve(kSlotsToReserve); - nodes_and_registration_.reserve(kSlotsToReserve); + tensors_.reserve(kTensorsReservedCapacity); + nodes_and_registration_.reserve(kTensorsReservedCapacity); next_execution_plan_index_to_prepare_ = 0; UseNNAPI(false); } @@ -353,6 +346,7 @@ TfLiteStatus Interpreter::PrepareOpsStartingAt( TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpPrepare(registration, &node) == kTfLiteError) { return kTfLiteError; } @@ -430,6 +424,7 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index a9df2627e0..04c19644a0 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,14 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // The default capacity of `tensors_` vector. + static constexpr int kTensorsReservedCapacity = 128; + // The capacity headroom of `tensors_` vector before calling ops' + // `prepare` and `invoke` function. In these functions, it's guaranteed + // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate + // pointers to existing tensors. + static constexpr int kTensorsCapacityHeadroom = 16; + private: // Give 'op_reg' a chance to initialize itself using the contents of // 'buffer'. @@ -377,6 +385,18 @@ class Interpreter { static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, TfLiteIntArray** execution_plan); + // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra + // capacity. Calling this function may invalidate existing pointers to + // tensors. After calling this function, adding `kTensorsCapacityHeadroom` + // more tensors won't invalidate the pointer to existing tensors. + void EnsureTensorsVectorCapacity() { + const int required_capacity = tensors_size() + kTensorsCapacityHeadroom; + if (required_capacity > tensors_.capacity()) { + tensors_.reserve(required_capacity); + context_.tensors = tensors_.data(); + } + } + // A pure C data structure used to communicate with the pure C plugin // interface. To avoid copying tensor metadata, this is also the definitive // structure to store tensors. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 28c96e5dde..2e6727b323 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -561,6 +561,46 @@ TEST(BasicInterpreter, TestCustomErrorReporter) { ASSERT_EQ(reporter.calls, 1); } +TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom, + &new_tensor_index); + EXPECT_EQ(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + +TEST(InterpreterTensorsCapacityTest, TestExceedHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom + 1, + &new_tensor_index); + EXPECT_NE(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + // Test fixture that allows playing with execution plans. It creates a two // node graph that can be executed in either [0,1] order or [1,0] order. // The CopyOp records when it is invoked in the class member run_order_ -- GitLab From c6b6af31e11cfb115c26c76277ea71b13fa0e326 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 14:22:53 -0800 Subject: [PATCH 086/884] * CUB updated to 1.8.0 * updated ShuffleIndex because of API change PiperOrigin-RevId: 186822637 --- tensorflow/core/kernels/reduction_gpu_kernels.cu.h | 4 ++-- tensorflow/workspace.bzl | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 15ae4c1fc5..9237fa51d8 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -280,8 +280,8 @@ __global__ void ColumnReduceMax16ColumnsKernel( const int rows_in_this_warp = min(rows_per_warp, num_rows - start_row_warp); // not the most efficient way to do this sum for (int i = 1; i < rows_in_this_warp; ++i) { - value_type tmp = - cub::ShuffleIndex(sum, threadIdx.x + i * num_cols, 32, 0xffffffff); + value_type tmp = cub::ShuffleIndex<32, value_type>( + sum, static_cast(threadIdx.x + i * num_cols), 0xffffffff); if (lane < num_cols) sum = op(sum, tmp); } diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index b6bba78401..70cb65f3e7 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -664,11 +664,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "cub_archive", urls = [ - "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip", - "https://github.com/NVlabs/cub/archive/1.7.4.zip", + "https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip", + "https://github.com/NVlabs/cub/archive/1.8.0.zip", ], - sha256 = "20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31", - strip_prefix = "cub-1.7.4", + sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", + strip_prefix = "cub-1.8.0", build_file = str(Label("//third_party:cub.BUILD")), ) -- GitLab From e4b7f8d2a231e712f203b29055fe3fd0f8be502c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 23 Feb 2018 15:43:09 -0800 Subject: [PATCH 087/884] Add test for bug in CUB that caused dynamic partition to fail on the GPU. PiperOrigin-RevId: 186834668 --- .../python/kernel_tests/dynamic_partition_op_test.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py index fedbf9e696..5e8937ad2c 100644 --- a/tensorflow/python/kernel_tests/dynamic_partition_op_test.py +++ b/tensorflow/python/kernel_tests/dynamic_partition_op_test.py @@ -326,6 +326,18 @@ class DynamicPartitionTest(test.TestCase): with self.assertRaises(ValueError): data_flow_ops.dynamic_partition(data, indices, num_partitions=4) + # see https://github.com/tensorflow/tensorflow/issues/17106 + def testCUBBug(self): + x = constant_op.constant(np.random.randn(3072)) + inds = [0]*189 + [1]*184 + [2]*184 + [3]*191 + [4]*192 + [5]*195 + [6]*195 + inds += [7]*195 + [8]*188 + [9]*195 + [10]*188 + [11]*202 + [12]*194 + inds += [13]*194 + [14]*194 + [15]*192 + self.assertEqual(len(inds), x.shape[0]) + partitioned = data_flow_ops.dynamic_partition(x, inds, 16) + with self.test_session() as sess: + res = sess.run(partitioned) + self.assertEqual(res[-1].shape[0], 192) + if __name__ == "__main__": test.main() -- GitLab From 0f8ee19ef830fc7d28ae611194bcd66f4383b038 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 11:43:14 -0800 Subject: [PATCH 088/884] Actually expose smart_cond and smart_constant_value in tf.contrib.framework Also moves these methods into their own file in python/framework. This avoids further bloating control_flow_ops.py and makes the BUILD deps easier for a future change I'm working on. PiperOrigin-RevId: 187055501 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/contrib/framework/__init__.py | 7 +- tensorflow/python/BUILD | 26 ++++++ tensorflow/python/framework/smart_cond.py | 79 +++++++++++++++++++ .../python/framework/smart_cond_test.py | 66 ++++++++++++++++ tensorflow/python/layers/utils.py | 5 +- tensorflow/python/ops/control_flow_ops.py | 56 ------------- .../python/ops/control_flow_ops_test.py | 36 --------- 8 files changed, 180 insertions(+), 96 deletions(-) create mode 100644 tensorflow/python/framework/smart_cond.py create mode 100644 tensorflow/python/framework/smart_cond_test.py diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 1accb319d2..50868c6d6c 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -63,6 +63,7 @@ tf_custom_op_py_library( "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:script_ops", + "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", "//tensorflow/python:state_ops", "//tensorflow/python:state_ops_gen", diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index deeb5bec79..8063250091 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -87,6 +87,9 @@ See the @{$python/contrib.framework} guide. @@get_placeholders +@@smart_cond +@@smart_constant_value + @@CriticalSection @@BoundedTensorSpec @@ -104,10 +107,10 @@ from tensorflow.contrib.framework.python.ops import * from tensorflow.python.framework.ops import prepend_name_scope from tensorflow.python.framework.ops import strip_name_scope +from tensorflow.python.framework.smart_cond import smart_cond +from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.control_flow_ops import smart_cond -from tensorflow.python.ops.control_flow_ops import smart_constant_value from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4c8c73548c..b0cb48c80c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -765,6 +765,31 @@ py_library( ], ) +py_library( + name = "smart_cond", + srcs = ["framework/smart_cond.py"], + srcs_version = "PY2AND3", + deps = [ + ":control_flow_ops", + ":tensor_util", + ], +) + +py_test( + name = "smart_cond_test", + size = "small", + srcs = ["framework/smart_cond_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":constant_op", + ":framework_ops", + ":math_ops", + ":session", + ":smart_cond", + ], +) + py_library( name = "sparse_tensor", srcs = ["framework/sparse_tensor.py"], @@ -4091,6 +4116,7 @@ py_library( ":control_flow_ops", ":framework_for_generated_wrappers", ":platform", + ":smart_cond", ":tensor_util", ":util", ":variable_scope", diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py new file mode 100644 index 0000000000..f97bb01f54 --- /dev/null +++ b/tensorflow/python/framework/smart_cond.py @@ -0,0 +1,79 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""smart_cond and related utilties.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError("`true_fn` must be callable.") + if not callable(false_fn): + raise TypeError("`false_fn` must be callable.") + + pred_value = smart_constant_value(pred) + if pred_value is not None: + if pred_value: + return true_fn() + else: + return false_fn() + else: + return control_flow_ops.cond(pred, true_fn=true_fn, false_fn=false_fn, + name=name) + + +def smart_constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or tensor. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Tensor or bool. + """ + if isinstance(pred, bool): + pred_value = pred + elif isinstance(pred, ops.Tensor): + pred_value = tensor_util.constant_value(pred) + else: + raise TypeError("`pred` must be a Tensor or a Python bool.") + return pred_value diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py new file mode 100644 index 0000000000..b682506da0 --- /dev/null +++ b/tensorflow/python/framework/smart_cond_test.py @@ -0,0 +1,66 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond +from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class SmartCondTest(test_util.TensorFlowTestCase): + + def testSmartCondTrue(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(2) + y = constant_op.constant(5) + z = smart_cond.smart_cond(True, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 5)) + self.assertEqual(z.eval(), 32) + + def testSmartCondFalse(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(4) + y = constant_op.constant(3) + z = smart_cond.smart_cond(False, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 3)) + self.assertEqual(z.eval(), 9) + + def testSmartCondMissingArg1(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, false_fn=lambda: x) + + def testSmartCondMissingArg2(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, lambda: x) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py index 484c6fc466..3b156c36a2 100644 --- a/tensorflow/python/layers/utils.py +++ b/tensorflow/python/layers/utils.py @@ -24,6 +24,7 @@ from tensorflow.python.eager import context from tensorflow.python.ops import variables from tensorflow.python.ops import control_flow_ops from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.framework import tensor_util from tensorflow.python.util import nest @@ -201,7 +202,7 @@ def smart_cond(pred, true_fn=None, false_fn=None, name=None): if isinstance(pred, variables.Variable): return control_flow_ops.cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) - return control_flow_ops.smart_cond( + return smart_module.smart_cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) @@ -228,7 +229,7 @@ def constant_value(pred): if isinstance(pred, variables.Variable): return None - return control_flow_ops.smart_constant_value(pred) + return smart_module.smart_constant_value(pred) def object_list_uid(object_list): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index c78a5aa8c2..8d5ab72670 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -23,7 +23,6 @@ See the @{$python/control_flow_ops} guide. @@no_op @@count_up_to @@cond -@@smart_cond @@case @@while_loop @@logical_and @@ -2130,61 +2129,6 @@ def cond(pred, # pylint: enable=redefined-outer-name -def smart_cond(pred, true_fn=None, false_fn=None, name=None): - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Arguments: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if not callable(true_fn): - raise TypeError("`true_fn` must be callable.") - if not callable(false_fn): - raise TypeError("`false_fn` must be callable.") - - pred_value = smart_constant_value(pred) - if pred_value is not None: - if pred_value: - return true_fn() - else: - return false_fn() - else: - return cond(pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def smart_constant_value(pred): - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Arguments: - pred: A scalar, either a Python bool or tensor. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Tensor or bool. - """ - if isinstance(pred, bool): - pred_value = pred - elif isinstance(pred, ops.Tensor): - pred_value = tensor_util.constant_value(pred) - else: - raise TypeError("`pred` must be a Tensor or a Python bool.") - return pred_value - - def _resource_safe_shape(t): """Returns the shape of t or the variable it points to.""" if t.dtype == dtypes.resource: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index adc8c51e11..f22f3059d1 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -349,42 +349,6 @@ class SwitchTestCase(test_util.TensorFlowTestCase): self.assertEquals(grad_x_false.eval(), 0.) -@test_util.with_c_api -class SmartCondTest(test_util.TensorFlowTestCase): - - def testSmartCondTrue(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(2) - y = constant_op.constant(5) - z = control_flow_ops.smart_cond(True, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 5)) - self.assertEqual(z.eval(), 32) - - def testSmartCondFalse(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(4) - y = constant_op.constant(3) - z = control_flow_ops.smart_cond(False, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 3)) - self.assertEqual(z.eval(), 9) - - def testSmartCondMissingArg1(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, false_fn=lambda: x) - - def testSmartCondMissingArg2(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, lambda: x) - - @test_util.with_c_api class CondTest(test_util.TensorFlowTestCase): -- GitLab From 72eef4b7cf49956a3c675c6dc9d0488176a224cb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:50:49 -0800 Subject: [PATCH 089/884] Add the internal module name prefix to the white list. PiperOrigin-RevId: 187056701 --- tensorflow/contrib/py2tf/impl/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index c90e85c96b..bdbc6663dd 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -31,12 +31,16 @@ PYTHON_LITERALS = { DEFAULT_UNCOMPILED_MODULES = set(( ('tensorflow',), (utils.__name__,), + + # All of tensorflow's subpackages. Unlike the root tf module, they don't + # have well-known names. Not refering to the module directly to avoid + # circular imports. + (utils.__name__[:-len('.contrib.py2tf.utils')],), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Also allow controlling the generated names (for testability). -# TODO(mdan): Make sure copybara renames the reference below. COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', 'import tensorflow as tf', -- GitLab From fd1a54b00b265a09d7026c05c074af6b8839e593 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Feb 2018 11:52:26 -0800 Subject: [PATCH 090/884] Internal change. PiperOrigin-RevId: 187056963 --- tensorflow/tools/api/tests/api_compatibility_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index c1e09cc531..2a784973e1 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -165,7 +165,7 @@ class ApiCompatibilityTest(test.TestCase): logging.error('%d differences found between API and golden.', diff_count) messages = verbose_diffs if verbose else diffs for i in range(diff_count): - logging.error('Issue %d\t: %s', i + 1, messages[i]) + print('Issue %d\t: %s' % (i + 1, messages[i]), file=sys.stderr) if update_goldens: # Write files if requested. -- GitLab From 16dbf4b8b08a587329900c71da5cb1bcab075b19 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 11:57:30 -0800 Subject: [PATCH 091/884] Use optimized ops to handle GPU memory swapping: this avoids the need for 2 pairs of extra _send/_recv nodes which speeds things up a bit. This also ensures that performance doesn't depend on the recv scheduling built in TF, which isn't always optimal. PiperOrigin-RevId: 187057831 --- tensorflow/core/grappler/optimizers/BUILD | 36 +++++++- .../optimizers/gpu_swapping_kernels.cc | 88 +++++++++++++++++++ .../grappler/optimizers/gpu_swapping_ops.cc | 58 ++++++++++++ .../grappler/optimizers/memory_optimizer.cc | 9 +- .../optimizers/memory_optimizer_test.cc | 65 +++++++++++--- tensorflow/core/grappler/utils/BUILD | 1 + .../core/grappler/utils/grappler_test.cc | 17 ++++ .../core/grappler/utils/grappler_test.h | 3 + 8 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 50ba48ea7a..908e58bcc7 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -1,6 +1,8 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") filegroup( name = "all_files", @@ -282,18 +284,48 @@ tf_cc_test( ], ) +tf_kernel_library( + name = "gpu_swapping_kernels", + srcs = [ + "gpu_swapping_kernels.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "gpu_swapping_ops", + srcs = [ + "gpu_swapping_ops.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + cc_library( name = "memory_optimizer", - srcs = ["memory_optimizer.cc"], + srcs = [ + "memory_optimizer.cc", + ], hdrs = [ "memory_optimizer.h", ], visibility = ["//visibility:public"], deps = [ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -307,7 +339,7 @@ cc_library( ], ) -tf_cc_test( +tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], deps = [ diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc new file mode 100644 index 0000000000..1820af6844 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op kernels used to swap data in and out of GPU memory. + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +class CopyFromGpuToHostKernel : public AsyncOpKernel { + public: + explicit CopyFromGpuToHostKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, !ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromGpuToHost kernel " + "must reside on the device."), + done); + + AllocatorAttributes alloc_attrs; + alloc_attrs.set_gpu_compatible(true); + alloc_attrs.set_on_host(true); + Tensor* output; + OP_REQUIRES_OK_ASYNC( + ctx, ctx->allocate_output(0, input.shape(), &output, alloc_attrs), + done); + + ctx->op_device_context()->CopyDeviceTensorToCPU( + &input, "CopyFromGpuToHost", static_cast(ctx->device()), + output, [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromGpuToHost").Device(DEVICE_GPU).HostMemory("output"), + CopyFromGpuToHostKernel); + +class CopyFromHostToGpuKernel : public AsyncOpKernel { + public: + explicit CopyFromHostToGpuKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromHostToGpu kernel " + "must reside on the host."), + done); + + Tensor* output; + OP_REQUIRES_OK_ASYNC(ctx, ctx->allocate_output(0, input.shape(), &output), + done); + + ctx->op_device_context()->CopyCPUTensorToDevice( + &input, static_cast(ctx->device()), output, + [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromHostToGpu").Device(DEVICE_GPU).HostMemory("input"), + CopyFromHostToGpuKernel); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc new file mode 100644 index 0000000000..46828346da --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Definition for the ops used to swap data in and out of GPU memory. + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +// The _CopyFromGpuToHost op copies its input tensor to the host. The input must +// reside on GPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromGpuToHost") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from gpu to the host."); + +// The _CopyFromHostToGpu op copies its input tensor from the host to the GPU. +// The input must reside on CPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromHostToGpu") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from the host to the GPU."); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index dec4f04a1c..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -720,18 +720,19 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, // Force the tensor to be copied to cpu. NodeDef* swap_out_node = graph->add_node(); swap_out_node->set_name(swap_out_name); - swap_out_node->set_op("Identity"); - swap_out_node->set_device("/device:CPU:0"); + swap_out_node->set_op("_CopyFromGpuToHost"); // Force the tensor to be restored to the device. NodeDef* swap_in_node = graph->add_node(); swap_in_node->set_name(swap_in_name); - swap_in_node->set_op("Identity"); + swap_in_node->set_op("_CopyFromHostToGpu"); *swap_in_node->add_input() = swap_out_node->name(); - // Colocate the swap_in_ node with the node itself. + // Colocate the swap_out_ and swap_in_ nodes with the node itself. + swap_out_node->set_device(node->device()); swap_in_node->set_device(node->device()); string coloc_group = strings::StrCat("loc@", tensor_to_swap); + (*swap_out_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 5d7913e0c0..9595936e9e 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -221,16 +221,20 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { // Build a simple graph with an op that's marked for swapping. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Variable(s.WithOpName("a"), {10, 10}, DT_FLOAT); - Output b = ops::AddN(s.WithOpName("b"), {a}); - Output c = ops::AddN(s.WithOpName("c"), {b}); - Output d = ops::AddN(s.WithOpName("d"), {c}); - Output e = ops::AddN(s.WithOpName("e"), {b, d}); + Output a = + ops::Variable(s.WithOpName("a").WithDevice("/gpu:0"), {10, 10}, DT_FLOAT); + Output b = ops::AddN(s.WithOpName("b").WithDevice("/gpu:0"), {a}); + Output c = ops::AddN(s.WithOpName("c").WithDevice("/gpu:0"), {b}); + Output d = ops::AddN(s.WithOpName("d").WithDevice("/gpu:0"), {c}); + Output e = ops::AddN(s.WithOpName("e").WithDevice("/gpu:0"), {b, d}); + + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {10, 10}); + Output init = ops::Assign(s.WithOpName("init"), a, constant); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - EXPECT_EQ(5, item.graph.node_size()); + EXPECT_EQ(7, item.graph.node_size()); EXPECT_EQ(NodeName(e.name()), item.graph.node(4).name()); AttrValue& val = (*item.graph.mutable_node(4)->mutable_attr())["_swap_to_host"]; @@ -243,32 +247,43 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { Status status = optimizer.Optimize(cluster.get(), item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(7, output.node_size()); - const NodeDef& new_e = output.node(4); + EXPECT_EQ(9, output.node_size()); + const NodeDef& new_e = output.node(6); EXPECT_EQ(NodeName(e.name()), new_e.name()); EXPECT_EQ(2, new_e.input_size()); EXPECT_EQ(NodeName(d.name()), new_e.input(1)); EXPECT_EQ("swap_in_e_0", new_e.input(0)); - const NodeDef& swap_out = output.node(5); + const NodeDef& swap_out = output.node(7); EXPECT_EQ("swap_out_e_0", swap_out.name()); + EXPECT_EQ("_CopyFromGpuToHost", swap_out.op()); - const NodeDef& swap_in = output.node(6); + const NodeDef& swap_in = output.node(8); EXPECT_EQ("swap_in_e_0", swap_in.name()); + EXPECT_EQ("_CopyFromHostToGpu", swap_in.op()); EXPECT_EQ(NodeName(b.name()), swap_out.input(0)); EXPECT_EQ(NodeName(swap_out.name()), swap_in.input(0)); EXPECT_EQ("^c", swap_in.input(1)); - const NodeDef& new_c = output.node(2); + const NodeDef& new_c = output.node(4); EXPECT_EQ(NodeName(c.name()), new_c.name()); EXPECT_EQ("^swap_out_e_0", new_c.input(1)); // Run the optimizer a second time to ensure it's idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(cluster.get(), item, &output); + GrapplerItem item_copy(item, std::move(output)); + status = optimizer.Optimize(cluster.get(), item_copy, &output); TF_EXPECT_OK(status); + +#if GOOGLE_CUDA + item.fetch = {"e"}; + item.init_ops = {init.name()}; + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, SwappingHeuristics) { @@ -287,9 +302,13 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { Output h = ops::Exp(s.WithOpName("h").WithDevice("/gpu:0"), c); Output i = ops::Log(s.WithOpName("i").WithDevice("/gpu:0"), d); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e", "f", "g", "h", "i"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -308,6 +327,15 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { EXPECT_EQ("axis", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } +#endif } TEST_F(MemoryOptimizerTest, UnswappableInputs) { @@ -325,9 +353,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { Output e = ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {b, c, d}, axis); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -344,6 +376,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { EXPECT_EQ("^swap_out_d_2", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, AccumulationRewrites) { diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 0a9dbe22cf..5d32609434 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -142,6 +142,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", ], ) diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fed46c05fb..fef8e97b6e 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -35,6 +35,23 @@ std::vector GrapplerTest::EvaluateNodes( return output_tensors; } +std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(item.graph)); + RunOptions run_options; + if (!item.init_ops.empty()) { + std::vector dummy; + TF_CHECK_OK( + session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); + } + std::vector output_tensors; + TF_CHECK_OK( + session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Close()); + return output_tensors; +} + void GrapplerTest::AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph) { auto* node = graph->add_node(); diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 042b616aa4..fd6809b6e2 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -30,6 +31,8 @@ class GrapplerTest : public ::testing::Test { std::vector EvaluateNodes(const GraphDef& graph, const std::vector& node_names); + std::vector EvaluateFetchNodes(const GrapplerItem& item); + void AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph); -- GitLab From 63d4c46a613c4d0e44d966c040bdfbbd0b16d13d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 12:10:01 -0800 Subject: [PATCH 092/884] Fix bug calling gradients_function inside custom_gradient PiperOrigin-RevId: 187059871 --- tensorflow/python/eager/backprop_test.py | 13 +++++++++++++ tensorflow/python/eager/custom_gradient.py | 9 ++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 734558dee2..48fd170764 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -115,6 +115,19 @@ class BackpropTest(test.TestCase): with self.assertRaises(RuntimeError): backprop.gradients_function(f)(constant_op.constant(1.0)) + def testGradientsFunctionInCustomGradient(self): + + @custom_gradient.custom_gradient + def f(x): + (y,) = backprop.gradients_function(lambda x: x * x)(x) + + def grad(dy): + return [2 * dy] + + return y, grad + + self.assertAllEqual(f(1.0), 2.0) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 05460ff996..fb932a9372 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -71,11 +71,10 @@ def custom_gradient(f): input_tensors = [tf_ops.convert_to_tensor(x) for x in args] - with tape.stop_recording(): - result, grad_fn = f(*args, **kwargs) - flat_result = nest.flatten(result) - # TODO(apassos) consider removing the identity below. - flat_result = [gen_array_ops.identity(x) for x in flat_result] + result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] def actual_grad_fn(*outputs): return nest.flatten(grad_fn(*outputs)) -- GitLab From 1120deaf0bf5a51db5351c12b548994b35ba71c8 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 26 Feb 2018 12:23:36 -0800 Subject: [PATCH 093/884] Internal change. PiperOrigin-RevId: 187061863 --- tensorflow/contrib/bayesflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 08b29fb6bc..270c309ec3 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -210,7 +210,7 @@ cuda_py_test( cuda_py_test( name = "hmc_test", - size = "medium", + size = "large", srcs = ["python/kernel_tests/hmc_test.py"], additional_deps = [ ":bayesflow_py", -- GitLab From da492741630f62bfd4f8475fa532ef216f0d2bfd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 12:33:17 -0800 Subject: [PATCH 094/884] Maintain a cache of output dtypes of ops in TFE_Context. PiperOrigin-RevId: 187062992 --- tensorflow/c/eager/c_api.cc | 20 ++++++++++++++++++++ tensorflow/c/eager/runtime.cc | 15 ++++++++++++--- tensorflow/c/eager/runtime.h | 6 ++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index c27a7129fa..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/rendezvous.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" @@ -823,6 +824,25 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, delete kernel; return; } + // Update output_dtypes inside `kernel`. + const tensorflow::OpDef* op_def = nullptr; + const tensorflow::FunctionDef* function_def = + ctx->func_lib_def.Find(ndef.op()); + if (function_def != nullptr) { + op_def = &(function_def->signature()); + } + if (op_def == nullptr) { + status->status = OpDefForOp(ndef.op().c_str(), &op_def); + if (!status->status.ok()) { + return; + } + } + tensorflow::DataTypeVector input_dtypes; + status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, + kernel->output_dtypes()); + if (!status->status.ok()) { + return; + } tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index f77a937f1f..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -41,17 +41,26 @@ const uint32 kIsList = 1U << 31; } // namespace +Status OpDefForOp(const char* op_name, const OpDef** op_def) { + const OpRegistrationData* op_reg_data = nullptr; + Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (s.ok()) { + *op_def = &op_reg_data->op_def; + } + return s; +} + Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { mutex_lock l(g_op_name_to_attr_type_map_lock); *out = gtl::FindPtrOrNull(*OpNameToAttrTypeMap(), op_name); if (*out != nullptr) return Status::OK(); - const OpRegistrationData* op_reg_data = nullptr; - Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + const OpDef* op_def = nullptr; + Status s = OpDefForOp(op_name, &op_def); if (!s.ok()) return s; std::unique_ptr m(new AttrTypeMap); // TODO(agarwal): Avoid having to create this "registry" at runtime, // perhaps can be done at op registration time? - for (const auto& attr : op_reg_data->op_def.attr()) { + for (const auto& attr : op_def->attr()) { string type = attr.type(); const bool is_list = (type.length() > 6 && type.compare(0, 4, "list") == 0); if (is_list) { diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 4d20b5244a..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -39,6 +39,9 @@ namespace tensorflow { // represent the TF_AttrType type of the values in the list. typedef std::unordered_map AttrTypeMap; +// Look up OpDef for `op_name`. +Status OpDefForOp(const char* op_name, const OpDef** op_def); + // Returns the AttrTypeMap for the TensorFlow operation named op_name. Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out); @@ -180,12 +183,15 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + DataTypeVector* output_dtypes() { return &output_dtypes_; } + private: std::unique_ptr kernel_; Device* device_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; + DataTypeVector output_dtypes_; }; } // namespace tensorflow -- GitLab From c7ea6ace71ed503a316cc5eb3dd087c5e7709725 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Mon, 26 Feb 2018 13:06:59 -0800 Subject: [PATCH 095/884] Include c_api_experimental in libtensorflow.so's dependencies. PiperOrigin-RevId: 187068103 --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 2e71783b0d..a4e7602bea 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -779,6 +779,7 @@ tf_cc_shared_object( }), deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_experimental", "//tensorflow/c:exported_symbols.lds", "//tensorflow/c:version_script.lds", "//tensorflow/c/eager:c_api", -- GitLab From acb1ef68f5aea3b6f7f1e14db588b74134719b5e Mon Sep 17 00:00:00 2001 From: Daniel Trebbien Date: Mon, 26 Feb 2018 13:42:07 -0800 Subject: [PATCH 096/884] Add missing `override' (#17098) This fixes a warning produced by clang: ./tensorflow/core/common_runtime/gpu/gpu_device.h:70:10: warning: 'FillContextMap' overrides a member function but is not marked 'override' [-Winconsistent-missing-override] Status FillContextMap(const Graph* graph, ^ ./tensorflow/core/common_runtime/device.h:124:18: note: overridden virtual function is here virtual Status FillContextMap(const Graph* graph, --- tensorflow/core/common_runtime/gpu/gpu_device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.h b/tensorflow/core/common_runtime/gpu/gpu_device.h index c88daa8ff8..d817c7dd1f 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_device.h +++ b/tensorflow/core/common_runtime/gpu/gpu_device.h @@ -68,7 +68,7 @@ class BaseGPUDevice : public LocalDevice { const TensorReferenceVector& tensor_refs) override; Status FillContextMap(const Graph* graph, - DeviceContextMap* device_context_map); + DeviceContextMap* device_context_map) override; void Compute(OpKernel* op_kernel, OpKernelContext* context) override; -- GitLab From ba2cc572f99b09ddd6a60e0557059cb1da51b356 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 26 Feb 2018 13:54:02 -0800 Subject: [PATCH 097/884] Update eager uniform replay buffer microbenchmarks to compare against graph functions when possible. PiperOrigin-RevId: 187075418 --- .../contrib/framework/python/ops/critical_section_ops.py | 6 ++++-- tensorflow/python/framework/ops.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index 3c5c55ed65..ab603cc18e 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -143,7 +143,7 @@ class CriticalSection(object): def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: - with ops.control_dependencies(None): + with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access @@ -226,7 +226,9 @@ class CriticalSection(object): # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): - if sg.handle.name == self._handle.name: + sg_handle_name = ops.convert_to_tensor(sg.handle).name + self_handle_name = ops.convert_to_tensor(self._handle).name + if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5a14ea4176..b0d2704c07 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4805,7 +4805,14 @@ def container(container_name): @tf_export("colocate_with") def colocate_with(op, ignore_existing=False): if context.in_graph_mode(): - return get_default_graph().colocate_with(op, ignore_existing) + default_graph = get_default_graph() + if isinstance(op, EagerTensor): + if default_graph.building_function: + op = internal_convert_to_tensor(op) + else: + raise ValueError("Encountered an Eager-defined Tensor during graph " + "construction, but a function was not being built.") + return default_graph.colocate_with(op, ignore_existing) else: if op is not None: return device(op.device) -- GitLab From 7765066e6a686c7d6b1bed44248fafaa859db4eb Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 26 Feb 2018 14:00:07 -0800 Subject: [PATCH 098/884] TFTS: Switch to using core feature columns This fixes some shape issues that came up when using the tf.contrib.layers parsing functions. Adds a string -> embedding column API example to the LSTM example. PiperOrigin-RevId: 187076400 --- .../examples/data/multivariate_periods.csv | 200 +++++++++--------- .../timeseries/examples/known_anomaly.py | 8 +- .../contrib/timeseries/examples/lstm.py | 26 ++- .../python/timeseries/estimators.py | 53 +++-- .../timeseries/python/timeseries/model.py | 38 ++-- .../state_space_models/state_space_model.py | 10 +- 6 files changed, 177 insertions(+), 158 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv index b49a0662c2..9b15b4f0b2 100644 --- a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv +++ b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv @@ -1,100 +1,100 @@ -0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0. -1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0. -2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0. -3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0. -4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0. -5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0. -6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0. -7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0. -8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0. -9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0. -10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0. -11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0. -12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0. -13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0. -14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0. -15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0. -16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0. -17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0. -18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0. -19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0. -20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0. -21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0. -22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0. -23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0. -24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0. -25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0. -26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0. -27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0. -28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0. -29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0. -30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0. -31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0. -32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0. -33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0. -34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0. -35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0. -36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0. -37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0. -38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0. -39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0. -40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0. -41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0. -42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0. -43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0. -44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0. -45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0. -46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0. -47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0. -48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0. -49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0. -50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0. -51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0. -52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0. -53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0. -54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0. -55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0. -56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0. -57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0. -58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0. -59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0. -60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0. -61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0. -62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0. -63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0. -64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0. -65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0. -66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0. -67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0. -68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0. -69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0. -70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0. -71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0. -72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0. -73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0. -74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0. -75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0. -76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0. -77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0. -78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0. -79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0. -80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0. -81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0. -82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0. -83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0. -84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0. -85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0. -86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0. -87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0. -88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0. -89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0. -90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0. -91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0. -92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0. -93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0. -94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0. -95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0. -96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0. -97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0. -98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0. -99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0. +0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0.,strkeya +1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0.,strkeyb +2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0.,strkey +3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0.,strkey +4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0.,strkey +5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0.,strkey +6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0.,strkey +7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0.,strkey +8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0.,strkey +9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0.,strkey +10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0.,strkeyc +11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0.,strkey +12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0.,strkey +13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0.,strkey +14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0.,strkey +15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0.,strkey +16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0.,strkey +17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0.,strkey +18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0.,strkey +19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0.,strkey +20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0.,strkey +21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0.,strkey +22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0.,strkey +23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0.,strkey +24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0.,strkey +25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0.,strkey +26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0.,strkey +27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0.,strkey +28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0.,strkey +29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0.,strkey +30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0.,strkey +31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0.,strkey +32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0.,strkey +33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0.,strkey +34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0.,strkey +35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0.,strkey +36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0.,strkey +37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0.,strkeyd +38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0.,strkey +39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0.,strkey +40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0.,strkey +41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0.,strkey +42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0.,strkey +43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0.,strkey +44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0.,strkey +45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0.,strkey +46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0.,strkey +47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0.,strkey +48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0.,strkey +49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0.,strkey +50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0.,strkey +51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0.,strkey +52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0.,strkey +53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0.,strkey +54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0.,strkey +55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0.,strkey +56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0.,strkey +57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0.,strkey +58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0.,strkey +59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0.,strkey +60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0.,strkey +61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0.,strkey +62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0.,strkey +63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0.,strkey +64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0.,strkey +65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0.,strkey +66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0.,strkey +67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0.,strkey +68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0.,strkey +69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0.,strkey +70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0.,strkey +71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0.,strkey +72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0.,strkey +73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0.,strkey +74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0.,strkey +75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0.,strkey +76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0.,strkey +77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0.,strkey +78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0.,strkey +79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0.,strkey +80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0.,strkey +81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0.,strkey +82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0.,strkey +83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0.,strkey +84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0.,strkey +85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0.,strkey +86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0.,strkey +87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0.,strkey +88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0.,strkey +89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0.,strkey +90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0.,strkey +91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0.,strkey +92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0.,strkey +93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0.,strkey +94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0.,strkey +95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0.,strkey +96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0.,strkey +97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0.,strkey +98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0.,strkey +99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0.,strkey diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index 7659dd308a..c08c0b0acb 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -46,12 +46,12 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # Indicate the format of our exogenous feature, in this case a string # representing a boolean value. - string_feature = tf.contrib.layers.sparse_column_with_keys( - column_name="is_changepoint", keys=["no", "yes"]) + string_feature = tf.feature_column.categorical_column_with_vocabulary_list( + key="is_changepoint", vocabulary_list=["no", "yes"]) # Specify the way this feature is presented to the model, here using a one-hot # encoding. - one_hot_feature = tf.contrib.layers.one_hot_column( - sparse_id_column=string_feature) + one_hot_feature = tf.feature_column.indicator_column( + categorical_column=string_feature) estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( periodicities=12, diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py index f37cafcc50..2eee878196 100644 --- a/tensorflow/contrib/timeseries/examples/lstm.py +++ b/tensorflow/contrib/timeseries/examples/lstm.py @@ -59,10 +59,10 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel): num_units: The number of units in the model's LSTMCell. num_features: The dimensionality of the time series (features per timestep). - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects representing features which are inputs to the model but are - not predicted by it. These must then be present for training, - evaluation, and prediction. + exogenous_feature_columns: A list of `tf.feature_column`s representing + features which are inputs to the model but are not predicted by + it. These must then be present for training, evaluation, and + prediction. dtype: The floating point data type to use. """ super(_LSTMModel, self).__init__( @@ -189,12 +189,16 @@ def train_and_predict( export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. + categorical_column = tf.feature_column.categorical_column_with_hash_bucket( + key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. - tf.contrib.layers.real_valued_column( - "2d_exogenous_feature", dimension=2)] + tf.feature_column.numeric_column( + "2d_exogenous_feature", shape=(2,)), + tf.feature_column.embedding_column( + categorical_column=categorical_column, dimension=10)] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), @@ -205,7 +209,11 @@ def train_and_predict( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES,) * 5 - + ("2d_exogenous_feature",) * 2)) + + ("2d_exogenous_feature",) * 2 + + ("categorical_exogenous_feature",)), + # Data types other than for `times` need to be specified if they aren't + # float32. In this case one of our exogenous features has string dtype. + column_dtypes=((tf.int64,) + (tf.float32,) * 7 + (tf.string,))) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn( reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) @@ -215,7 +223,9 @@ def train_and_predict( predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate( [numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], - axis=-1)} + axis=-1), + "categorical_exogenous_feature": numpy.array( + ["strkey"] * 100)[None, :, None]} (predictions,) = tuple(estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index f8355f366f..8d13343e82 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.layers.python.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib @@ -31,10 +29,12 @@ from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filterin from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.export import export_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.training import training as train @@ -117,22 +117,29 @@ class TimeSeriesRegressor(estimator_lib.Estimator): dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) - with ops.Graph().as_default(): - # Default placeholders have only an unknown batch dimension. Make them - # in a separate graph, then splice in the series length to the shapes - # and re-create them in the outer graph. - exogenous_feature_shapes = { - key: (value.get_shape(), value.dtype) for key, value - in feature_column.make_place_holder_tensors_for_base_features( - self._model.exogenous_feature_columns).items()} - for feature_key, (batch_only_feature_shape, value_dtype) in ( - exogenous_feature_shapes.items()): - batch_only_feature_shape = batch_only_feature_shape.with_rank_at_least( - 1).as_list() - feature_shape = ([default_batch_size, default_series_length] - + batch_only_feature_shape[1:]) - placeholders[feature_key] = array_ops.placeholder( - dtype=value_dtype, name=feature_key, shape=feature_shape) + if self._model.exogenous_feature_columns: + with ops.Graph().as_default(): + # Default placeholders have only an unknown batch dimension. Make them + # in a separate graph, then splice in the series length to the shapes + # and re-create them in the outer graph. + parsed_features = ( + feature_column.make_parse_example_spec( + self._model.exogenous_feature_columns)) + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder( + shape=[None], dtype=dtypes.string), + features=parsed_features) + exogenous_feature_shapes = { + key: (value.get_shape(), value.dtype) for key, value + in placeholder_features.items()} + for feature_key, (batch_only_feature_shape, value_dtype) in ( + exogenous_feature_shapes.items()): + batch_only_feature_shape = ( + batch_only_feature_shape.with_rank_at_least(1).as_list()) + feature_shape = ([default_batch_size, default_series_length] + + batch_only_feature_shape[1:]) + placeholders[feature_key] = array_ops.placeholder( + dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. @@ -333,11 +340,11 @@ class StructuralEnsembleRegressor(StateSpaceRegressor): determine the model size. Learning autoregressive coefficients typically requires more steps and a smaller step size than other components. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments, `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]), and diff --git a/tensorflow/contrib/timeseries/python/timeseries/model.py b/tensorflow/contrib/timeseries/python/timeseries/model.py index bac7d1ebf5..7644764a74 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model.py @@ -21,18 +21,17 @@ from __future__ import print_function import abc import collections -from tensorflow.contrib import layers -from tensorflow.contrib.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import math_utils from tensorflow.contrib.timeseries.python.timeseries.feature_keys import PredictionFeatures from tensorflow.contrib.timeseries.python.timeseries.feature_keys import TrainEvalFeatures +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope @@ -66,11 +65,11 @@ class TimeSeriesModel(object): Args: num_features: Number of features for the time series - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not + part of the series to be predicted. Passed to + `tf.feature_column.input_layer`. dtype: The floating point datatype to use. """ if exogenous_feature_columns: @@ -86,7 +85,7 @@ class TimeSeriesModel(object): @property def exogenous_feature_columns(self): - """`FeatureColumn` objects for features which are not predicted.""" + """`tf.feature_colum`s for features which are not predicted.""" return self._exogenous_feature_columns # TODO(allenl): Move more of the generic machinery for generating and @@ -265,11 +264,14 @@ class TimeSeriesModel(object): if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): - placeholder_features = ( - feature_column.make_place_holder_tensors_for_base_features( + parsed_features = ( + feature_column.make_parse_example_spec( self._exogenous_feature_columns)) - embedded = layers.input_from_feature_columns( - columns_to_tensors=placeholder_features, + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), + features=parsed_features) + embedded = feature_column.input_layer( + features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:] @@ -308,13 +310,13 @@ class TimeSeriesModel(object): # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. - if tensor.get_shape().ndims == 1: + if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( - layers.input_from_feature_columns( - columns_to_tensors=exogenous_features_single_batch_dimension, + feature_column.input_layer( + features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( @@ -381,8 +383,8 @@ class SequentialTimeSeriesModel(TimeSeriesModel): may use _scale_back_data or _scale_back_variance to return predictions to the input scale. dtype: The floating point datatype to use. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects. See `TimeSeriesModel`. + exogenous_feature_columns: A list of `tf.feature_column`s objects. See + `TimeSeriesModel`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 6257002647..951c6546d5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -112,11 +112,11 @@ class StateSpaceModelConfiguration( exogenous_noise_decreases: If True, exogenous regressors can "set" model state, decreasing uncertainty. If both this parameter and exogenous_noise_increases are False, exogenous regressors are ignored. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a -- GitLab From a05488be720fc803ac56738c8bc0222fb8a36d7f Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Mon, 26 Feb 2018 14:11:08 -0800 Subject: [PATCH 099/884] Adding documentation for dataset/iterator checkpointing. PiperOrigin-RevId: 187078347 --- .../docs_src/programmers_guide/datasets.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d19200e80c..d38fbddfa1 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -327,6 +327,35 @@ same op/node (created by `Iterator.get_next()`). Therefore, evaluating *any* of these tensors will advance the iterator for all components. A typical consumer of an iterator will include all components in a single expression. +### Saving iterator state + +The @{tf.contrib.data.make_saveable_from_iterator} function creates a +`SaveableObject` from an iterator, which can be used to save and +restore the current state of the iterator (and, effectively, the whole input +pipeline). A saveable object thus created can be added to @{tf.train.Saver} +variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and +restoring in the same manner as a @{tf.Variable}. Refer to +@{$saved_model$Saving and Restoring} for details on how to save and restore +variables. + +```python +# Create saveable object from iterator. +saveable = tf.contrib.data.make_saveable_from_iterator(iterator) + +# Save the iterator state by adding it to the saveable objects collection. +tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable) +saver = tf.train.Saver() + +with tf.Session() as sess: + + if should_checkpoint: + saver.save(path_to_checkpoint) + +# Restore the iterator state. +with tf.Session() as sess: + saver.restore(sess, path_to_checkpoint) +``` + ## Reading input data ### Consuming NumPy arrays -- GitLab From d98e7fc5720c1597b6f2034ba2ad62438ac5ef39 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 14:19:56 -0800 Subject: [PATCH 100/884] [XLA] GTE of a certain element of the tuple does not need not keep other elements alive. This achieves two things: 1. Heap simulation runtime is no longer quadratic in the number of tuple elements (as we don't add each GetTupleElement to the liveset of each buffer defined by the tuple). 2. A reduction in the heap memory footprint. PiperOrigin-RevId: 187079787 --- .../compiler/xla/service/heap_simulator.cc | 135 ++++++++++-------- .../xla/service/heap_simulator_test.cc | 50 +++++++ 2 files changed, 127 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index a2d13c013c..3dd4c4a079 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -27,38 +27,6 @@ namespace xla { using tensorflow::gtl::FlatMap; using tensorflow::gtl::FlatSet; -namespace { - -// Returns the set of buffers that may be sources of all operands of the given -// instruction. The returned buffers are guaranteed to have no duplicates, and -// to be sorted in a deterministic order. -std::vector UniqueOperandSourceBuffers( - const HloInstruction* instruction, - const TuplePointsToAnalysis& points_to_analysis) { - std::vector buffers; - for (const HloInstruction* operand : instruction->operands()) { - points_to_analysis.GetPointsToSet(operand).ForEachElement( - [&](const ShapeIndex& /*index*/, - const PointsToSet::BufferList& points_to) { - buffers.insert(buffers.end(), points_to.begin(), points_to.end()); - }); - } - - // Sort and then remove duplicates from buffers. - std::sort(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() < b->id(); - }); - buffers.erase(std::unique(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() == b->id(); - }), - buffers.end()); - return buffers; -} - -} // namespace - /*static*/ StatusOr HeapSimulator::Run( std::unique_ptr algorithm, const HloModule& module, @@ -93,6 +61,7 @@ Status HeapSimulator::RunComputation( const HloComputation& computation, const std::vector& instruction_sequence, const TuplePointsToAnalysis& points_to_analysis) { + VLOG(3) << "Computation:\n" << computation.ToString(); // The goal here is to minimize memory usage, assuming the given sequential // ordering of instructions. The strategy is to walk through the instruction // sequence, calling Alloc and Free on the underlying heap algorithm. The @@ -101,7 +70,51 @@ Status HeapSimulator::RunComputation( // 'live_buffers' tracks the liveness of each buffer that we assign, by // associating it with a set of HloInstructions that need to be visited. When // the set becomes empty, the buffer is no longer used, and can be freed. + // 'used_buffers' is the reverse map - it tracks which buffers were used by an + // instruction, so that we can remove the instructions from a buffer's live + // set after they are visited. FlatMap> live_buffers; + FlatMap> used_buffers; + auto add_user_to_buffer = [this, &live_buffers, &used_buffers]( + const HloInstruction* user, + const LogicalBuffer* buffer) { + if (!IgnoreBuffer(buffer)) { + VLOG(4) << " Adding user " << user->name() << " to buffer " + << buffer->ToString(); + live_buffers[buffer].insert(user); + used_buffers[user].insert(buffer); + } + }; + + // Initialize live_buffers for each buffer that we're going to assign. The + // set of instructions that need to be visited contains all users of all + // aliases, that is, all users of all instructions that have the buffer + // contained in their points-to set. + for (const HloInstruction* instruction : instruction_sequence) { + const PointsToSet& points_to = + points_to_analysis.GetPointsToSet(instruction); + const PointsToSet::BufferSet& buffer_set = points_to.CreateFlattenedSet(); + for (const HloInstruction* user : instruction->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + for (const LogicalBuffer* buffer : buffer_set) { + add_user_to_buffer(user, buffer); + } + } else { + // A GetTupleElement doesn't need to keep all of its operand's buffers + // alive. It only needs the buffers that relate to the element its + // extracting, and the tuple it's extracting from, but not the buffers + // for the other elements. + for (const LogicalBuffer* buffer : points_to.element({})) { + add_user_to_buffer(user, buffer); + } + const PointsToSet& gte_points_to = + points_to_analysis.GetPointsToSet(user); + for (const LogicalBuffer* buffer : gte_points_to.CreateFlattenedSet()) { + add_user_to_buffer(user, buffer); + } + } + } + } const HloInstruction* root = computation.root_instruction(); auto output_source_buffers = @@ -114,34 +127,17 @@ Status HeapSimulator::RunComputation( buffers_defined_by_instruction = points_to_analysis.GetBuffersDefinedByInstruction(instruction); - // Initialize live_buffers for each buffer that we're going to assign. The - // set of instructions that need to be visited contains all users of all - // aliases. The alias itself is not necessary; if it has users, the users - // are necessarily scheduled after the alias. And if it has no users, it is - // either a dead value or an output, both of which are handled below. - // - // We ignore control dependencies here. The reasoning is that the control - // dependencies have already been accounted for in the ordering of the given - // 'instruction_sequence', and should not otherwise artificially extend the - // lifetime of buffers that aren't already connected by a data dependency. + VLOG(3) << "Instruction: " << instruction->ToString(); + for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { + VLOG(4) << " Defines: " << buffer->ToString() + << (IgnoreBuffer(buffer) ? " (Ignored)" : ""); + } + dead_buffers_to_free.clear(); for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { if (IgnoreBuffer(buffer)) { continue; } - FlatSet* live_set = nullptr; - for (const BufferAlias& alias : - points_to_analysis.GetBufferAliases(*buffer)) { - const std::vector& users = - alias.instruction()->users(); - if (!users.empty()) { - if (live_set == nullptr) { - live_set = &live_buffers[buffer]; - } - live_set->insert(users.begin(), users.end()); - } - } - // Add a nullptr sentry to ensure entry parameters and output source // buffers are not freed until the very end. const bool entry_parameter = @@ -165,11 +161,12 @@ Status HeapSimulator::RunComputation( // have no instructions left to visit are moved from live_buffers to // operand_buffers_to_free. operand_buffers_to_free.clear(); - for (const LogicalBuffer* operand_buffer : - UniqueOperandSourceBuffers(instruction, points_to_analysis)) { + for (const LogicalBuffer* operand_buffer : used_buffers[instruction]) { if (IgnoreBuffer(operand_buffer)) { continue; } + VLOG(4) << " Removing user " << instruction->name() << " from buffer " + << operand_buffer->ToString(); auto it = live_buffers.find(operand_buffer); FlatSet* live_set = &it->second; live_set->erase(instruction); @@ -178,6 +175,11 @@ Status HeapSimulator::RunComputation( operand_buffers_to_free.push_back(operand_buffer); } } + // Sort to get a deterministic iteration order. + std::sort(operand_buffers_to_free.begin(), operand_buffers_to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); // Allocate buffers defined by this instruction. This is the latest point // that we can allocate; right before the buffer is first used. This must @@ -203,6 +205,8 @@ Status HeapSimulator::RunComputation( CanShareOperandBufferWithUser( operand_buffer->instruction(), operand_buffer->index(), buffer->instruction(), buffer->index(), points_to_analysis)) { + VLOG(3) << " Sharing: " << buffer->ToString() << " with " + << operand_buffer->ToString(); ShareBuffer(buffer, operand_buffer, instruction); shared = true; break; @@ -211,6 +215,7 @@ Status HeapSimulator::RunComputation( } if (!shared) { + VLOG(3) << " Allocating: " << buffer->ToString(); Alloc(buffer, instruction); } } @@ -244,20 +249,34 @@ Status HeapSimulator::RunComputation( // Free buffers that are no longer live. This is the earliest point that we // can de-allocate; right after the last use of the buffer. for (const LogicalBuffer* buffer : dead_buffers_to_free) { + VLOG(3) << " Freeing dead: " << buffer->ToString(); Free(buffer, instruction); } for (const LogicalBuffer* buffer : operand_buffers_to_free) { + VLOG(3) << " Freeing operand: " << buffer->ToString(); Free(buffer, instruction); } } // Any remaining live buffers must be entry parameters or output source - // buffers, which had a nullptr sentry added. Free them now. + // buffers, which had a nullptr sentry added. Free them now, in a + // deterministic order. + std::vector to_free; + to_free.reserve(live_buffers.size()); for (const auto& buffer_pending : live_buffers) { const LogicalBuffer* buffer = buffer_pending.first; const FlatSet& pending = buffer_pending.second; CHECK_EQ(pending.size(), 1) << *buffer; CHECK(*pending.begin() == nullptr) << *buffer; + to_free.push_back(buffer); + } + + std::sort(to_free.begin(), to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); + for (const LogicalBuffer* buffer : to_free) { + VLOG(3) << "Freeing pending: " << buffer->ToString(); Free(buffer, root); } diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 387b649a73..688a271712 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -410,6 +410,56 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) { }); } +TEST_F(HeapSimulatorTest, IndependentTupleElements) { + auto builder = HloComputation::Builder(TestName()); + auto paramA = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32scalar_, "paramA")); + auto paramB = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32scalar_, "paramB")); + auto mul = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kMultiply, paramA, paramB)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kAdd, paramA, paramB)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({mul, add})); + auto element0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 0)); + auto broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(f32vec4_, element0, {0})); + auto sub = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kSubtract, paramA, paramB)); + auto element1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 1)); + auto output = builder.AddInstruction( + HloInstruction::CreateTuple({broadcast, sub, element1})); + + HeapSimulatorTracker tracker(TestName(), builder.Build(), + {paramA, paramB, mul, add, tuple, element0, + broadcast, sub, element1, output}); + tracker.ExpectCallSequence({ + {kAlloc, tracker.BufferAt(paramA, {})}, + {kAlloc, tracker.BufferAt(paramB, {})}, + {kAlloc, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(add, {})}, + {kAlloc, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(broadcast, {})}, + // The mul can be freed right after the broadcast happens, even though + // The other GetTupleElement is still alive. + {kFree, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(sub, {})}, + // The temporary tuple is now dead. + {kFree, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(output, {})}, + // All params and outputs are freed at the end. + {kFree, tracker.BufferAt(paramA, {})}, + {kFree, tracker.BufferAt(paramB, {})}, + {kFree, tracker.BufferAt(add, {})}, + {kFree, tracker.BufferAt(broadcast, {})}, + {kFree, tracker.BufferAt(sub, {})}, + {kFree, tracker.BufferAt(output, {})}, + {kFinish, nullptr}, + }); +} + TEST_F(HeapSimulatorTest, WholeModule) { HeapSimulatorTracker tracker(TestName()); -- GitLab From 5b7f78c767b30076850f9b9f88b8730767a0437c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:30 -0800 Subject: [PATCH 101/884] 1st version of sequential feature columns. PiperOrigin-RevId: 187080635 --- tensorflow/contrib/feature_column/BUILD | 31 +- .../sequential_feature_column.py | 308 +++++++++++- .../sequential_feature_column_test.py | 471 ++++++++++++++++++ 3 files changed, 808 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 6fc053759c..a53e36c2d5 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -33,5 +33,34 @@ py_library( name = "sequential_feature_column", srcs = ["python/feature_column/sequential_feature_column.py"], srcs_version = "PY2AND3", - deps = [], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + ], +) + +py_test( + name = "sequential_feature_column_test", + srcs = ["python/feature_column/sequential_feature_column_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequential_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:training", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py index 690a44ff43..4ed7268e7a 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py @@ -12,8 +12,314 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental methods for tf.feature_column sequential input.""" +"""Experimental methods for tf.feature_column sequence input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function + + +import abc +import collections + + +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope + +# TODO(b/73160931): Fix pydoc. +# pylint: disable=g-doc-args,missing-docstring,protected-access +# TODO(b/73827486): Support SequenceExample. + + +def sequence_input_layer( + features, + feature_columns, + weight_collections=None, + trainable=True, + scope=None): + """"Builds input layer for sequence input. + + All `feature_columns` must be sequence dense columns with the same + `sequence_length`. The output of this method can be fed into sequence + networks, such as RNN. + + The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ from + batch to batch. + + If multiple `feature_columns` are given with `Di` `num_elements` each, their + outputs are concatenated. So, the final `Tensor` has shape + `[batch_size, T, D0 + D1 + ... + Dn]`. + + Example: + + ```python + rating = sequence_numeric_column('rating') + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [rating, watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ + from batch to batch. `D` is the sum of `num_elements` for all + `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence + length for each example. + Raises: + ValueError: If any of the `feature_columns` is the wrong type. + """ + feature_columns = fc._clean_feature_columns(feature_columns) + for c in feature_columns: + if not isinstance(c, _SequenceDenseColumn): + raise ValueError( + 'All feature_columns must be of type _SequenceDenseColumn. ' + 'Given (type {}): {}'.format(type(c), c)) + + with variable_scope.variable_scope( + scope, default_name='sequence_input_layer', values=features.values()): + builder = fc._LazyBuilder(features) + output_tensors = [] + sequence_lengths = [] + ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): + ordered_columns.append(column) + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): + dense_tensor, sequence_length = column._get_sequence_dense_tensor( + builder, + weight_collections=weight_collections, + trainable=trainable) + # Flattens the final dimension to produce a 3D Tensor. + num_elements = column._variable_shape.num_elements() + shape = array_ops.shape(dense_tensor) + output_tensors.append( + array_ops.reshape( + dense_tensor, + shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) + # TODO(b/73160931): Verify sequence_length equality. + return array_ops.concat(output_tensors, -1), sequence_lengths[0] + + +# TODO(b/73160931): Add remaining categorical columns. +def sequence_categorical_column_with_identity( + key, num_buckets, default_value=None): + return _SequenceCategoricalColumn( + fc.categorical_column_with_identity( + key=key, + num_buckets=num_buckets, + default_value=default_value)) + + +# TODO(b/73160931): Merge with embedding_column +def _sequence_embedding_column( + categorical_column, dimension, initializer=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceEmbeddingColumn( + fc.embedding_column( + categorical_column, + dimension=dimension, + initializer=initializer, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + +def sequence_numeric_column( + key, + shape=(1,), + default_value=0., + dtype=dtypes.float32): + # TODO(b/73160931): Add validations. + return _SequenceNumericColumn( + key, + shape=shape, + default_value=default_value, + dtype=dtype) + + +class _SequenceDenseColumn(fc._FeatureColumn): + """Represents dense sequence data.""" + + __metaclass__ = abc.ABCMeta + + TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + + @abc.abstractproperty + def _variable_shape(self): + """`TensorShape` without batch and sequence dimensions.""" + pass + + @abc.abstractmethod + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + """Returns a `TensorSequenceLengthPair`.""" + pass + + +def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): + with ops.name_scope(None, 'sequence_length') as name_scope: + row_ids = sp_tensor.indices[:, 0] + column_ids = sp_tensor.indices[:, 1] + column_ids += array_ops.ones_like(column_ids) + seq_length = ( + math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # If the last n rows do not have ids, seq_length will have shape + # [batch_size - n]. Pad the remaining values with zeros. + n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] + padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) + return array_ops.concat([seq_length, padding], axis=0, name=name_scope) + + +class _SequenceCategoricalColumn( + fc._CategoricalColumn, + collections.namedtuple( + '_SequenceCategoricalColumn', ['categorical_column'])): + + @property + def name(self): + return self.categorical_column.name + + @property + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) + + @property + def _num_buckets(self): + return self.categorical_column._num_buckets + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands final dimension, so that embeddings are not combined during + # embedding lookup. + check_id_rank = check_ops.assert_equal( + array_ops.rank(id_tensor), 2, + data=[ + 'Column {} expected ID tensor of rank 2. '.format(self.name), + 'id_tensor shape: ', array_ops.shape(id_tensor)]) + with ops.control_dependencies([check_id_rank]): + id_tensor = sparse_ops.sparse_reshape( + id_tensor, + shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + if weight_tensor is not None: + check_weight_rank = check_ops.assert_equal( + array_ops.rank(weight_tensor), 2, + data=[ + 'Column {} expected weight tensor of rank 2.'.format(self.name), + 'weight_tensor shape:', array_ops.shape(weight_tensor)]) + with ops.control_dependencies([check_weight_rank]): + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, + shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + + def _sequence_length(self, inputs): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) + + +class _SequenceEmbeddingColumn( + _SequenceDenseColumn, + collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + + @property + def name(self): + return self.embedding_column.name + + @property + def _parse_example_spec(self): + return self.embedding_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.embedding_column._transform_feature(inputs) + + @property + def _variable_shape(self): + return self.embedding_column._variable_shape + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + dense_tensor = self.embedding_column._get_dense_tensor( + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = self.embedding_column.categorical_column._sequence_length( + inputs) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + +class _SequenceNumericColumn( + _SequenceDenseColumn, + collections.namedtuple( + '_SequenceNumericColumn', + ['key', 'shape', 'default_value', 'dtype'])): + + @property + def name(self): + return self.key + + @property + def _parse_example_spec(self): + return {self.key: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.key) + + @property + def _variable_shape(self): + return tensor_shape.TensorShape(self.shape) + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + sp_tensor = inputs.get(self) + dense_tensor = sparse_ops.sparse_tensor_to_dense( + sp_tensor, default_value=self.default_value) + # Reshape into [batch_size, T, variable_shape]. + dense_shape = array_ops.concat( + [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], + axis=0) + dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) + sequence_length = _sequence_length_from_sparse_tensor( + sp_tensor, num_elements=self._variable_shape.num_elements()) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + +# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py new file mode 100644 index 0000000000..59674869a2 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py @@ -0,0 +1,471 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sequential_feature_column.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test +from tensorflow.python.training import monitored_session + + +class SequenceInputLayerTest(test.TestCase): + + def test_embedding_column(self): + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + + embedding_dimension_a = 2 + embedding_values_a = ( + (1., 2.), # id 0 + (3., 4.), # id 1 + (5., 6.) # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11., 12., 13.), # id 0 + (14., 15., 16.), # id 1 + (17., 18., 19.) # id 2 + ) + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = sfc._sequence_embedding_column( + categorical_column_a, dimension=embedding_dimension_a, + initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_b = sfc._sequence_embedding_column( + categorical_column_b, dimension=embedding_dimension_b, + initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[embedding_column_b, embedding_column_a]) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('sequence_input_layer/aaa_embedding/embedding_weights:0', + 'sequence_input_layer/bbb_embedding/embedding_weights:0'), + tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) + self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_input_layer = [ + [[0.], [1.]], + [[10.], [0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column_multi_dim(self): + """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + # The output of numeric_column._get_dense_tensor should be flattened. + expected_input_layer = [ + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +def _assert_sparse_tensor_value(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + + test_case.assertEqual( + np.array(expected.values).dtype, np.array(actual.values).dtype) + test_case.assertAllEqual(expected.values, actual.values) + + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) + test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) + + +class SequenceCategoricalColumnWithIdentityTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_get_sparse_tensors_inputs3d(self): + """Tests _get_sparse_tensors when the input is already 3D Tensor.""" + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 2, 0), + dense_shape=(2, 2, 1)) + + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Column aaa expected ID tensor of rank 2\.\s*' + r'id_tensor shape:\s*\[2 2 1\]'): + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': inputs})) + with monitored_session.MonitoredSession() as sess: + id_weight_pair.id_tensor.eval(session=sess) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_zeros(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((1, 0), (3, 0), (3, 1)), + values=(1, 2, 0), + dense_shape=(5, 2)) + expected_sequence_length = [0, 1, 0, 2, 0] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceEmbeddingColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + expected_lookups = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + + embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('embedding_weights:0',), tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) + self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceNumericColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_dense_tensor = [ + [[0.], [1.]], + [[10.], [0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa') + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_sequence_dense_tensor_with_shape(self): + """Tests get_sequence_dense_tensor with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_dense_tensor = [ + [[0., 1., 2.], [3., 4., 5.]], + [[10., 11., 12.], [0., 0., 0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_dense_tensor_multi_dim(self): + """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + expected_dense_tensor = [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_sequence_length(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_shape(self): + """Tests _sequence_length with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [] + # example 1, values [[0.], [1.]] + # example 2, [[2.]] + # example 3, values [] + # example 4, [[3.]] + # example 5, values [] + indices=((1, 0), (1, 1), (2, 0), (4, 0)), + values=(0., 1., 2., 3.), + dense_shape=(6, 2)) + expected_sequence_length = [0, 2, 1, 0, 1, 0] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +if __name__ == '__main__': + test.main() -- GitLab From ecace69b5e28f508f76264e66778935e84c37715 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:37 -0800 Subject: [PATCH 102/884] Add a function that allows to dynamically verify whether a function is white listed for graph mode. PiperOrigin-RevId: 187080654 --- tensorflow/contrib/py2tf/impl/conversion.py | 18 ++++++++++++++++++ .../contrib/py2tf/impl/conversion_test.py | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 044de33568..d95469ea53 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -97,6 +97,24 @@ class ConversionMap(object): self.dependency_cache[original_entity] = converted_ast +def is_whitelisted_for_graph(o): + """Check whether an entity is whitelisted for use in graph mode. + + Examples of whitelisted entities include all members of the tensorflow + package. + + Args: + o: A Python entity. + Returns: + Boolean + """ + m = tf_inspect.getmodule(o) + for prefix, in config.DEFAULT_UNCOMPILED_MODULES: + if m.__name__.startswith(prefix): + return True + return False + + def entity_to_graph(o, conversion_map, arg_values, arg_types): """Compile a Python entity into equivalent TensorFlow. diff --git a/tensorflow/contrib/py2tf/impl/conversion_test.py b/tensorflow/contrib/py2tf/impl/conversion_test.py index 7816f95857..9ff256aace 100644 --- a/tensorflow/contrib/py2tf/impl/conversion_test.py +++ b/tensorflow/contrib/py2tf/impl/conversion_test.py @@ -20,12 +20,23 @@ from __future__ import print_function import gast +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import conversion +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test class ConversionTest(test.TestCase): + def test_is_whitelisted_for_graph(self): + + def test_fn(): + return constant_op.constant(1) + + self.assertFalse(conversion.is_whitelisted_for_graph(test_fn)) + self.assertTrue(conversion.is_whitelisted_for_graph(utils)) + self.assertTrue(conversion.is_whitelisted_for_graph(constant_op.constant)) + def test_entity_to_graph_unsupported_types(self): with self.assertRaises(ValueError): conversion_map = conversion.ConversionMap(True, (), (), None) -- GitLab From 26a765f95acc7cbc762b8e1fef94921cab8f181d Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:31:29 -0800 Subject: [PATCH 103/884] [TF:XLA] Bump open source llvm revision to r326083 PiperOrigin-RevId: 187081592 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d6ac7be8b5..5b09c5e67d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", ], - sha256 = "f5721d9cc18a9109c9e9f847f48e69b710b961cee83e6691227e310cb3b5da58", - strip_prefix = "llvm-fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14", + sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", + strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From a80896d3b3a2358f324dc4cd429409ea9acc8a09 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:32:08 -0800 Subject: [PATCH 104/884] Track DebugOptions in AotCompilationOptions In particular, I need this for supporting HLO profiling in the AOT backend. PiperOrigin-RevId: 187081674 --- tensorflow/compiler/xla/service/compile_only_service.cc | 3 +-- tensorflow/compiler/xla/service/compiler.cc | 3 +++ tensorflow/compiler/xla/service/compiler.h | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index dab73596e1..6664496ab6 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -72,8 +72,7 @@ CompileOnlyService::CompileAheadOfTime( VersionedComputationHandle versioned_handle = user_computation->GetVersionedHandle(); - // TODO(b/63773457): Track DebugOptions in AotCompilationOptions. - DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + const DebugOptions& debug_options = options.debug_options(); // Dump computation proto state if flag is set. const string& directory_path = debug_options.xla_dump_computations_to(); diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index e2e9d2a0c0..0392d4af48 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -86,4 +86,7 @@ Compiler::GetPlatformCompilers() { return compilers->at(platform->id()).get(); } +AotCompilationOptions::AotCompilationOptions() + : debug_options_(legacy_flags::GetDebugOptionsFromFlags()) {} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 74fd24edf8..33e19efc72 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -79,11 +79,15 @@ class AotCompilationOptions { device_allocator_ = device_allocator; } + const DebugOptions& debug_options() const { return debug_options_; } + DebugOptions* mutable_debug_options() { return &debug_options_; } + protected: - AotCompilationOptions() = default; + AotCompilationOptions(); private: DeviceMemoryAllocator* device_allocator_ = nullptr; + DebugOptions debug_options_; }; // Abstract compiler interface that is subclassed for compilation on a -- GitLab From 153e10a037c5e348834108ff46d9dccdf0cfb9a9 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 14:38:31 -0800 Subject: [PATCH 105/884] Enable de/serialization of nested control flow. This is a follow-up to the previous commit (https://github.com/tensorflow/tensorflow/commit/23851760b7b099214bdd4f1b88156d7ac2bdd2a2). It adds the new proto schemas, enables the behavior for reading and writing the new protos, and adds a test for de/serializing nested while loops. There's still a bug preventing deserializing conds, which will be addressed in another change. PiperOrigin-RevId: 187082713 --- tensorflow/core/protobuf/control_flow.proto | 17 ++++++- tensorflow/python/ops/control_flow_ops.py | 54 ++++++-------------- tensorflow/python/training/saver_test.py | 56 +++++++++++++++++++++ 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto index 2c9476a08a..3c05b4f0e2 100644 --- a/tensorflow/core/protobuf/control_flow.proto +++ b/tensorflow/core/protobuf/control_flow.proto @@ -17,6 +17,15 @@ message ValuesDef { map external_values = 2; } +// Container for any kind of control flow context. Any other control flow +// contexts that are added below should also be added here. +message ControlFlowContextDef { + oneof ctxt { + CondContextDef cond_ctxt = 1; + WhileContextDef while_ctxt = 2; + } +} + // Protocol buffer representing a CondContext object. message CondContextDef { // Name of the context. @@ -33,6 +42,9 @@ message CondContextDef { // Values and external values in control flow context. ValuesDef values_def = 5; + + // Contexts contained inside this context (e.g. nested conds). + repeated ControlFlowContextDef nested_contexts = 6; } // Protocol buffer representing a WhileContext object. @@ -70,5 +82,8 @@ message WhileContextDef { // Optional name of the maximum_iterations tensor. string maximum_iterations_name = 11; - // Next available id: 12. + // Contexts contained inside this context (e.g. nested whiles). + repeated ControlFlowContextDef nested_contexts = 12; + + // Next available id: 13. } diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8d5ab72670..85944efbe8 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1767,13 +1767,9 @@ class CondContext(ControlFlowContext): context_def.branch = self._branch context_def.values_def.MergeFrom(super(CondContext, self)._to_values_def( export_scope)) - # TODO(b/72868227): enable this once the corresponding control_flow.proto - # changes have been checked in (they aren't checked in and this is - # disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -1785,14 +1781,10 @@ class CondContext(ControlFlowContext): ret = CondContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is here for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def) + ret.Exit() return ret def to_control_flow_context_def(self, context_def, export_scope=None): @@ -2110,10 +2102,7 @@ def cond(pred, # Only add non-nested conds to the collection. Any nested control flow will # be encapsulated in the root context. assert context_t.outer_context == context_f.outer_context - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or context_t.outer_context is None: + if context_t.outer_context is None: ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_t) ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_f) @@ -2336,13 +2325,9 @@ class WhileContext(ControlFlowContext): context_def.values_def.MergeFrom( super(WhileContext, self)._to_values_def( export_scope=export_scope)) - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -2364,14 +2349,10 @@ class WhileContext(ControlFlowContext): """ ret = WhileContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def, import_scope=import_scope) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def, import_scope=import_scope) + ret.Exit() return ret def GetWhileContext(self): @@ -3216,10 +3197,7 @@ def while_loop(cond, swap_memory=swap_memory) # Only add non-nested loops to the collection. Any nested control flow will # be encapsulated in the root context. - # TODO(b/72868227): enable condition once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or loop_context.outer_context is None: + if loop_context.outer_context is None: ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context) result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) if maximum_iterations is not None: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index f00f98db00..b366ed30f3 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -53,6 +53,7 @@ from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables @@ -2040,6 +2041,61 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) + def testNestedWhileLoops(self): + test_dir = self._get_test_dir("nested_whiles") + filename = os.path.join(test_dir, "metafile") + saver_ckpt = os.path.join(test_dir, "saver.ckpt") + + # Create two simple nested while loops. + with ops_lib.Graph().as_default(): + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + + var = variables.Variable(0) + var_name = var.name + + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + [0, var]) + output_name = output.name + + init_op = variables.global_variables_initializer() + + # Generate a MetaGraphDef containing the nested loops. + with session.Session() as sess: + sess.run(init_op) + sess.run(output) + saver = saver_module.Saver() + saver.save(sess, saver_ckpt) + saver.export_meta_graph(filename) + + # Build and run the gradients of the nested while loop. We use this below + # to verify that the gradients are correct with an imported MetaGraphDef. + grad = gradients_impl.gradients([output], [var]) + with session.Session() as sess: + sess.run(init_op) + expected_grad_value = sess.run(grad) + + # Restore the MetaGraphDef into a new Graph. + with ops_lib.Graph().as_default(): + with session.Session() as sess: + saver = saver_module.import_meta_graph(filename) + saver.restore(sess, saver_ckpt) + + # Make sure we can still build gradients and get the same result. + var = ops_lib.get_default_graph().get_tensor_by_name(var_name) + output = ops_lib.get_default_graph().get_tensor_by_name(output_name) + grad = gradients_impl.gradients([output], [var]) + + init_op = variables.global_variables_initializer() + + with session.Session() as sess: + sess.run(init_op) + actual_grad_value = sess.run(grad) + self.assertEqual(expected_grad_value, actual_grad_value) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 95d36c770b24a343008d32eda85e8f91278f6df0 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 15:37:27 -0800 Subject: [PATCH 106/884] [XLA::Interpreter] Add support for kCall to HloEvaluator. Also enable xla/tests/call_test to run on interpreter. PiperOrigin-RevId: 187092587 --- .../compiler/xla/service/hlo_evaluator.cc | 20 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 3 +++ 3 files changed, 25 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 15ae53128a..fd06b19144 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2445,6 +2445,26 @@ Status HloEvaluator::HandleCopy(HloInstruction* copy) { return Status::OK(); } +Status HloEvaluator::HandleCall(HloInstruction* call) { + auto* computation = call->to_apply(); + auto operands = call->operands(); + + std::vector arg_literals; + arg_literals.reserve(operands.size()); + for (auto operand : operands) { + const Literal& arg_literal = GetEvaluatedLiteralFor(operand); + arg_literals.push_back(&arg_literal); + } + + HloEvaluator embedded_evaluator; + std::unique_ptr result = + embedded_evaluator.Evaluate(*computation, arg_literals) + .ConsumeValueOrDie(); + + evaluated_[call] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 3b2b697e49..c65d9915e3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleCall(HloInstruction* call) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 97abf217d7..33fde9737d 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1143,6 +1143,9 @@ xla_test( xla_test( name = "call_test", srcs = ["call_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", -- GitLab From aa2f0b68fb7052ea46547bf15fb8a46f6447f182 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 15:37:40 -0800 Subject: [PATCH 107/884] Uses a thread pool for graph functions in eager mode with inter_op_parallelism_threads. PiperOrigin-RevId: 187092622 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +++++++++++++- tensorflow/c/eager/runtime.cc | 14 ++++++++++---- tensorflow/c/eager/runtime.h | 3 +++ tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e55cb672e9..16a2a15072 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,6 +21,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ + "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..b233dd5b93 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->func_lib(device), &ctx->runner, kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..29944df4c2 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -45,7 +46,15 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : thread_pool(new tensorflow::thread::ThreadPool( + opts.session_options.options.env, "EagerCompute", + opts.session_options.options.config + .inter_op_parallelism_threads() != 0 + ? opts.session_options.options.config + .inter_op_parallelism_threads() + : tensorflow::port::NumSchedulableCPUs())), + runner([this](std::function f) { thread_pool->Schedule(f); }), + policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +63,9 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const std::unique_ptr thread_pool; + std::function)> runner; + const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 4bf24fec2c..b9618420f0 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,17 +255,22 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; + out->runner_ = nullptr; + out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; + out->runner_ = runner; + out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -296,10 +301,11 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - // TODO(apassos): use a thread pool. - std::function)> runner = - [](std::function f) { f(); }; - params.runner = &runner; + if (runner_ == nullptr) { + params.runner = &default_runner_; + } else { + params.runner = runner_; + } OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..fa5f839977 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,6 +169,7 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -188,6 +189,8 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; + std::function)>* runner_; + std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 643153058c..ab0b535e1a 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); + Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 175730d3791618a496a5c66d7d6fef9c7768cf34 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 26 Feb 2018 15:42:52 -0800 Subject: [PATCH 108/884] [XLA] Fix #17090 a problem in IrArray::Index::SourceIndexOfTranspose. Agebraic simplification transforms bitcast-equivalent transpose/reshape instructions to bitcast instructions before IR emission. As such, we should skip the checking on whether a transpose/reshape instruction is bitcast-equivalent or not during IR emission. Remove the call from IrArray::Index::SourceIndexOfTranspose to ShapeUtil::TransposeIsBitcast. Also remove the call from IrArray::Index::SourceIndexOfReshape to ShapeUtil::ReshapeIsBitcast. Remove the calls to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast from NotWorthHoistingIndividually because layout assignment hasn't been done there yet. Instead, returns true when the input is a transpose or reshape instruction, to prevent it from being hoisted out of loops. Add a check to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast to make sure that both input shape and output shape have layouts. Add two test cases. PiperOrigin-RevId: 187093399 --- .../xla/service/layout_assignment_test.cc | 79 +++++++++++++++++++ .../compiler/xla/service/llvm_ir/ir_array.cc | 8 +- .../while_loop_invariant_code_motion.cc | 12 +-- tensorflow/compiler/xla/shape_util.cc | 14 +--- tensorflow/compiler/xla/shape_util.h | 4 + 5 files changed, 95 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 88e5caaf47..62feb7c1e9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -590,6 +590,85 @@ TEST_F(LayoutAssignmentTest, TransposeToBitcastToUser) { transpose->shape(), {2, 3, 0, 1})); } +// TransposeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, TransposeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = builder.AddInstruction( + HloInstruction::CreateTranspose(input_shape, param, {0, 2, 1})); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH(ShapeUtil::TransposeIsBitcast(hlo->operand(0)->shape(), + hlo->shape(), hlo->dimensions()), + "LayoutUtil::HasLayout"); +} + +// ReshapeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, ReshapeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = + builder.AddInstruction(HloInstruction::CreateReshape(input_shape, param)); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH( + ShapeUtil::ReshapeIsBitcast(hlo->operand(0)->shape(), hlo->shape()), + "LayoutUtil::HasLayout"); +} + +// Check that the computation below doesn't crash the compiler. +// +// Within a fusion computation, only the parameters and result get assigned a +// layout. When we run the algebraic simplifier on this computation post layout +// assignment, it should not call TransposeIsBitcast on the `transpose` node +// inside the fusion computation as TransposeIsBitcast checks both input_shape +// and output_shape have layouts. +TEST_F(LayoutAssignmentTest, TransposeWithinFusionDoesNotCrash) { + const char* module_str = R"( + HloModule test_module + + fused_computation { + param_1 = f32[2,2,2]{2,1,0} parameter(1) + transpose = f32[2,2,2]{2,1,0} transpose(param_1), dimensions={0,2,1} + reduce_1 = f32[] parameter(0) + broadcast_1 = f32[2,2,2]{2,1,0} broadcast(reduce_1), dimensions={} + ROOT divide_1 = f32[2,2,2]{2,1,0} divide(transpose, broadcast_1) + } + + ENTRY entry_computation { + fusion.1 = f32[2,2,2]{2,1,0} parameter(1) + reduce.1 = f32[] parameter(0) + fusion.2 = f32[2,2,2]{2,1,0} fusion(reduce.1, fusion.1), kind=kLoop, calls=fused_computation + ROOT tuple.1 = (f32[2,2,2]{2,1,0}) tuple(fusion.2) + } + )"; + + auto module = tools::Parse(module_str).ValueOrDie(); + + module = + backend() + .compiler() + ->RunHloPasses(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .ConsumeValueOrDie(); + + EXPECT_EQ( + ::tensorflow::Status::OK(), + backend() + .compiler() + ->RunBackend(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .status()); +} + // A GTE inside of a fusion node inherits the layout of its operand (which // should, if we keep following operands, eventually be a parameter). TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 6384c7f46f..f3642cf0a1 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -160,7 +160,8 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape( } } - if (linear() != nullptr && + if (linear() != nullptr && LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape) && ShapeUtil::ReshapeIsBitcast(input_shape, output_shape)) { return Index(source_multidim_index, linear(), input_shape); } @@ -195,10 +196,13 @@ IrArray::Index IrArray::Index::SourceIndexOfTranspose( llvm::IRBuilder<>* builder) const { std::vector operand_multidim_index = Permute(dimension_mapping, multidim()); - if (linear() != nullptr && + + if (linear() != nullptr && LayoutUtil::HasLayout(operand_shape) && + LayoutUtil::HasLayout(shape) && ShapeUtil::TransposeIsBitcast(operand_shape, shape, dimension_mapping)) { return Index(operand_multidim_index, linear(), operand_shape); } + return Index(operand_multidim_index); } diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index a5f9b01f01..3ef0cdff67 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -106,20 +106,12 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { case HloOpcode::kBitcast: case HloOpcode::kBroadcast: case HloOpcode::kConstant: + case HloOpcode::kReshape: case HloOpcode::kReverse: case HloOpcode::kSlice: + case HloOpcode::kTranspose: case HloOpcode::kTuple: return true; - - case HloOpcode::kTranspose: - return ShapeUtil::TransposeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape(), instruction.dimensions()); - - case HloOpcode::kReshape: - return ShapeUtil::ReshapeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape()); } } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 604e0173e7..3152789016 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1073,11 +1073,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping) { - // Can't insert bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) && - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { @@ -1106,11 +1103,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape) { - // Can't convert reshapes into bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) || - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 19b1aa93bd..8ee263fe5e 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -522,12 +522,16 @@ class ShapeUtil { // Returns whether a transpose from input_shape to output_shape with dimension // mapping "dimension_mapping" produces a result which is bit-wise identical // to its input and thus may be replaced with a bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping); // Returns whether a reshape from "input_shape" to "output_shape" is a // bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape); -- GitLab From 7c512d5461eeff635acf1c7d0f301f5bb880b6b3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 16:01:04 -0800 Subject: [PATCH 109/884] [XLA] Add more supported dtypes to the local Python client. PiperOrigin-RevId: 187096144 --- tensorflow/compiler/xla/python/xla_client.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 3b8ec851d5..90cda42f32 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -30,9 +30,9 @@ from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.compiler.xla.python import pywrap_xla as c_api -# Most functions are snake_case for consistency with other modules, -# whereas method names of ComputationBuilder and LocalComputation are -# CamelCase for consistency with XLA. +# Most functions are snake_case for consistency with other modules, whereas +# method names of ComputationBuilder and LocalComputation are CamelCase for +# consistency with XLA. # pylint: disable=invalid-name @@ -123,24 +123,34 @@ _BINARY_OPS = [ 'Pow', ] + XLA_ELEMENT_TYPE_TO_DTYPE = { - xla_data_pb2.F32: np.dtype(np.float32), - xla_data_pb2.F64: np.dtype(np.float64), - xla_data_pb2.S32: np.dtype(np.int32), - xla_data_pb2.S64: np.dtype(np.int64), - xla_data_pb2.U32: np.dtype(np.uint32), - xla_data_pb2.U64: np.dtype(np.uint64), - xla_data_pb2.PRED: np.dtype(np.bool), + xla_data_pb2.PRED: np.dtype('bool'), + xla_data_pb2.S8: np.dtype('int8'), + xla_data_pb2.S16: np.dtype('int16'), + xla_data_pb2.S32: np.dtype('int32'), + xla_data_pb2.S64: np.dtype('int64'), + xla_data_pb2.U8: np.dtype('uint8'), + xla_data_pb2.U16: np.dtype('uint16'), + xla_data_pb2.U32: np.dtype('uint32'), + xla_data_pb2.U64: np.dtype('uint64'), + xla_data_pb2.F16: np.dtype('float16'), + xla_data_pb2.F32: np.dtype('float32'), + xla_data_pb2.F64: np.dtype('float64'), + xla_data_pb2.C64: np.dtype('complex64'), xla_data_pb2.TUPLE: np.dtype(np.object), } # Note the conversion on the key. Numpy has a known issue wherein dtype hashing # doesn't work as expected (https://github.com/numpy/numpy/issues/7242). Thus, # when keying by dtype in this dict, we use the string form of dtypes. -DTYPE_TO_XLA_ELEMENT_TYPE = { - str(v): k - for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items() -} +DTYPE_TO_XLA_ELEMENT_TYPE = {str(dt): et + for et, dt in XLA_ELEMENT_TYPE_TO_DTYPE.items()} + + +def dtype_to_etype(dtype): + """Convenience function for reading DTYPE_TO_XLA_ELEMENT_TYPE.""" + return DTYPE_TO_XLA_ELEMENT_TYPE[str(np.dtype(dtype))] class LocalBuffer(object): -- GitLab From 511cf67f2327e9186124a92c9469dc60fd64a6a2 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 26 Feb 2018 16:23:46 -0800 Subject: [PATCH 110/884] Deprecate tf.contrib.learn. RELNOTES: Deprecated tf.contrib.learn. Please check contrib/learn/README.md for instructions on how to convert existing code. PiperOrigin-RevId: 187099439 --- .../python/framework/experimental_test.py | 1 - tensorflow/contrib/learn/README.md | 143 ++++++++++++++++++ tensorflow/contrib/learn/__init__.py | 7 +- tensorflow/contrib/learn/python/__init__.py | 7 +- .../contrib/learn/python/learn/__init__.py | 7 +- .../python/learn/basic_session_run_hooks.py | 43 +++++- .../learn/python/learn/datasets/__init__.py | 12 +- .../learn/python/learn/datasets/base.py | 26 +++- .../learn/python/learn/datasets/mnist.py | 23 ++- .../learn/datasets/produce_small_datasets.py | 7 +- .../learn/python/learn/datasets/synthetic.py | 10 +- .../python/learn/datasets/text_datasets.py | 10 +- .../learn/python/learn/estimators/__init__.py | 7 +- .../learn/python/learn/estimators/_sklearn.py | 4 +- .../learn/estimators/composable_model.py | 17 ++- .../python/learn/estimators/constants.py | 8 +- .../learn/python/learn/estimators/debug.py | 14 +- .../learn/python/learn/estimators/dnn.py | 19 ++- .../learn/estimators/dnn_linear_combined.py | 19 ++- .../learn/estimators/dynamic_rnn_estimator.py | 13 +- .../python/learn/estimators/estimator.py | 27 +++- .../learn/estimators/estimator_test_utils.py | 7 +- .../learn/python/learn/estimators/head.py | 20 ++- .../learn/python/learn/estimators/kmeans.py | 9 +- .../learn/python/learn/estimators/linear.py | 19 ++- .../learn/estimators/logistic_regressor.py | 10 +- .../python/learn/estimators/metric_key.py | 10 +- .../learn/python/learn/estimators/model_fn.py | 22 ++- .../python/learn/estimators/prediction_key.py | 8 +- .../python/learn/estimators/rnn_common.py | 7 +- .../python/learn/estimators/run_config.py | 19 ++- .../estimators/state_saving_rnn_estimator.py | 13 +- .../learn/python/learn/estimators/svm.py | 11 +- .../learn/estimators/tensor_signature.py | 11 +- .../python/learn/estimators/test_data.py | 7 +- .../contrib/learn/python/learn/evaluable.py | 11 +- .../contrib/learn/python/learn/experiment.py | 24 +-- .../learn/python/learn/export_strategy.py | 14 +- .../learn/python/learn/graph_actions.py | 8 +- .../learn/python/learn/learn_io/__init__.py | 7 +- .../learn/python/learn/learn_io/dask_io.py | 11 +- .../python/learn/learn_io/data_feeder.py | 29 +++- .../python/learn/learn_io/generator_io.py | 9 +- .../learn/python/learn/learn_io/graph_io.py | 16 +- .../learn/python/learn/learn_io/numpy_io.py | 9 +- .../learn/python/learn/learn_io/pandas_io.py | 12 +- .../learn/python/learn/learn_runner.py | 10 +- .../learn/python/learn/learn_runner_lib.py | 6 +- .../contrib/learn/python/learn/metric_spec.py | 13 +- .../contrib/learn/python/learn/models.py | 14 +- .../learn/python/learn/monitored_session.py | 6 +- .../contrib/learn/python/learn/monitors.py | 68 ++++++++- .../learn/python/learn/ops/__init__.py | 7 +- .../learn/python/learn/ops/embeddings_ops.py | 6 +- .../learn/python/learn/ops/losses_ops.py | 7 +- .../learn/python/learn/ops/seq2seq_ops.py | 12 +- .../python/learn/preprocessing/__init__.py | 7 +- .../python/learn/preprocessing/categorical.py | 15 +- .../preprocessing/categorical_vocabulary.py | 13 +- .../learn/python/learn/preprocessing/text.py | 26 +++- .../learn/python/learn/session_run_hook.py | 6 +- .../python/learn/summary_writer_cache.py | 5 +- .../contrib/learn/python/learn/trainable.py | 9 +- .../learn/python/learn/utils/__init__.py | 7 +- .../learn/python/learn/utils/export.py | 9 +- .../contrib/learn/python/learn/utils/gc.py | 13 +- .../python/learn/utils/input_fn_utils.py | 16 +- .../python/learn/utils/inspect_checkpoint.py | 2 +- .../learn/utils/saved_model_export_utils.py | 30 +++- tensorflow/python/util/decorator_utils.py | 2 +- 70 files changed, 945 insertions(+), 111 deletions(-) create mode 100644 tensorflow/contrib/learn/README.md diff --git a/tensorflow/contrib/framework/python/framework/experimental_test.py b/tensorflow/contrib/framework/python/framework/experimental_test.py index 8e54e09e04..cfdc7df7d8 100644 --- a/tensorflow/contrib/framework/python/framework/experimental_test.py +++ b/tensorflow/contrib/framework/python/framework/experimental_test.py @@ -49,7 +49,6 @@ class ExperimentalTest(test.TestCase): "\nTHIS FUNCTION IS EXPERIMENTAL. It may change or " "be removed at any time, and without warning." "\n" - "\n" "\nArgs:" "\n arg0: Arg 0." "\n arg1: Arg 1." diff --git a/tensorflow/contrib/learn/README.md b/tensorflow/contrib/learn/README.md new file mode 100644 index 0000000000..d516bffc5e --- /dev/null +++ b/tensorflow/contrib/learn/README.md @@ -0,0 +1,143 @@ +EVERYTHING IN THIS DIRECTORY IS DEPRECATED. + +Using functions or classes will result in warnings. + +Instructions for converting to current alternatives are included in the +warnings. A high-level overview is below. + +## Canned Estimators + +Many canned estimators (subclasses of `Estimator`) have equivalents in core: +`DNNClassifier`, `DNNRegressor`, `DNNEstimator`, `LinearClassifier`, +`LinearRegressor`, `DNNLinearCombinedClassifier` and +`DNNLinearCombinedRegressor`. They are exposed under `tf.estimator`. +`DNNEstimator`, `LinearEstimator` and `DNNLinearCombinedEstimator` +are exposed under `tf.contrib.estimator`. + +To migrate to the new api, users need to take the following steps: + +* Replace `tf.contrib.learn` with `tf.estimator`. +* If you subclass any of the estimators, stop doing that. You should be able to + write a factory method that returns a canned estimator instead. If this is not + possible (if you override methods from the canned estimator), consider writing + a custom estimator instead. See `tf.estimator.Estimator`. +* Set `loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE` to preserve loss + reduction as the average over batch. +* Some optimizer-related arguments are no longer passed in the estimator + constructor. Instead, we provide methods that perform the same job by wrapping + an optimizer. Specifically: + * `gradient_clip_norm`: Use `tf.contrib.estimator.clip_gradients_by_norm` + * `embedding_lr_multipliers`: Not supported. + Other arguments: + * `input_layer_min_slice_size`: Replaced by `input_layer_partitioner` + * `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. + * `feature_engineering_fn`: Not supported. You can call your + `feature_engineering_fn` inside your input_fn: + ```python + def new_input_fn(): + features, labels = old_input_fn() + return feature_engineering_fn(features, labels) + ``` +* Use `tf.reshape` to reshape labels in your `input_fn`. `tf.estimator` + classifiers and regressors expect labels as a 2D Tensor of shape + `[batch_size, 1]`, or `[batch_size, n_labels]`. In contrast, + `tf.contrib.learn` classifiers and regressors supported labels with shape + `[batch_size]`. +* If you pass custom metrics from the `evaluate()` method call, use + `tf.contrib.estimator.add_metrics`. +* Replace your `serving_input_fn` with a `serving_input_receiver_fn`. + Note this should be entirely distinct from your training `input_fn`, so if you + previously had one `input_fn` with different "modes", you should now factor + that apart. Where the former returned either a simple `(features, labels)` + tuple or `InputFnOps`, you should now return a `ServingInputReceiver`. + If you were generating your `serving_input_fn` using the + `build_parsing_serving_input_fn` helper, you can simply drop in the + replacement `build_parsing_serving_input_receiver_fn`. + +Some remaining estimators/classes: + +* `DynamicRnnEstimator`: Consider a custom `model_fn`. +* `KMeansClustering`: Use `tf.contrib.factorization.KMeansClustering`. +* `LogisticRegressor`: Not supported. Instead, use `binary_classification_head` + with a custom `model_fn`, or with `DNNEstimator`. +* `StateSavingRnnEstimator`: Consider a custom `model_fn`. +* SVM: Consider a custom `model_fn`. +* `LinearComposableModel` and `DNNComposableModel`: Not supported. + Consider `tf.contrib.estimator.DNNEstimator`, or write a custom model_fn. +* `MetricSpec`: Deprecated. For adding custom metrics to canned Estimators, use + `tf.contrib.estimator.add_metrics`. + +## Estimator +`tf.contrib.learn.Estimator` is migrated to `tf.estimator.Estimator`. + +To migrate, users need to take the following steps: + +* Replace `tf.contrib.learn.Estimator` with `tf.estimator.Estimator`. +* If you pass a `config` argument to `Estimator`, this must be + `tf.estimator.RunConfig`. You may need to edit your code accordingly. +* Edit your `model_fn` to return `tf.estimator.EstimatorSpec`. Refer to + `EstimatorSpec` for documentation of specific fields. +* If your `model_fn` uses the `mode` argument, use `tf.estimator.ModeKeys`. + +Some related classes: +* `Evaluable`, `Trainable`: Not supported, merged into `tf.estimator.Estimator`. +* ExportStrategy: Replaced by `tf.estimator.Exporter`. + +## Head/MultiHead +These classes are now supported under `tf.contrib.estimator`, e.g. +`tf.contrib.estimator.multi_class_head` and `tf.contrib.estimator.multi_head`. + +Some differences: + +* `multi_class_head`: If you use `tf.contrib.learn.multi_class_head` with + `n_classes=2`, switch to `tf.contrib.estimator.binary_classification_head`. +* `loss_only_head`: Not supported. +* `poisson_regression_head`: Not supported (yet). +* `binary_svm_head`: Not supported (yet). +* `no_op_train_fn`: Replace it with `tf.no_op`. + +Some arguments are renamed, please refer to documentation. In addition: + +* `loss_fn`: Supported for `multi_label_head`. If you need it for other heads, + please open an issue. +* `metric_class_ids`: Not supported (yet). +* `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. +* `label_name`: Not needed in `tf.estimator`. If you don’t use `multi_head`, + drop this argument. If you use `multi_head`, refer to + `tf.contrib.estimator.multi_head` documentation. + +## Experiment Class - Distributed Training Tooling + +Switch to `tf.estimator.train_and_evaluate`. Some differences: + +* Most of the constructor arguments, like `train_input_fn`, `eval_input_fn`, + should be wrapped into `tf.estimator.TrainSpec` and `tf.estimator.EvalSpec`. +* Remove the `experiment_fn`. Instead, create the `Estimator`, + `train_spec` and `eval_spec`, then call `tf.estimator.train_and_evaluate` + directly. +* Inside `tf.estimator.EvalSpec`, the `exporter` field is the replacement + for `export_strategy`. To be precise, `tf.estimator.LatestExporter` is the + replacement for `tf.contrib.learn.make_export_strategy`. If you want to export + only at the end of training use `tf.estimator.FinalExporter`. +* If the `TF_CONFIG` environment variable is constructed manually, please read + the `train_and_evaluate` documentation for the new requirementds (in + particular, the chief node and evaluator node). + +## Others Classes and Functions + +* `tf.contrib.learn.datasets` is deprecated. We are adding ready to use datasets + to tensorflow/models. Many smaller datasets are available from other sources, + such as scikits.learn. Some Python processing may have to be written, but this + is straightforward to implement using the standard modules. +* `tf.contrib.learn.preprocessing`: Deprecated. The python-only preprocessing + functions are not a good fit for TensorFlow. Please use `tf.data`, and + consider tensorflow/transform for more complex use cases. +* `tf.contrib.learn.models`: Not supported, use canned estimators instead. +* `tf.contrib.learn.monitors`: Implement `SessionRunHook` instead. Hook + implementations are in `tf.train`. +* `tf.contrib.learn.learn_io`: Use the methods in `tf.estimator.inputs`, such as + `tf.estimator.inputs.numpy_input_fn`. Some utility functions have no + equivalent, we encourage the use of `tf.data`. + diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py index 3698af027e..79bd73faaf 100644 --- a/tensorflow/contrib/learn/__init__.py +++ b/tensorflow/contrib/learn/__init__.py @@ -13,8 +13,11 @@ # limitations under the License. # ============================================================================== -# TODO(ptucker,ipolosukhin): Improve descriptions. -"""High level API for learning. +"""High level API for learning (DEPRECATED). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. See the @{$python/contrib.learn} guide. diff --git a/tensorflow/contrib/learn/python/__init__.py b/tensorflow/contrib/learn/python/__init__.py index bbebd5ab97..df23aeb2c4 100644 --- a/tensorflow/contrib/learn/python/__init__.py +++ b/tensorflow/contrib/learn/python/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index cdc67c77d5..76e0e8ac8f 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py index 2284ec46e9..fed1c44d19 100644 --- a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py +++ b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py @@ -12,20 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Some common SessionRunHook classes.""" +"""Some common SessionRunHook classes (deprected). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.util.deprecation import deprecated_alias # pylint: disable=invalid-name -LoggingTensorHook = basic_session_run_hooks.LoggingTensorHook -StopAtStepHook = basic_session_run_hooks.StopAtStepHook -CheckpointSaverHook = basic_session_run_hooks.CheckpointSaverHook -StepCounterHook = basic_session_run_hooks.StepCounterHook -NanLossDuringTrainingError = basic_session_run_hooks.NanLossDuringTrainingError -NanTensorHook = basic_session_run_hooks.NanTensorHook -SummarySaverHook = basic_session_run_hooks.SummarySaverHook +LoggingTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.LoggingTensorHook', + 'tf.train.LoggingTensorHook', + basic_session_run_hooks.LoggingTensorHook) +StopAtStepHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StopAtStepHook', + 'tf.train.StopAtStepHook', + basic_session_run_hooks.StopAtStepHook) +CheckpointSaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.CheckpointSaverHook', + 'tf.train.CheckpointSaverHook', + basic_session_run_hooks.CheckpointSaverHook) +StepCounterHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StepCounterHook', + 'tf.train.StepCounterHook', + basic_session_run_hooks.StepCounterHook) +NanLossDuringTrainingError = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanLossDuringTrainingError', + 'tf.train.NanLossDuringTrainingError', + basic_session_run_hooks.NanLossDuringTrainingError) +NanTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanTensorHook', + 'tf.train.NanTensorHook', + basic_session_run_hooks.NanTensorHook) +SummarySaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.SummarySaverHook', + 'tf.train.SummarySaverHook', + basic_session_run_hooks.SummarySaverHook) # pylint: enable=invalid-name diff --git a/tensorflow/contrib/learn/python/learn/datasets/__init__.py b/tensorflow/contrib/learn/python/learn/datasets/__init__.py index 7240b0de14..3c34712ac8 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/__init__.py +++ b/tensorflow/contrib/learn/python/learn/datasets/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Dataset utilities and synthetic/reference datasets.""" +"""Dataset utilities and synthetic/reference datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.contrib.learn.python.learn.datasets import mnist from tensorflow.contrib.learn.python.learn.datasets import synthetic from tensorflow.contrib.learn.python.learn.datasets import text_datasets +from tensorflow.python.util.deprecation import deprecated # Export load_iris and load_boston. load_iris = base.load_iris @@ -51,6 +57,7 @@ SYNTHETIC = { } +@deprecated(None, 'Please use tf.data.') def load_dataset(name, size='small', test_with_fake_data=False): """Loads dataset by name. @@ -73,8 +80,9 @@ def load_dataset(name, size='small', test_with_fake_data=False): return DATASETS[name]() +@deprecated(None, 'Please use tf.data.') def make_dataset(name, n_samples=100, noise=None, seed=42, *args, **kwargs): - """Creates binary synthetic datasets + """Creates binary synthetic datasets. Args: name: str, name of the dataset to generate diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index ca720ae5ed..3b5c9b97c0 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base utilities for loading datasets.""" + +"""Base utilities for loading datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +35,14 @@ import numpy as np from six.moves import urllib from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated + Dataset = collections.namedtuple('Dataset', ['data', 'target']) Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test']) +@deprecated(None, 'Use tf.data instead.') def load_csv_with_header(filename, target_dtype, features_dtype, @@ -53,6 +62,7 @@ def load_csv_with_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def load_csv_without_header(filename, target_dtype, features_dtype, @@ -70,6 +80,7 @@ def load_csv_without_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def shrink_csv(filename, ratio): """Create a smaller dataset of only 1/ratio of original data.""" filename_small = filename.replace('.', '_small.') @@ -84,6 +95,7 @@ def shrink_csv(filename, ratio): i += 1 +@deprecated(None, 'Use scikits.learn.datasets.') def load_iris(data_path=None): """Load Iris dataset. @@ -100,6 +112,7 @@ def load_iris(data_path=None): data_path, target_dtype=np.int, features_dtype=np.float) +@deprecated(None, 'Use scikits.learn.datasets.') def load_boston(data_path=None): """Load Boston housing dataset. @@ -116,7 +129,12 @@ def load_boston(data_path=None): data_path, target_dtype=np.float, features_dtype=np.float) -def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): +@deprecated(None, 'Use the retry module or similar alternatives.') +def retry(initial_delay, + max_delay, + factor=2.0, + jitter=0.25, + is_retriable=None): """Simple decorator for wrapping retriable functions. Args: @@ -152,7 +170,7 @@ def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): for delay in delays(): try: return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except) + except Exception as e: # pylint: disable=broad-except if is_retriable is None: continue @@ -176,11 +194,13 @@ def _is_retriable(e): return isinstance(e, IOError) and e.errno in _RETRIABLE_ERRNOS +@deprecated(None, 'Please use urllib or similar directly.') @retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) def urlretrieve_with_retry(url, filename=None): return urllib.request.urlretrieve(url, filename) +@deprecated(None, 'Please write your own downloading logic.') def maybe_download(filename, work_directory, source_url): """Download the data from source url, unless it's already here. diff --git a/tensorflow/contrib/learn/python/learn/datasets/mnist.py b/tensorflow/contrib/learn/python/learn/datasets/mnist.py index 37f9175015..abbb44c2f5 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/mnist.py +++ b/tensorflow/contrib/learn/python/learn/datasets/mnist.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functions for downloading and reading MNIST data.""" +"""Functions for downloading and reading MNIST data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated # CVDF mirror of http://yann.lecun.com/exdb/mnist/ DEFAULT_SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/' @@ -37,6 +43,7 @@ def _read32(bytestream): return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. @@ -65,6 +72,7 @@ def extract_images(f): return data +@deprecated(None, 'Please use tf.one_hot on tensors.') def dense_to_one_hot(labels_dense, num_classes): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] @@ -74,6 +82,7 @@ def dense_to_one_hot(labels_dense, num_classes): return labels_one_hot +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. @@ -103,7 +112,15 @@ def extract_labels(f, one_hot=False, num_classes=10): class DataSet(object): + """Container class for a dataset (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def __init__(self, images, labels, @@ -210,6 +227,8 @@ class DataSet(object): return self._images[start:end], self._labels[start:end] +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def read_data_sets(train_dir, fake_data=False, one_hot=False, @@ -275,5 +294,7 @@ def read_data_sets(train_dir, return base.Datasets(train=train, validation=validation, test=test) +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def load_mnist(train_dir='MNIST-data'): return read_data_sets(train_dir) diff --git a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py index 6e0ba38941..a4848fa64a 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Produce DBpedia datasets of a smaller size.""" +"""Produce DBpedia datasets of a smaller size (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py index 9a843168c2..6a0e3350b3 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Synthetic dataset generators.""" +"""Synthetic dataset generators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,8 +26,10 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.learn.python.learn.datasets.base import Dataset +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def circles(n_samples=100, noise=None, seed=None, @@ -93,6 +100,7 @@ def circles(n_samples=100, return Dataset(data=X[indices], target=y[indices]) +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def spirals(n_samples=100, noise=None, seed=None, diff --git a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py index 2596a2ecaf..ce94663017 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Text datasets.""" +"""Text datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,10 +31,12 @@ import numpy as np from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated DBPEDIA_URL = 'https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz' +@deprecated(None, 'See contrib/learn/README.md') def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') @@ -41,6 +48,7 @@ def maybe_download_dbpedia(data_dir): tfile.extractall(data_dir) +@deprecated(None, 'See contrib/learn/README.md') def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index 4981750c94..3e64595f31 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""An estimator is a rule for calculating an estimate of a given quantity. +"""An estimator is a rule for calculating an estimate of a given quantity (deprecated). + +These classes are deprecated and replaced with `tf.estimator`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. # Estimators diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py index 15277415a1..1f0e4663d0 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -"""sklearn cross-support.""" +"""sklearn cross-support (deprecated).""" from __future__ import absolute_import from __future__ import division @@ -132,6 +132,8 @@ class _TransformerMixin(): class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. + USE OF THIS EXCEPTION IS DEPRECATED. + This class inherits from both ValueError and AttributeError to help with exception handling and backward compatibility. diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py index a02c726c74..1fa58271e2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py +++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow composable models used as building blocks for estimators.""" +"""TensorFlow composable models used as building blocks for estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,6 +39,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated class _ComposableModel(object): @@ -46,6 +52,7 @@ class _ComposableModel(object): _ComposableModel and its subclasses are not part of the public tf.learn API. """ + @deprecated(None, "Please use model_fns in tf.estimator.") def __init__(self, num_label_columns, optimizer, @@ -141,6 +148,10 @@ class _ComposableModel(object): class LinearComposableModel(_ComposableModel): """A _ComposableModel that implements linear regression. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ @@ -252,6 +263,10 @@ class LinearComposableModel(_ComposableModel): class DNNComposableModel(_ComposableModel): """A _ComposableModel that implements a DNN. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ diff --git a/tensorflow/contrib/learn/python/learn/estimators/constants.py b/tensorflow/contrib/learn/python/learn/estimators/constants.py index fc69e81024..d2548946bc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/constants.py +++ b/tensorflow/contrib/learn/python/learn/estimators/constants.py @@ -13,9 +13,11 @@ # limitations under the License. # ============================================================================== -"""Constants regarding Estimators. +"""Constants regarding Estimators (deprecated). -This file is obsoleted in the move of Estimator to core. +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ from __future__ import absolute_import from __future__ import division @@ -25,6 +27,8 @@ from __future__ import print_function class ProblemType(object): """Enum-like values for the type of problem that the model solves. + THIS CLASS IS DEPRECATED. + These values are used when exporting the model to produce the appropriate signature function for serving. diff --git a/tensorflow/contrib/learn/python/learn/estimators/debug.py b/tensorflow/contrib/learn/python/learn/estimators/debug.py index 9d5f6c2bf9..24b067b7e3 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/debug.py +++ b/tensorflow/contrib/learn/python/learn/estimators/debug.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Debug estimators. +"""Debug estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Debug estimators are bias-only estimators that can be used for debugging and as simple baselines. @@ -118,6 +122,10 @@ def debug_model_fn(features, labels, mode, params, config=None): class DebugClassifier(estimator.Estimator): """A classifier for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -237,6 +245,10 @@ class DebugClassifier(estimator.Estimator): class DebugRegressor(estimator.Estimator): """A regressor for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index c17b41c0f7..eabebb7e88 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators.""" +"""Deep Neural Network estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -212,6 +217,10 @@ def _dnn_model_fn(features, labels, mode, params, config=None): class DNNClassifier(estimator.Estimator): """A classifier for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -521,6 +530,10 @@ class DNNClassifier(estimator.Estimator): class DNNRegressor(estimator.Estimator): """A regressor for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -796,6 +809,10 @@ class DNNRegressor(estimator.Estimator): class DNNEstimator(estimator.Estimator): """A Estimator for TensorFlow DNN models with user specified _Head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py index 7266122350..3d85533d92 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow estimators for Linear and DNN joined training models.""" +"""TensorFlow estimators for Linear and DNN joined training models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -372,6 +377,10 @@ def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): class DNNLinearCombinedEstimator(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -490,6 +499,10 @@ class DNNLinearCombinedEstimator(estimator.Estimator): class DNNLinearCombinedClassifier(estimator.Estimator): """A classifier for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -832,6 +845,10 @@ class DNNLinearCombinedClassifier(estimator.Estimator): class DNNLinearCombinedRegressor(estimator.Estimator): """A regressor for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 69440e823e..a703dc66e9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for Dynamic RNNs.""" +"""Estimator for Dynamic RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -540,6 +545,12 @@ def _get_dynamic_rnn_model_fn( class DynamicRnnEstimator(estimator.Estimator): + """Dynamically unrolled RNN (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 4b63e08ab3..5262e04e16 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base Estimator class.""" +"""Base Estimator class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -138,6 +143,7 @@ def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): return df.input_builder, df.get_feed_dict_fn() +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input_fn(input_fn): """Creates `FeatureColumn` objects for inputs defined by `input_fn`. @@ -158,6 +164,7 @@ def infer_real_valued_columns_from_input_fn(input_fn): return layers.infer_real_valued_columns(features) +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input(x): """Creates `FeatureColumn` objects for inputs defined by input `x`. @@ -389,6 +396,10 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable): """Abstract BaseEstimator class to train and evaluate TensorFlow models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Users should not instantiate or subclass this class. Instead, use an `Estimator`. """ @@ -399,6 +410,8 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, # TODO(wicke): Remove this once launcher takes over config functionality _Config = run_config.RunConfig # pylint: disable=invalid-name + @deprecated(None, 'Please replace uses of any Estimator from tf.contrib.learn' + ' with an Estimator from tf.estimator.*') def __init__(self, model_dir=None, config=None): """Initializes a BaseEstimator instance. @@ -1074,6 +1087,10 @@ def _identity_feature_engineering_fn(features, labels): class Estimator(BaseEstimator): """Estimator class is the basic TensorFlow model trainer/evaluator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ def __init__(self, @@ -1458,8 +1475,14 @@ class Estimator(BaseEstimator): # For time of deprecation x,y from Estimator allow direct access. # pylint: disable=protected-access class SKCompat(sklearn.BaseEstimator): - """Scikit learn wrapper for TensorFlow Learn Estimator.""" + """Scikit learn wrapper for TensorFlow Learn Estimator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please switch to the Estimator interface.') def __init__(self, estimator): self._estimator = estimator diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py index fd47710e30..e4c31396ba 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utils for Estimator.""" +"""Utils for Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 9b124b2c19..2b4b6eff39 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Abstractions for the head(s) of a model. +"""Abstractions for the head(s) of a model (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -47,11 +52,16 @@ from tensorflow.python.summary import summary from tensorflow.python.training import training from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated class Head(object): """Interface for the head/top of a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, default metric and export signature. It is meant to, @@ -177,6 +187,7 @@ class Head(object): raise NotImplementedError("Calling an abstract method.") +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -216,6 +227,7 @@ def regression_head(label_name=None, link_fn=(link_fn if link_fn is not None else array_ops.identity)) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def poisson_regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -254,6 +266,7 @@ def poisson_regression_head(label_name=None, # TODO(zakaria): Consider adding a _RegressionHead for logistic_regression +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_class_head(n_classes, label_name=None, weight_column_name=None, @@ -335,6 +348,7 @@ def multi_class_head(n_classes, label_keys=label_keys) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def binary_svm_head( label_name=None, weight_column_name=None, @@ -370,6 +384,7 @@ def binary_svm_head( thresholds=thresholds) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_label_head(n_classes, label_name=None, weight_column_name=None, @@ -430,6 +445,7 @@ def multi_label_head(n_classes, loss_fn=_wrap_custom_loss_fn(loss_fn) if loss_fn else None) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def loss_only_head(loss_fn, head_name=None): """Creates a Head that contains only loss terms. @@ -447,6 +463,7 @@ def loss_only_head(loss_fn, head_name=None): return _LossOnlyHead(loss_fn, head_name=head_name) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_head(heads, loss_weights=None): """Creates a MultiHead stemming from same logits/hidden layer. @@ -479,6 +496,7 @@ def multi_head(heads, loss_weights=None): return _MultiHead(heads, loss_merger=_weighted_loss_merger) +@deprecated(None, "Use 'lambda _: tf.no_op()'.") def no_op_train_fn(loss): del loss return control_flow_ops.no_op() diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py index 8f9d6fc318..66ebcfd1d8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementation of k-means clustering on top of `Estimator` API. +"""Implementation of k-means clustering on top of `Estimator` API (deprecated). This module is deprecated. Please use @{tf.contrib.factorization.KMeansClustering} instead of @@ -153,7 +153,12 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config): # TODO(agarwal,ands): support sharded input. class KMeansClustering(estimator.Estimator): - """An Estimator for K-Means clustering.""" + """An Estimator for K-Means clustering. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE RANDOM_INIT = clustering_ops.RANDOM_INIT diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 37aa8b3396..64d7ecc68e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Linear Estimators.""" +"""Linear Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -305,6 +310,10 @@ class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): class LinearClassifier(estimator.Estimator): """Linear classifier model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear model to classify instances into one of multiple possible classes. When number of possible classes is 2, this is binary classification. @@ -625,6 +634,10 @@ class LinearClassifier(estimator.Estimator): class LinearRegressor(estimator.Estimator): """Linear regressor model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear regression model to predict label value given observation of feature values. @@ -860,6 +873,10 @@ class LinearRegressor(estimator.Estimator): class LinearEstimator(estimator.Estimator): """Linear model with user specified head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a generalized linear model to predict label value given observation of feature values. diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py index fb339160d5..3cbcc6e98d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py +++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Logistic regression (aka binary classifier) class. +"""Logistic regression (aka binary classifier) class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This defines some useful basic metrics for using logistic regression to classify a binary event (0 vs 1). @@ -75,6 +79,10 @@ def LogisticRegressor( # pylint: disable=invalid-name feature_engineering_fn=None): """Builds a logistic regression Estimator for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This method provides a basic Estimator with some additional metrics for custom binary classification models, including AUC, precision/recall and accuracy. diff --git a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py index 99388f116b..f264248e44 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for metric keys.""" +"""Enum for metric keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class MetricKey(object): - """Metric key strings.""" + """Metric key strings (deprecated).""" + LOSS = "loss" AUC = "auc" AUC_PR = "auc_precision_recall" diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 44e6c7c52d..dcb161180c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Classes and methods related to model_fn.""" +"""Classes and methods related to model_fn (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -37,10 +42,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import session_run_hook +from tensorflow.python.util.deprecation import deprecated class ModeKeys(object): - """Standard names for model modes. + """Standard names for model modes (deprecated). + + THIS CLASS IS DEPRECATED. The following standard keys are defined: @@ -65,8 +73,16 @@ class ModelFnOps( 'output_alternatives', 'training_chief_hooks', 'training_hooks', 'scaffold', 'mode' ])): - """Ops returned from a model_fn.""" + """Ops returned from a model_fn. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'When switching to tf.estimator.Estimator, use ' + 'tf.estimator.EstimatorSpec. You can use the `estimator_spec`' + ' method to create an equivalent one.') def __new__(cls, mode, predictions=None, diff --git a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py index f8d87b8914..6fd2fc9d59 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for model prediction keys. +"""Enum for model prediction keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This file is obsoleted in the move of Estimator to core. """ @@ -22,6 +26,8 @@ from __future__ import print_function class PredictionKey(object): + """THIS CLASS IS DEPRECATED.""" + CLASSES = "classes" PROBABILITIES = "probabilities" LOGITS = "logits" diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py index 2752bc2d90..215022e5d9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py +++ b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common operations for RNN Estimators.""" +"""Common operations for RNN Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index fd90fd1cc6..1d161093de 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Run Config.""" +"""Run Config (deprecated, use tf.estimator.RunConfig instead). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +34,12 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as core_run_config from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.util.deprecation import deprecated # A list of the property names in RunConfig user allows to change. They will # not affect the execution framework, so when execution framework checks the -# `uid` of the RunConfig, it should be ingored. +# `uid` of the RunConfig, it should be ignored. _DEFAULT_UID_WHITE_LIST = [ 'tf_random_seed', 'save_summary_steps', @@ -47,6 +53,7 @@ _DEFAULT_UID_WHITE_LIST = [ class Environment(object): + """DEPRECATED CLASS.""" # For running general distributed training. CLOUD = 'cloud' # For running Google-internal distributed training. @@ -56,6 +63,7 @@ class Environment(object): class TaskType(object): + """DEPRECATED CLASS.""" MASTER = 'master' PS = 'ps' WORKER = 'worker' @@ -64,6 +72,8 @@ class TaskType(object): class ClusterConfig(object): """This class specifies the configurations for a distributed run. + THIS CLASS IS DEPRECATED. Use tf.estimator.RunConfig instead. + If you're using an `Estimator`, you should probably use the subclass RunConfig instead. """ @@ -211,10 +221,13 @@ class ClusterConfig(object): class RunConfig(ClusterConfig, core_run_config.RunConfig): """This class specifies the configurations for an `Estimator` run. - This class is the implementation of @{tf.estimator.RunConfig} interface. + This class is a deprecated implementation of @{tf.estimator.RunConfig} + interface. """ _USE_DEFAULT = 0 + @deprecated(None, 'When switching to tf.estimator.Estimator, use' + ' tf.estimator.RunConfig instead.') def __init__(self, master=None, num_cores=0, diff --git a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py index 0cea35e219..de78c72c3a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for State Saving RNNs.""" +"""Estimator for State Saving RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -528,6 +533,12 @@ def _get_rnn_model_fn(cell_type, class StateSavingRnnEstimator(estimator.Estimator): + """RNN with static unrolling and state saving (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index 72920d73c0..3459997bab 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Support Vector Machine (SVM) Estimator.""" +"""Support Vector Machine (SVM) Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -36,6 +41,10 @@ def _as_iterable(preds, output): class SVM(estimator.Estimator): """Support Vector Machine (SVM) model for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Currently, only linear SVMs are supported. For the underlying optimization problem, the `SDCAOptimizer` is used. For performance and convergence tuning, the num_loss_partitions parameter passed to `SDCAOptimizer` (see `__init__()` diff --git a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py index a120bc6cc3..71b5658dd1 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py +++ b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorSignature class and utilities.""" +"""TensorSignature class and utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -33,6 +38,10 @@ class TensorSignature(collections.namedtuple( "TensorSignature", ["dtype", "shape", "is_sparse"])): """Signature of the `Tensor` object. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Useful to check compatibility of tensors. Example: diff --git a/tensorflow/contrib/learn/python/learn/estimators/test_data.py b/tensorflow/contrib/learn/python/learn/estimators/test_data.py index ed201bfc58..e4b057b4f5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/test_data.py +++ b/tensorflow/contrib/learn/python/learn/estimators/test_data.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Test data utilities.""" +"""Test data utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/evaluable.py b/tensorflow/contrib/learn/python/learn/evaluable.py index 8f6cd39864..10881ca885 100644 --- a/tensorflow/contrib/learn/python/learn/evaluable.py +++ b/tensorflow/contrib/learn/python/learn/evaluable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Evaluable` interface.""" +"""`Evaluable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,10 @@ import abc class Evaluable(object): """Interface for objects that are evaluatable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 331bc11549..9a7c4cd685 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experiment class collecting information needed for a single training run.""" +"""Experiment class collecting information for a single training run (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -25,7 +30,6 @@ import os import time from tensorflow.contrib.framework import deprecated -from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import export_strategy @@ -118,6 +122,10 @@ class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener): class Experiment(object): """Experiment is a class containing all information needed to train a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + After an experiment is created (by passing an Estimator and inputs for training and evaluation), an Experiment instance knows how to invoke training and eval loops in a sensible fashion for distributed training. @@ -125,16 +133,8 @@ class Experiment(object): # TODO(ispir): remove delay_workers_by_global_step and make global step based # waiting as only behavior. - @deprecated_args( - "2016-10-23", - "local_eval_frequency is deprecated as local_run will be renamed to " - "train_and_evaluate. Use min_eval_frequency and call train_and_evaluate " - "instead. Note, however, that the default for min_eval_frequency is 1, " - "meaning models will be evaluated every time a new checkpoint is " - "available. In contrast, the default for local_eval_frequency is None, " - "resulting in evaluation occurring only after training has completed. " - "min_eval_frequency is ignored when calling the deprecated local_run.", - "local_eval_frequency") + @deprecated(None, "Please switch to tf.estimator.train_and_evaluate. You will" + " also have to convert to a tf.estimator.Estimator.") def __init__(self, estimator, train_input_fn, diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 55a8b82431..075cab536e 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""ExportStrategy class represents different flavors of model export.""" +"""ExportStrategy class represents different flavors of model export (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,7 @@ from __future__ import print_function import collections from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated __all__ = ['ExportStrategy'] @@ -30,6 +36,10 @@ class ExportStrategy( ['name', 'export_fn', 'strip_default_attrs'])): """A class representing a type of model export. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Typically constructed by a utility function specific to the exporter, such as `saved_model_export_utils.make_export_strategy()`. @@ -56,6 +66,8 @@ class ExportStrategy( forward compatibility of the resulting `SavedModel`. """ + @deprecated(None, 'Please switch to tf.estimator.train_and_evaluate, and use ' + 'tf.estimator.Exporter.') def __new__(cls, name, export_fn, strip_default_attrs=None): return super(ExportStrategy, cls).__new__( cls, name, export_fn, strip_default_attrs) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index 98365c05f6..a997fab723 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level operations on graphs.""" +"""High level operations on graphs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -68,6 +73,7 @@ def clear_summary_writers(): return summary_io.SummaryWriterCache.clear() +@deprecated(None, 'Use `SummaryWriterCache.get` directly.') def get_summary_writer(logdir): """Returns single SummaryWriter per logdir in current run. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py index 06c3782a47..8b133a4440 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tools to allow different io formats.""" +"""Tools to allow different io formats (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py index 7d666391ce..e0a1948d95 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Methods to allow dask.DataFrame.""" +"""Methods to allow dask.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,8 @@ from __future__ import print_function import numpy as np +from tensorflow.python.util.deprecation import deprecated + try: # pylint: disable=g-import-not-at-top import dask.dataframe as dd @@ -60,6 +67,7 @@ def _construct_dask_df_with_divisions(df): return dd.Series(merge(dsk, df.dask), name, df.name, divisions) +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_data(data): """Extract data from dask.Series or dask.DataFrame for predictors. @@ -81,6 +89,7 @@ def extract_dask_data(data): return data +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_labels(labels): """Extract data from dask.Series or dask.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 96be8b1bc4..c45b1d1864 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementations of different data feeders to provide data for TF trainer.""" +"""Implementations of different data feeders to provide data for TF trainer (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues. @@ -31,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels @@ -101,6 +107,7 @@ def _is_iterable(x): return hasattr(x, 'next') or hasattr(x, '__next__') +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_train_data_feeder(x, y, n_classes, @@ -188,6 +195,7 @@ def _batch_data(x, batch_size=None): yield np.matrix(chunk) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_predict_data_feeder(x, batch_size=None): """Returns an iterable for feeding into predict step. @@ -219,6 +227,7 @@ def setup_predict_data_feeder(x, batch_size=None): return [x] +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_processor_data_feeder(x): """Sets up processor iterable. @@ -233,6 +242,7 @@ def setup_processor_data_feeder(x): return x +@deprecated(None, 'Please convert numpy dtypes explicitly.') def check_array(array, dtype): """Checks array on dtype and converts it if different. @@ -275,8 +285,14 @@ def _check_dtype(dtype): class DataFeeder(object): - """Data feeder is an example class to sample data for TF trainer.""" + """Data feeder is an example class to sample data for TF trainer. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, x, y, @@ -563,6 +579,10 @@ class DataFeeder(object): class StreamingDataFeeder(DataFeeder): """Data feeder for TF trainer that reads data from iterator. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Streaming data feeder allows to read data as it comes it from disk or somewhere else. It's custom to have this iterators rotate infinetly over the dataset, to allow control of how much to learn on the trainer side. @@ -771,11 +791,16 @@ class StreamingDataFeeder(DataFeeder): class DaskDataFeeder(object): """Data feeder for that reads data from dask.Series and dask.DataFrame. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Numpy arrays can be serialized to disk and it's possible to do random seeks into them. DaskDataFeeder will remove requirement to have full dataset in the memory and still do random seeks for sampling of batches. """ + @deprecated(None, 'Please feed input to tf.data to support dask.') def __init__(self, x, y, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py index 884faf8335..f8aaa0c9e3 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow generator of dict with numpy arrays.""" +"""Methods to allow generator of dict with numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,8 +28,10 @@ from types import FunctionType from types import GeneratorType from tensorflow.python.estimator.inputs.queues.feeding_functions import _enqueue_data as enqueue_data +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.data.') def generator_input_fn(x, target_key=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index 3a46c23968..9e816f54b6 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to read data in the graph.""" +"""Methods to read data in the graph (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,11 +39,13 @@ from tensorflow.python.platform import gfile from tensorflow.python.summary import summary from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner +from tensorflow.python.util.deprecation import deprecated # Default name for key in the feature dict. KEY_FEATURE_NAME = '__key__' +@deprecated(None, 'Use tf.data.') def read_batch_examples(file_pattern, batch_size, reader, @@ -106,6 +113,7 @@ def read_batch_examples(file_pattern, return examples +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples(file_pattern, batch_size, reader, @@ -175,6 +183,7 @@ def read_keyed_batch_examples(file_pattern, seed=seed) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples_shared_queue(file_pattern, batch_size, reader, @@ -452,6 +461,7 @@ def _read_keyed_batch_examples_helper(file_pattern, return queued_examples_with_keys +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features(file_pattern, batch_size, features, @@ -540,6 +550,7 @@ def read_keyed_batch_features(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features_shared_queue(file_pattern, batch_size, features, @@ -620,6 +631,7 @@ def read_keyed_batch_features_shared_queue(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, @@ -742,6 +754,7 @@ def queue_parsed_features(parsed_features, return dequeued_keys, dequeued_parsed_features +@deprecated(None, 'Use tf.data.') def read_batch_features(file_pattern, batch_size, features, @@ -821,6 +834,7 @@ def read_batch_features(file_pattern, return features +@deprecated(None, 'Use tf.data.') def read_batch_record_features(file_pattern, batch_size, features, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py index 692438807f..29552d24f1 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py @@ -12,15 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow dict of numpy arrays.""" +"""Methods to allow dict of numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn as core_numpy_input_fn +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Use tf.estimator.inputs.numpy_input_fn.') def numpy_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py index ede7558eaf..b4ef055f5a 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py @@ -13,13 +13,19 @@ # limitations under the License. # ============================================================================== -"""Methods to allow pandas.DataFrame.""" +"""Methods to allow pandas.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn as core_pandas_input_fn +from tensorflow.python.util.deprecation import deprecated try: # pylint: disable=g-import-not-at-top @@ -47,6 +53,7 @@ PANDAS_DTYPES = { } +@deprecated(None, 'Please use tf.estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, batch_size=128, @@ -66,6 +73,7 @@ def pandas_input_fn(x, target_column=target_column) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_data(data): """Extract data from pandas.DataFrame for predictors. @@ -96,6 +104,7 @@ def extract_pandas_data(data): 'float, or bool. Found: ' + ', '.join(error_report)) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_matrix(data): """Extracts numpy matrix from pandas DataFrame. @@ -111,6 +120,7 @@ def extract_pandas_matrix(data): return data.as_matrix() +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_labels(labels): """Extract data from pandas.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py index 2af723a0d6..d719a3e488 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Runs an Experiment.""" +"""Runs an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import run_config as run_c from tensorflow.contrib.learn.python.learn.experiment import Experiment from tensorflow.contrib.training.python.training import hparam as hparam_lib from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # TODO(xiejw): Refactor the learn_runner to make code reusable. @@ -99,6 +105,7 @@ def _wrapped_experiment_fn_with_uid_check(experiment_fn, require_hparams=False): return wrapped_experiment_fn +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def run(experiment_fn, output_dir=None, schedule=None, run_config=None, hparams=None): """Make and run an experiment. @@ -218,6 +225,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None, return _execute_schedule(experiment, schedule) +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def tune(experiment_fn, tuner): """Tune an experiment with hyper-parameters. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py index 7d9b1c7716..ba2d067787 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities to run and tune an Experiment. +"""Utilities to run and tune an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@run @@tune diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 6440bc204b..97220365d5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The metric spec class to flexibly connect models and metrics.""" +"""The metric spec class to flexibly connect models and metrics (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ import six from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated def _assert_named_args(sentinel): @@ -223,6 +229,10 @@ def _adapt_metric_fn( class MetricSpec(object): """MetricSpec connects a model to metric functions. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + The MetricSpec class contains all information necessary to connect the output of a `model_fn` to the metrics (usually, streaming metrics) that are used in evaluation. @@ -284,6 +294,7 @@ class MetricSpec(object): """ + @deprecated(None, 'Use tf.estimator.EstimatorSpec.eval_metric_ops.') def __init__(self, metric_fn, prediction_key=None, diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py index 4283240d01..bd4bbf9f8c 100644 --- a/tensorflow/contrib/learn/python/learn/models.py +++ b/tensorflow/contrib/learn/python/learn/models.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Various high level TF models.""" +"""Various high level TF models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -28,8 +33,10 @@ from tensorflow.python.ops import array_ops as array_ops_ from tensorflow.python.ops import init_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using a tf.estimator.LinearRegressor') def linear_regression_zero_init(x, y): """Linear regression subgraph with zero-value initial weights and bias. @@ -43,6 +50,7 @@ def linear_regression_zero_init(x, y): return linear_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier') def logistic_regression_zero_init(x, y): """Logistic regression subgraph with zero-value initial weights and bias. @@ -56,6 +64,7 @@ def logistic_regression_zero_init(x, y): return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.') def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. @@ -107,6 +116,7 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): return losses_ops.mean_squared_error_regressor(x, y, weights, bias) +@deprecated(None, 'Consider using a class from tf.estimator.') def logistic_regression(x, y, class_weight=None, @@ -203,6 +213,7 @@ def _reverse_seq(input_seq, lengths): return result +@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.') def bidirectional_rnn(cell_fw, cell_bw, inputs, @@ -283,6 +294,7 @@ def bidirectional_rnn(cell_fw, # End of TensorFlow 0.7 +@deprecated(None, 'Please consider tensorflow/tensor2tensor.') def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, target_predictor_fn, sequence_length, initial_state, attn_length, attn_size, attn_vec_size): diff --git a/tensorflow/contrib/learn/python/learn/monitored_session.py b/tensorflow/contrib/learn/python/learn/monitored_session.py index 22602e9f69..ac0433f177 100644 --- a/tensorflow/contrib/learn/python/learn/monitored_session.py +++ b/tensorflow/contrib/learn/python/learn/monitored_session.py @@ -13,7 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A wrapper of Session API which runs hooks.""" +"""A wrapper of Session API which runs hooks (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 9457a73ecf..77f7c73d54 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Monitors instrument the training process. +"""Monitors instrument the training process (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@get_default_monitors @@BaseMonitor @@ -59,6 +63,10 @@ from tensorflow.python.util import tf_inspect class BaseMonitor(object): """Base class for Monitors. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Defines basic interfaces of Monitors. Monitors can either be run on all workers or, more commonly, restricted to run exclusively on the elected chief worker. @@ -229,6 +237,10 @@ def _extract_output(outputs, request): class EveryN(BaseMonitor): """Base class for monitors that execute callbacks every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This class adds three new callbacks: - every_n_step_begin - every_n_step_end @@ -418,6 +430,10 @@ class StopAtStep(BaseMonitor): class PrintTensor(EveryN): """Prints given tensors every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This is an `EveryN` monitor and has consistent semantic for `every_n` and `first_n`. @@ -455,9 +471,12 @@ class PrintTensor(EveryN): class LoggingTrainable(EveryN): """Writes trainable variable values into log every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Write the tensors in trainable variables `every_n` steps, starting with the `first_n`th step. - """ def __init__(self, scope=None, every_n=100, first_n=1): @@ -493,7 +512,12 @@ class LoggingTrainable(EveryN): class SummarySaver(EveryN): - """Saves summaries every N steps.""" + """Saves summaries every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, summary_op, @@ -554,6 +578,10 @@ class SummarySaver(EveryN): class ValidationMonitor(EveryN): """Runs evaluation of a given estimator, at most every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note that the evaluation is done based on the saved checkpoint, which will usually be older than the current step. @@ -756,6 +784,10 @@ class ValidationMonitor(EveryN): class CaptureVariable(EveryN): """Captures a variable's values into a collection. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This monitor is useful for unit testing. You should exercise caution when using this monitor in production, since it never discards values. @@ -794,6 +826,7 @@ class CaptureVariable(EveryN): self._var_values[step] = _extract_output(outputs, self._var_name) +@deprecation.deprecated(None, "Use tf.train.MonitoredTrainingSession.") def get_default_monitors(loss_op=None, summary_op=None, save_summary_steps=100, @@ -828,6 +861,10 @@ def get_default_monitors(loss_op=None, class GraphDump(BaseMonitor): """Dumps almost all tensors in the graph at every step. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note, this is very expensive, prefer `PrintTensor` in production. """ @@ -917,7 +954,12 @@ class GraphDump(BaseMonitor): class ExportMonitor(EveryN): - """Monitor that exports Estimator every N steps.""" + """Monitor that exports Estimator every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ @deprecation.deprecated("2017-03-25", "ExportMonitor is deprecated. Please pass an " @@ -1040,7 +1082,12 @@ class ExportMonitor(EveryN): class CheckpointSaver(BaseMonitor): - """Saves checkpoints every N steps or N seconds.""" + """Saves checkpoints every N steps or N seconds. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, checkpoint_dir, @@ -1125,7 +1172,12 @@ class CheckpointSaver(BaseMonitor): class StepCounter(EveryN): - """Steps per second monitor.""" + """Steps per second monitor. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): super(StepCounter, self).__init__(every_n_steps=every_n_steps) @@ -1165,6 +1217,10 @@ class NanLossDuringTrainingError(RuntimeError): class NanLoss(EveryN): """NaN Loss monitor. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Monitors loss and stops training if loss is NaN. Can either fail with exception or just stop training. """ diff --git a/tensorflow/contrib/learn/python/learn/ops/__init__.py b/tensorflow/contrib/learn/python/learn/ops/__init__.py index 33962e34cc..efb1f47cf5 100644 --- a/tensorflow/contrib/learn/python/learn/ops/__init__.py +++ b/tensorflow/contrib/learn/python/learn/ops/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Various TensorFlow Ops.""" +"""Various TensorFlow Ops (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index fa3b7323e3..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops to work with embeddings. +"""TensorFlow Ops to work with embeddings (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Note: categorical variables are handled via embeddings in many cases. For example, in case of words. diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index b040ab3bb6..92976d1539 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for loss computation.""" +"""TensorFlow Ops for loss computation (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py index 45727faab4..aa37cb4a76 100644 --- a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for Sequence to Sequence models.""" +"""TensorFlow Ops for Sequence to Sequence models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,8 +31,10 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. @@ -57,6 +64,7 @@ def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): return array_ops.stack(predictions, axis=1), loss +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. @@ -87,6 +95,7 @@ def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): return in_x, in_y, out_y +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. @@ -123,6 +132,7 @@ def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): return outputs, states, sampling_outputs, sampling_states +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py index 7bcc177d4e..e8c6e1acf8 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Preprocessing tools useful for building models.""" +"""Preprocessing tools useful for building models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py index 154739d497..faba3b2025 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements preprocessing transformers for categorical variables.""" +"""Implements preprocessing transformers for categorical variables (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,8 @@ from __future__ import print_function import math import numpy as np +from tensorflow.python.util.deprecation import deprecated + # pylint: disable=g-bad-import-order from . import categorical_vocabulary from ..learn_io.data_feeder import setup_processor_data_feeder @@ -31,10 +38,16 @@ from ..learn_io.data_feeder import setup_processor_data_feeder class CategoricalProcessor(object): """Maps documents to sequences of word ids. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + As a common convention, Nan values are handled as unknown tokens. Both float('nan') and np.nan are accepted. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data for sequence ' + 'processing.') def __init__(self, min_frequency=0, share=False, vocabularies=None): """Initializes a CategoricalProcessor instance. diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py index 5709955c49..3ac370a6ab 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""Categorical vocabulary classes to map categories to indexes. +"""Categorical vocabulary classes to map categories to indexes (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Can be used for categorical variables, sparse variables and words. """ @@ -25,14 +29,21 @@ from __future__ import print_function import collections import six +from tensorflow.python.util.deprecation import deprecated + class CategoricalVocabulary(object): """Categorical variables vocabulary class. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Accumulates and provides mapping from classes to indexes. Can be easily used for words. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, unknown_token="", support_reverse=True): self._unknown_token = unknown_token self._mapping = {unknown_token: 0} diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/text.py b/tensorflow/contrib/learn/python/learn/preprocessing/text.py index 3af2074c2a..f2b6776be7 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/text.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/text.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements a number of text preprocessing utilities.""" +"""Implements a number of text preprocessing utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -24,6 +29,7 @@ import numpy as np import six from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated from .categorical_vocabulary import CategoricalVocabulary # pylint: disable=g-bad-import-order @@ -38,6 +44,7 @@ TOKENIZER_RE = re.compile(r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", re.UNICODE) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def tokenizer(iterator): """Tokenizer generator. @@ -51,9 +58,16 @@ def tokenizer(iterator): yield TOKENIZER_RE.findall(value) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') class ByteProcessor(object): - """Maps documents into sequence of ids for bytes.""" + """Maps documents into sequence of ids for bytes. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length): self.max_document_length = max_document_length @@ -108,8 +122,14 @@ class ByteProcessor(object): class VocabularyProcessor(object): - """Maps documents to sequences of word ids.""" + """Maps documents to sequences of word ids. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length, min_frequency=0, diff --git a/tensorflow/contrib/learn/python/learn/session_run_hook.py b/tensorflow/contrib/learn/python/learn/session_run_hook.py index a8ba2be972..87edc9b720 100644 --- a/tensorflow/contrib/learn/python/learn/session_run_hook.py +++ b/tensorflow/contrib/learn/python/learn/session_run_hook.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""This file is deprecated. Use tensorflow.python.training.session_run_hook.""" +"""This file is deprecated. Use `tensorflow.python.training.session_run_hook`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py index 919d415c30..d663cf5fb7 100644 --- a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py +++ b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Wrapper for a Session-like object that handles threads and recovery. +"""Wrapper for a Session-like object that handles threads and recovery (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. Based on an original design of Illia Polosukhin. """ diff --git a/tensorflow/contrib/learn/python/learn/trainable.py b/tensorflow/contrib/learn/python/learn/trainable.py index 429b6040be..a1a3f20dcd 100644 --- a/tensorflow/contrib/learn/python/learn/trainable.py +++ b/tensorflow/contrib/learn/python/learn/trainable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Trainable` interface.""" +"""`Trainable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,8 @@ import abc class Trainable(object): """Interface for objects that are trainable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py index 48978d0ac3..66d8dc6fd4 100644 --- a/tensorflow/contrib/learn/python/learn/utils/__init__.py +++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Learn Utils.""" +"""TensorFlow Learn Utils (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index cb34cb1d26..3eacac7a3d 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -13,14 +13,18 @@ # limitations under the License. # ============================================================================== -"""Export utilities.""" +"""Export utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.framework import deprecated -from tensorflow.python.training import training_util from tensorflow.contrib.session_bundle import exporter from tensorflow.contrib.session_bundle import gc from tensorflow.python.client import session as tf_session @@ -32,6 +36,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as tf_saver +from tensorflow.python.training import training_util @deprecated('2017-03-25', 'Please use Estimator.export_savedmodel() instead.') diff --git a/tensorflow/contrib/learn/python/learn/utils/gc.py b/tensorflow/contrib/learn/python/learn/utils/gc.py index 226915987a..916aecbea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/gc.py +++ b/tensorflow/contrib/learn/python/learn/utils/gc.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -r"""System for specifying garbage collection (GC) of path based data. +r"""System for specifying garbage collection (GC) of path based data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This framework allows for GC of data specified by path names, for example files on disk. gc.Path objects each represent a single item stored at a path and may @@ -73,10 +77,12 @@ import os from tensorflow.python.platform import gfile from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated Path = collections.namedtuple('Path', 'path export_version') +@deprecated(None, 'Please implement your own file management or use Saver.') def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. @@ -97,6 +103,7 @@ def largest_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def one_of_every_n_export_versions(n): """Creates a filter that keeps one of every n export versions. @@ -128,6 +135,7 @@ def one_of_every_n_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def mod_export_version(n): """Creates a filter that keeps every export that is a multiple of n. @@ -146,6 +154,7 @@ def mod_export_version(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def union(lf, rf): """Creates a filter that keeps the union of two filters. @@ -163,6 +172,7 @@ def union(lf, rf): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def negation(f): """Negate a filter. @@ -179,6 +189,7 @@ def negation(f): return keep +@deprecated(None, 'Please implement your own file name management.') def get_paths(base_dir, parser): """Gets a list of Paths in a given directory. diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py index b2521933e5..b92eb9fea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities for creating input_fns. +"""Utilities for creating input_fns (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Contents of this file are moved to tensorflow/python/estimator/export.py. InputFnOps is renamed to ServingInputReceiver. @@ -32,13 +36,17 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.util.deprecation import deprecated class InputFnOps(collections.namedtuple('InputFnOps', ['features', 'labels', 'default_inputs'])): - """A return type for an input_fn. + """A return type for an input_fn (deprecated). + + THIS CLASS IS DEPRECATED. Please use tf.estimator.export.ServingInputReceiver + instead. This return type is currently only supported for serving input_fn. Training and eval input_fn should return a `(features, labels)` tuple. @@ -56,6 +64,8 @@ class InputFnOps(collections.namedtuple('InputFnOps', """ +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_parsing_serving_input_receiver_fn.') def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): """Build an input_fn appropriate for serving, expecting fed tf.Examples. @@ -84,6 +94,8 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): return input_fn +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_raw_serving_input_receiver_fn.') def build_default_serving_input_fn(features, default_batch_size=None): """Build an input_fn appropriate for serving, expecting feature Tensors. diff --git a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py index 6a63fb545a..6dbaa15f83 100644 --- a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py +++ b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A simple script for inspect checkpoint files.""" +"""A simple script for inspect checkpoint files (deprecated).""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 1593380007..213619a187 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities supporting export to SavedModel. +"""Utilities supporting export to SavedModel (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Some contents of this file are moved to tensorflow/python/estimator/export.py: @@ -52,8 +56,9 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.summary import summary_iterator from tensorflow.python.training import saver - from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated + # A key for use in the input_alternatives dict indicating the default input. # This is the input that will be expected when a serving request does not @@ -77,6 +82,7 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative' _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative' +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_standardized_signature_def(input_tensors, output_tensors, problem_type): """Build a SignatureDef using problem type and input and output Tensors. @@ -156,6 +162,7 @@ def _is_regression_problem(problem_type, input_tensors, output_tensors): len(input_tensors) == 1 and len(output_tensors) == 1) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_input_alternatives(input_ops): """Obtain all input alternatives using the input_fn output and heuristics.""" input_alternatives = {} @@ -181,6 +188,7 @@ def get_input_alternatives(input_ops): return input_alternatives, features +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): """Obtain all output alternatives using the model_fn output and heuristics. @@ -246,6 +254,7 @@ def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): sorted(output_alternatives.keys()))) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_all_signature_defs(input_alternatives, output_alternatives, actual_default_output_alternative_key): """Build `SignatureDef`s from all pairs of input and output alternatives.""" @@ -279,6 +288,7 @@ def build_all_signature_defs(input_alternatives, output_alternatives, MAX_DIRECTORY_CREATION_ATTEMPTS = 10 +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_timestamped_export_dir(export_dir_base): """Builds a path to a new subdirectory within the base directory. @@ -317,6 +327,7 @@ def get_timestamped_export_dir(export_dir_base): '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_temp_export_dir(timestamped_export_dir): """Builds a directory name based on the argument but starting with 'temp-'. @@ -344,6 +355,7 @@ def _export_version_parser(path): return path._replace(export_version=int(filename)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_most_recent_export(export_dir_base): """Locate the most recent SavedModel export in a directory of many exports. @@ -363,6 +375,7 @@ def get_most_recent_export(export_dir_base): return next(iter(results or []), None) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def garbage_collect_exports(export_dir_base, exports_to_keep): """Deletes older exports, retaining only a given number of the most recent. @@ -387,6 +400,7 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): logging.warn('Can not delete %s recursively: %s', p.path, e) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_export_strategy(serving_input_fn, default_output_alternative_key=None, assets_extra=None, @@ -469,6 +483,8 @@ def make_export_strategy(serving_input_fn, return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs) +@deprecated(None, + 'Use tf.estimator.export.build_parsing_serving_input_receiver_fn') def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, @@ -555,8 +571,14 @@ def _default_compare_fn(curr_best_eval_result, cand_eval_result): class BestModelSelector(object): - """A helper that keeps track of export selection candidates.""" + """A helper that keeps track of export selection candidates. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def __init__(self, event_file_pattern=None, compare_fn=None): """Constructor of this class. @@ -622,6 +644,7 @@ class BestModelSelector(object): return best_eval_result +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_best_model_export_strategy( serving_input_fn, exports_to_keep=1, @@ -707,6 +730,7 @@ def make_best_model_export_strategy( # TODO(b/67013778): Revisit this approach when corresponding changes to # TF Core are finalized. +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def extend_export_strategy(base_export_strategy, post_export_fn, post_export_name=None): diff --git a/tensorflow/python/util/decorator_utils.py b/tensorflow/python/util/decorator_utils.py index df259c7f7c..7b4363c0e4 100644 --- a/tensorflow/python/util/decorator_utils.py +++ b/tensorflow/python/util/decorator_utils.py @@ -82,7 +82,7 @@ def add_notice_to_docstring( lines = _normalize_docstring(doc).splitlines() lines[0] += ' ' + suffix_str - notice = [''] + notice + [instructions] + notice = [''] + notice + ([instructions] if instructions else []) if len(lines) > 1: # Make sure that we keep our distance from the main body -- GitLab From 29bc0d92967d8853c872ba7f736462f1ea2fbd81 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 16:24:54 -0800 Subject: [PATCH 111/884] [XLA] In HloEvaluator, fix an issue for HandleAbs to handle complex numbers more correctly: - abs([complex numbers]) would yield floats. However since the specilization for HandleAbs is based on the return type (float), we'd CHECK fail due to float != complex when accessing the elements of the operand (complex). - enable unary_op_test for interpreter. PiperOrigin-RevId: 187099576 --- .../compiler/xla/service/hlo_evaluator.cc | 32 +++++++++++++++++-- tensorflow/compiler/xla/tests/BUILD | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index fd06b19144..cf8b35908f 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -57,6 +57,12 @@ struct is_complex_t : public std::false_type {}; template <> struct is_complex_t : public std::true_type {}; +template +struct is_complex64_t : public std::false_type {}; + +template <> +struct is_complex64_t : public std::true_type {}; + template StatusOr> Compare(const Shape& shape, HloOpcode opcode, const Literal& lhs_literal, @@ -248,17 +254,37 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { template < typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> + typename std::enable_if::value>::type* = nullptr> Status HandleAbs(HloInstruction* abs) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) { + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { return std::abs(elem_operand); })); return Status::OK(); } + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(abs->operand(0)); + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[abs], + (ElementWiseUnaryOpImpl( + abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, + operand_literal))); + + return Status::OK(); + } + Status HandleAbs(HloInstruction* abs) override { + // If the operand is of C64 type, the return type of abs will be F32. + // However, ElementwiseT would still be the return type, F32, and thus + // specifying the ElementwiseT explicitly as C64 is needed below. + if (abs->operand(0)->shape().element_type() == C64) { + return HandleAbs(abs); + } return HandleAbs(abs); } diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 33fde9737d..f3ecfc1604 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -494,6 +494,7 @@ xla_test( xla_test( name = "unary_op_test", srcs = ["unary_op_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From e37a7ae2277a2a2f7b50ad5ef361e41c30edeb41 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 17:01:24 -0800 Subject: [PATCH 112/884] Only link the swapping code when compiling TensorFlow with CUDA support. PiperOrigin-RevId: 187104273 --- tensorflow/core/grappler/optimizers/BUILD | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 908e58bcc7..a52d1c8df2 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -3,6 +3,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") filegroup( name = "all_files", @@ -319,8 +320,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":gpu_swapping_kernels", - ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", @@ -336,7 +335,10 @@ cc_library( "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", - ], + ] + if_cuda([ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", + ]), ) tf_cc_test_gpu( -- GitLab From 49d4e9233cebdff001ffcc2e3d703e815ba0a881 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:04:09 -0800 Subject: [PATCH 113/884] Consolidate the builtin function overrides into a single module, and use a generic `dynamic_builtin` function to dispatch between implementations. Use the generic dispatcher in the generated code. PiperOrigin-RevId: 187104685 --- .../py2tf/converters/builtin_functions.py | 13 ++++--- tensorflow/contrib/py2tf/utils/BUILD | 12 +----- tensorflow/contrib/py2tf/utils/__init__.py | 4 +- .../py2tf/utils/{printing.py => builtins.py} | 32 +++++++++++++-- .../{printing_test.py => builtins_test.py} | 39 +++++++++++++++---- tensorflow/contrib/py2tf/utils/misc.py | 13 ------- tensorflow/contrib/py2tf/utils/misc_test.py | 27 +------------ 7 files changed, 72 insertions(+), 68 deletions(-) rename tensorflow/contrib/py2tf/utils/{printing.py => builtins.py} (62%) rename tensorflow/contrib/py2tf/utils/{printing_test.py => builtins_test.py} (56%) diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index e69038aced..b5aa9756da 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -36,23 +36,24 @@ class BuiltinFunctionTransformer(transformer.Base): # pylint:disable=invalid-name - def _convert_len(self, node): + def _convert_builtin(self, node): template = """ - py2tf_utils.dynamic_len(args) + py2tf_utils.dynamic_builtin(func, args) """ - return templates.replace(template, args=node.args)[0].value + return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - py2tf_utils.call_print(args) + py2tf_utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id == 'len': - return self._convert_len(node) + if isinstance(node.func, gast.Name) and node.func.id in ('len',): + return self._convert_builtin(node) + # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': return self._convert_print(node) return node diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c2fdd40707..2086a9ef60 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -20,10 +20,10 @@ py_library( name = "utils", srcs = [ "__init__.py", + "builtins.py", "context_managers.py", "misc.py", "multiple_dispatch.py", - "printing.py", "py_func.py", "tensor_list.py", "type_check.py", @@ -76,16 +76,6 @@ py_test( ], ) -py_test( - name = "printing_test", - srcs = ["printing_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index d931322bf3..19bf2272bc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -18,11 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin +from tensorflow.contrib.py2tf.utils.builtins import dynamic_print from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while -from tensorflow.contrib.py2tf.utils.printing import call_print from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/printing.py b/tensorflow/contrib/py2tf/utils/builtins.py similarity index 62% rename from tensorflow/contrib/py2tf/utils/printing.py rename to tensorflow/contrib/py2tf/utils/builtins.py index 95a62bd80b..0a50b80b60 100644 --- a/tensorflow/contrib/py2tf/utils/printing.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -12,14 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow printing support utilities.""" +"""Builtin conversion utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.util import tf_inspect + + +def dynamic_builtin(f, *args, **kwargs): + """Converts a builtin function call inline.""" + if not tf_inspect.isbuiltin(f): + return f(*args, **kwargs) + + if f is len: + return dynamic_len(*args, **kwargs) + + raise NotImplementedError('The "%s" builtin is not yet supported.' % f) + + +def dynamic_len(list_or_tensor): + """Implementation of len using dynamic dispatch.""" + if tensor_util.is_tensor(list_or_tensor): + shape = list_or_tensor.shape + if not shape: + raise ValueError( + 'len requires non-zero rank for tensor "%s"' % list_or_tensor) + return array_ops.shape(list_or_tensor)[0] + + return len(list_or_tensor) def is_tf_print_compatible(value): @@ -30,8 +56,8 @@ def is_tf_print_compatible(value): return False -def call_print(*values): - """Compiled counterpart of the print builtin. +def dynamic_print(*values): + """Implementartion of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. diff --git a/tensorflow/contrib/py2tf/utils/printing_test.py b/tensorflow/contrib/py2tf/utils/builtins_test.py similarity index 56% rename from tensorflow/contrib/py2tf/utils/printing_test.py rename to tensorflow/contrib/py2tf/utils/builtins_test.py index 2070deb304..19a72c63ec 100644 --- a/tensorflow/contrib/py2tf/utils/printing_test.py +++ b/tensorflow/contrib/py2tf/utils/builtins_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for printing module.""" +"""Tests for builtins module.""" from __future__ import absolute_import from __future__ import division @@ -22,28 +22,53 @@ import sys import six -from tensorflow.contrib.py2tf.utils import printing +from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): +class BuiltinsTest(test.TestCase): - def test_call_print_tf(self): + def test_dynamic_len_tf_scalar(self): + a = constant_op.constant(1) + + with self.assertRaises(ValueError): + with self.test_session() as sess: + sess.run(builtins.dynamic_builtin(len, a)) + + def test_dynamic_len_tf_array(self): + a = constant_op.constant([1, 2, 3]) + + with self.test_session() as sess: + self.assertEqual(3, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_tf_matrix(self): + a = constant_op.constant([[1, 2], [3, 4]]) + + with self.test_session() as sess: + self.assertEqual(2, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_py_list(self): + a = [3] * 5 + + self.assertEqual(5, builtins.dynamic_builtin(len, a)) + + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', 1)) + sess.run(builtins.dynamic_print('test message', 1)) self.assertEqual(out_capturer.getvalue(), 'test message 1\n') finally: sys.stdout = sys.__stdout__ - def test_call_print_py_func(self): + def test_dynamic_print_complex(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', [1, 2])) + sess.run(builtins.dynamic_print('test message', [1, 2])) self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n') finally: sys.stdout = sys.__stdout__ diff --git a/tensorflow/contrib/py2tf/utils/misc.py b/tensorflow/contrib/py2tf/utils/misc.py index 7548048388..1b06caf0bd 100644 --- a/tensorflow/contrib/py2tf/utils/misc.py +++ b/tensorflow/contrib/py2tf/utils/misc.py @@ -19,22 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -def dynamic_len(list_or_tensor): - """Implementation of len using dynamic dispatch.""" - if tensor_util.is_tensor(list_or_tensor): - shape = list_or_tensor.shape - if not shape: - raise ValueError( - 'len requires non-zero rank for tensor "%s"' % list_or_tensor) - return array_ops.shape(list_or_tensor)[0] - - return len(list_or_tensor) - - def alias_tensors(*args): """Wrap any Tensor arguments with an identity op. diff --git a/tensorflow/contrib/py2tf/utils/misc_test.py b/tensorflow/contrib/py2tf/utils/misc_test.py index ec88e7cb74..8aedd4cd64 100644 --- a/tensorflow/contrib/py2tf/utils/misc_test.py +++ b/tensorflow/contrib/py2tf/utils/misc_test.py @@ -19,37 +19,12 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): - - def test_dynamic_len_tf_scalar(self): - a = constant(1) - - with self.assertRaises(ValueError): - with self.test_session() as sess: - sess.run(dynamic_len(a)) - - def test_dynamic_len_tf_array(self): - a = constant([1, 2, 3]) - - with self.test_session() as sess: - self.assertEqual(3, sess.run(dynamic_len(a))) - - def test_dynamic_len_tf_matrix(self): - a = constant([[1, 2], [3, 4]]) - - with self.test_session() as sess: - self.assertEqual(2, sess.run(dynamic_len(a))) - - def test_dynamic_len_py_list(self): - a = [3] * 5 - - self.assertEqual(5, dynamic_len(a)) +class MiscTest(test.TestCase): def test_alias_single_tensor(self): a = constant(1) -- GitLab From c7c8f4e82ede4fec5b21f9acd61bcc221d87efdc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:27:20 -0800 Subject: [PATCH 114/884] Fix buffer assignment for conditional instruction. PiperOrigin-RevId: 187107432 --- .../compiler/xla/service/buffer_assignment.cc | 358 +++++++++--------- .../compiler/xla/service/copy_insertion.cc | 72 +++- 2 files changed, 241 insertions(+), 189 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b1e693da9d..d44d3d71d9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -48,6 +48,183 @@ using ::tensorflow::strings::HumanReadableNumBytes; using ::tensorflow::strings::Printf; using ::tensorflow::strings::StrAppend; +namespace { + +template +string ColocatedBufferSetsToString(const T& container, const char* title) { + string result; + StrAppend(&result, title, "\n"); + for (const auto& it : container) { + StrAppend(&result, "\t", it->ToString(), "\n"); + } + return result; +} + +// Walk the call graph of the HLO module and place each computation into either +// thread_local_computations or global_computations depending upon whether the +// computation requires thread-local allocations or global allocations. The +// elements in thread_local_computations and global_computations are in post +// order (if computation A has an instruction which calls computation B, then A +// will appear after B in the vector). +Status GatherComputationsByAllocationType( + const HloModule* module, + std::vector* thread_local_computations, + std::vector* global_computations) { + // Create a worklist of computations paired with whether the allocation must + // be thread-local. + std::deque> worklist; + worklist.push_back(std::make_pair(module->entry_computation(), + /*is_thread_local*/ false)); + + // Sets for quickly checking membership. Computations are returned in vectors + // for stable iteration. + FlatSet thread_local_set; + FlatSet global_set; + + while (!worklist.empty()) { + auto worklist_front = worklist.front(); + worklist.pop_front(); + const HloComputation* computation = worklist_front.first; + bool is_thread_local = worklist_front.second; + bool in_thread_local_set = thread_local_set.count(computation) > 0; + bool in_global_set = global_set.count(computation) > 0; + + // If the computation has already been added to the respective set, then + // nothing to do. + if ((is_thread_local && in_thread_local_set) || + (!is_thread_local && in_global_set)) { + continue; + } + + // If the computation has already been added to the other set this is an + // error condition because the global call to the computation (eg, + // while/call) may return a reference to one of the thread-local buffers to + // the calling computation which will become a dangling reference when the + // thread-local is deallocated with the call return. + if ((is_thread_local && in_global_set) || + (!is_thread_local && in_thread_local_set)) { + return InvalidArgument( + "computation %s has conflicting allocation requirements (global " + "and thread-local)", + computation->name().c_str()); + } + + if (is_thread_local) { + thread_local_set.insert(computation); + } else { + global_set.insert(computation); + } + + for (auto* instruction : computation->instructions()) { + for (HloComputation* subcomputation : + instruction->called_computations()) { + switch (instruction->opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kWhile: + // Call and while must be called from a computation with global + // allocations as they may return references to buffers inside the + // called computation which cannot be thread-local. + if (is_thread_local) { + return InvalidArgument( + "computation %s cannot contain call/while op because it " + "requires thread-local buffer allocations", + computation->name().c_str()); + } + worklist.push_back(std::make_pair(subcomputation, + false)); // Not thread local. + break; + case HloOpcode::kMap: + case HloOpcode::kReduce: + case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: + case HloOpcode::kFusion: + // Map/reduce etc computations are always thread-local. + worklist.push_back(std::make_pair(subcomputation, + true)); // Thread local. + break; + default: + return InternalError( + "Unexpected calling opcode: %s", + HloOpcodeString(instruction->opcode()).c_str()); + } + } + } + } + + // Add the computations to the vectors in post order. + for (auto* computation : module->MakeComputationPostOrder()) { + if (thread_local_set.count(computation) > 0) { + thread_local_computations->push_back(computation); + } else if (global_set.count(computation) > 0) { + global_computations->push_back(computation); + } + // If the computation is not reachable from the entry computation, then it + // will not appear in either thread_local_set or global_set. We don't bother + // assigning buffers for these. + } + return Status::OK(); +} + +// Checks that points-to set of 'instruction' is unambiguous and distinct +// (ensured by CopyInsertion), then adds the buffer from the points-to set at +// 'index' to 'colocated_set'. +const LogicalBuffer* AddBufferToColocatedSet( + const HloInstruction* instruction, const ShapeIndex& index, + const TuplePointsToAnalysis& points_to_analysis, + std::vector* colocated_set) { + // CopyInsertion ensures root points-to set is unambiguous and distinct. + const auto& points_to = points_to_analysis.GetPointsToSet(instruction); + DCHECK(!points_to.IsAmbiguous()); + colocated_set->push_back(points_to.element(index)[0]); + return colocated_set->back(); +} + +// Given the interference map of a graph (the list of interfering node indices +// for each node), perform graph coloring such that interfering nodes are +// assigned to different colors. Returns the assigned color of the nodes, where +// the colors are represented as integer values [0, color_count). +std::vector ColorInterferenceGraph( + const std::vector>& interference_map) { + const int64 node_count = interference_map.size(); + + // Sort the nodes such that we assign nodes with more interference first. This + // relies on the common heuristic of assigning the most constrained node + // first, but it would be good to investigate other ordering heuristics too. + std::vector nodes(node_count); + std::iota(nodes.begin(), nodes.end(), 0); + std::sort(nodes.begin(), nodes.end(), + [&interference_map](const int64 i, const int64 j) { + return interference_map[i].size() > interference_map[j].size(); + }); + + const int64 kColorUnassigned = -1; + std::vector assigned_colors(node_count, kColorUnassigned); + for (int64 node : nodes) { + // Mark the colors that are already assigned to the neighbors. + std::vector available_colors(node_count, true); + for (int64 neighbor : interference_map[node]) { + int64 color = assigned_colors[neighbor]; + if (color != kColorUnassigned) { + available_colors[color] = false; + } + } + + // Find the color that is not yet assigned to the neighbors. + int64 color = kColorUnassigned; + for (color = 0; color < available_colors.size(); ++color) { + if (available_colors[color]) { + break; + } + } + CHECK_NE(color, kColorUnassigned); + assigned_colors[node] = color; + } + return assigned_colors; +} + +} // namespace + size_t BufferAllocation::Slice::Hasher::operator()(Slice s) const { uint64 h = std::hash()(s.index()); h = tensorflow::Hash64Combine(h, std::hash()(s.offset())); @@ -523,116 +700,6 @@ BufferAssignmentProto BufferAssignment::ToProto() const { return proto; } -namespace { - -// Walk the call graph of the HLO module and place each computation into either -// thread_local_computations or global_computations depending upon whether the -// computation requires thread-local allocations or global allocations. The -// elements in thread_local_computations and global_computations are in post -// order (if computation A has an instruction which calls computation B, then A -// will appear after B in the vector). -Status GatherComputationsByAllocationType( - const HloModule* module, - std::vector* thread_local_computations, - std::vector* global_computations) { - // Create a worklist of computations paired with whether the allocation must - // be thread-local. - std::deque> worklist; - worklist.push_back(std::make_pair(module->entry_computation(), - /*is_thread_local*/ false)); - - // Sets for quickly checking membership. Computations are returned in vectors - // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; - - while (!worklist.empty()) { - auto worklist_front = worklist.front(); - worklist.pop_front(); - const HloComputation* computation = worklist_front.first; - bool is_thread_local = worklist_front.second; - bool in_thread_local_set = thread_local_set.count(computation) > 0; - bool in_global_set = global_set.count(computation) > 0; - - // If the computation has already been added to the respective set, then - // nothing to do. - if ((is_thread_local && in_thread_local_set) || - (!is_thread_local && in_global_set)) { - continue; - } - - // If the computation has already been added to the other set this is an - // error condition because the global call to the computation (eg, - // while/call) may return a reference to one of the thread-local buffers to - // the calling computation which will become a dangling reference when the - // thread-local is deallocated with the call return. - if ((is_thread_local && in_global_set) || - (!is_thread_local && in_thread_local_set)) { - return InvalidArgument( - "computation %s has conflicting allocation requirements (global " - "and thread-local)", - computation->name().c_str()); - } - - if (is_thread_local) { - thread_local_set.insert(computation); - } else { - global_set.insert(computation); - } - - for (auto* instruction : computation->instructions()) { - for (HloComputation* subcomputation : - instruction->called_computations()) { - switch (instruction->opcode()) { - case HloOpcode::kCall: - case HloOpcode::kConditional: - case HloOpcode::kWhile: - // Call and while must be called from a computation with global - // allocations as they may return references to buffers inside the - // called computation which cannot be thread-local. - if (is_thread_local) { - return InvalidArgument( - "computation %s cannot contain call/while op because it " - "requires thread-local buffer allocations", - computation->name().c_str()); - } - worklist.push_back(std::make_pair(subcomputation, - false)); // Not thread local. - break; - case HloOpcode::kMap: - case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kFusion: - // Map/reduce etc computations are always thread-local. - worklist.push_back(std::make_pair(subcomputation, - true)); // Thread local. - break; - default: - return InternalError( - "Unexpected calling opcode: %s", - HloOpcodeString(instruction->opcode()).c_str()); - } - } - } - } - - // Add the computations to the vectors in post order. - for (auto* computation : module->MakeComputationPostOrder()) { - if (thread_local_set.count(computation) > 0) { - thread_local_computations->push_back(computation); - } else if (global_set.count(computation) > 0) { - global_computations->push_back(computation); - } - // If the computation is not reachable from the entry computation, then it - // will not appear in either thread_local_set or global_set. We don't bother - // assigning buffers for these. - } - return Status::OK(); -} - -} // namespace - /* static */ StatusOr> BufferAssigner::Run( const HloModule* module, std::unique_ptr hlo_ordering, @@ -1085,7 +1152,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( if (colocated_set.empty()) { return; } - + VLOG(5) << ColocatedBufferSetsToString(colocated_set, + "Adding colocated buffer set"); // Find existing sets that overlap with at least one buffer from the // colocated_set. The resulting 'overlap_set_indices' will have at most // colocated_buffer_sets->size() entries, and will be in increasing order. @@ -1093,6 +1161,10 @@ void BufferAssigner::AddSetToColocatedBufferSets( for (size_t index = 0; index < colocated_buffer_sets->size(); ++index) { for (const LogicalBuffer* buffer : colocated_set) { if ((*colocated_buffer_sets)[index].count(buffer) > 0) { + VLOG(5) << "Found overlap with existing set on buffer " + << buffer->ToString() << "\n" + << ColocatedBufferSetsToString((*colocated_buffer_sets)[index], + "Overlapping set"); overlap_set_indices.push_back(index); break; } @@ -1104,6 +1176,7 @@ void BufferAssigner::AddSetToColocatedBufferSets( colocated_buffer_sets->emplace_back(); colocated_buffer_sets->back().insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << "No overlap found, new group created"; return; } @@ -1115,6 +1188,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( first->insert(overlap_set.begin(), overlap_set.end()); } first->insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << ColocatedBufferSetsToString( + *first, "Result of the colocated buffer set merging"); // Remove overlap sets that we just merged. The offset accounts for the fact // that as elements are erased, the indices need to be adjusted. Keep in mind @@ -1125,67 +1200,6 @@ void BufferAssigner::AddSetToColocatedBufferSets( } } -namespace { - -// Checks that points-to set of 'instruction' is unambiguous and distinct -// (ensured by CopyInsertion), then adds the buffer from the points-to set at -// 'index' to 'colocated_set'. -const LogicalBuffer* AddBufferToColocatedSet( - const HloInstruction* instruction, const ShapeIndex& index, - const TuplePointsToAnalysis& points_to_analysis, - std::vector* colocated_set) { - // CopyInsertion ensures root points-to set is unambiguous and distinct. - const auto& points_to = points_to_analysis.GetPointsToSet(instruction); - DCHECK(!points_to.IsAmbiguous()); - colocated_set->push_back(points_to.element(index)[0]); - return colocated_set->back(); -} - -// Given the interference map of a graph (the list of interfering node indices -// for each node), perform graph coloring such that interfering nodes are -// assigned to different colors. Returns the assigned color of the nodes, where -// the colors are represented as integer values [0, color_count). -std::vector ColorInterferenceGraph( - const std::vector>& interference_map) { - const int64 node_count = interference_map.size(); - - // Sort the nodes such that we assign nodes with more interference first. This - // relies on the common heuristic of assigning the most constrained node - // first, but it would be good to investigate other ordering heuristics too. - std::vector nodes(node_count); - std::iota(nodes.begin(), nodes.end(), 0); - std::sort(nodes.begin(), nodes.end(), - [&interference_map](const int64 i, const int64 j) { - return interference_map[i].size() > interference_map[j].size(); - }); - - const int64 kColorUnassigned = -1; - std::vector assigned_colors(node_count, kColorUnassigned); - for (int64 node : nodes) { - // Mark the colors that are already assigned to the neighbors. - std::vector available_colors(node_count, true); - for (int64 neighbor : interference_map[node]) { - int64 color = assigned_colors[neighbor]; - if (color != kColorUnassigned) { - available_colors[color] = false; - } - } - - // Find the color that is not yet assigned to the neighbors. - int64 color = kColorUnassigned; - for (color = 0; color < available_colors.size(); ++color) { - if (available_colors[color]) { - break; - } - } - CHECK_NE(color, kColorUnassigned); - assigned_colors[node] = color; - } - return assigned_colors; -} - -} // namespace - std::vector BufferAssigner::MergeColocatedBufferSets( const std::vector& colocated_buffer_sets, diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cc195879a6..df73c28597 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -58,6 +58,45 @@ bool ValueIsReadOnly(const HloValue& value) { return IsConstantValue(value) || IsEntryParameterValue(value); } +// Data structure describing the action which should be taken on parts of a +// computation buffers, with respect to the adding of special case copies. +struct SpecialCaseCopyPolicy { + // Insert a copy if the same buffer is found at multiple indices within the + // output tuple. + bool copy_root_replicated_buffers = false; + // If true, insert a copy if a buffer coming from a constant or a parameter + // is found wihtin the output tuple. + bool copy_parameters_and_constants = false; +}; + +SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, + HloModule* module, + HloComputation* computation) { + SpecialCaseCopyPolicy policy; + if (computation == module->entry_computation()) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + for (const CallSite& site : node.caller_callsites()) { + // The kWhile instruction does not have an handling here, as the + // AddCopiesForWhile() API takes care of adding its own copies. + if (site.instruction()->opcode() == HloOpcode::kConditional) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + } + return policy; +} + +bool ShouldCopyRootValue(const HloValue& value, + const SpecialCaseCopyPolicy& policy) { + if (policy.copy_parameters_and_constants) { + return IsConstantValue(value) || + value.defining_instruction()->opcode() == HloOpcode::kParameter; + } + return false; +} + // Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in // 'indices_to_copy'. Add control edges from the respective kCopy instructions // in deep copy of 'from' to the respective kCopy instruction in the deep copy @@ -957,7 +996,8 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { } TF_RET_CHECK(node.context() == CallContext::kSequential); - const bool is_entry = computation == module->entry_computation(); + SpecialCaseCopyPolicy policy = + GetSpecialCaseCopyPolicy(node, module, computation); HloInstruction* root = computation->root_instruction(); // Mark nondistinct/ambiguous indices. @@ -970,27 +1010,26 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { for (const HloBuffer* buffer : buffers_at_index) { buffer_seen_before |= !seen.insert(buffer).second; } - if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) { - VLOG(2) << "Index " << index << " of root of computation " + if (buffers_at_index.size() > 1 || + (buffer_seen_before && policy.copy_root_replicated_buffers)) { + VLOG(2) << "Index " << index << " of computation " << computation->name() << " (" << root->name() << ") has ambiguous or non-distinct buffer. Copying."; add_index_to_copy(root, index); } }); - // For entry instructions, mark any parameter or constant values. - if (is_entry) { - for (const auto& pair : - alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (ValueIsReadOnly(*value)) { - VLOG(2) << "Root of entry computation (" << root->name() - << ") has constant or entry parameter value at index " - << index << ". Copying."; - add_index_to_copy(root, index); - } + for (const auto& pair : + alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (ShouldCopyRootValue(*value, policy)) { + VLOG(2) << "Root of (" << root->name() << ") of computation(" + << computation->name() + << ") has constant or parameter value at index " << index + << ". Copying."; + add_index_to_copy(root, index); } } } @@ -1012,7 +1051,6 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { instruction->parent()->set_root_instruction(deep_copy); } } - return Status::OK(); } -- GitLab From 73b11c4cff53cff0710019a276d41a397c180089 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 26 Feb 2018 17:38:21 -0800 Subject: [PATCH 115/884] Local FP16 conversion to workaround TRT issue --- .../contrib/tensorrt/convert/convert_nodes.cc | 322 ++++++++++++++---- 1 file changed, 256 insertions(+), 66 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index e557db90e1..d9377ba597 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -117,6 +117,18 @@ static std::vector> CreateSamePadding( return padding; } +string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { + size_t last_scope_separator = 0; + for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { + if (op_name_a[i] != op_name_b[i]) { + break; + } else if (op_name_a[i] == '/') { + last_scope_separator = i + 1; + } + } + return op_name_a.substr(0, last_scope_separator); +} + class TRT_ShapedWeights { public: TRT_ShapedWeights(tensorflow::DataType type, const void* values, @@ -325,12 +337,21 @@ void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, nvinfer1::DimsHW istrides = {1, k}; nvinfer1::DimsHW ostrides = {c, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: + case tensorflow::DataType::DT_FLOAT: { reorder2({k, c}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; + } + case tensorflow::DataType::DT_HALF: { + reorder2( + {k, c}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } default: LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; } @@ -356,12 +377,22 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1}; switch (iweights.type_) { - case tensorflow::DataType::DT_FLOAT: + case tensorflow::DataType::DT_FLOAT: { Reorder4({k, c, r, s}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; + } + case tensorflow::DataType::DT_HALF: { + Reorder4( + {k, c, r, s}, static_cast(iweights.GetValues()), + istrides, + static_cast(const_cast(oweights->GetValues())), + ostrides); + break; + } + default: LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; } @@ -395,6 +426,7 @@ class Converter { nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; tensorflow::trt::TRTWeightStore* weight_store_; + bool fp16_; void register_op_converters(); std::vector get_inputs( const tensorflow::NodeDef& node_def) { @@ -430,8 +462,8 @@ class Converter { public: explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::trt::TRTWeightStore* ws) - : trt_network_(trt_network), weight_store_(ws) { + tensorflow::trt::TRTWeightStore* ws, bool fp16) + : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { this->register_op_converters(); } tensorflow::trt::TRTWeightStore* weight_store() { return weight_store_; } @@ -444,7 +476,7 @@ class Converter { weights.SetValues(weight_store_->store_.back().data()); return weights; } - + bool isFP16() { return fp16_; }; TRT_ShapedWeights get_temp_weights_like(const TRT_ShapedWeights& weights) { return this->get_temp_weights(weights.type_, weights.shape_); } @@ -529,7 +561,7 @@ struct LambdaFactory { switch (op) { case OP_CATEGORY::RSQRT: { VLOG(2) << "RSQRT GETS DONE"; - return [](T t) -> T { return 1.0 / std::sqrt(t); }; + return [](T t) -> T { return 1.0 / sqrt(t); }; } case OP_CATEGORY::NEG: return [](T t) -> T { return -t; }; @@ -615,6 +647,22 @@ struct LambdaFactory { } }; +template <> +std::function LambdaFactory::unary() { + switch (op) { + case OP_CATEGORY::RSQRT: { + VLOG(2) << "RSQRT GETS DONE"; + return [](Eigen::half t) -> Eigen::half { + return Eigen::half(1.0 / sqrt(float(t))); + }; + } + case OP_CATEGORY::NEG: + return [](Eigen::half t) -> Eigen::half { return -t; }; + default: + VLOG(2) << "Not supported op for unary: " << static_cast(op); + return nullptr; + } +} tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights, LambdaFactory unary_op) { @@ -626,6 +674,14 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights, std::transform(inp, inp + iweights.count(), oup, unary_op.unary()); break; } + case tensorflow::DataType::DT_HALF: { + auto inp = static_cast(iweights.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + std::transform(inp, inp + iweights.count(), oup, + unary_op.unary()); + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -669,6 +725,32 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, } break; } + case tensorflow::DataType::DT_HALF: { + auto inp_l = static_cast(iweights_l.GetValues()); + auto inp_r = static_cast(iweights_r.GetValues()); + auto oup = + static_cast(const_cast(oweights->GetValues())); + + if (iweights_l.count() != iweights_r.count()) { + // We only supports broadcast of RankZero + if (iweights_l.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_l); + std::transform(inp_r, inp_r + iweights_r.count(), oup, + binary_op.broadcast_l(*inp_l)); + } else if (iweights_r.count() == 1) { + VLOG(2) << "I bet it is not working!" << (*inp_r); + std::transform(inp_l, inp_l + iweights_l.count(), oup, + binary_op.broadcast_r(*inp_r)); + } else { + return tensorflow::errors::Unimplemented( + "Binary op with non-rankZero broadcast not supported"); + } + } else { + std::transform(inp_l, inp_l + iweights_l.count(), inp_r, oup, + binary_op.binary()); + } + break; + } default: return tensorflow::errors::Unimplemented( "Data type not supported: " + @@ -1317,16 +1399,33 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - size_t lenData = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) lenData *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(lenData)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.float_val().begin(), - weights_tensor.float_val() - .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), lenData); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + if (ctx.isFP16()) { + auto dtypeNew = tensorflow::DataType::DT_HALF; + size_t lenData = tensorflow::DataTypeSize(dtypeNew); + for (int i = 0; i < scalar_shape.nbDims; i++) + lenData *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(lenData)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + auto half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + half_tensor.device(defd) = + tensor.flat().template cast(); + memcpy(dst, half_tensor.data(), lenData); // store into weight store + weights = TRT_ShapedWeights(dtypeNew, dst, scalar_shape); + } else { + size_t lenData = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + lenData *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(lenData)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.float_val().begin(), + weights_tensor.float_val() + .end()); // make a local copy first to flatten + memcpy(dst, tensor_data.data(), lenData); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } // LOG(INFO) << " add: " << weights_tensor.float_val().data(); // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); @@ -1362,18 +1461,61 @@ tensorflow::Status ConvertConst(Converter& ctx, scalar_shape.type[i] = nvinfer1::DimensionType::kSPATIAL; } } - size_t lenData = tensorflow::DataTypeSize(dtype); - for (int i = 0; i < scalar_shape.nbDims; i++) lenData *= scalar_shape.d[i]; - size_t lenTensor = weights_tensor.int_val_size() * sizeof(int32); - lenData = std::max(lenData, lenTensor); - ctx.weight_store()->store_.push_back(std::vector(lenData)); - void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); - std::vector tensor_data( - weights_tensor.int_val().begin(), - weights_tensor.int_val().end()); // make a local copy first to flatten - // doesn't have to be contigous - memcpy(dst, tensor_data.data(), lenTensor); // store into weight store - weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + if (ctx.isFP16()) { + auto dtypeNew = tensorflow::DataType::DT_HALF; + size_t lenData = tensorflow::DataTypeSize(dtypeNew); + for (int i = 0; i < scalar_shape.nbDims; i++) + lenData *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(lenData)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); + TTypes::Flat half_tensor = temp_tensor.flat(); + Eigen::DefaultDevice defd; + switch (dtype) { + case (tensorflow::DT_INT32): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT16): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_INT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + case (tensorflow::DT_UINT8): { + half_tensor.device(defd) = + tensor.flat().template cast(); + break; + } + default: + return tensorflow::errors::InvalidArgument( + "Datatype " + tensorflow::DataTypeString(dtype) + + " for FP16 conversion"); + break; + }; + memcpy(dst, half_tensor.data(), lenData); // store into weight store + weights = TRT_ShapedWeights(dtypeNew, dst, scalar_shape); + } else { + size_t lenData = tensorflow::DataTypeSize(dtype); + for (int i = 0; i < scalar_shape.nbDims; i++) + lenData *= scalar_shape.d[i]; + size_t lenTensor = weights_tensor.int_val_size() * sizeof(int32); + lenData = std::max(lenData, lenTensor); + ctx.weight_store()->store_.push_back(std::vector(lenData)); + void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); + std::vector tensor_data( + weights_tensor.int_val().begin(), + weights_tensor.int_val() + .end()); // make a local copy first to flatten + // doesn't have to be contigous + memcpy(dst, tensor_data.data(), lenTensor); // store into weight store + weights = TRT_ShapedWeights(dtype, dst, scalar_shape); + } } else if (!weights_tensor.tensor_content().empty()) { VLOG(2) << "TENSOR!!!" << node_def.name(); const auto& content = weights_tensor.tensor_content(); @@ -1757,29 +1899,81 @@ tensorflow::Status ConvertFusedBatchNorm( TRT_ShapedWeights combined_offset_weights = ctx.get_temp_weights_like(offset_weights); size_t nweight = scale_weights.count(); - if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || - offset_weights.type_ != tensorflow::DataType::DT_FLOAT || - mean_weights.type_ != tensorflow::DataType::DT_FLOAT || - variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { - return tensorflow::errors::Unimplemented( - "only float32 weights data type is supported, at " + node_def.name()); - } - for (size_t i = 0; i < nweight; ++i) { - float scale = (static_cast(scale_weights.GetValues()))[i]; - float offset = (static_cast(offset_weights.GetValues()))[i]; - float mean = (static_cast(mean_weights.GetValues()))[i]; - float variance = - (static_cast(variance_weights.GetValues()))[i]; - float& combined_scale_ref = const_cast( - static_cast(combined_scale_weights.GetValues()))[i]; - float& combined_offset_ref = const_cast( - static_cast(combined_offset_weights.GetValues()))[i]; - combined_scale_ref = scale / sqrtf(variance + epsilon); - combined_offset_ref = offset - mean * combined_scale_ref; + if ((scale_weights.type_ == offset_weights.type_) && + (mean_weights.type_ == variance_weights.type_) && + (scale_weights.type_ == variance_weights.type_)) { + if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && + (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { + return tensorflow::errors::Unimplemented( + "only float32 weights data type is supported, at " + node_def.name() + + " " + tensorflow::DataTypeString(scale_weights.type_)); + } + if (scale_weights.type_ == tensorflow::DT_FLOAT) { + for (size_t i = 0; i < nweight; ++i) { + float scale = (static_cast(scale_weights.GetValues()))[i]; + float offset = + (static_cast(offset_weights.GetValues()))[i]; + float mean = (static_cast(mean_weights.GetValues()))[i]; + float variance = + (static_cast(variance_weights.GetValues()))[i]; + float& combined_scale_ref = const_cast( + static_cast(combined_scale_weights.GetValues()))[i]; + float& combined_offset_ref = const_cast( + static_cast(combined_offset_weights.GetValues()))[i]; + combined_scale_ref = scale / sqrtf(variance + epsilon); + combined_offset_ref = offset - mean * combined_scale_ref; + } + } else { + const Eigen::half* scale_vals = + (static_cast(scale_weights.GetValues())); + const Eigen::half* off_vals = + (static_cast(offset_weights.GetValues())); + const Eigen::half* mean_vals = + (static_cast(mean_weights.GetValues())); + const Eigen::half* variance_vals = + (static_cast(variance_weights.GetValues())); + Eigen::half* comb_scale_vals = const_cast( + static_cast(combined_scale_weights.GetValues())); + Eigen::half* comb_off_vals = const_cast( + static_cast(combined_offset_weights.GetValues())); + for (size_t i = 0; i < nweight; ++i) { + float scale(scale_vals[i]); + float offset(off_vals[i]); + float mean(mean_vals[i]); + float variance(variance_vals[i]); + float combined_scale_ref = scale / sqrtf(variance + epsilon); + comb_scale_vals[i] = Eigen::half(combined_scale_ref); + float combined_offset_ref = offset - mean * combined_scale_ref; + comb_off_vals[i] = Eigen::half(combined_offset_ref); + } + } } + // if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || + // offset_weights.type_ != tensorflow::DataType::DT_FLOAT || + // mean_weights.type_ != tensorflow::DataType::DT_FLOAT || + // variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { + // return tensorflow::errors::Unimplemented( + // "only float32 weights data type is supported, at " + + // node_def.name()); + // } + // for (size_t i = 0; i < nweight; ++i) { + // float scale = (static_cast(scale_weights.GetValues()))[i]; + // float offset = (static_cast(offset_weights.GetValues()))[i]; float mean = (static_cast(mean_weights.GetValues()))[i]; float variance = + // (static_cast(variance_weights.GetValues()))[i]; + // float& combined_scale_ref = const_cast( + // static_cast(combined_scale_weights.GetValues()))[i]; + // float& combined_offset_ref = const_cast( + // static_cast(combined_offset_weights.GetValues()))[i]; + // combined_scale_ref = scale / sqrtf(variance + epsilon); + // combined_offset_ref = offset - mean * combined_scale_ref; + // } nvinfer1::IScaleLayer* layer = ctx.network()->addScale( *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, - combined_offset_weights, combined_scale_weights, dummy_power_weights); + combined_offset_weights.GetWeightsForTRT(), + combined_scale_weights.GetWeightsForTRT(), + dummy_power_weights.GetWeightsForTRT()); nvinfer1::ITensor* output_tensor = layer->getOutput(0); outputs->push_back(TRT_TensorOrWeights(output_tensor)); return tensorflow::Status::OK(); @@ -2065,10 +2259,18 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // topological order is needed to build TRT network VLOG(2) << "BUILDING 1"; static int static_id = 0; - string calib_op_name = - tensorflow::strings::StrCat("my_trt_calib_op_", static_id); - string engine_name = tensorflow::strings::StrCat("my_trt_op", static_id); - + string subgraph_name_scope; + if (!order.empty()) { + subgraph_name_scope = order.front()->name(); + } + for (const tensorflow::Node* node : order) { + subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); + } + // TODO(sami,ben,jie): proper naming! + string calib_op_name = tensorflow::strings::StrCat( + subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = + tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op", static_id); static_id++; VLOG(2) << "BUILDING 2"; auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); @@ -2098,7 +2300,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::trt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network, ws); + Converter converter(op_res->network, ws, s.precision_mode == 1); VLOG(2) << "BUILDING 5"; std::vector input_names; @@ -2257,18 +2459,6 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { return tensorflow::Status::OK(); } -string GetCommonNameScope(const string& op_name_a, const string& op_name_b) { - size_t last_scope_separator = 0; - for (size_t i = 0; i < std::min(op_name_a.size(), op_name_b.size()); ++i) { - if (op_name_a[i] != op_name_b[i]) { - break; - } else if (op_name_a[i] == '/') { - last_scope_separator = i + 1; - } - } - return op_name_a.substr(0, last_scope_separator); -} - tensorflow::Status ConvertSubGraphToTensorRTNodeDef( tensorrt::convert::SubGraphParams& s) { // Visit nodes in reverse topological order and construct the TRT network. @@ -2319,7 +2509,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); // Build the network - Converter converter(trt_network.get(), ws); + Converter converter(trt_network.get(), ws, s.precision_mode == 1); std::vector input_names; std::vector input_dtypes; -- GitLab From dedace82ecf34c7906647361a811c8bf99f13da7 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 17:55:31 -0800 Subject: [PATCH 116/884] [XLA::Interpreter] Add support for kConditional to HloEvaluator. Also enable xla/tests/conditional_tests to run on interpreter. PiperOrigin-RevId: 187110438 --- .../compiler/xla/service/hlo_evaluator.cc | 28 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 1 + 3 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index cf8b35908f..afbfdac05e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2491,6 +2491,34 @@ Status HloEvaluator::HandleCall(HloInstruction* call) { return Status::OK(); } +Status HloEvaluator::HandleConditional(HloInstruction* conditional) { + const auto& pred = GetEvaluatedLiteralFor(conditional->operand(0)); + const auto& true_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(1)); + const auto& false_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(2)); + + auto* true_computation = conditional->true_computation(); + auto* false_computation = conditional->false_computation(); + + auto result = Literal::CreateFromShape(conditional->shape()); + HloEvaluator embedded_evaluator; + if (pred.Get({})) { + result = embedded_evaluator + .Evaluate(*true_computation, + {&true_computation_arg}) + .ConsumeValueOrDie(); + } else { + result = embedded_evaluator + .Evaluate(*false_computation, + {&false_computation_arg}) + .ConsumeValueOrDie(); + } + + evaluated_[conditional] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index c65d9915e3..fc82011630 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleConditional(HloInstruction* conditional) override; + Status HandleCall(HloInstruction* call) override; private: diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f3ecfc1604..19b3dfae4e 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -478,6 +478,7 @@ xla_test( xla_test( name = "conditional_test", srcs = ["conditional_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From 4aa3d3ce252a9af2e09cdbd5460262ccb5378a3a Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 17:56:15 -0800 Subject: [PATCH 117/884] Support configurable stats publishers in the grpc server. PiperOrigin-RevId: 187110497 --- .../distributed_runtime/rpc/grpc_server_lib.cc | 15 ++++++++++++--- .../distributed_runtime/rpc/grpc_server_lib.h | 6 ++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c4ac92d809..a6f4be3eaf 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -106,7 +106,8 @@ GrpcServer::~GrpcServer() { Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, - const WorkerCreationFunction& worker_func) { + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory) { mutex_lock l(mu_); CHECK_EQ(state_, NEW); master_env_.env = env_; @@ -218,7 +219,7 @@ Status GrpcServer::Init( master_env_.ops = OpRegistry::Global(); master_env_.worker_cache = worker_cache; master_env_.master_session_factory = - [config]( + [config, stats_factory]( SessionOptions options, const MasterEnv* env, std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, @@ -226,7 +227,7 @@ Status GrpcServer::Init( options.config.MergeFrom(config); return new MasterSession(options, env, std::move(remote_devs), std::move(worker_cache), std::move(device_set), - CreateNoOpStatsPublisher); + stats_factory); }; master_env_.worker_cache_factory = [this](const WorkerCacheFactoryOptions& options, @@ -241,6 +242,14 @@ Status GrpcServer::Init( return Status::OK(); } +Status GrpcServer::Init( + ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func) { + return Init(std::move(service_func), rendezvous_mgr_func, worker_func, + CreateNoOpStatsPublisher); +} + Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h index 8b12ac1461..7c2f06f618 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h @@ -22,6 +22,7 @@ limitations under the License. #include "grpc++/security/credentials.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/stats_publisher_interface.h" #include "tensorflow/core/distributed_runtime/master_env.h" #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" @@ -68,6 +69,11 @@ class GrpcServer : public ServerInterface { const string target() const override; protected: + Status Init(ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory); + Status Init(ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, const WorkerCreationFunction& worker_func); -- GitLab From 19f18e377d8ee2f624406527b21444128da344df Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 26 Feb 2018 18:04:55 -0800 Subject: [PATCH 118/884] Modify retrain script to output TFLite compatible quantized models. -Also fix flaky input name selection introduced by last PR. -Also rely on tf.contrib.quantize to do graph transformations. -Also, update retrain script to use new float mobilenet_v1 and quantized mobilenet_v1 models. PiperOrigin-RevId: 187111533 --- .../examples/image_retraining/retrain.py | 317 +++++++++++------- .../examples/image_retraining/retrain_test.py | 44 ++- 2 files changed, 229 insertions(+), 132 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 25e09fecbf..99a71206ac 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -75,13 +75,16 @@ python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` -Run quantized version of mobilenet: +Run mobilenet, instrumented for quantization: ```bash python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quant ``` +These instrumented models can be converted to fully quantized mobile models via +TensorFlow Lite. + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -121,7 +124,6 @@ import numpy as np from six.moves import urllib import tensorflow as tf -from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -135,6 +137,9 @@ FLAGS = None # need to update these to reflect the values in the network you're using. MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M +# The location where variable checkpoints will be stored. +CHECKPOINT_NAME = '/tmp/_retrain_checkpoint' + def create_image_lists(image_dir, testing_percentage, validation_percentage): """Builds a list of training images from the file system. @@ -745,9 +750,9 @@ def variable_summaries(var): tf.summary.histogram('histogram', var) -def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): - """Adds a new softmax and fully-connected layer for training. +def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor, + bottleneck_tensor_size, quantize_layer, is_training): + """Adds a new softmax and fully-connected layer for training and eval. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the @@ -763,7 +768,9 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. + instrumented for quantized. + is_training: Boolean, specifying whether the newly add layer is for training + or eval. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -778,50 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, ground_truth_input = tf.placeholder( tf.int64, [None], name='GroundTruthInput') - # Organizing the following ops as `final_training_ops` so they're easier - # to see in TensorBoard - layer_name = 'final_training_ops' + # Organizing the following ops so they are easier to see in TensorBoard. + layer_name = 'final_retrain_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) - variable_summaries(layer_weights) + with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - variable_summaries(layer_biases) with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + # The tf.contrib.quantize functions rewrite the graph in place for + # quantization. The imported model graph has already been rewritten, so upon + # calling these rewrites, only the newly added final layer will be + # transformed. + if quantize_layer: + if is_training: + tf.contrib.quantize.create_training_graph() + else: + tf.contrib.quantize.create_eval_graph() + tf.summary.histogram('activations', final_tensor) + # If this is an eval graph, we don't need to add loss ops or an optimizer. + if not is_training: + return None, None, bottleneck_input, ground_truth_input, final_tensor + with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) @@ -857,13 +855,91 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): return evaluation_step, prediction -def save_graph_to_file(sess, graph, graph_file_name): +def run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor): + """Runs a final evaluation on an eval graph using the test data set. + + Args: + sess: Session for the train graph. + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + image_lists: Dictionary of training images for each label. + jpeg_data_tensor: The layer to feed jpeg image data into. + decoded_image_tensor: The output of decoding and resizing the image. + resized_image_tensor: The input node of the recognition graph. + bottleneck_tensor: The bottleneck output layer of the CNN graph. + """ + (sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) = build_eval_session(model_info, class_count) + + test_bottlenecks, test_ground_truth, test_filenames = ( + get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, + 'testing', FLAGS.bottleneck_dir, + FLAGS.image_dir, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor, FLAGS.architecture)) + test_accuracy, predictions = sess.run( + [evaluation_step, prediction], + feed_dict={ + bottleneck_input: test_bottlenecks, + ground_truth_input: test_ground_truth + }) + tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % + (test_accuracy * 100, len(test_bottlenecks))) + + if FLAGS.print_misclassified_test_images: + tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') + for i, test_filename in enumerate(test_filenames): + if predictions[i] != test_ground_truth[i]: + tf.logging.info('%70s %s' % (test_filename, + list(image_lists.keys())[predictions[i]])) + + +def build_eval_session(model_info, class_count): + """Builds an restored eval session without train operations for exporting. + + Args: + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + + Returns: + Eval session containing the restored eval graph. + The bottleneck input, ground truth, eval step, and prediction tensors. + """ + # If quantized, we need to create the correct eval graph for exporting. + eval_graph, bottleneck_tensor, _ = create_model_graph(model_info) + + eval_sess = tf.Session(graph=eval_graph) + with eval_graph.as_default(): + # Add the new layer for exporting. + (_, _, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + False) + + # Now we need to restore the values from the training graph to the eval + # graph. + tf.train.Saver().restore(eval_sess, CHECKPOINT_NAME) + + evaluation_step, prediction = add_evaluation_step(final_tensor, + ground_truth_input) + + return (eval_sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) + + +def save_graph_to_file(graph, graph_file_name, model_info, class_count): + """Saves an graph to file, creating a valid quantized one if necessary.""" + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) - return def prepare_file_system(): @@ -916,11 +992,10 @@ def create_model_info(architecture): return None version_string = parts[1] if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.50' and version_string != '0.25'): + version_string != '0.5' and version_string != '0.25'): tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', - but found '%s' for architecture '%s'""", - version_string, architecture) + """"The Mobilenet version should be '1.0', '0.75', '0.5', or '0.25', + but found '%s' for architecture '%s'""", version_string, architecture) return None size_string = parts[2] if (size_string != '224' and size_string != '192' and @@ -933,35 +1008,26 @@ def create_model_info(architecture): if len(parts) == 3: is_quantized = False else: - if parts[3] != 'quantized': + if parts[3] != 'quant': tf.logging.error( "Couldn't understand architecture suffix '%s' for '%s'", parts[3], architecture) return None is_quantized = True + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/' + model_name = 'mobilenet_v1_' + version_string + '_' + size_string if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' + model_name += '_quant' + data_url += model_name + '.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_file_name = model_name + '_frozen.pb' bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 else: @@ -1011,43 +1077,45 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, return jpeg_data, mul_image -def export_model(sess, architecture, saved_model_dir): +def export_model(model_info, class_count, saved_model_dir): """Exports model for serving. Args: - sess: Current active TensorFlow Session. - architecture: Model architecture. + model_info: The modelinfo for the current model. + class_count: The number of classes. saved_model_dir: Directory in which to save exported model and variables. """ - if architecture == 'inception_v3': - input_tensor = 'DecodeJpeg/contents:0' - elif architecture.startswith('mobilenet_'): - input_tensor = 'input:0' - else: - raise ValueError('Unknown architecture', architecture) - in_image = sess.graph.get_tensor_by_name(input_tensor) - inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} - - out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + # The SavedModel should hold the eval graph. + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + with graph.as_default(): + input_tensor = model_info['resized_input_tensor_name'] + in_image = sess.graph.get_tensor_by_name(input_tensor) + inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name('final_result:0') + outputs = { + 'prediction': tf.saved_model.utils.build_tensor_info(out_classes) + } - signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') + legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - # Save out the SavedModel. - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature - }, - legacy_init_op=legacy_init_op) - builder.save() + # Save out the SavedModel. + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants. + DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) + builder.save() def main(_): @@ -1064,11 +1132,6 @@ def main(_): tf.logging.error('Did not recognize architecture flag') return -1 - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - # Look at the folder structure, and create lists of all the images. image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, FLAGS.validation_percentage) @@ -1087,6 +1150,19 @@ def main(_): FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, FLAGS.random_brightness) + # Set up the pre-trained graph. + maybe_download_and_extract(model_info['data_url']) + graph, bottleneck_tensor, resized_image_tensor = ( + create_model_graph(model_info)) + + # Add the new layer that we'll be training. + with graph.as_default(): + (train_step, cross_entropy, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + True) + with tf.Session(graph=graph) as sess: # Set up the image decoding sub-graph. jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( @@ -1110,15 +1186,8 @@ def main(_): decoded_image_tensor, resized_image_tensor, bottleneck_tensor, FLAGS.architecture) - # Add the new layer that we'll be training. - (train_step, cross_entropy, bottleneck_input, ground_truth_input, - final_tensor) = add_final_training_ops( - len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, prediction = add_evaluation_step( - final_tensor, ground_truth_input) + evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input) # Merge all the summaries and write them out to the summaries_dir merged = tf.summary.merge_all() @@ -1128,6 +1197,10 @@ def main(_): validation_writer = tf.summary.FileWriter( FLAGS.summaries_dir + '/validation') + # Create a train saver that is used to restore values into an eval graph + # when exporting models. + train_saver = tf.train.Saver() + # Set up all our weights to their initial default values. init = tf.global_variables_initializer() sess.run(init) @@ -1168,6 +1241,9 @@ def main(_): (datetime.now(), i, train_accuracy * 100)) tf.logging.info('%s: Step %d: Cross entropy = %f' % (datetime.now(), i, cross_entropy_value)) + # TODO(suharshs): Make this use an eval graph, to avoid quantization + # moving averages being updated by the validation set, though in + # practice this makes a negligable difference. validation_bottlenecks, validation_ground_truth, _ = ( get_random_cached_bottlenecks( sess, image_lists, FLAGS.validation_batch_size, 'validation', @@ -1190,42 +1266,32 @@ def main(_): if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) and i > 0): + # If we want to do an intermediate save, save a checkpoint of the train + # graph, to restore into the eval graph. + train_saver.save(sess, CHECKPOINT_NAME) intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + 'intermediate_' + str(i) + '.pb') tf.logging.info('Save intermediate result to : ' + intermediate_file_name) - save_graph_to_file(sess, graph, intermediate_file_name) + save_graph_to_file(graph, intermediate_file_name, model_info, + class_count) + + # After training is complete, force one last save of the train checkpoint. + train_saver.save(sess, CHECKPOINT_NAME) # We've completed all our training, so run a final test evaluation on # some new images we haven't used before. - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.test_batch_size, 'testing', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth}) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % - (test_filename, - list(image_lists.keys())[predictions[i]])) + run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor) # Write out the trained graph and labels with the weights stored as # constants. - save_graph_to_file(sess, graph, FLAGS.output_graph) + save_graph_to_file(graph, FLAGS.output_graph, model_info, class_count) with gfile.FastGFile(FLAGS.output_labels, 'w') as f: f.write('\n'.join(image_lists.keys()) + '\n') - export_model(sess, FLAGS.architecture, FLAGS.saved_model_dir) + export_model(model_info, class_count, FLAGS.saved_model_dir) if __name__ == '__main__': @@ -1406,8 +1472,9 @@ if __name__ == '__main__': form 'mobilenet__[_quantized]'. For example, 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - less accurate, but smaller and faster network that's 920 KB on disk and - takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html + smaller and less accurate model, taking 128x128 images, and instrumented + for eventual quantization via TensorFlow Lite. + See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html for more information on Mobilenet.\ """) parser.add_argument( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index 8b8dd45fd7..fb7324c58a 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -67,22 +67,52 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOps(self, flags_mock): + def testAddFinalRetrainOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + # Test creating final training op with quantization. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, False, + False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): + def testAddFinalRetrainOpsQuantized(self, flags_mock): + # Ensure that the training and eval graph for quantized models are correctly + # created. + with tf.Graph().as_default() as g: + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization, set is_training to + # true. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, True) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + # Ensure that the inputs of each FakeQuant operations has 2 Assign + # operations in the training graph (Assign[Min,Max]Last, + # Assign[Min,Max]Ema) + self.assertEqual(2, + len([i for i in op.inputs if 'Assign' in i.name])) + self.assertEqual(found_fake_quant, 2) + with tf.Graph().as_default() as g: with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) + # Test creating final training op with quantization, set is_training to + # false. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + for i in op.inputs: + # Ensure that no operations are Assign operation since this is the + # evaluation graph. + self.assertTrue('Assign' not in i.name) + self.assertEqual(found_fake_quant, 2) def testAddEvaluationStep(self): with tf.Graph().as_default(): -- GitLab From 60a4b676df017b4ac51ca84a5e5e3a998912cebc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 18:05:59 -0800 Subject: [PATCH 119/884] Remove old implementation of the adaptive shared batcher, the in flight batches implemntation delivers similar performance but is simpler and requires less tuning. PiperOrigin-RevId: 187111685 --- .../adaptive_shared_batch_scheduler.h | 172 +----- .../adaptive_shared_batch_scheduler_test.cc | 488 +++++------------- 2 files changed, 140 insertions(+), 520 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 25c5f9cf42..661ed239d3 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -50,43 +50,26 @@ class ASBSQueue; // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see -// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler -// prioritizes batches by age (i.e. the batch's oldest request) irrespective of -// queue or batch size. +// shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler +// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) +// irrespective of queue or batch size. // -// The scheduling decision currently exists in two flavors, controlled by the -// option use_in_flight_batches_implementation. It is expected that setting this -// option to true will give universally better results; after a period of -// testing to confirm, the old implementation will be removed. -// -// If use_in_flight_batches_implementation is set to true, the scheduler -// limits the number of batches which can be processed concurrently. If a new -// batch is created, and the number of in flight batches is below the limit, -// the next (i.e. oldest) batch is immediately scheduled. Similarly, when a -// batch finishes processing, the limit is rechecked, and another batch may be -// scheduled. To avoid the need to carefully tune the limit for workload, -// model type, platform, etc, it is dynamically adjusted in order to provide the -// lowest latency. -// -// If use_in_flight_batches_implementation is set to false, the scheduler will -// process the oldest batch at an adjustable rate, regardless of batch size. -// The user can provide feedback to help set this rate to achieve some goal -// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather, -// the corresponding period) is adjusted each time a batch is processed, using -// an exponentially weighted moving average to smooth noisy feedback: -// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N -// period *= (1 + K * emwa_feedback) +// ASBS tries to keep the system busy by maintaining an adjustable number of +// concurrently processed batches. If a new batch is created, and the number of +// in flight batches is below the target, the next (i.e. oldest) batch is +// immediately scheduled. Similarly, when a batch finishes processing, the +// target is rechecked, and another batch may be scheduled. To avoid the need +// to carefully tune the target for workload, model type, platform, etc, it is +// dynamically adjusted in order to provide the lowest average latency. // // Some potential use cases: // Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing // involves serial processing by a device, from a latency perspective it is // desirable to keep the device evenly loaded, avoiding the need to wait for // the device to process prior batches. -// feedback = num_pending_on_device() - desired_pending. // CPU utilization - If the batch processing is cpu dominated, you can reap // latency gains when underutilized by increasing the processing rate, but // back the rate off when the load increases to avoid overload. -// feedback = cpu_rate() - desired_cpu_rate. template class AdaptiveSharedBatchScheduler @@ -101,13 +84,17 @@ class AdaptiveSharedBatchScheduler struct Options { // The name to use for the pool of batch threads. string thread_pool_name = {"batch_threads"}; - // Number of batch processing threads; equivalently the maximum number of - // concurrently running batches. + // Number of batch processing threads - the maximum value of + // in_flight_batches_limit_. It is recommended that this value be set by + // running the system under load, observing the learned value for + // in_flight_batches_limit_, and setting this maximum to ~ 2x the value. + // Under low load, in_flight_batches_limit_ has no substantial effect on + // latency and therefore undergoes a random walk. Unreasonably large values + // for num_batch_threads allows for large in_flight_batches_limit_, which + // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); // The environment to use (typically only overridden by test code). Env* env = Env::Default(); - // Which implementation to use (described in class comments above). - bool use_in_flight_batches_implementation = false; // Initial limit for number of batches being concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time. @@ -116,28 +103,6 @@ class AdaptiveSharedBatchScheduler // numbers will give less noisy latency measurements, but will be less // responsive to changes in workload. int64 batches_to_average_over = 1000; - - // TODO(kte): remove the rate based implementation and corresponding options - // below once testing confirms the superiority of the in flight batches - // implementation. - // Initial batch scheduling period in microseconds. Will be altered for - // non-zero rate_feedback. - double initial_scheduling_period_micros = 500; - // Minimum batch scheduling period in microseconds. Recommend setting this - // value greater than 0, otherwise it may take a while to recover from a - // sustained time of negative scheduling_period_feedback (which may occur - // under low load). - double min_scheduling_period_micros = 100; - // Maximum batch scheduling period in microseconds. - double max_scheduling_period_micros = 10000; - // Feedback function used to modify the scheduling period each time a batch - // is scheduled. Should return values roughly O(1), with positive values - // resulting in an increased period. - std::function scheduling_period_feedback{[] { return 0.; }}; - // To handle potentially noisy scheduling_period_feedback, the period is - // adjusted using an exponentially weighted moving average over the previous - // feedback_smoothing_batches batches. Must be greater than 0. - int64 feedback_smoothing_batches = 10; }; // Ownership is shared between the caller of Create() and any queues created @@ -171,17 +136,11 @@ class AdaptiveSharedBatchScheduler explicit AdaptiveSharedBatchScheduler(const Options& options); - // Batch scheduling function which runs every scheduling_period_ microseconds. - // Only used when options_.use_in_flight_batches_implementation == false. - void ProcessOneBatch(); - // Tracks processing latency and adjusts in_flight_batches_limit to minimize. - // Only used when options_.use_in_flight_batches_implementation == true. void CallbackWrapper(const internal::ASBSBatch* batch, BatchProcessor callback); // Schedules batch if in_flight_batches_limit_ is not met. - // Only used when options_.use_in_flight_batches_implementation == true. void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_); // Notifies scheduler of non-empty batch which is eligible for processing. @@ -212,41 +171,22 @@ class AdaptiveSharedBatchScheduler mutex mu_; - // Responsible for running ProcessOneBatch. PeriodicFunction was used in order - // to check for deletion so that the thread can be shut down. - // Only used when options_.use_in_flight_batches_implementation == false. - std::unique_ptr scheduling_thread_; - // Responsible for running the batch processing callbacks. std::unique_ptr batch_thread_pool_; - // Time interval in microseconds between successive ProcessOneBatch calls. - // Only used when options_.use_in_flight_batches_implementation == false. - double scheduling_period_; - - // Exponentially weighted moving average of - // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch - // call. - // Only used when options_.use_in_flight_batches_implementation == false. - double ewma_feedback_ = 0; - // Limit on number of batches which can be concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2 // results in an actual cap of 3 80% of the time, and 4 20% of the time. - // Only used when options_.use_in_flight_batches_implementation == true. double in_flight_batches_limit_ GUARDED_BY(mu_); // Number of batches currently being processed. - // Only used when options_.use_in_flight_batches_implementation == true. int64 in_flight_batches_ GUARDED_BY(mu_) = 0; // RNG engine and distribution. - // Only used when options_.use_in_flight_batches_implementation == true. std::default_random_engine rand_engine_; std::uniform_real_distribution rand_double_; // Fields controlling the dynamic adjustment of in_flight_batches_limit_. - // Only used when options_.use_in_flight_batches_implementation == true. // Number of batches since the last in_flight_batches_limit_ adjustment. int64 batch_count_ GUARDED_BY(mu_) = 0; // Sum of processing latency for batches counted by batch_count_. @@ -348,32 +288,6 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } - if (options.min_scheduling_period_micros < 0) { - return errors::InvalidArgument( - "min_scheduling_period_micros must be >= 0; was ", - options.min_scheduling_period_micros); - } - if (options.min_scheduling_period_micros > - options.initial_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be >= min_scheduling_period_micros (", - options.min_scheduling_period_micros, ")"); - } - if (options.initial_scheduling_period_micros > - options.max_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be <= max_scheduling_period_micros (", - options.max_scheduling_period_micros, ")"); - } - if (options.feedback_smoothing_batches < 1) { - return errors::InvalidArgument( - "feedback_smoothing_batches must be positive; was ", - options.feedback_smoothing_batches); - } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -401,20 +315,12 @@ template AdaptiveSharedBatchScheduler::AdaptiveSharedBatchScheduler( const Options& options) : options_(options), - scheduling_period_(options.initial_scheduling_period_micros), in_flight_batches_limit_(options.initial_in_flight_batches_limit), rand_double_(0.0, 1.0) { std::random_device device; rand_engine_.seed(device()); - PeriodicFunction::Options opts; - opts.thread_name_prefix = "scheduling_thread"; - opts.env = GetEnv(); batch_thread_pool_.reset(new thread::ThreadPool( GetEnv(), options.thread_pool_name, options.num_batch_threads)); - if (!options.use_in_flight_batches_implementation) { - scheduling_thread_.reset( - new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts)); - } } template @@ -443,9 +349,7 @@ void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); batches_.push(batch); - if (options_.use_in_flight_batches_implementation) { - MaybeScheduleNextBatch(); - } + MaybeScheduleNextBatch(); } template @@ -523,44 +427,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -void AdaptiveSharedBatchScheduler::ProcessOneBatch() { - static const double kFeedbackMultiplier = .001; - const internal::ASBSBatch* batch = nullptr; - BatchProcessor callback; - const int64 start_time_micros = GetEnv()->NowMicros(); - { - mutex_lock l(mu_); - if (!batches_.empty()) { - batch = batches_.top(); - batches_.pop(); - callback = queues_and_callbacks_[batch->queue()]; - } - } - if (batch != nullptr) { - double feedback = options_.scheduling_period_feedback(); - const int64 N = options_.feedback_smoothing_batches; - ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N; - scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_); - if (scheduling_period_ < options_.min_scheduling_period_micros) { - scheduling_period_ = options_.min_scheduling_period_micros; - } else if (scheduling_period_ > options_.max_scheduling_period_micros) { - scheduling_period_ = options_.max_scheduling_period_micros; - } - // Queue may destroy itself after ReleaseBatch is called. - batch->queue()->ReleaseBatch(batch); - batch_thread_pool_->Schedule([callback, batch] { - callback(std::unique_ptr>( - const_cast*>(batch))); - }); - } - const int64 sleep_time = - scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros); - if (sleep_time > 0) { - GetEnv()->SleepForMicroseconds(sleep_time); - } -} - template bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( const internal::ASBSBatch* a, diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 8ae8ca02ec..109234287e 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -64,59 +64,6 @@ std::unique_ptr CreateFakeClockAdvancerThread( })); } -TEST(AdaptiveSharedBatchSchedulerTest, Basic) { - for (const bool delete_scheduler_early : {false, true}) { - for (const bool delete_queue_1_early : {false, true}) { - int queue_0_tasks = 0; - auto queue_0_callback = - [&queue_0_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - }; - int queue_1_tasks = 0; - auto queue_1_callback = - [&queue_1_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - }; - { - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create({}, &scheduler)); - - // Create two queues. - std::unique_ptr> queue_0; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_0_callback, &queue_0)); - std::unique_ptr> queue_1; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_1_callback, &queue_1)); - - if (delete_scheduler_early) { - // Delete our copy of the scheduler. The queues should keep it alive - // under the covers. - scheduler = nullptr; - } - // Submit tasks to the two queues, and (optionally) remove the queues. - TF_ASSERT_OK(ScheduleTask(1, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(2, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(3, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(4, queue_1.get())); - if (delete_queue_1_early) { - queue_1 = nullptr; - } - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - } - EXPECT_EQ(queue_0_tasks, 9); - EXPECT_EQ(queue_1_tasks, 6); - } - } -} - TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { using Scheduler = AdaptiveSharedBatchScheduler; std::shared_ptr scheduler; @@ -124,24 +71,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { options.num_batch_threads = 0; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1000; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 100; - options.max_scheduling_period_micros = 50; - options.initial_scheduling_period_micros = 75; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.feedback_smoothing_batches = 0; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); options.initial_in_flight_batches_limit = 0.5; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); @@ -153,301 +82,8 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); } -TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue_0; - std::unique_ptr> queue_1; - int queue_0_tasks = 0; - int queue_1_tasks = 0; - auto queue_0_callback = [&queue_0_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - auto queue_1_callback = [&queue_1_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 0; - // Queue must have max_enqueued_batchs > 1. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0).ok()); - queue_options.max_enqueued_batches = 2; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0)); - EXPECT_EQ(10, queue_0->max_task_size()); - queue_options.max_batch_size = 0; - // Queue must have max_batch_size > 0. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1).ok()); - queue_options.max_batch_size = 2; - queue_options.max_enqueued_batches = 1; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(15, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - env.AdvanceByMicroseconds(1); - - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(3, queue_1.get()).ok()); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - env.AdvanceByMicroseconds(1); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(1, queue_1.get()).ok()); - - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(6, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(4, queue_0.get())); - - // Batches should be processed in order from oldest to newest. - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 0); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 2); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 19); - EXPECT_EQ(queue_1_tasks, 2); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, RateFeedback) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.min_scheduling_period_micros = 200; - options.max_scheduling_period_micros = 2000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 1; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 6 batches. - for (int i = 0; i < 6; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -500; - env.AdvanceByMicroseconds(994); - env.BlockUntilThreadsAsleep(2); // scheduling period = 500 usec. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(500); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 901); - feedback = 0; - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 902); - feedback = 10000; // large feedback should hit max_scheduling_period. - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 2000 usec. - EXPECT_EQ(scheduled_items, 903); - feedback = -10000; // large feedback should hit min_scheduling_period. - env.AdvanceByMicroseconds(1999); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 903); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); // scheduling period = 200 usec. - EXPECT_EQ(scheduled_items, 904); - env.AdvanceByMicroseconds(200); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 905); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, FeedbackSmoothing) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 3; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 4 batches. - for (int i = 0; i < 4; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -300; - env.AdvanceByMicroseconds(996); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 100, scheduling_period = 900. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(899); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 167, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 901); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 901); - feedback = 1000 / 3.; - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // emwa_feedback = 0, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 903); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 10; - TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 3 tasks. - EXPECT_EQ(queue->NumEnqueuedTasks(), 0); - EXPECT_EQ(queue->SchedulingCapacity(), 100); - TF_ASSERT_OK(ScheduleTask(5, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 1); - EXPECT_EQ(queue->SchedulingCapacity(), 95); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(6, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 2); - EXPECT_EQ(queue->SchedulingCapacity(), 84); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(1, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 3); - EXPECT_EQ(queue->SchedulingCapacity(), 83); - - env.AdvanceByMicroseconds(998); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 5); - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 7); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { +TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimit) { AdaptiveSharedBatchScheduler::Options options; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1000; mutex mu; @@ -476,7 +112,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { std::unique_ptr> queue; TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - // Enqueue 3 batches. + // Enqueue 3 tasks, should result in 3 batches. for (int i = 0; i < 3; i++) { TF_ASSERT_OK(ScheduleTask(100, queue.get())); } @@ -490,7 +126,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { { AdaptiveSharedBatchScheduler::Options options; options.env = &env; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1; auto queue_callback = [&env](std::unique_ptr> batch) { @@ -544,6 +179,125 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { } stop_teardown.Notify(); } + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::unique_ptr queue_deleter; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete queue, should be kept alive until empty. + queue_deleter.reset(Env::Default()->StartThread( + {}, "QueueDeleterThread", [&queue, &mu, &processed_batches] { + queue.reset(); + mutex_lock l(mu); + EXPECT_EQ(processed_batches, 2); + })); + // Give queue_deleter thread time to delete queue. + Env::Default()->SleepForMicroseconds(1000); + finish_processing.Notify(); +} + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteScheduler) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete scheduler, should be kept alive until queues are empty. + scheduler.reset(); + finish_processing.Notify(); + while (true) { + mutex_lock l(mu); + if (processed_batches == 2) break; + } +} + +TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + mu.lock(); + int batch_num = ++processed_batches; + mu.unlock(); + if (batch_num == 1) { + finish_processing.WaitForNotification(); + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // First batch was immediately processed, no longer counts as enqueued. + EXPECT_EQ(queue->NumEnqueuedTasks(), 1); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 900); + // Enqueue 2 more tasks, should fall in same batch. + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + TF_ASSERT_OK(ScheduleTask(200, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 3); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 600); + // Enqueue 1 more task, should create new batch. + TF_ASSERT_OK(ScheduleTask(700, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 4); + EXPECT_EQ(queue->SchedulingCapacity(), 8 * 1000 + 300); + finish_processing.Notify(); +} } // namespace anonymous } // namespace serving } // namespace tensorflow -- GitLab From 8a2d00f57c8bce6be7550dc447036b62567d1d82 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Mon, 26 Feb 2018 18:32:36 -0800 Subject: [PATCH 120/884] Fix bad wrong jpeg/nasm mirror (#17277) --- tensorflow/workspace.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 85f423f236..278a225f76 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -215,6 +215,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): urls = [ "https://mirror.bazel.build/www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", "http://pkgs.fedoraproject.org/repo/pkgs/nasm/nasm-2.12.02.tar.bz2/d15843c3fb7db39af80571ee27ec6fad/nasm-2.12.02.tar.bz2", + "http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", ], sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", strip_prefix = "nasm-2.12.02", @@ -226,7 +227,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): urls = [ "https://mirror.bazel.build/github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.tar.gz", - "http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", ], sha256 = "c15a9607892113946379ccea3ca8b85018301b200754f209453ab21674268e77", strip_prefix = "libjpeg-turbo-1.5.1", -- GitLab From 4a9d929868c57d742512d65634cceada8c11c6ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 19:46:27 -0800 Subject: [PATCH 121/884] Make sure rounding and handling of denormals in Grappler is the same as in TensorFlow. Enable constant folding for more types, particularly on GPUs. PiperOrigin-RevId: 187120456 --- tensorflow/core/grappler/op_types.cc | 6 +- .../grappler/optimizers/constant_folding.cc | 96 ++++++++++++------- tensorflow/core/kernels/constant_op.cc | 11 +++ 3 files changed, 74 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index e225e99a9e..9b3755ddce 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -354,7 +354,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + const string& op_name = node.op(); + Status status = OpRegistry::Global()->LookUpOpDef(op_name, &op_def); if (!status.ok()) { return false; } @@ -368,7 +369,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { } } // Some nodes do in-place updates on regular tensor inputs. - if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) { + if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace") || + StringPiece(op_name).starts_with("Inplace")) { return false; } return true; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 10ca7dcce0..a5417aaa51 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -35,7 +35,9 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/bcast.h" @@ -51,7 +53,14 @@ class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface { explicit EigenThreadPoolWrapper(thread::ThreadPool* pool) : pool_(pool) {} ~EigenThreadPoolWrapper() override {} void Schedule(std::function fn) override { - pool_->Schedule(std::move(fn)); + auto wrapped = [=]() { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + fn(); + }; + pool_->Schedule(std::move(wrapped)); } int NumThreads() const override { return pool_->NumThreads(); } int CurrentThreadId() const override { return pool_->CurrentThreadId(); } @@ -292,16 +301,16 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // graph. const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_->mutable_node(i); - const string op = node.op(); + NodeDef* node = graph_->mutable_node(i); + const string op = node->op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; } const std::vector& output = - properties.GetOutputProperties(node.name()); + properties.GetOutputProperties(node->name()); const std::vector& input = - properties.GetInputProperties(node.name()); + properties.GetInputProperties(node->name()); if (input.empty() || output.empty()) { continue; } @@ -328,35 +337,35 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // could have multiple outputs). if (op == "Shape" || op == "Size" || op == "Rank") { // Replace the node with the corresponding constant. - node.set_op("Const"); - node.clear_attr(); - (*node.mutable_attr())["dtype"].set_type(type); + node->set_op("Const"); + node->clear_attr(); + (*node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( - (*node.mutable_attr())["value"].mutable_tensor()); + (*node->mutable_attr())["value"].mutable_tensor()); // Turn the data input into a control dependency: this is needed to // ensure that the constant value will only be run in the // cases where the shape/rank/size would have been run in // the original graph. Additional inputs are extra control string ctrl_dep = - AddControlDependency(node.input(0), graph_, node_map_.get()); - node.set_input(0, ctrl_dep); - node_map_->AddOutput(NodeName(ctrl_dep), node.name()); + AddControlDependency(node->input(0), graph_, node_map_.get()); + node->set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node->name()); } else { - auto outputs = node_map_->GetOutputs(node.name()); + auto outputs = node_map_->GetOutputs(node->name()); for (const auto& output : outputs) { for (int k = 0; k < output->input_size(); ++k) { int port; string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port == j) { + if (node_name == node->name() && port == j) { // Create a const node as ShapeN's output if not already. const string const_name = - OptimizedNodeName(node, strings::StrCat("-matshapes-", j)); + OptimizedNodeName(*node, strings::StrCat("-matshapes-", j)); if (node_map_->GetNode(const_name) == nullptr) { NodeDef* added_node = graph_->add_node(); added_node->set_name(const_name); added_node->set_op("Const"); - added_node->set_device(node.device()); + added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); (*added_node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( @@ -364,7 +373,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We add a control dependency to the original ShapeN node, // so that the node will only be run if all inputs of the // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node.name(), graph_, + string ctrl_dep = AddControlDependency(node->name(), graph_, node_map_.get()); *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); @@ -679,7 +688,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) { return false; } - // Skip control flow nodes, they can't be folded + // Skip control flow nodes, they can't be folded. if (ModifiesFrameInfo(node)) { return false; } @@ -688,12 +697,16 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Skips ops that don't benefit from folding. - const string& op = node.op(); + // Don't fold stateful ops such as TruncatedNormal. + if (!IsFreeOfSideEffect(node)) { + return false; + } - if (op.find("Placeholder") == 0) { + // Skips ops that don't benefit from folding. + if (IsPlaceholder(node)) { return false; } + const string& op = node.op(); if (op.find("Save") != string::npos || op.find("Restore") != string::npos || op.find("Reader") != string::npos) { return false; @@ -705,16 +718,12 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Don't fold stateful ops such as TruncatedNormal. const OpDef* op_def = nullptr; Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); if (!status.ok()) { return false; } - if (op_def->is_stateful()) { - return false; - } - + // Don't fold ops without outputs. if (op_def->output_arg_size() == 0) { return false; } @@ -779,8 +788,11 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); SET_TENSOR_VAL_CASE(DT_INT8, int32, int); SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); @@ -843,10 +855,16 @@ Status ConstantFolding::CreateNodeDef(const string& name, POPULATE_TENSOR_PROTO(tensor, t, double, double); case DT_INT64: POPULATE_TENSOR_PROTO(tensor, t, int64, int64); + case DT_UINT64: + POPULATE_TENSOR_PROTO(tensor, t, uint64, int64); case DT_INT32: POPULATE_TENSOR_PROTO(tensor, t, int32, int); + case DT_UINT32: + POPULATE_TENSOR_PROTO(tensor, t, uint32, int); case DT_INT16: POPULATE_TENSOR_PROTO(tensor, t, int16, int); + case DT_UINT16: + POPULATE_TENSOR_PROTO(tensor, t, uint16, int); case DT_INT8: POPULATE_TENSOR_PROTO(tensor, t, int8, int); case DT_UINT8: @@ -1166,9 +1184,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { std::unordered_set processed_nodes; std::deque queue; for (int i = 0; i < graph_->node_size(); i++) { - auto node = graph_->mutable_node(i); - if (IsFoldable(*node)) { - queue.push_back(node); + if (IsFoldable(graph_->node(i))) { + queue.push_back(graph_->mutable_node(i)); } } while (!queue.empty()) { @@ -1203,8 +1220,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { int last = output->node_size() - 1; for (int i = output->node_size() - 1; i >= 0; --i) { const NodeDef& node = output->node(i); - auto outputs = node_map_->GetOutputs(node.name()); - if (outputs.empty()) { + auto fanout = node_map_->GetOutputs(node.name()); + if (fanout.empty()) { output->mutable_node()->SwapElements(i, last); last--; } @@ -1216,8 +1233,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch // their values. - auto outputs = node_map_->GetOutputs(node.name()); - if (!outputs.empty() || !has_fetch_ || + auto fanout = node_map_->GetOutputs(node.name()); + if (!fanout.empty() || !has_fetch_ || nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { auto added_node = output->add_node(); *added_node = node; @@ -1331,14 +1348,14 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { // IS_ONES_CASE(DT_HALF); IS_ONES_CASE(DT_FLOAT); IS_ONES_CASE(DT_DOUBLE); + IS_ONES_CASE(DT_COMPLEX64); + IS_ONES_CASE(DT_COMPLEX128); IS_ONES_CASE(DT_UINT8); IS_ONES_CASE(DT_INT8); IS_ONES_CASE(DT_UINT16); IS_ONES_CASE(DT_INT16); IS_ONES_CASE(DT_INT32); IS_ONES_CASE(DT_INT64); - IS_ONES_CASE(DT_COMPLEX64); - IS_ONES_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1362,14 +1379,14 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { // IS_ZEROS_CASE(DT_HALF); IS_ZEROS_CASE(DT_FLOAT); IS_ZEROS_CASE(DT_DOUBLE); + IS_ZEROS_CASE(DT_COMPLEX64); + IS_ZEROS_CASE(DT_COMPLEX128); IS_ZEROS_CASE(DT_UINT8); IS_ZEROS_CASE(DT_INT8); IS_ZEROS_CASE(DT_UINT16); IS_ZEROS_CASE(DT_INT16); IS_ZEROS_CASE(DT_INT32); IS_ZEROS_CASE(DT_INT64); - IS_ZEROS_CASE(DT_COMPLEX64); - IS_ZEROS_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1869,6 +1886,11 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index fdb03a5aae..312c1a41d3 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -105,7 +105,12 @@ REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, qint8); REGISTER_KERNEL(GPU, uint16); REGISTER_KERNEL(GPU, int16); +REGISTER_KERNEL(GPU, qint16); +REGISTER_KERNEL(GPU, quint16); +REGISTER_KERNEL(GPU, uint32); +REGISTER_KERNEL(GPU, qint32); REGISTER_KERNEL(GPU, int64); +REGISTER_KERNEL(GPU, uint64); REGISTER_KERNEL(GPU, complex64); REGISTER_KERNEL(GPU, complex128); REGISTER_KERNEL(GPU, bool); @@ -122,9 +127,15 @@ REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); REGISTER_SYCL_KERNEL(SYCL, uint8); REGISTER_SYCL_KERNEL(SYCL, int8); +REGISTER_SYCL_KERNEL(SYCL, qint8); REGISTER_SYCL_KERNEL(SYCL, uint16); REGISTER_SYCL_KERNEL(SYCL, int16); +REGISTER_SYCL_KERNEL(SYCL, qint16); +REGISTER_SYCL_KERNEL(SYCL, quint16); +REGISTER_SYCL_KERNEL(SYCL, uint32); +REGISTER_SYCL_KERNEL(SYCL, qint32); REGISTER_SYCL_KERNEL(SYCL, int64); +REGISTER_SYCL_KERNEL(SYCL, uint64); REGISTER_SYCL_KERNEL(SYCL, bool); #undef REGISTER_SYCL_KERNEL #endif -- GitLab From 4774889094d3f1787a38cfbeb0670cb4fb6e24ff Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 26 Feb 2018 19:57:42 -0800 Subject: [PATCH 122/884] Fixes and simplification in the Keras training engine. - Explicitly disallow sample/class weighting in eager (it was never supported) - Remove tests for it (which were actually ignoring sample/class weights) - Make sample weight placeholders placeholder_with_default, and do not create all-ones numpy arrays to feed them when no sample weights are provided (this might lead to better performance) PiperOrigin-RevId: 187121215 --- .../python/keras/_impl/keras/backend.py | 11 +- .../python/keras/_impl/keras/callbacks.py | 20 +- .../keras/_impl/keras/engine/training.py | 151 +++--- .../_impl/keras/engine/training_eager.py | 17 +- .../_impl/keras/engine/training_eager_test.py | 436 ------------------ .../keras/_impl/keras/engine/training_test.py | 8 - 6 files changed, 110 insertions(+), 533 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index a2db05f6cf..2b75666b9e 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2749,7 +2749,7 @@ class Function(object): self.updates_op = control_flow_ops.group(*updates_ops) self.name = name # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', {}) + self.feed_dict = session_kwargs.pop('feed_dict', None) # additional operations self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): @@ -2759,8 +2759,15 @@ class Function(object): def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - feed_dict = self.feed_dict.copy() + + if self.feed_dict: + feed_dict = self.feed_dict.copy() + else: + feed_dict = {} + for tensor, value in zip(self.inputs, inputs): + if value is None: + continue if is_sparse(tensor): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index f6c4661425..deb1e8867d 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -778,16 +778,24 @@ class TensorBoard(Callback): while i < val_size: step = min(self.batch_size, val_size - i) batch_val = [] - batch_val.append(val_data[0][i:i + step]) - batch_val.append(val_data[1][i:i + step]) - batch_val.append(val_data[2][i:i + step]) + batch_val.append(val_data[0][i:i + step] + if val_data[0] is not None else None) + batch_val.append(val_data[1][i:i + step] + if val_data[1] is not None else None) + batch_val.append(val_data[2][i:i + step] + if val_data[2] is not None else None) if self.model.uses_learning_phase: # do not slice the learning phase - batch_val = [x[i:i + step] for x in val_data[:-1]] + batch_val = [x[i:i + step] if x is not None else None + for x in val_data[:-1]] batch_val.append(val_data[-1]) else: - batch_val = [x[i:i + step] for x in val_data] - feed_dict = dict(zip(tensors, batch_val)) + batch_val = [x[i:i + step] if x is not None else None + for x in val_data] + feed_dict = {} + for key, val in zip(tensors, batch_val): + if val is not None: + feed_dict[key] = val result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, epoch) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 57451ad470..63bea08ac5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -40,6 +40,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export @@ -225,9 +226,9 @@ def _check_array_lengths(inputs, targets, weights=None): # return a set with the variation between # different shapes, with None => 0 if x is None: - return {0} + return {} else: - return set([0 if y is None else y.shape[0] for y in x]) + return set([y.shape[0] for y in x if y is not None]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -259,7 +260,8 @@ def _check_array_lengths(inputs, targets, weights=None): def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): """Does validation on the compatibility of targets and loss functions. - This helps prevent users from using loss functions incorrectly. + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. Arguments: targets: list of Numpy arrays of targets. @@ -275,7 +277,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None: + if y is None or loss is None or tensor_util.is_tensor(y): continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -507,10 +509,7 @@ def _standardize_weights(y, (existing_classes - existing_class_weight)) return weights else: - if sample_weight_mode is None: - return np.ones((y.shape[0],), dtype=K.floatx()) - else: - return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) + return None @tf_export('keras.models.Model', 'keras.Model') @@ -862,12 +861,12 @@ class Model(Network): sample_weights.append(None) else: if sample_weight_mode == 'temporal': - sample_weights.append( - K.placeholder(ndim=2, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [[1.]], shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: - sample_weights.append( - K.placeholder(ndim=1, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [1.], shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] @@ -1314,7 +1313,7 @@ class Model(Network): for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1424,7 +1423,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): + if ins and isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1518,7 +1517,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -2070,10 +2069,6 @@ class Model(Network): val_y, sample_weight=val_sample_weight, batch_size=batch_size) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights elif validation_split and 0. < validation_split < 1.: do_validation = True @@ -2085,36 +2080,34 @@ class Model(Network): y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = (slice_arrays( sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights - elif validation_steps: + val_x = [] + val_y = [] + val_sample_weights = [] do_validation = True - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = [0.] - - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights # Prepare display labels. out_labels = self.metrics_names if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + if do_validation: + if any([w is not None for w in val_sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported' + ' when eager execution is enabled, for now.') callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + val_ins = val_x + val_y else: callback_metrics = copy.copy(out_labels) return training_eager.fit_loop( self, - ins, + x + y, out_labels=out_labels, batch_size=batch_size, epochs=epochs, @@ -2127,18 +2120,25 @@ class Model(Network): steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: + # Prepare input arrays and training function. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() f = self.train_function if do_validation: - if context.in_graph_mode(): - self._make_test_function() - val_f = self.test_function - else: - val_f = None + self._make_test_function() + val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_ins = val_x + val_y + val_sample_weights + [0] + else: + val_ins = val_x + val_y + val_sample_weights else: val_f = None callback_metrics = copy.copy(out_labels) @@ -2229,16 +2229,20 @@ class Model(Network): y, sample_weight=sample_weight, batch_size=batch_size) - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_test_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights + self._make_test_function() f = self.test_function return self._test_loop( @@ -2276,16 +2280,16 @@ class Model(Network): 'argument.') x, _, _ = self._standardize_user_data(x) - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): return training_eager.predict_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_predict_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() f = self.predict_function @@ -2327,20 +2331,26 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. + Raises: + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.train_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.train_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() outputs = self.train_function(ins) @@ -2377,18 +2387,21 @@ class Model(Network): the display labels for the scalar outputs. Raises: - ValueError: in case of invalid arguments. + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.test_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.test_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights self._make_test_function() outputs = self.test_function(ins) @@ -2408,14 +2421,9 @@ class Model(Network): """ x, _, _ = self._standardize_user_data(x) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): ins_batch_converted = [] - for ib in ins: + for ib in x: ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] @@ -2426,6 +2434,11 @@ class Model(Network): return outs if context.in_graph_mode(): + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() outputs = self.predict_function(ins) if len(outputs) == 1: @@ -2643,7 +2656,7 @@ class Model(Network): val_data = val_x + val_y + val_sample_weights if self.uses_learning_phase and not isinstance( K.learning_phase(), int): - val_data += [0.] + val_data += [0] for cbk in callbacks: cbk.validation_data = val_data diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 282dd0dc0d..cdf189adef 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -139,6 +139,8 @@ def _model_loss(model, inputs, targets, training=False): model.output_names[i]) loss_metrics.append(K.mean(output_loss)) + # TODO(fchollet): support masking; in practice `_keras_mask` is never + # set in this context currently. mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: @@ -148,17 +150,7 @@ def _model_loss(model, inputs, targets, training=False): # to the number of unmasked samples. output_loss /= K.mean(mask) - # adapted from weighted_loss_fn - # apply sample weighting - if model.sample_weights: - # reduce score_array to same ndim as weight array - ndim = K.ndim(output_loss) - weight_ndim = K.ndim(model.sample_weights) - output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) - output_loss *= model.sample_weights - output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0), - K.floatx())) - output_loss = K.mean(output_loss) + # TODO(fchollet): support sample weighting loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -231,7 +223,8 @@ def train_on_batch(model, ins): """ ins_batch_converted = [] for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + if ib is not None: + ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] eager_model_outputs = [] for i in range(len(model.inputs)): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 3d94b7537f..550b86a71d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -24,9 +24,7 @@ import numpy as np from tensorflow.python.framework import ops from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.rmsprop import RMSPropOptimizer @@ -311,440 +309,6 @@ class TrainingTest(test.TestCase): optimizer='rms') -class LossWeightingTest(test.TestCase): - - def test_class_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train, sample_weight)) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch( - x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) - ref_score = model.evaluate(x_test, y_test, verbose=0) - score = model.evaluate( - x_test[test_ids, :], y_test[test_ids, :], verbose=0) - self.assertLess(score, ref_score) - - def test_sample_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(43) - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - model.train_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - model.test_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - - def test_temporal_sample_weights(self): - num_classes = 5 - weighted_class = 3 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - - np.random.seed(1337) - (_, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode='temporal') - - def test_class_weight_invalid_use_case(self): - num_classes = 5 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - class_weight = dict([(i, 1.) for i in range(num_classes)]) - - del class_weight[1] - with self.assertRaises(ValueError): - model.fit(x_train, y_train, - epochs=0, verbose=0, class_weight=class_weight) - - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode=[]) - - # Build multi-output model - x = keras.Input((3,)) - y1 = keras.layers.Dense(4, name='1')(x) - y2 = keras.layers.Dense(4, name='2')(x) - model = keras.models.Model(x, [y1, y2]) - model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') - x_np = np.random.random((10, 3)) - y_np = np.random.random((10, 4)) - w_np = np.random.random((10,)) - # This will work - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) - # These will not - with self.assertRaises(ValueError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) - with self.assertRaises(TypeError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((11,)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - - -class TestDynamicTrainability(test.TestCase): - - def test_trainable_warning(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3)) - model.trainable = False - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - model.trainable = True - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - - def test_trainable_argument(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3, trainable=False)) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - # test with nesting - inputs = keras.layers.Input(shape=(3,)) - output = model(inputs) - model = keras.models.Model(inputs, output) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - def test_layer_trainability_switch(self): - # with constructor argument, in Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, trainable=False, input_dim=1)) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Sequential - model = keras.models.Sequential() - layer = keras.layers.Dense(2, input_dim=1) - model.add(layer) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # with constructor argument, in Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2, trainable=False)(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Model - x = keras.layers.Input(shape=(1,)) - layer = keras.layers.Dense(2) - y = layer(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_model_trainability_switch(self): - # a non-trainable model has no trainable weights - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # same for Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=1)) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_nested_model_trainability(self): - - # a Sequential inside a Model - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Sequential inside a Sequential - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Sequential - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - -class TestTrainingUtils(test.TestCase): - - def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) - a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( - [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) - - b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) - - def test_slice_arrays(self): - input_a = np.random.random((10, 3)) - slice_arrays(None) - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None, [1, 1], None, [1, 1]] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = None - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - - def test_fit_with_BatchNorm(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_dim=4)) - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Activation('tanh')) - model.add(keras.layers.Dropout(0.2)) - - input_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 10)) - - model.compile(loss='binary_crossentropy', optimizer=RMSPropOptimizer(0.001)) - model.fit(input_a_np, output_b_np, epochs=1, batch_size=5, verbose=0) - - def test_fit_with_regularization(self): - model = keras.models.Sequential() - with self.assertRaises(ValueError): - model.add( - keras.layers.Dense(4, input_dim=3, - kernel_regularizer=keras.regularizers.l2(0.01), - activity_regularizer=keras.regularizers.l1(0.01))) - - if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 9651eb9f14..6ca5941e9a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -1045,16 +1045,8 @@ class TestTrainingUtils(test.TestCase): keras.engine.training._check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) with self.assertRaises(ValueError): keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) def test_slice_arrays(self): input_a = np.random.random((10, 3)) -- GitLab From 6825af46c53e6ad0b1260e5a96a4ef46b7703e46 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 19:58:18 -0800 Subject: [PATCH 123/884] Fix bug in deserializing CondContexts. PiperOrigin-RevId: 187121244 --- tensorflow/python/ops/control_flow_ops.py | 11 ++++- tensorflow/python/training/saver_test.py | 49 ++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 85944efbe8..fb9e2188d7 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1718,8 +1718,15 @@ class CondContext(ControlFlowContext): self._pivot = g.as_graph_element( ops.prepend_name_scope(context_def.pivot_name, import_scope)) self._branch = context_def.branch - super(CondContext, self).__init__( - values_def=context_def.values_def, import_scope=import_scope) + super(CondContext, self).__init__(values_def=context_def.values_def, + import_scope=import_scope) + # The predicate and pivot ops appear in self._values, but don't have self + # set as their control context. The __init__ call above will set self for + # all values, so manually override the predicate and pivot contexts here. + # pylint: disable=protected-access + self._pred.op._set_control_flow_context(self.outer_context) + self._pivot.op._set_control_flow_context(self.outer_context) + # pylint: enable=protected-access @property def pred(self): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b366ed30f3..b758ceaab0 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2041,29 +2041,24 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) - def testNestedWhileLoops(self): - test_dir = self._get_test_dir("nested_whiles") + def _testWhileLoopAndGradientSerDes(self, outer_body_fn): + # Build a while loop with `outer_body_fn`, export it, and verify that it can + # be imported and the gradient can be built and run correctly. + + test_dir = self._get_test_dir("nested_control_flow") filename = os.path.join(test_dir, "metafile") saver_ckpt = os.path.join(test_dir, "saver.ckpt") - # Create two simple nested while loops. + # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - def body(i, x): - _, r = control_flow_ops.while_loop(lambda j, y: j < 3, - lambda j, y: (j + 1, y + x), - [0, 0]) - return i + 1, x + r - var = variables.Variable(0) var_name = var.name - - _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, outer_body_fn, [0, var]) output_name = output.name - init_op = variables.global_variables_initializer() - # Generate a MetaGraphDef containing the nested loops. + # Generate a MetaGraphDef containing the while loop. with session.Session() as sess: sess.run(init_op) sess.run(output) @@ -2071,8 +2066,8 @@ class MetaGraphTest(test.TestCase): saver.save(sess, saver_ckpt) saver.export_meta_graph(filename) - # Build and run the gradients of the nested while loop. We use this below - # to verify that the gradients are correct with an imported MetaGraphDef. + # Build and run the gradients of the while loop. We use this below to + # verify that the gradients are correct with an imported MetaGraphDef. grad = gradients_impl.gradients([output], [var]) with session.Session() as sess: sess.run(init_op) @@ -2096,6 +2091,30 @@ class MetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def testNestedWhileLoopsSerDes(self): + # Test two simple nested while loops. + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + self._testWhileLoopAndGradientSerDes(body) + + def testNestedControlFlowSerDes(self): + # Test while loop in a cond in a while loop. + # pylint: disable=g-long-lambda + def body(i, x): + cond_result = control_flow_ops.cond( + i > 0, + lambda: control_flow_ops.while_loop( + lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0])[1], + lambda: x) + return i + 1, cond_result + # pylint: enable=g-long-lambda + self._testWhileLoopAndGradientSerDes(body) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From bac2cb076281a90902609cea5ee2b28c5d821657 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 20:21:07 -0800 Subject: [PATCH 124/884] Add helpers to stream data from the GCE VM to a Cloud TPU. PiperOrigin-RevId: 187122870 --- tensorflow/contrib/tpu/BUILD | 28 +++ tensorflow/contrib/tpu/python/tpu/datasets.py | 192 ++++++++++++++++++ .../contrib/tpu/python/tpu/datasets_test.py | 181 +++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets.py create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets_test.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index c48e84ddfa..095b4821f1 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -163,6 +163,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":datasets", ":profiler", ":tpu_py", "//tensorflow/contrib/tpu/proto:topology_proto_py", @@ -181,6 +182,33 @@ py_library( ], ) +py_library( + name = "datasets", + srcs = [ + "python/tpu/datasets.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", + ], +) + +tf_py_test( + name = "datasets_test", + srcs = ["python/tpu/datasets_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + ":datasets", + ], + grpc_enabled = True, +) + tf_py_test( name = "tpu_test", size = "small", diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py new file mode 100644 index 0000000000..29aea98542 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -0,0 +1,192 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Library of Cloud TPU helper functions for data loading.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import functional_ops + + +def _TextLineDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TextLineDataset(filename, buffer_size=buffer_size) + return dataset + + +def _TFRecordDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size) + return dataset + + +_FILETYPE_MAP = { + 'tfrecord': _TFRecordDataset, + 'textline': _TextLineDataset, + 'text': _TextLineDataset, +} + + +def StreamingFilesDataset(files, + filetype=None, + file_reader_job=None, + worker_job=None, + num_epochs=None, + filename_shuffle_buffer_size=None, + num_parallel_reads=None, + batch_transfer_size=None, + sloppy=None): + """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM). + + Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read + files local to your GCE VM. In order to train using files stored on your local + VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset + helper to generate a dataset to feed your Cloud TPU with files from your GCE + VM. + + The resulting dataset may return an OutOfRangeError if there are no files + found as a result of the fileglob expansion. + + Note: StreamingFilesDataset assumes that the session is using a + TPUClusterResolver and has therefore a worker and a coordinator job. File + loading will be done on the coordinator job. + + Args: + files: A string glob to match files, or a `tf.data.Dataset` generating file + names. + filetype: A string (one of 'tfrecord', or 'textline') or a single-argument + TensorFlow function that when given a filename returns a dataset. + file_reader_job: An optional string that corresponds to the job that should + perform the file reads. + worker_job: An optional string that corresponds to the job that should + process the tensors (i.e. your GPU or TPU worker). + num_epochs: The number of epochs through the training set that should be + generated. By default, it will repeat infinitely. + filename_shuffle_buffer_size: An optional integer whose value controls the + shuffling of the file names. If you would like to read from the files in + the same order, set to 0 or False. + num_parallel_reads: An optional integer controlling the number of files to + read from concurrently. (Set to 1 for no parallelism.) + batch_transfer_size: An optional integer controlling the batching used to + amortize the remote function invocation overhead. Set to a very large + number to increase throughput. Set to a very small number to reduce memory + consumption. Set to False to skip batching. + sloppy: (Optional.) If `True`, read input data as fast as possible, without + maintaining a deterministic order. Defaults to `False`. + Returns: + A `tf.data.Dataset` with an infinite stream of elements generated by a + parallel interleaving of the set of files matched (or generated) by `files` + with a type is the output of the dataset specified by `filetype`. + + Raises: + ValueError: if any argument is not of the expected type. + """ + if filetype is None: + filetype = 'tfrecord' + + if isinstance(filetype, str): + if filetype not in _FILETYPE_MAP: + raise ValueError('Unexpected filetype: %s' % filetype) + reader_fn = _FILETYPE_MAP[filetype] + elif callable(filetype): + reader_fn = filetype + else: + raise ValueError('filetype should be a string or a callable') + + file_reader_job = file_reader_job or 'coordinator' + + worker_job = worker_job or 'worker' + + if filename_shuffle_buffer_size is None: + filename_shuffle_buffer_size = 4096 + + num_parallel_reads = num_parallel_reads or 8 + + if batch_transfer_size is None: + batch_transfer_size = 1024 + + if sloppy is None: + sloppy = False + + with ops.device('/job:%s' % file_reader_job): + if isinstance(files, str): + source_dataset = dataset_ops.Dataset.list_files(files) + elif isinstance(files, dataset_ops.Dataset): + source_dataset = files + else: + raise ValueError('files was not a string or a dataset: %s' % files) + + if filename_shuffle_buffer_size: + source_dataset = source_dataset.shuffle( + buffer_size=filename_shuffle_buffer_size) + + # NOTE: We perform the `repeat` on the source dataset, because the output + # dataset does not currently have enough information to recreate an iterator + # over the source dataset when it reaches the end. + source_dataset = source_dataset.repeat(num_epochs) + + source_dataset = source_dataset.apply( + interleave_ops.parallel_interleave( + reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy)) + + if batch_transfer_size: + # Note: we can safely call batch_and_drop_remainder because we have an + # infinite stream of TFRecords. + source_dataset = source_dataset.apply( + batching.batch_and_drop_remainder(batch_transfer_size)) + + source_dataset = source_dataset.prefetch(1) + + source_iterator = source_dataset.make_one_shot_iterator() + source_handle = source_iterator.string_handle() + + @function.Defun(dtypes.string) + def LoadingFunc(h): + remote_iterator = iterator_ops.Iterator.from_string_handle( + h, source_dataset.output_types, source_dataset.output_shapes) + return remote_iterator.get_next() + + def MapFn(unused_input): + return functional_ops.remote_call( + args=[source_handle], + Tout=[dtypes.string], + f=LoadingFunc, + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + + with ops.device('/job:%s' % worker_job): + # TODO(saeta,mrry): Switch to using _GeneratorDataset. + + # identity = lambda x: x + # dummy = constant_op.constant(0) + # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn, + # identity) + + output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn) + output_dataset = output_dataset.prefetch(1) + + if batch_transfer_size: + # Undo the batching used during the transfer. + output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1) + + return output_dataset diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py new file mode 100644 index 0000000000..2c40797792 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -0,0 +1,181 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU datasets tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.tpu.python.tpu import datasets +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.lib.io import python_io +from tensorflow.python.platform import test +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat + +_NUM_FILES = 10 +_NUM_ENTRIES = 200 + + +class DatasetsTest(test.TestCase): + + def setUp(self): + super(DatasetsTest, self).setUp() + self._coord = server_lib.Server.create_local_server() + self._worker = server_lib.Server.create_local_server() + + self._cluster_def = cluster_pb2.ClusterDef() + worker_job = self._cluster_def.job.add() + worker_job.name = 'worker' + worker_job.tasks[0] = self._worker.target[len('grpc://'):] + coord_job = self._cluster_def.job.add() + coord_job.name = 'coordinator' + coord_job.tasks[0] = self._coord.target[len('grpc://'):] + + session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def) + + self._sess = session.Session(self._worker.target, config=session_config) + + def testTextLineDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i) + contents = [] + for j in range(_NUM_ENTRIES): + contents.append(compat.as_bytes('%d: %d' % (i, j))) + with open(filename, 'wb') as f: + f.write(b'\n'.join(contents)) + all_contents.extend(contents) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDatasetFromDataset(self): + filenames = [] + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + filenames.append(filename) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testArbitraryReaderFunc(self): + + def MakeRecord(i, j): + return compat.as_bytes('%04d-%04d' % (i, j)) + + record_bytes = len(MakeRecord(10, 200)) + + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i) + with open(filename, 'wb') as f: + for j in range(_NUM_ENTRIES): + record = MakeRecord(i, j) + f.write(record) + all_contents.append(record) + + def FixedLengthFile(filename): + return readers.FixedLengthRecordDataset(filename, record_bytes) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'fixed_length*'), + filetype=FixedLengthFile) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testUnexpectedFiletypeString(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype='foo') + + def testUnexpectedFiletypeType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype=3) + + def testUnexpectedFilesType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset(123, filetype='tfrecord') + + +if __name__ == '__main__': + test.main() -- GitLab From 0bde713c06895b9ce2de61d6aea1bff5415ddcbc Mon Sep 17 00:00:00 2001 From: Nick Kreeger Date: Mon, 26 Feb 2018 21:11:36 -0800 Subject: [PATCH 125/884] Upgrade Jenkins/Docker build scripts to Bazel 0.11.0. (#17280) The 0.10.0 bazel has problems with static-linking on linux: https://github.com/bazelbuild/bazel/issues/4474. This PR bumps to the latest bazel that produces proper binaries w/o the linking issue. --- tensorflow/tools/ci_build/install/install_bazel.sh | 2 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/install/install_bazel.sh b/tensorflow/tools/ci_build/install/install_bazel.sh index 1df6a84d7c..3e27a94cf2 100755 --- a/tensorflow/tools/ci_build/install/install_bazel.sh +++ b/tensorflow/tools/ci_build/install/install_bazel.sh @@ -15,7 +15,7 @@ # ============================================================================== # Select bazel version. -BAZEL_VERSION="0.10.0" +BAZEL_VERSION="0.11.0" set +e local_bazel_ver=$(bazel version 2>&1 | grep -i label | awk '{print $3}') diff --git a/tensorflow/tools/docker/Dockerfile.devel b/tensorflow/tools/docker/Dockerfile.devel index d16761c367..22c73c3fe1 100644 --- a/tensorflow/tools/docker/Dockerfile.devel +++ b/tensorflow/tools/docker/Dockerfile.devel @@ -57,7 +57,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ >>/etc/bazel.bazelrc # Install the most recent bazel release. -ENV BAZEL_VERSION 0.8.0 +ENV BAZEL_VERSION 0.11.0 WORKDIR / RUN mkdir /bazel && \ cd /bazel && \ diff --git a/tensorflow/tools/docker/Dockerfile.devel-gpu b/tensorflow/tools/docker/Dockerfile.devel-gpu index 4ef37881bc..69ba340f92 100644 --- a/tensorflow/tools/docker/Dockerfile.devel-gpu +++ b/tensorflow/tools/docker/Dockerfile.devel-gpu @@ -66,7 +66,7 @@ RUN echo "startup --batch" >>/etc/bazel.bazelrc RUN echo "build --spawn_strategy=standalone --genrule_strategy=standalone" \ >>/etc/bazel.bazelrc # Install the most recent bazel release. -ENV BAZEL_VERSION 0.8.0 +ENV BAZEL_VERSION 0.11.0 WORKDIR / RUN mkdir /bazel && \ cd /bazel && \ -- GitLab From 50daa198f85f21f3295dd6e1ad2951f38cc6c825 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:09:30 -0800 Subject: [PATCH 126/884] Automated g4 rollback of changelist 187092622 PiperOrigin-RevId: 187125995 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +------------- tensorflow/c/eager/runtime.cc | 14 ++++---------- tensorflow/c/eager/runtime.h | 3 --- tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 13 insertions(+), 35 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 16a2a15072..e55cb672e9 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,7 +21,6 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ - "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b233dd5b93..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->func_lib(device), &ctx->runner, kernel); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 29944df4c2..3356054cd0 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -46,15 +45,7 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : thread_pool(new tensorflow::thread::ThreadPool( - opts.session_options.options.env, "EagerCompute", - opts.session_options.options.config - .inter_op_parallelism_threads() != 0 - ? opts.session_options.options.config - .inter_op_parallelism_threads() - : tensorflow::port::NumSchedulableCPUs())), - runner([this](std::function f) { thread_pool->Schedule(f); }), - policy(opts.policy), + : policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -63,9 +54,6 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} - const std::unique_ptr thread_pool; - std::function)> runner; - const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index b9618420f0..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,22 +255,17 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; - out->runner_ = nullptr; - out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; - out->runner_ = runner; - out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -301,11 +296,10 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - if (runner_ == nullptr) { - params.runner = &default_runner_; - } else { - params.runner = runner_; - } + // TODO(apassos): use a thread pool. + std::function)> runner = + [](std::function f) { f(); }; + params.runner = &runner; OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index fa5f839977..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,7 +169,6 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -189,8 +188,6 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; - std::function)>* runner_; - std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index ab0b535e1a..643153058c 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel); + Status s = + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &k)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From b053b1006abdfcf1f790a729a412001ebbaf679f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:25:22 -0800 Subject: [PATCH 127/884] Improve error handling in strided_slice_op to fail more gracefully and return an error status instead of crashing. PiperOrigin-RevId: 187126888 --- tensorflow/core/kernels/strided_slice_op.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 7745effe2a..1e3e92a68a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -109,17 +109,27 @@ class StridedSliceOp : public OpKernel { if (is_identity) { VLOG(1) << "Strided slice identity "; Tensor tmp; - CHECK(tmp.CopyFrom(input, final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(input, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } // Optimization #2, slice is memory contiguous (only occurs in dim 0) if (slice_dim0 && IsDim0SliceAligned(input.shape(), begin[0], end[0])) { - CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + OP_REQUIRES(context, input.dims() >= 1, + errors::InvalidArgument( + "Input must have rank at least 1, got: ", input.dims())); + // Otherwise, is_identity should be true. VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString(); + OP_REQUIRES( + context, begin[0] <= end[0], + errors::InvalidArgument("begin[0] (", begin[0], + ") must less or equal to end[0] (", end[0])); + Tensor slice = input.Slice(begin[0], end[0]); Tensor tmp; - CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(slice, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } @@ -238,7 +248,8 @@ class StridedSliceGradOp : public OpKernel { if (processing_shape.dims() == 0) { auto in = context->input(4); - CHECK(result->CopyFrom(in, processing_shape)); + OP_REQUIRES(context, result->CopyFrom(in, processing_shape), + errors::Internal("Copy failed")); return; } -- GitLab From 4faee3942d9983e0c96091b32095cc0d9ff494e0 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Tue, 27 Feb 2018 07:36:01 +0100 Subject: [PATCH 128/884] Fix some breakages in TensorFlow Windows build (#17271) * Fix configure.py * Add quantization_utils for building quantize_weights, quantize_nodes, round_weights Caused by https://github.com/tensorflow/tensorflow/pull/16121 --- configure.py | 8 ++++++-- tensorflow/core/kernels/BUILD | 16 ++++++++++++---- tensorflow/tools/graph_transforms/BUILD | 2 +- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/configure.py b/configure.py index 7d2e30cd8a..97f46757ee 100644 --- a/configure.py +++ b/configure.py @@ -250,7 +250,11 @@ def reset_tf_configure_bazelrc(workspace_path): if _TF_BAZELRC_FILENAME in l: continue f.write('%s\n' % l) - f.write('import %s\n' % _TF_BAZELRC) + if is_windows(): + tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") + else: + tf_bazelrc_path = _TF_BAZELRC + f.write('import %s\n' % tf_bazelrc_path) def cleanup_makefile(): @@ -444,7 +448,7 @@ def check_bazel_version(min_version): if which('bazel') is None: print('Cannot find bazel. Please install bazel.') sys.exit(0) - curr_version = run_shell(['bazel', '--batch', 'version']) + curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version']) for line in curr_version.split('\n'): if 'Build label: ' in line: diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 3426cf6e40..78786de16b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -5128,7 +5128,6 @@ tf_kernel_library( srcs = [ "dequantize_op.cc", "meta_support.cc", - "quantization_utils.cc", "quantize_down_and_shrink_range.cc", "quantize_op.cc", "quantized_activation_ops.cc", @@ -5149,7 +5148,6 @@ tf_kernel_library( ], hdrs = [ "meta_support.h", - "quantization_utils.h", "reference_gemm.h", ], deps = [ @@ -5160,6 +5158,7 @@ tf_kernel_library( ":image_resizer_state", ":ops_util", ":pooling_ops", + ":quantization_utils", "//tensorflow/core:array_ops_op_lib", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -5706,6 +5705,16 @@ tf_kernel_library( ], ) +cc_library( + name = "quantization_utils", + srcs = ["quantization_utils.cc"], + hdrs = ["quantization_utils.h"], + deps = [ + "//tensorflow/core:framework", + "@gemmlowp", + ], +) + cc_library( name = "remote_fused_graph_execute_utils", srcs = [ @@ -6081,7 +6090,6 @@ cc_library( srcs = [ "cwise_ops_common.cc", "meta_support.cc", - "quantization_utils.cc", ], hdrs = [ "cwise_ops.h", @@ -6090,10 +6098,10 @@ cc_library( "cwise_ops_gpu_gradients.cu.h", "cwise_ops_gradients.h", "meta_support.h", - "quantization_utils.h", ], deps = [ ":bounds_check", + ":quantization_utils", "//tensorflow/core:framework", "//tensorflow/core:lib", "//third_party/eigen3", diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index ad3668fa02..4fe4fc3b13 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -134,8 +134,8 @@ cc_library( "//tensorflow/core:tensorflow", "//tensorflow/contrib/rnn:gru_ops_op_lib", "//tensorflow/contrib/rnn:lstm_ops_op_lib", + "//tensorflow/core/kernels:quantization_utils", ] + if_not_windows([ - "//tensorflow/core/kernels:quantized_ops", "//tensorflow/core/kernels:remote_fused_graph_rewriter_transform", "//tensorflow/core/kernels/hexagon:hexagon_rewriter_transform", ]), -- GitLab From e4b294e080dc5f339d1e639e1e9907b53461b754 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 01:02:36 -0800 Subject: [PATCH 129/884] Add documentation to Grappler RewriterConfig to give a short description for each of the optimizer on what they do. PiperOrigin-RevId: 187143156 --- tensorflow/core/protobuf/rewriter_config.proto | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 504ed5d819..875e4663db 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -30,12 +30,17 @@ message RewriterConfig { } // Optimize tensor layouts (default is ON) + // e.g. This will try to use NCHW layout on GPU which is faster. Toggle layout_optimizer = 1; // Fold constants (default is ON) + // Statically infer the value of tensors when possible, and materialize the + // result using constants. Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) + // e.g. Simplify arithmetic ops; merge ops with same value (like constants). Toggle arithmetic_optimization = 7; // Control dependency optimizations (default is ON). + // Remove redundant control dependencies, which may enable other optimization. Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; @@ -49,12 +54,20 @@ message RewriterConfig { NO_MEM_OPT = 1; // Driven by manual op-level annotations. MANUAL = 2; + // Driven by heuristics. The behavior of these heuristics is subject to // change. Currently includes an experimental recomputation and swapping // heuristics. Manual annotations are respected, but additional nodes are // selected automatically. + + // Swapping heuristic will move a tensor from the GPU to the CPU and move + // it back when needed to reduce peak memory usage. SWAPPING_HEURISTICS = 4; + // Recomputation heuristics will recompute ops (such as Relu activation) + // during backprop instead of storing them, reducing peak memory usage. RECOMPUTATION_HEURISTICS = 5; + // Scheduling will split big ops such as AddN and try to enforce a schedule + // of the new computations that decreases peak memory usage. SCHEDULING_HEURISTICS = 6; // Use any combination of swapping and recomputation heuristics. HEURISTICS = 3; -- GitLab From 7f25c9d127e8535170d0575c038fd42222887dd4 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 06:00:21 -0800 Subject: [PATCH 130/884] Enable dynamic function calls. These are compiled just in time by inserting a call to compile. PiperOrigin-RevId: 187165096 --- tensorflow/contrib/py2tf/__init__.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 13 +-- .../contrib/py2tf/converters/call_trees.py | 76 +++++++------- .../py2tf/converters/call_trees_test.py | 16 +++ .../py2tf/converters/converter_test_base.py | 32 ++++-- tensorflow/contrib/py2tf/impl/api.py | 99 ++++++++++++++----- 6 files changed, 163 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py index 379fa7fd5c..6531183cb5 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/py2tf/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl.api import convert +from tensorflow.contrib.py2tf.impl.api import converted_call from tensorflow.contrib.py2tf.impl.api import graph_ready from tensorflow.contrib.py2tf.impl.api import to_code from tensorflow.contrib.py2tf.impl.api import to_graph @@ -30,7 +31,8 @@ from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'to_graph', 'to_code', 'convert', 'graph_ready', 'utils', 'PyFlowParseError' + 'to_graph', 'to_code', 'convert', 'graph_ready', 'converted_call', 'utils', + 'PyFlowParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 42baaaaba7..78f46bc05f 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -46,6 +46,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", + "//tensorflow/contrib/py2tf/pyct", "//tensorflow/contrib/py2tf/pyct/static_analysis", "//tensorflow/contrib/py2tf/utils", "@gast_archive//:gast", @@ -59,7 +60,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -70,7 +70,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -81,7 +80,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -92,7 +90,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/py2tf/impl", "//tensorflow/python:client_testlib", ], ) @@ -103,7 +101,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -114,7 +111,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -125,7 +121,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -136,7 +131,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -157,7 +151,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -168,7 +161,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -184,7 +176,6 @@ py_test( ], deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index 1050ba654c..f18f9f6086 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -27,6 +27,7 @@ import types import gast from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer @@ -72,9 +73,8 @@ class CallTreeTransformer(transformer.Base): self.uncompiled_modules = uncompiled_modules self.nocompile_decorators = nocompile_decorators - # pylint:disable=invalid-name - def _resolve_name(self, node): + """Used to resolve decorator info.""" if isinstance(node, gast.Call): return self._resolve_name(node.func) if isinstance(node, gast.Name): @@ -99,7 +99,13 @@ class CallTreeTransformer(transformer.Base): (owner_type, node.attr)) return None + def _function_is_compilable(self, target_entity): + """Determines whether an entity can be compiled at all.""" + # TODO(mdan): This is just a placeholder. Implement. + return not isinstance(target_entity, types.BuiltinFunctionType) + def _should_compile(self, node, fqn): + """Determines whether an entity should be compiled in the context.""" for i in range(1, len(fqn)): if fqn[:i] in self.uncompiled_modules: return False @@ -141,33 +147,6 @@ class CallTreeTransformer(transformer.Base): return True - def _determine_function_owner(self, m): - # TODO(mdan): The parent type should be known at analysis. Use that instead. - if hasattr(m, 'im_class'): # Python 2 - return m.im_class - if hasattr(m, '__qualname__'): # Python 3 - # Object attributes: should be bound to "self". - if hasattr(m, '__self__'): - return type(m.__self__) - - # Class attributes: should have the owner name in their namespace. - qn = m.__qualname__.split('.') - if len(qn) < 2: - return None - owner_name, func_name = qn[-2:] - if func_name != m.__name__: - raise ValueError('Inconsistent names detected ' - '(__qualname__[1] = "%s", __name__ = "%s") for %s.' % - (func_name, m.__name__, m)) - if owner_name == '': - return None - if owner_name not in self.context.namespace: - raise ValueError( - 'Could not resolve name "%s" while analyzing %s. Namespace:\n%s' % - (owner_name, m, self.context.namespace)) - return self.context.namespace[owner_name] - return None - def _rename_compilable_function(self, node): assert anno.hasanno(node.func, 'live_val') assert anno.hasanno(node.func, 'fqn') @@ -182,7 +161,11 @@ class CallTreeTransformer(transformer.Base): target_fqn, live_entity=target_entity) do_rename = True else: - owner_type = self._determine_function_owner(target_entity) + if anno.hasanno(node.func, 'parent_type'): + owner_type = anno.getanno(node.func, 'parent_type') + else: + # Fallback - not reliable. + owner_type = inspect_utils.getmethodclass(target_entity) new_name, do_rename = self.context.namer.compiled_function_name( target_fqn, live_entity=target_entity, owner_type=owner_type) @@ -202,9 +185,32 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _function_is_compilable(self, target_entity): - # TODO(mdan): This is just a placeholder. Implement. - return not isinstance(target_entity, types.BuiltinFunctionType) + def _converted_call(self, node): + """Inlines a dynamic conversion for a dynamic function.""" + # TODO(mdan): Pass information on the statically compiled functions. + # Having access to the statically compiled functions can help avoid + # unnecessary compilation. + # For example, this would lead to function `a` being compiled twice: + # + # def a(): + # v = b + # b() + # def b(): + # a() + # + # This is really a problem with recursive calls, which currently can + # only be gated by a static condition, and should be rare. + # TODO(mdan): It probably makes sense to use dynamic conversion every time. + # Before we could convert all the time though, we'd need a reasonable + # caching mechanism. + template = """ + py2tf_api.converted_call(func, True, False, {}, original_args) + """ + call_expr = templates.replace( + template, func=node.func, original_args=node.args) + return call_expr[0].value + + # pylint:disable=invalid-name def visit_Expr(self, node): if isinstance(node.value, gast.Call): @@ -245,9 +251,9 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - raise NotImplementedError('Could not resolve target function.') + node = self._converted_call(node) else: - # TODO(mdan): Double check. Is this reachable code? + # Unresolved functions are allowed in non-recursive mode. pass return node diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index 777648dc0b..d482a9ef78 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -47,6 +47,21 @@ class CallTreesTest(converter_test_base.TestCase): result.renamed_test_fn_1 = renamed_test_fn_1 self.assertEquals(3, result.test_fn_2(1)) + def test_dynamic_function(self): + + def test_fn_1(): + raise ValueError('This should be masked by the mock.') + + def test_fn_2(f): + return f() + 3 + + node = self.parse_and_analyze(test_fn_2, {}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node) as result: + # 10 = 7 (from the mock) + 3 (from test_fn_2) + self.assertEquals(10, result.test_fn_2(test_fn_1)) + def test_simple_methods(self): class TestClass(object): @@ -59,6 +74,7 @@ class CallTreesTest(converter_test_base.TestCase): node = self.parse_and_analyze( TestClass.test_fn_2, {'TestClass': TestClass}, + namer=converter_test_base.FakeNoRenameNamer(), arg_types={'self': (TestClass.__name__, TestClass)}) node = call_trees.transform(node, self.ctx, (), ()) diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/py2tf/converters/converter_test_base.py index afa5c2f96f..1f98d8469c 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/py2tf/converters/converter_test_base.py @@ -25,6 +25,7 @@ from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import context from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import pretty_printer from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity from tensorflow.contrib.py2tf.pyct.static_analysis import live_values @@ -52,26 +53,43 @@ class FakeNamer(object): return ('renamed_%s' % '_'.join(original_fqn)), True +class FakeNoRenameNamer(FakeNamer): + + def compiled_function_name(self, original_fqn, **_): + return str(original_fqn), False + + class TestCase(test.TestCase): """Base class for unit tests in this module. Contains relevant utilities.""" @contextlib.contextmanager def compiled(self, node, *symbols): - source = '' + source = None + + self.dynamic_calls = [] + def converted_call(*args): + """Mock version of api.converted_call.""" + self.dynamic_calls.append(args) + return 7 + try: result, source = compiler.ast_to_object(node) - result.tf = self.make_fake_tf(*symbols) + result.tf = self.make_fake_mod('fake_tf', *symbols) result.py2tf_utils = utils + result.py2tf_api = self.make_fake_mod('fake_api', converted_call) yield result except Exception: # pylint:disable=broad-except - print('Offending compiled code:\n%s' % source) + if source is None: + print('Offending AST:\n%s' % pretty_printer.fmt(node, color=False)) + else: + print('Offending compiled code:\n%s' % source) raise - def make_fake_tf(self, *symbols): - fake_tf = imp.new_module('fake_tf') + def make_fake_mod(self, name, *symbols): + fake_mod = imp.new_module(name) for s in symbols: - setattr(fake_tf, s.__name__, s) - return fake_tf + setattr(fake_mod, s.__name__, s) + return fake_mod def attach_namespace(self, module, **ns): for k, v in ns.items(): diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 29d2e038a7..48100aac32 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -26,7 +26,9 @@ import six from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import conversion from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.utils import builtins from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -110,28 +112,7 @@ def convert(recursive=False, verbose=False, arg_types=None): @wraps(f) def wrapper(*args, **kwargs): - """Wrapper that calls the compiled version of the wrapped function.""" - partial_types = () - arg_values = {} - arg_names = tf_inspect.getargspec(f)[0] - for name, arg in zip(arg_names, args): - arg_values[name] = arg - arg_class = arg.__class__ - # If arg_value_hints specifies any name, use that instead. - if name not in arg_types: - arg_types[name] = (arg_class.__name__, arg_class) - if name == 'self' and tf_inspect.isclass(arg_class): - # Annotated methods need to specify that their owner type is partial, - # otherwise other members they call will not be converted. - partial_types = (arg_class,) - wrapped = to_graph( - f, - recursive=recursive, - verbose=verbose, - arg_values=arg_values, - arg_types=arg_types, - partial_types=partial_types) - return wrapped(*args, **kwargs) + return converted_call(f, recursive, verbose, arg_types, *args, **kwargs) # Sometimes the decorator is just desugared, making it impossible to detect. # This attribute makes detection easier. @@ -141,6 +122,78 @@ def convert(recursive=False, verbose=False, arg_types=None): return decorator +def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): + """Compiles a function call inline.""" + # TODO(mdan): This needs cleanup. + # In particular, we may want to avoid renaming functions altogether. + + if conversion.is_whitelisted_for_graph(f): + return f(*args, **kwargs) + + unknown_arg_value = object() # Sentinel for arguments of unknown value + + if tf_inspect.isbuiltin(f): + return builtins.dynamic_builtin(f, *args, **kwargs) + + if tf_inspect.isfunction(f) or tf_inspect.ismethod(f): + # Regular functions + target_entity = f + arg_map_target = f + effective_args = args + f_class = inspect_utils.getmethodclass(f) + + if f_class is not None: + partial_types = (f_class,) + else: + partial_types = () + + elif tf_inspect.isclass(f): + # Constructors + target_entity = f + arg_map_target = f.__init__ + effective_args = (unknown_arg_value,) + args + partial_types = () + + elif hasattr(f, '__call__') and hasattr(f, '__class__'): + # Callable objects + target_entity = f.__call__ + arg_map_target = f.__call__ + effective_args = (f,) + args + partial_types = (f.__class__,) + + else: + NotImplementedError('unknown callable type "%s"' % type(f)) + + arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs) + for name, arg in arg_values.items(): + if arg is unknown_arg_value: + continue + arg_class = arg.__class__ + # If arg_value_hints specifies any name, use that instead. + if name not in arg_types: + arg_types[name] = (arg_class.__name__, arg_class) + + # When called from within a decorator, this is the only indication that + # the function is a method - it appears that the decorator is applied + # before the method is bound. + if not partial_types: + if 'self' in arg_values: + if tf_inspect.isclass(arg_values['self'].__class__): + partial_types = (arg_values['self'].__class__,) + elif 'cls' in arg_values: + if tf_inspect.isclass(arg_values['cls']): + partial_types = (arg_values['cls'],) + + converted_f = to_graph( + target_entity, + recursive=recursive, + verbose=verbose, + arg_values=arg_values, + arg_types=arg_types, + partial_types=partial_types) + return converted_f(*effective_args, **kwargs) + + def to_graph(e, recursive=True, verbose=False, @@ -189,7 +242,7 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(six.get_function_globals(e)) + compiled_node.__dict__.update(inspect_utils.getnamespace(e)) compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 0c47d9d9622724aabd41425aad482637b2245499 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 08:29:52 -0800 Subject: [PATCH 131/884] Tensorflow: adds additional debugging info to feed_dict failure condition. If you have a large feed dict, determining the type of each object can be difficult, and this additional debugging info helped me in such a case. PiperOrigin-RevId: 187179551 --- tensorflow/python/client/session.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index f3c4fecdc0..5737047c4b 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1085,7 +1085,10 @@ class BaseSession(SessionInterface): if isinstance(subfeed_val, ops.Tensor): raise TypeError('The value of a feed cannot be a tf.Tensor object. ' 'Acceptable feed values include Python scalars, ' - 'strings, lists, numpy ndarrays, or TensorHandles.') + 'strings, lists, numpy ndarrays, or TensorHandles.' + 'For reference, the tensor object was ' + + str(feed_val) + ' which was passed to the ' + 'feed with key ' + str(feed) + '.') subfeed_dtype = subfeed_t.dtype.as_numpy_dtype if isinstance(subfeed_val, int) and _convert_to_numpy_obj( -- GitLab From 67545cd70ebec13c18159d105b0ce17bbfc7ac44 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Feb 2018 09:52:00 -0800 Subject: [PATCH 132/884] Uses the new automatic control dependencies code for functions. PiperOrigin-RevId: 187189552 --- tensorflow/python/eager/function.py | 73 ++++++++++++++--------- tensorflow/python/eager/function_test.py | 14 ++--- tensorflow/python/eager/graph_callable.py | 12 +++- 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index b3317bd323..655eaf3a1e 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -36,6 +36,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes as dtypes_module from tensorflow.python.framework import errors from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.util import compat @@ -162,31 +163,15 @@ class CapturingGraph(ops.Graph): op_def=None, compute_shapes=True, compute_device=True): - # TODO(apassos) probably control flow has to be handled delicately here as - # in if a resource is accessed inside a control flow context we need the - # control dependency to point to something outside the context which is - # guaranteed to happen after the access. - # # TODO(apassos) this should do some form of alias analysis as ops which # forward the resources such as Identity and Switch can cause serialization # to fail. - resource_inputs = set() - control_inputs = set() for i, inp in enumerate(inputs): if inp.graph is not self: inputs[i] = capture_value(self.captures, inp, inp.dtype, inp.op.name) - inp = inputs[i] - if inp.dtype == dtypes_module.resource: - if inp.name in self._last_op_using_resource_tensor: - control_inputs.add(self._last_op_using_resource_tensor[inp.name]) - resource_inputs.add(inp.name) - with self.control_dependencies(list(control_inputs)): - op = super(CapturingGraph, self).create_op( - op_type, inputs, dtypes, input_types, name, attrs, op_def, - compute_shapes, compute_device) - for name in resource_inputs: - self._last_op_using_resource_tensor[name] = op - return op + return super(CapturingGraph, self).create_op( + op_type, inputs, dtypes, input_types, name, attrs, op_def, + compute_shapes, compute_device) # TODO(apassos): it'd be really nice if we could scope this registration. @@ -636,13 +621,15 @@ def _defun_internal(name, func, args, kwds): for collection in curr_graph.collections: tmp_graph.get_collection_ref(collection)[:] = curr_graph.get_collection( collection) - with tmp_graph.as_default(): + with tmp_graph.as_default(), AutomaticControlDependencies() as a: func_inputs = _get_defun_inputs(args) def convert(x): if x is None: return None - return ops.convert_to_tensor_or_indexed_slices(x) + x = ops.convert_to_tensor_or_indexed_slices(x) + x = a.mark_as_return(x) + return x with capture_tensors(captures): this_tape = tape.push_new_tape() @@ -887,7 +874,36 @@ class AutomaticControlDependencies(object): self._returned_tensors = set() def mark_as_return(self, tensor): + """Acts like identity but marks the `Tensor` as a return value. + + This will possibly return a copy of the `Tensor`. Usage: + + ``` + with AutomaticControlDependencies() as a: + ... + t = a.mark_as_return(t) + _ = ...(t...) # i.e. it's safe to use t here + ``` + + Args: + tensor: the `Tensor` to be marked + + Returns: + a copy of the `Tensor`. + """ + if isinstance(tensor, ops.IndexedSlices): + values = array_ops.identity(tensor.values) + indices = array_ops.identity(tensor.indices) + self._returned_tensors.add(indices) + self._returned_tensors.add(values) + return ops.IndexedSlices(values, indices, dense_shape=tensor.dense_shape) + # We want to make the return values depend on the stateful operations, but + # we don't want to introduce a cycle, so we make the return value the result + # of a new identity operation that the stateful operations definitely don't + # depend on. + tensor = array_ops.identity(tensor) self._returned_tensors.add(tensor) + return tensor def __enter__(self): if context.in_eager_mode(): @@ -1008,7 +1024,8 @@ class AutomaticControlDependencies(object): for op in new_operations: control_inputs = set() # Ensure stateful ops run - if self._graph._registered_ops[op.type].is_stateful: # pylint: disable=protected-access + if (op.type not in self._graph._registered_ops # pylint: disable=protected-access + or self._graph._registered_ops[op.type].is_stateful): # pylint: disable=protected-access ops_which_must_run.add(op) # Ignore switches (they're handled separately) if op.type == "Switch" and op.inputs[0].dtype == dtypes_module.resource: @@ -1044,9 +1061,10 @@ class AutomaticControlDependencies(object): # Ensure all ops which must run do run for r in self._returned_tensors: - r.op._add_control_inputs( # pylint: disable=protected-access - [o for o in ops_which_must_run - if o._control_flow_context is r.op._control_flow_context]) # pylint: disable=protected-access + if ops_which_must_run: + r.op._add_control_inputs( # pylint: disable=protected-access + [o for o in ops_which_must_run + if o._control_flow_context is r.op._control_flow_context]) # pylint: disable=protected-access def automatic_control_dependencies(f): @@ -1066,8 +1084,7 @@ def automatic_control_dependencies(f): def wrapper(*args, **kwds): with AutomaticControlDependencies() as a: result = f(*args, **kwds) - for t in nest.flatten(result): - a.mark_as_return(t) - return result + result_flat = [a.mark_as_return(t) for t in nest.flatten(result)] + return nest.pack_sequence_as(result, result_flat) return tf_decorator.make_decorator(f, wrapper) diff --git a/tensorflow/python/eager/function_test.py b/tensorflow/python/eager/function_test.py index 431d9388c0..b9cde16867 100644 --- a/tensorflow/python/eager/function_test.py +++ b/tensorflow/python/eager/function_test.py @@ -606,7 +606,7 @@ class AutomaticControlDependenciesTest(test.TestCase): v.assign(v + 1) v.assign(2 * v) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(), 4.0) def testCondMustRun(self): @@ -626,7 +626,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 6.0) @@ -647,7 +647,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) one = constant_op.constant(1.0) - c.mark_as_return(one) + one = c.mark_as_return(one) one.eval(feed_dict={p: False}) self.assertAllEqual(v.read_value().eval(), 5.0) one.eval(feed_dict={p: True}) @@ -681,7 +681,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) with ops.name_scope('final'): val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False, q: False}), 3.0) self.assertAllEqual(val.eval(feed_dict={p: False, q: True}), 6.0) self.assertAllEqual(val.eval(feed_dict={p: True, q: True}), 7.0) @@ -703,7 +703,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 5.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 5.0) @@ -724,7 +724,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 6.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 12.0) @@ -745,7 +745,7 @@ class AutomaticControlDependenciesTest(test.TestCase): control_flow_ops.cond(p, true_fn, false_fn) v.assign(v * 2) val = v.read_value() - c.mark_as_return(val) + val = c.mark_as_return(val) self.assertAllEqual(val.eval(feed_dict={p: False}), 10.0) self.assertAllEqual(val.eval(feed_dict={p: True}), 20.0) diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 62106bf0e2..623f3564ad 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -279,9 +279,12 @@ def _graph_callable_internal(func, shape_and_dtypes): # scope's view of which variables exist. variable_captures = _VariableCapturingScope() with variable_captures.initializing_scope(), function.capture_tensors( - captures): + captures), function.AutomaticControlDependencies() as a: func_outputs = func(*func_inputs) - outputs_list = nest.flatten(func_outputs) + outputs_list = nest.flatten(func_outputs) + for i, x in enumerate(outputs_list): + if x is not None: + outputs_list[i] = a.mark_as_return(x) if len(outputs_list) == 1 and outputs_list[0] is None: outputs_list = [] output_shapes = [x.shape for x in outputs_list] @@ -294,9 +297,12 @@ def _graph_callable_internal(func, shape_and_dtypes): # knows about all variables. tmp_graph.clear_resource_control_flow_state() with variable_captures.capturing_scope(), function.capture_tensors( - captures): + captures), function.AutomaticControlDependencies() as a: captured_outputs = func(*func_inputs) captured_outlist = nest.flatten(captured_outputs) + for i, x in enumerate(captured_outlist): + if x is not None: + captured_outlist[i] = a.mark_as_return(x) capturing_operations = tmp_graph.get_operations()[ len(initializing_operations):] -- GitLab From f62f168fc3d59e3f067423fc39b4f5c3bfe2527a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:05:22 -0800 Subject: [PATCH 133/884] Make crosstools ready for introduction of c++-link-nodeps-dynamic-library PiperOrigin-RevId: 187191730 --- third_party/gpus/crosstool/CROSSTOOL_clang.tpl | 7 +++++++ third_party/toolchains/gpus/crosstool/CROSSTOOL | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/third_party/gpus/crosstool/CROSSTOOL_clang.tpl b/third_party/gpus/crosstool/CROSSTOOL_clang.tpl index e4363d6045..2f09473ee2 100644 --- a/third_party/gpus/crosstool/CROSSTOOL_clang.tpl +++ b/third_party/gpus/crosstool/CROSSTOOL_clang.tpl @@ -49,6 +49,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-lstdc++" } @@ -75,6 +76,7 @@ toolchain { name: "alwayslink" flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,-no-as-needed" @@ -116,6 +118,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-Wl,-z,relro,-z,now" } @@ -161,6 +164,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { # Stamp the binary with a unique identifier. flag: "-Wl,--build-id=md5" @@ -176,6 +180,7 @@ toolchain { action: "c++-compile" action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag:"-no-canonical-prefixes" } @@ -199,6 +204,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-B/usr/bin/" } @@ -246,6 +252,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,--gc-sections" diff --git a/third_party/toolchains/gpus/crosstool/CROSSTOOL b/third_party/toolchains/gpus/crosstool/CROSSTOOL index a47e0c7cd7..16ee2f82c6 100644 --- a/third_party/toolchains/gpus/crosstool/CROSSTOOL +++ b/third_party/toolchains/gpus/crosstool/CROSSTOOL @@ -53,6 +53,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-lstdc++" } @@ -79,6 +80,7 @@ toolchain { name: "alwayslink" flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,-no-as-needed" @@ -120,6 +122,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-Wl,-z,relro,-z,now" } @@ -165,6 +168,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { # Stamp the binary with a unique identifier. flag: "-Wl,--build-id=md5" @@ -180,6 +184,7 @@ toolchain { action: "c++-compile" action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag:"-no-canonical-prefixes" } @@ -203,6 +208,7 @@ toolchain { flag_set { action: "c++-link-executable" action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" flag_group { flag: "-B/usr/bin/" } @@ -250,6 +256,7 @@ toolchain { } flag_set { action: "c++-link-dynamic-library" + action: "c++-link-nodeps-dynamic-library" action: "c++-link-executable" flag_group { flag: "-Wl,--gc-sections" -- GitLab From 0e5458fb95b0b146838a3c61de31bb9497c613ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:05:38 -0800 Subject: [PATCH 134/884] Implement partial constant folding of AddN and AccumulateNV2. Change AccumulateNV2 to AddN if all inputs are constant, since constant folding doesn't work for the fake node type. PiperOrigin-RevId: 187191772 --- .../grappler/optimizers/constant_folding.cc | 78 ++++++++++++ .../optimizers/constant_folding_test.cc | 115 ++++++++++++++++-- 2 files changed, 184 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index a5417aaa51..32c8a9b2f5 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1493,6 +1493,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (int i = 0; i < output->node_size(); ++i) { NodeDef* node = output->mutable_node(i); + // Remove Shuffle or Reverse op over scalar values. if (use_shape_info && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { @@ -1839,6 +1840,83 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, std::swap(*node->mutable_input(parent_const_input), *op_child_node->mutable_input(non_const_leaf_input)); graph_modified_ = true; + continue; + } + + // Partial constant folding for associative operators: + // Split AddN/AccumulateNV2 to enable partial + // folding of ops when more than one but not all inputs are constant. + // For AddN and AccumulateNV2, we may furthermore reorder inputs, since + // addition is commutative. + // TODO(rmlarsen): Concat/Pack/ParallelConcat which are not commutative, so + // we have to preserve order and can only push consecutive runs of constant + // inputs into sub-nodes. + if (IsAggregate(*node) && IsCommutative(*node) && + NumNonControlInputs(*node) > 2) { + const int num_control_inputs = + node->input_size() - NumNonControlInputs(*node); + std::vector const_inputs; + std::vector nonconst_inputs; + for (int i = 0; i < node->input_size(); ++i) { + const string& input = node->input(i); + const NodeDef* input_node = node_map_->GetNode(NodeName(input)); + CHECK(input_node != nullptr) << input; + if (!IsControlInput(input) && IsReallyConstant(*input_node)) { + const_inputs.push_back(i); + } else { + // Non-const and control inputs. + nonconst_inputs.push_back(i); + } + } + // Promote AccumulateNV2 with all constant inputs to AddN, since it is + // a fake node that cannot be constant folded by itself. + if (const_inputs.size() == NumNonControlInputs(*node) && + node->op() == "AccumulateNV2") { + node->set_op("AddN"); + node->mutable_attr()->erase("shape"); + graph_modified_ = true; + continue; + } + const string new_node_name = OptimizedNodeName( + *node, strings::StrCat("_partial_split_", const_inputs.size())); + if (1 < const_inputs.size() && + const_inputs.size() < NumNonControlInputs(*node) && + !node_map_->NodeExists(new_node_name)) { + NodeDef* added_node = output->add_node(); + *added_node = *node; + // Always use AddN for the constant node, since AccumulateNV2 is a fake + // node that cannot be constant folded, since it does not have a kernel. + added_node->set_op("AddN"); + added_node->mutable_attr()->erase("shape"); + added_node->set_name(new_node_name); + node_map_->AddNode(added_node->name(), added_node); + added_node->clear_input(); + for (int i : const_inputs) { + added_node->add_input(node->input(i)); + node_map_->UpdateOutput(NodeName(node->input(i)), node->name(), + added_node->name()); + } + + // Overwrite the first const input with the added node. + node->set_input(const_inputs[0], added_node->name()); + node_map_->AddOutput(added_node->name(), node->name()); + nonconst_inputs.push_back(const_inputs[0]); + // Compact the remaining inputs to the original node. + std::sort(nonconst_inputs.begin(), nonconst_inputs.end()); + int idx = 0; + for (int i : nonconst_inputs) { + if (idx != i) { + node->set_input(idx, node->input(i)); + } + ++idx; + } + node->mutable_input()->DeleteSubrange(nonconst_inputs.size(), + const_inputs.size() - 1); + (*node->mutable_attr())["N"].set_i(node->input_size() - + num_control_inputs); + (*added_node->mutable_attr())["N"].set_i(const_inputs.size()); + graph_modified_ = true; + } } } diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index c6540192d7..3149e1d53e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -187,20 +187,21 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Output bias_add2 = ops::BiasAdd(s.WithOpName("bias_add2"), zeros, bias); Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros); Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y); - Output addn = - ops::AddN(s.WithOpName("addn"), - {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, - matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}); + Output concat = + ops::Concat(s.WithOpName("concat"), + {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, + matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}, + 0); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"addn", "matmul3", "matmul4"}; + item.fetch = {"concat", "matmul3", "matmul4"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(27, output.node_size()); + EXPECT_EQ(28, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); const string& name = node.name(); @@ -414,7 +415,6 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) { GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - LOG(INFO) << output.DebugString(); EXPECT_EQ(15, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { @@ -1547,8 +1547,105 @@ TEST_F(ConstantFoldingTest, SwitchIdenticalInputs) { EXPECT_EQ(6, found); } +TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { + std::function addn_fun = + [](const Scope& scope, InputList inputs) { + return ops::AddN(scope, inputs); + }; + std::function accumulate_fun = + [](const Scope& scope, InputList inputs) { + return ops::AccumulateNV2(scope, inputs, TensorShape({2, 2})); + }; + for (bool use_add_n : {true, false}) { + auto fun = use_add_n ? addn_fun : accumulate_fun; + const string op_name = use_add_n ? "AddN" : "AccumulateNV2"; + Scope s = Scope::NewRootScope(); + Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output z = ops::Placeholder(s.WithOpName("z"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output c1 = ops::Const(s.WithOpName("c1"), 1.0f, {2, 2}); + Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2, 2}); + Output c3 = ops::Const(s.WithOpName("c3"), 3.0f, {2, 2}); + Output acc0 = fun(s.WithOpName("acc0"), {c1, c2, c3}); + Output acc1 = fun(s.WithOpName("acc1"), {x, y, z}); + Output acc2 = fun(s.WithOpName("acc2"), {c1, x, y}); + Output acc3 = fun(s.WithOpName("acc3"), {c1, c2, z}); + Output acc4 = fun(s.WithOpName("acc4"), {c1, y, c2}); + Output acc5 = fun(s.WithOpName("acc5"), {x, c1, c2}); + Output acc6 = fun(s.WithOpName("acc6"), {x, c1, y, c2}); + Output concat = ops::Concat(s.WithOpName("concat"), + {acc0, acc1, acc2, acc3, acc4, acc5, acc6}, 0); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"concat"}; + + ConstantFolding optimizer(nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(17, output.node_size()); + for (const NodeDef& node : output.node()) { + if (node.name() == "acc0") { + EXPECT_EQ("Const", node.op()); + } + if (node.name() == "acc1") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("z", node.input(2)); + } + if (node.name() == "acc2") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("c1", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("y", node.input(2)); + } + if (node.name() == "acc3") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("ConstantFolding/acc3_partial_split_2", node.input(0)); + EXPECT_EQ("z", node.input(1)); + } + if (node.name() == "acc4") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("ConstantFolding/acc4_partial_split_2", node.input(0)); + EXPECT_EQ("y", node.input(1)); + } + if (node.name() == "acc5") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/acc5_partial_split_2", node.input(1)); + } + if (node.name() == "acc6") { + EXPECT_EQ(op_name, node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/acc6_partial_split_2", node.input(1)); + EXPECT_EQ("y", node.input(2)); + } + if (StringPiece(node.name()).starts_with("ConstantFolding/")) { + EXPECT_EQ("Const", node.op()); + } + } + + std::vector fetch = {"acc0"}; + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); + EXPECT_EQ(1, tensors_expected.size()); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); + } +} + } // namespace } // namespace grappler } // namespace tensorflow - -// LocalWords: NewRootScope -- GitLab From e929b16dc89f62a41bcaba57b98ddd221bf9bf68 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 27 Feb 2018 10:25:17 -0800 Subject: [PATCH 135/884] Lint fixes. PiperOrigin-RevId: 187194778 --- tensorflow/python/util/tf_inspect.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index a7cead5555..4ab8a72a83 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -46,8 +46,10 @@ def getargspec(object): # pylint: disable=redefined-builtin def getfullargspec(obj): # pylint: disable=redefined-builtin - """TFDecorator-aware replacement for inspect.getfullargspec and fallback to - inspect.getargspec in Python 2. + """TFDecorator-aware replacement for `inspect.getfullargspec`/`getargspec`. + + This wrapper uses `inspect.getfullargspec` if available and falls back to + `inspect.getargspec` in Python 2. Args: obj: A callable, possibly decorated. -- GitLab From e20be23387a6c1b72f3e34d03d4206c3211c921a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:27:28 -0800 Subject: [PATCH 136/884] Make block-based pruning more general, allowing it to operate on higher-dimensional arrays that can be squeezed to 2-dimensional. PiperOrigin-RevId: 187195105 --- tensorflow/contrib/model_pruning/README.md | 2 +- .../contrib/model_pruning/python/pruning.py | 21 ++++++++++++------- .../model_pruning/python/pruning_test.py | 17 +++++++++++++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/model_pruning/README.md b/tensorflow/contrib/model_pruning/README.md index d286750c25..52b659c69f 100644 --- a/tensorflow/contrib/model_pruning/README.md +++ b/tensorflow/contrib/model_pruning/README.md @@ -134,7 +134,7 @@ $ bazel-bin/$examples_dir/cifar10/cifar10_eval --run_once ### Block Sparsity -For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is supported for weight tensors with rank 2 only. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter). +For some hardware architectures, it may be beneficial to induce spatially correlated sparsity. To train models in which the weight tensors have block sparse structure, set *block_height* and *block_width* hyperparameters to the desired block configuration (2x2, 4x4, 4x1, 1x8, etc). Currently, block sparsity is only supported for weight tensors which can be squeezed to rank 2. The matrix is partitioned into non-overlapping blocks of size *[block_height, block_dim]* and the either the average or max absolute value in this block is taken as a proxy for the entire block (set by *block_pooling_function* hyperparameter). The convolution layer tensors are always pruned used block dimensions of [1,1]. ## References diff --git a/tensorflow/contrib/model_pruning/python/pruning.py b/tensorflow/contrib/model_pruning/python/pruning.py index d16af9da19..86963be4b8 100644 --- a/tensorflow/contrib/model_pruning/python/pruning.py +++ b/tensorflow/contrib/model_pruning/python/pruning.py @@ -523,7 +523,8 @@ class Pruning(object): """Performs block-granular masking of the weights. Block pruning occurs only if the block_height or block_width is > 1 and - if the weight tensor has ndims = 2. Otherwise, elementwise pruning occurs. + if the weight tensor, when squeezed, has ndims = 2. Otherwise, elementwise + pruning occurs. Args: weights: The weight tensor that needs to be masked. threshold: The current threshold value. The function will compute a new @@ -540,7 +541,8 @@ class Pruning(object): Raises: ValueError: if block pooling function is not AVG or MAX """ - if weights.get_shape().ndims != 2 or self._block_dim == [1, 1]: + squeezed_weights = array_ops.squeeze(weights) + if squeezed_weights.get_shape().ndims != 2 or self._block_dim == [1, 1]: return self._update_mask(weights, threshold) if self._block_pooling_function not in ['AVG', 'MAX']: @@ -549,9 +551,11 @@ class Pruning(object): with ops.name_scope(weights.op.name + '_pruning_ops'): abs_weights = math_ops.abs( - array_ops.reshape( - weights, [1, weights.get_shape()[0], - weights.get_shape()[1], 1])) + array_ops.reshape(weights, [ + 1, + squeezed_weights.get_shape()[0], + squeezed_weights.get_shape()[1], 1 + ])) pool_window = [self._block_dim[0], self._block_dim[1]] pooled_weights = nn_ops.pool( abs_weights, @@ -572,9 +576,10 @@ class Pruning(object): array_ops.ones(self._block_dim)) sliced_mask = array_ops.slice( updated_mask, [0, 0], - [weights.get_shape()[0], - weights.get_shape()[1]]) - return smoothed_threshold, sliced_mask + [squeezed_weights.get_shape()[0], + squeezed_weights.get_shape()[1]]) + return smoothed_threshold, array_ops.reshape(sliced_mask, + array_ops.shape(weights)) def _get_mask_assign_ops(self): # Make sure the assignment ops have not already been added to the list diff --git a/tensorflow/contrib/model_pruning/python/pruning_test.py b/tensorflow/contrib/model_pruning/python/pruning_test.py index 1767b4bb94..89e6571319 100644 --- a/tensorflow/contrib/model_pruning/python/pruning_test.py +++ b/tensorflow/contrib/model_pruning/python/pruning_test.py @@ -140,6 +140,23 @@ class PruningTest(test.TestCase): [0.0, -0.3, 0.0, -0.4]]) expected_mask = [[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]] + self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, + expected_mask) + self._blockMasking(param_list + ["block_pooling_function=AVG"], weights_avg, + expected_mask) + + def testBlockMaskingWithHigherDimensions(self): + param_list = ["block_height=2", "block_width=2", "threshold_decay=0"] + + # Weights as in testBlockMasking, but with one extra dimension. + weights_avg = constant_op.constant( + [[[0.1, 0.1, 0.2, 0.2], [0.1, 0.1, 0.2, 0.2], [0.3, 0.3, 0.4, 0.4], + [0.3, 0.3, 0.4, 0.4]]]) + weights_max = constant_op.constant( + [[[0.1, 0.0, 0.2, 0.0], [0.0, -0.1, 0.0, -0.2], [0.3, 0.0, 0.4, 0.0], + [0.0, -0.3, 0.0, -0.4]]]) + expected_mask = [[[0, 0, 0, 0], [0, 0, 0, 0], [1, 1, 1, 1], [1, 1, 1, 1]]] + self._blockMasking(param_list + ["block_pooling_function=MAX"], weights_max, expected_mask) self._blockMasking(param_list + ["block_pooling_function=AVG"], -- GitLab From 38bda430f4d302c762bc2a0b74721d82b9c5cca4 Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Tue, 27 Feb 2018 10:30:41 -0800 Subject: [PATCH 137/884] [TF CriticalSection] Bugfix: deref the Mutex before calling done_() This avoids an error wherein the Mutex destructor is called from the same thread as its threadpool, thus leading to a pthread 35 error. If the mutex is dereferenced before done_ is called, then the destruction is delayed until after done_() is called, and this happens in a different thread from the threadpool. PiperOrigin-RevId: 187195628 --- tensorflow/core/kernels/mutex_ops.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mutex_ops.cc b/tensorflow/core/kernels/mutex_ops.cc index b8b1fc7679..b02a584d73 100644 --- a/tensorflow/core/kernels/mutex_ops.cc +++ b/tensorflow/core/kernels/mutex_ops.cc @@ -190,7 +190,6 @@ class MutexLockOp : public AsyncOpKernel { // End of bound arguments. const Status& s, Mutex::SharedLockReleaser&& lock) { - core::ScopedUnref unref(mutex); VLOG(2) << "Finished locking mutex " << mutex << " with lock: " << lock.shared_lock.get() << " status: " << s.ToString(); @@ -199,6 +198,7 @@ class MutexLockOp : public AsyncOpKernel { } else { c->SetStatus(s); } + mutex->Unref(); done_(); }, std::move(done), std::placeholders::_1, std::placeholders::_2)); -- GitLab From 8ccc858d11f913e63cf3e35523bc3121684c2a82 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 10:49:41 -0800 Subject: [PATCH 138/884] Add 8bit Tanh support to tflite Allow output datatypes for custom ops to be more than the output types used in the graph. When an op has multiple outputs, some of them not used will be optimized away. This results in a failure. The change in propagate_array_data_types.cc fix this. PiperOrigin-RevId: 187198815 --- .../contrib/lite/kernels/activations.cc | 40 ++++++++++++++++++- .../contrib/lite/kernels/activations_test.cc | 29 ++++++++++++++ .../propagate_array_data_types.cc | 7 +++- 3 files changed, 72 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/activations.cc b/tensorflow/contrib/lite/kernels/activations.cc index 6acded3091..093761c43c 100644 --- a/tensorflow/contrib/lite/kernels/activations.cc +++ b/tensorflow/contrib/lite/kernels/activations.cc @@ -63,6 +63,33 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) { TfLiteIntArrayCopy(input->dims)); } +TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, 0); + TfLiteTensor* output = GetOutput(context, node, 0); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + if (input->type == kTfLiteUInt8) { + static constexpr int kInputIntegerBits = 4; + + const double input_real_multiplier = + input->params.scale * + static_cast(1 << (31 - kInputIntegerBits)); + + QuantizeMultiplierGreaterThanOne(input_real_multiplier, + &data->input_multiplier, + &data->input_left_shift); + data->input_range_radius = + CalculateInputRadius(kInputIntegerBits, data->input_left_shift); + } + + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + TfLiteStatus SigmoidPrepare(TfLiteContext* context, TfLiteNode* node) { OpData* data = reinterpret_cast(node->user_data); @@ -180,6 +207,7 @@ TfLiteStatus Relu6Eval(TfLiteContext* context, TfLiteNode* node) { } TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { + OpData* data = reinterpret_cast(node->user_data); TfLiteTensor* input = GetInput(context, node, 0); TfLiteTensor* output = GetOutput(context, node, 0); switch (input->type) { @@ -191,6 +219,14 @@ TfLiteStatus TanhEval(TfLiteContext* context, TfLiteNode* node) { for (; in < in_end; in++, out++) *out = std::tanh(*in); return kTfLiteOk; } break; + case kTfLiteUInt8: { + optimized_ops::Tanh(GetTensorData(input), GetTensorDims(input), + input->params.zero_point, data->input_range_radius, + data->input_multiplier, data->input_left_shift, + GetTensorData(output), + GetTensorDims(output)); + return kTfLiteOk; + } break; default: context->ReportError(context, "Only float32 supported currently."); return kTfLiteError; @@ -376,8 +412,8 @@ TfLiteRegistration* Register_RELU6() { } TfLiteRegistration* Register_TANH() { - static TfLiteRegistration r = {/*init=*/nullptr, /*free=*/nullptr, - activations::GenericPrepare, + static TfLiteRegistration r = {activations::Init, activations::Free, + activations::TanhPrepare, activations::TanhEval}; return &r; } diff --git a/tensorflow/contrib/lite/kernels/activations_test.cc b/tensorflow/contrib/lite/kernels/activations_test.cc index 302e52b96d..b9a96e3f79 100644 --- a/tensorflow/contrib/lite/kernels/activations_test.cc +++ b/tensorflow/contrib/lite/kernels/activations_test.cc @@ -52,6 +52,14 @@ class BaseActivationsOpModel : public SingleOpModel { BuildInterpreter({GetShape(input_)}); } + BaseActivationsOpModel(BuiltinOperator type, const TensorData &input, + const TensorData &output) { + input_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(type, BuiltinOptions_NONE, 0); + BuildInterpreter({GetShape(input_)}); + } + protected: int input_; int output_; @@ -143,6 +151,27 @@ TEST(FloatActivationsOpTest, Tanh) { }))); } +TEST(QuantizedActivationsOpTest, Tanh) { + QuantizedActivationsOpModel m( + BuiltinOperator_TANH, + /*input=*/{TensorType_UINT8, {1, 2, 4, 1}, -8, 8}, + /*output=*/{TensorType_UINT8, {1, 2, 4, 1}, -1, 1}); + m.SetInput({ + 0, -6, 2, 4, // + -4, -2, 8, 1, // + }); + m.Invoke(); + EXPECT_THAT(m.GetDequantizedOutput(), + ElementsAreArray(ArrayFloatNear( + { + 0.0, -0.999987, 0.964027, 0.999329, // + -0.996078, -0.96402, 0.99999, 0.76159, // + }, + 4 * (1. / 256)))); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray({128, 0, 251, 255, 0, 5, 255, 226})); +} + TEST(FloatActivationsOpTest, Sigmoid) { FloatActivationsOpModel m(BuiltinOperator_LOGISTIC, /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}}); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index f0d107232b..bde947f78d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -97,10 +97,13 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { SetDataTypeForAllOutputs(model, op, data_type); } else if (op->type == OperatorType::kTensorFlowUnsupported) { auto* unsupported_op = static_cast(op); - if (unsupported_op->output_data_types.size() != op->outputs.size()) { + // Some output tensors from the op could be eliminated by optimization. + // This can make unsupported_op->output_data_types have more elements than + // op->outputs. + if (unsupported_op->output_data_types.size() < op->outputs.size()) { return false; } - for (int i = 0; i < unsupported_op->output_data_types.size(); ++i) { + for (int i = 0; i < op->outputs.size(); ++i) { auto output = op->outputs[i]; auto data_type = unsupported_op->output_data_types[i]; model->GetArray(output).data_type = data_type; -- GitLab From 6a6661bbdce2172d27bf501e26baf09e8a658657 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 11:01:10 -0800 Subject: [PATCH 139/884] Function optimization: added an optimizer to automatically inline functions in order to enable Grappler to optimize the body of functions. Inlining also reduces the overhead of evaluating function. PiperOrigin-RevId: 187200883 --- .../core/grappler/grappler_item_builder.cc | 108 -------- .../core/grappler/grappler_item_builder.h | 7 - .../grappler/grappler_item_builder_test.cc | 199 --------------- tensorflow/core/grappler/optimizers/BUILD | 38 +++ .../grappler/optimizers/function_optimizer.cc | 148 +++++++++++ .../grappler/optimizers/function_optimizer.h | 43 ++++ .../optimizers/function_optimizer_test.cc | 98 ++++++++ tensorflow/core/grappler/utils/BUILD | 32 +++ tensorflow/core/grappler/utils/functions.cc | 140 +++++++++++ tensorflow/core/grappler/utils/functions.h | 39 +++ .../core/grappler/utils/functions_test.cc | 232 ++++++++++++++++++ .../core/grappler/utils/grappler_test.cc | 4 +- 12 files changed, 772 insertions(+), 316 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer.cc create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer.h create mode 100644 tensorflow/core/grappler/optimizers/function_optimizer_test.cc create mode 100644 tensorflow/core/grappler/utils/functions.cc create mode 100644 tensorflow/core/grappler/utils/functions.h create mode 100644 tensorflow/core/grappler/utils/functions_test.cc diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 5ac52eefe1..606807b9e9 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -518,113 +518,5 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( return new_item; } -std::unique_ptr GrapplerItemFromFunctionDef( - const FunctionDef& func, - const std::unordered_map& func_attr, - const FunctionDefLibrary& library) { - if (func.signature().name().empty()) { - LOG(ERROR) << "function name must be specified."; - return nullptr; - } - std::unique_ptr new_item(new GrapplerItem()); - new_item->id = func.signature().name(); - - std::unordered_map port_map; - - // Add the function inputs as placeholder - for (const auto& inp : func.signature().input_arg()) { - NodeDef* ph = new_item->graph.add_node(); - ph->set_name(inp.name()); - ph->set_op("Placeholder"); - if (inp.type() != DT_INVALID) { - (*ph->mutable_attr())["T"].set_type(inp.type()); - } else { - auto it = func_attr.find(inp.type_attr()); - if (it == func_attr.end()) { - LOG(ERROR) << "Unknown type attribute " << inp.type_attr() - << " for function input " << inp.name(); - return nullptr; - } else { - (*ph->mutable_attr())["T"] = it->second; - } - } - port_map[inp.name()] = inp.name(); - } - - // Add the function body to the graph. - FunctionLibraryDefinition func_def(OpRegistry::Global(), library); - - for (const NodeDef& node : func.node_def()) { - NodeDef* new_node = new_item->graph.add_node(); - *new_node = node; - // Replace the placeholder attribute values with the specified value. - for (auto& attr : *new_node->mutable_attr()) { - const string& ph_name = attr.second.placeholder(); - auto it = func_attr.find(ph_name); - if (it != func_attr.end()) { - attr.second = it->second; - } - } - - // Functions use a custom format to encode connectivity. Map these custom - // strings to regular ones. - const OpRegistrationData* registration; - Status status = func_def.LookUp(node.op(), ®istration); - if (!status.ok()) { - LOG(ERROR) << "Op " << node.op() << " not registered: " << status; - return nullptr; - } - - tensorflow::NameRangeMap inputs; - tensorflow::NameRangeMap outputs; - status = tensorflow::NameRangesForNode(node, registration->op_def, &inputs, - &outputs); - if (!status.ok()) { - LOG(ERROR) << "Op " << node.op() << " invalid: " << status; - return nullptr; - } - for (const auto& name_range : outputs) { - string port_prefix = - strings::StrCat(node.name(), ":", name_range.first, ":"); - int index_start = name_range.second.first; - int index_end = name_range.second.second; - for (int i = index_start; i < index_end; ++i) { - string port_id = strings::StrCat(port_prefix, i - index_start); - string port_name = strings::StrCat(node.name(), ":", i); - port_map[port_id] = port_name; - } - } - } - - for (auto& node : *new_item->graph.mutable_node()) { - // Rewrite the inputs to use the normal naming convention. - for (int i = 0; i < node.input_size(); ++i) { - const string& input = node.input(i); - if (IsControlInput(input)) { - // No need to remap control dependencies. - continue; - } else { - auto it = port_map.find(input); - if (it == port_map.end()) { - LOG(ERROR) << "Unknown input: " << input; - return nullptr; - } - node.set_input(i, it->second); - } - } - } - - // Add the function outputs to the list of fetch nodes. - for (const auto& out : func.signature().output_arg()) { - new_item->fetch.emplace_back(out.name()); - } - // Add the function inputs to the list of feeds. - for (const auto& inp : func.signature().input_arg()) { - new_item->feed.emplace_back(inp.name(), Tensor()); - } - - return new_item; -} - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item_builder.h b/tensorflow/core/grappler/grappler_item_builder.h index e892a3f556..c877d91163 100644 --- a/tensorflow/core/grappler/grappler_item_builder.h +++ b/tensorflow/core/grappler/grappler_item_builder.h @@ -58,13 +58,6 @@ struct ItemConfig { std::unique_ptr GrapplerItemFromMetaGraphDef( const string& id, const MetaGraphDef& meta_graph, const ItemConfig& cfg); -// Factory method for creating a GrapplerItem from a FunctionDef. -// Returns nullptr if the given function def cannot be converted. -std::unique_ptr GrapplerItemFromFunctionDef( - const FunctionDef& func, - const std::unordered_map& func_attr, - const FunctionDefLibrary& library); - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index 68437b6041..ef95992af7 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -280,205 +280,6 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { ASSERT_TRUE(item != nullptr); } -TEST_F(GrapplerItemBuilderTest, FromSimpleFunctionDef) { - const Tensor kTwo = test::AsScalar(2); - FunctionDef func = FunctionDefHelper::Define( - // Name - "XTimesTwo", - // Args - {"x: T"}, - // Return values - {"y: T"}, - // Attr def - {"T: {float, double, int32, int64}"}, - // Nodes - { - {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, - {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, - {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, - }); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - FunctionDefLibrary library; - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - CHECK(item); - EXPECT_EQ("XTimesTwo", item->id); - EXPECT_EQ(4, item->graph.node_size()); - EXPECT_EQ(std::vector({"y"}), item->fetch); - EXPECT_EQ(1, item->feed.size()); - EXPECT_EQ("x", item->feed[0].first); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "two") { - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "scale") { - EXPECT_EQ("Cast", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("DstT").type()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("two:0", node.input(0)); - } else if (node.name() == "y") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("scale:0", node.input(1)); - } - } -} - -TEST_F(GrapplerItemBuilderTest, FromFunctionDefWithMultiOutputNodes) { - // Gradient graph for the Subtract operation - std::vector nodes = { - {{"sx"}, "Shape", {"x"}}, - {{"sy"}, "Shape", {"y"}}, - {{"gx"}, "Identity", {"dz"}}, - {{"gy"}, "Neg", {"dz"}}, - {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}}, - {{"sum_gx"}, "Sum", {"gx", "rx"}}, - {{"dx"}, "Reshape", {"sum_gx", "sx"}}, - {{"sum_gy"}, "Sum", {"gy", "ry"}}, - {{"dy"}, "Reshape", {"sum_gy", "sy"}}, - }; - - for (auto &n : nodes) { - // "BroadcastGradientArgs" doesn't need any attrs. - if (n.attr.empty() && n.op != "BroadcastGradientArgs") { - n.attr = {{"T", "$T"}}; - } - } - FunctionDef func = FunctionDefHelper::Define( - // Name - "SubGrad", - // Arg defs - {"x: T", "y: T", "dz: T"}, - // Ret val defs - {"dx: T", "dy: T"}, - // Attr defs - {{"T: {half, float, double}"}}, - // Nodes - nodes); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - FunctionDefLibrary library; - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - CHECK(item); - EXPECT_EQ("SubGrad", item->id); - EXPECT_EQ(12, item->graph.node_size()); - EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); - EXPECT_EQ(3, item->feed.size()); - EXPECT_EQ("x", item->feed[0].first); - EXPECT_EQ("y", item->feed[1].first); - EXPECT_EQ("dz", item->feed[2].first); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x" || node.name() == "y" || node.name() == "dz") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "rx") { - EXPECT_EQ("BroadcastGradientArgs", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("sx:0", node.input(0)); - EXPECT_EQ("sy:0", node.input(1)); - } else if (node.name() == "sum_gx") { - EXPECT_EQ("Sum", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("gx:0", node.input(0)); - EXPECT_EQ("rx:0", node.input(1)); - } else if (node.name() == "sum_gy") { - EXPECT_EQ("Sum", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("gy:0", node.input(0)); - EXPECT_EQ("rx:1", node.input(1)); - } - } -} - -TEST_F(GrapplerItemBuilderTest, FromFunctionDefWithNestedFuncs) { - FunctionDefLibrary library; - *library.add_function() = FunctionDefHelper::Define( - // Name - "Swap", - // Args - {"i0: T", "i1: T"}, - // Return values - {"o0: T", "o1: T"}, - // Attr def - {"T: {float, double}"}, - // Nodes - {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, - {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); - - FunctionDef func = FunctionDefHelper::Create( - // Name - "ManySwapsFirst", - // Args - {"x: float", "y: float"}, - // Return values - {"o: float"}, - // attr def - {}, - // Nodes - // o = x*x + y*y. Furthermore, The 1st swap depends on x2, and - // y2 depends on the 2nd swap. The 2nd swap has data dependency - // on the 1st swap. - {{{"a0"}, "Swap", {"x", "y"}, {{"T", DT_FLOAT}}, {"x2"}}, - {{"a1"}, "Swap", {"a0:o0:0", "a0:o1:0"}, {{"T", DT_FLOAT}}}, - {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, - {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, - {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, - {{"o", "o:z:0"}}); - - std::unordered_map func_attr; - func_attr["T"].set_type(DT_FLOAT); - std::unique_ptr item = - GrapplerItemFromFunctionDef(func, func_attr, library); - - for (const NodeDef &node : item->graph.node()) { - if (node.name() == "x" || node.name() == "y") { - EXPECT_EQ("Placeholder", node.op()); - EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); - EXPECT_EQ(0, node.input_size()); - } else if (node.name() == "a0") { - EXPECT_EQ("Swap", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^x2", node.input(2)); - } else if (node.name() == "a1") { - EXPECT_EQ("Swap", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("a0:0", node.input(0)); - EXPECT_EQ("a0:1", node.input(1)); - } else if (node.name() == "x2") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("x", node.input(1)); - } else if (node.name() == "y2") { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("y", node.input(0)); - EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("^a1", node.input(2)); - } else if (node.name() == "o") { - EXPECT_EQ("Add", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("x2:0", node.input(0)); - EXPECT_EQ("y2:0", node.input(1)); - } - } -} - } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index a52d1c8df2..bd41854c41 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -132,6 +132,44 @@ tf_cc_test( ], ) +cc_library( + name = "function_optimizer", + srcs = ["function_optimizer.cc"], + hdrs = [ + "function_optimizer.h", + ], + visibility = ["//visibility:public"], + deps = [ + ":graph_optimizer", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler/utils:functions", + ], +) + +tf_cc_test( + name = "function_optimizer_test", + srcs = ["function_optimizer_test.cc"], + deps = [ + ":function_optimizer", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/core:all_kernels", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + "//tensorflow/core/grappler/utils:grappler_test", + ], +) + cc_library( name = "graph_rewriter", srcs = ["graph_rewriter.cc"], diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc new file mode 100644 index 0000000000..efc4f2f4bd --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -0,0 +1,148 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op_def.pb.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils/functions.h" + +namespace tensorflow { +namespace grappler { + +Status InlineFunction(const NodeDef& node, const FunctionDef& func, + GraphDef* graph) { + const std::unordered_map attr(node.attr().begin(), + node.attr().end()); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, attr, library); + + std::unordered_map input_nodes; + for (int i = 0; i < func.signature().input_arg_size(); ++i) { + const OpDef::ArgDef& arg = func.signature().input_arg(i); + input_nodes[arg.name()] = i; + } + + // Add an IdentityN op to hook the function inputs to: this ensures that + // they're all evaluated before the evaluation of the function body starts. + NodeDef* func_inputs = graph->add_node(); + func_inputs->set_name(strings::StrCat(node.name(), "/", "inlined_inputs")); + func_inputs->set_op("IdentityN"); + *func_inputs->mutable_input() = node.input(); + AttrValue::ListValue* type_list = + (*func_inputs->mutable_attr())["T"].mutable_list(); + for (const OpDef::ArgDef& arg : func.signature().input_arg()) { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid input argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); + } + + for (NodeDef& func_body_node : *item->graph.mutable_node()) { + if (input_nodes.find(func_body_node.name()) != input_nodes.end()) { + // Turn input placeholders into identity nodes + if (IsPlaceholder(func_body_node)) { + func_body_node.set_op("Identity"); + } + CHECK_EQ(0, func_body_node.input_size()); + int input_id = input_nodes[func_body_node.name()]; + func_body_node.add_input( + strings::StrCat(func_inputs->name(), ":", input_id)); + } else { + // Update the input names. + for (string& input : *func_body_node.mutable_input()) { + input = strings::StrCat(node.name(), "/", input); + } + } + + // Add the node name as a prefix to avoid collisions after inlining + func_body_node.set_name( + strings::StrCat(node.name(), "/", func_body_node.name())); + + // Move the node to the main graph + graph->add_node()->Swap(&func_body_node); + } + + // Add an IdentityN op to hook the function outputs to: this ensures that the + // function body is fully evaluated before its fanout gets scheduled. + NodeDef* func_outputs = graph->add_node(); + func_outputs->set_name(node.name()); + func_outputs->set_op("IdentityN"); + type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); + for (const OpDef::ArgDef& arg : func.signature().output_arg()) { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid output argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); + func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); + } + + return Status::OK(); +} + +Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + std::unordered_map functions; + for (const FunctionDef& func : item.graph.library().function()) { + if (func.attr().count("_noinline") == 0) { + functions[func.signature().name()] = &func; + } + } + + // Nothing to do. + if (functions.empty()) { + *optimized_graph = item.graph; + return Status::OK(); + } + + // Inline functions when possible. + for (const NodeDef& node : item.graph.node()) { + auto it = functions.find(node.op()); + if (it == functions.end()) { + *optimized_graph->add_node() = node; + } else { + TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, optimized_graph)); + } + } + + // TODO(bsteiner): specialize the implementation of functions that can't be + // inlined based on the context in which they're instantiated. + + // TODO(bsteiner): trim the library to remove unused function definitions + *optimized_graph->mutable_library() = item.graph.library(); + *optimized_graph->mutable_versions() = item.graph.versions(); + + return Status::OK(); +} + +void FunctionOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, + double result) { + // Nothing to do for FunctionOptimizer. +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.h b/tensorflow/core/grappler/optimizers/function_optimizer.h new file mode 100644 index 0000000000..5c80226e9d --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ +#define TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ + +#include "tensorflow/core/grappler/optimizers/graph_optimizer.h" + +namespace tensorflow { +namespace grappler { + +// Remap TensorFlow subgraphs onto alternative operations or collection of +// operations to make the overall graph more efficient. +class FunctionOptimizer : public GraphOptimizer { + public: + FunctionOptimizer() {} + ~FunctionOptimizer() override {} + + string name() const override { return "function_optimizer"; }; + + Status Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) override; + + void Feedback(Cluster* cluster, const GrapplerItem& item, + const GraphDef& optimized_graph, double result) override; +}; + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_OPTIMIZERS_FUNCTION_OPTIMIZER_H_ diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc new file mode 100644 index 0000000000..b8e05a5296 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -0,0 +1,98 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/core/lib/core/status_test_util.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class FunctionOptimizerTest : public GrapplerTest {}; + +TEST_F(FunctionOptimizerTest, SimpleFunction) { + // Build a graph to compute y = XTimesTwo(x) + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "XTimesTwo", {"x"}, {{"T", DT_FLOAT}}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + test::function::XTimesTwo(), + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/x") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/two") { + count++; + EXPECT_EQ("Const", node.op()); + } else if (node.name() == "y/scale") { + count++; + EXPECT_EQ("Cast", node.op()); + } else if (node.name() == "y/y") { + count++; + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("y/x", node.input(0)); + EXPECT_EQ("y/scale:0", node.input(1)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/y", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(7, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 5d32609434..fc05713494 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -146,3 +146,35 @@ cc_library( "//tensorflow/core/grappler:utils", ], ) + +cc_library( + name = "functions", + srcs = [ + "functions.cc", + ], + hdrs = ["functions.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/grappler:grappler_item", + "//tensorflow/core/grappler:utils", + ], +) + +tf_cc_test( + name = "functions_test", + srcs = ["functions_test.cc"], + deps = [ + ":functions", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:all_kernels", + "//tensorflow/core:framework", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core:testlib", + ], +) diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc new file mode 100644 index 0000000000..37b00e0a30 --- /dev/null +++ b/tensorflow/core/grappler/utils/functions.cc @@ -0,0 +1,140 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/grappler/utils/functions.h" + +#include + +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/framework/graph_def_util.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/types.pb.h" +#include "tensorflow/core/grappler/utils.h" + +namespace tensorflow { +namespace grappler { + +std::unique_ptr GrapplerItemFromFunctionDef( + const FunctionDef& func, + const std::unordered_map& func_attr, + const FunctionDefLibrary& library) { + if (func.signature().name().empty()) { + LOG(ERROR) << "function name must be specified."; + return nullptr; + } + std::unique_ptr new_item(new GrapplerItem()); + new_item->id = func.signature().name(); + + std::unordered_map port_map; + + // Add the function inputs as placeholder + for (const auto& inp : func.signature().input_arg()) { + NodeDef* ph = new_item->graph.add_node(); + ph->set_name(inp.name()); + ph->set_op("Placeholder"); + if (inp.type() != DT_INVALID) { + (*ph->mutable_attr())["T"].set_type(inp.type()); + } else { + auto it = func_attr.find(inp.type_attr()); + if (it == func_attr.end()) { + LOG(ERROR) << "Unknown type attribute " << inp.type_attr() + << " for function input " << inp.name(); + return nullptr; + } else { + (*ph->mutable_attr())["T"] = it->second; + } + } + port_map[inp.name()] = inp.name(); + } + + // Add the function body to the graph. + FunctionLibraryDefinition func_def(OpRegistry::Global(), library); + + for (const NodeDef& node : func.node_def()) { + NodeDef* new_node = new_item->graph.add_node(); + *new_node = node; + // Replace the placeholder attribute values with the specified value. + for (auto& attr : *new_node->mutable_attr()) { + const string& ph_name = attr.second.placeholder(); + auto it = func_attr.find(ph_name); + if (it != func_attr.end()) { + attr.second = it->second; + } + } + + // Functions use a custom format to encode connectivity. Map these custom + // strings to regular ones. + const OpRegistrationData* registration; + Status status = func_def.LookUp(node.op(), ®istration); + if (!status.ok()) { + LOG(ERROR) << "Op " << node.op() << " not registered: " << status; + return nullptr; + } + + tensorflow::NameRangeMap inputs; + tensorflow::NameRangeMap outputs; + status = tensorflow::NameRangesForNode(node, registration->op_def, &inputs, + &outputs); + if (!status.ok()) { + LOG(ERROR) << "Op " << node.op() << " invalid: " << status; + return nullptr; + } + for (const auto& name_range : outputs) { + string port_prefix = + strings::StrCat(node.name(), ":", name_range.first, ":"); + int index_start = name_range.second.first; + int index_end = name_range.second.second; + for (int i = index_start; i < index_end; ++i) { + string port_id = strings::StrCat(port_prefix, i - index_start); + string port_name = strings::StrCat(node.name(), ":", i); + port_map[port_id] = port_name; + } + } + } + + for (auto& node : *new_item->graph.mutable_node()) { + // Rewrite the inputs to use the normal naming convention. + for (int i = 0; i < node.input_size(); ++i) { + const string& input = node.input(i); + if (IsControlInput(input)) { + // No need to remap control dependencies. + continue; + } else { + auto it = port_map.find(input); + if (it == port_map.end()) { + LOG(ERROR) << "Unknown input: " << input; + return nullptr; + } + node.set_input(i, it->second); + } + } + } + + // Add the function outputs to the list of fetch nodes. + for (const auto& out : func.signature().output_arg()) { + new_item->fetch.emplace_back(out.name()); + } + // Add the function inputs to the list of feeds. + for (const auto& inp : func.signature().input_arg()) { + new_item->feed.emplace_back(inp.name(), Tensor()); + } + + return new_item; +} + +} // end namespace grappler +} // end namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions.h b/tensorflow/core/grappler/utils/functions.h new file mode 100644 index 0000000000..8f9b7d848a --- /dev/null +++ b/tensorflow/core/grappler/utils/functions.h @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ +#define TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ + +#include +#include +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/function.pb.h" +#include "tensorflow/core/grappler/grappler_item.h" + +namespace tensorflow { + +namespace grappler { + +// Factory method for creating a GrapplerItem from a FunctionDef. +// Returns nullptr if the given function def cannot be converted. +std::unique_ptr GrapplerItemFromFunctionDef( + const FunctionDef& func, + const std::unordered_map& func_attr, + const FunctionDefLibrary& library); + +} // end namespace grappler +} // end namespace tensorflow + +#endif // TENSORFLOW_GRAPPLER_UTILS_FUNCTIONS_H_ diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc new file mode 100644 index 0000000000..25ccb50084 --- /dev/null +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -0,0 +1,232 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/utils/functions.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/meta_graph.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +class FunctionsTest : public ::testing::Test {}; + +TEST_F(FunctionsTest, FromSimpleFunctionDef) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "XTimesTwo", + // Args + {"x: T"}, + // Return values + {"y: T"}, + // Attr def + {"T: {float, double, int32, int64}"}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"scale"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}, + {{"y"}, "Mul", {"x", "scale"}, {{"T", "$T"}}}, + }); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + CHECK(item); + EXPECT_EQ("XTimesTwo", item->id); + EXPECT_EQ(4, item->graph.node_size()); + EXPECT_EQ(std::vector({"y"}), item->fetch); + EXPECT_EQ(1, item->feed.size()); + EXPECT_EQ("x", item->feed[0].first); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "two") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "scale") { + EXPECT_EQ("Cast", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("DstT").type()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("two:0", node.input(0)); + } else if (node.name() == "y") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("scale:0", node.input(1)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithMultiOutputNodes) { + // Gradient graph for the Subtract operation + std::vector nodes = { + {{"sx"}, "Shape", {"x"}}, + {{"sy"}, "Shape", {"y"}}, + {{"gx"}, "Identity", {"dz"}}, + {{"gy"}, "Neg", {"dz"}}, + {{"rx", "ry"}, "BroadcastGradientArgs", {"sx", "sy"}}, + {{"sum_gx"}, "Sum", {"gx", "rx"}}, + {{"dx"}, "Reshape", {"sum_gx", "sx"}}, + {{"sum_gy"}, "Sum", {"gy", "ry"}}, + {{"dy"}, "Reshape", {"sum_gy", "sy"}}, + }; + + for (auto &n : nodes) { + // "BroadcastGradientArgs" doesn't need any attrs. + if (n.attr.empty() && n.op != "BroadcastGradientArgs") { + n.attr = {{"T", "$T"}}; + } + } + FunctionDef func = FunctionDefHelper::Define( + // Name + "SubGrad", + // Arg defs + {"x: T", "y: T", "dz: T"}, + // Ret val defs + {"dx: T", "dy: T"}, + // Attr defs + {{"T: {half, float, double}"}}, + // Nodes + nodes); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + CHECK(item); + EXPECT_EQ("SubGrad", item->id); + EXPECT_EQ(12, item->graph.node_size()); + EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); + EXPECT_EQ(3, item->feed.size()); + EXPECT_EQ("x", item->feed[0].first); + EXPECT_EQ("y", item->feed[1].first); + EXPECT_EQ("dz", item->feed[2].first); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x" || node.name() == "y" || node.name() == "dz") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "rx") { + EXPECT_EQ("BroadcastGradientArgs", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("sx:0", node.input(0)); + EXPECT_EQ("sy:0", node.input(1)); + } else if (node.name() == "sum_gx") { + EXPECT_EQ("Sum", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("gx:0", node.input(0)); + EXPECT_EQ("rx:0", node.input(1)); + } else if (node.name() == "sum_gy") { + EXPECT_EQ("Sum", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("gy:0", node.input(0)); + EXPECT_EQ("rx:1", node.input(1)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { + FunctionDefLibrary library; + *library.add_function() = FunctionDefHelper::Define( + // Name + "Swap", + // Args + {"i0: T", "i1: T"}, + // Return values + {"o0: T", "o1: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"o0"}, "Identity", {"i1"}, {{"T", "$T"}}}, + {{"o1"}, "Identity", {"i0"}, {{"T", "$T"}}}}); + + FunctionDef func = FunctionDefHelper::Create( + // Name + "ManySwapsFirst", + // Args + {"x: float", "y: float"}, + // Return values + {"o: float"}, + // attr def + {}, + // Nodes + // o = x*x + y*y. Furthermore, The 1st swap depends on x2, and + // y2 depends on the 2nd swap. The 2nd swap has data dependency + // on the 1st swap. + {{{"a0"}, "Swap", {"x", "y"}, {{"T", DT_FLOAT}}, {"x2"}}, + {{"a1"}, "Swap", {"a0:o0:0", "a0:o1:0"}, {{"T", DT_FLOAT}}}, + {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, + {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, + {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, + {{"o", "o:z:0"}}); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "x" || node.name() == "y") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "a0") { + EXPECT_EQ("Swap", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^x2", node.input(2)); + } else if (node.name() == "a1") { + EXPECT_EQ("Swap", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("a0:0", node.input(0)); + EXPECT_EQ("a0:1", node.input(1)); + } else if (node.name() == "x2") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("x", node.input(1)); + } else if (node.name() == "y2") { + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("^a1", node.input(2)); + } else if (node.name() == "o") { + EXPECT_EQ("Add", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("x2:0", node.input(0)); + EXPECT_EQ("y2:0", node.input(1)); + } + } +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fef8e97b6e..79b2aa2808 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -46,8 +46,8 @@ std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); } std::vector output_tensors; - TF_CHECK_OK( - session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Run(run_options, item.feed, item.fetch, {}, + &output_tensors, nullptr)); TF_CHECK_OK(session->Close()); return output_tensors; } -- GitLab From 1f18f757042e678cc935f645e9e5c21208ddc9ac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 11:40:05 -0800 Subject: [PATCH 140/884] Don't crash on missing inputs in dependency analyzer. This is a temporary mitigation until the underlying bug is found. PiperOrigin-RevId: 187207594 --- tensorflow/core/grappler/optimizers/dependency_optimizer.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index edb0db65e9..b47cba5ff7 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -286,7 +286,10 @@ void DependencyOptimizer::OptimizeNode(int node_idx, std::vector input_nodes; for (int i = 0; i < num_inputs; ++i) { NodeDef* input_node = node_map_->GetNode(node->input(i)); - CHECK_NE(input_node, nullptr); + if (input_node == nullptr) { + LOG(ERROR) << "Invalid input " << node->input(i); + return; + } input_nodes.push_back(input_node); } -- GitLab From 207af365eb719fa7af3b56e1723fe3f67b0c4f0f Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 11:48:25 -0800 Subject: [PATCH 141/884] [TF:XLA] Bump open source llvm revision to r326181 PiperOrigin-RevId: 187208788 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 5b09c5e67d..fa3671b4c9 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", ], - sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", - strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", + sha256 = "e6bb793bbdce37ee5643789a27d174f1cdd8e7323a69d5f331376eb34755ee0d", + strip_prefix = "llvm-832f2bf0d8908aea8160bab128708d521764fe8d", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From d429fe193f4c235cde8223804ea888c2eaa5ce68 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 11:57:09 -0800 Subject: [PATCH 142/884] Improve our handling of bitcasts. - Do not fuse bitcasts in the CPU backend. Fused instructions lose their layout and a bitcast is meaningless without a layout. We were explicitly testing for this so I've changed the corresponding tests to use a reshape instead. - Fail the layout assignment if we see a bitcast. bitcasts are inherently layout sensitive and so a bitcast instruction present in the IR before layout assignment is a bug. PiperOrigin-RevId: 187210151 --- .../xla/service/cpu/cpu_instruction_fusion.cc | 1 - .../cpu/cpu_instruction_fusion_test.cc | 29 +++++++++---------- .../compiler/xla/service/layout_assignment.cc | 7 +++++ .../xla/service/layout_assignment_test.cc | 21 ++++++++++++++ 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc index 482e04052d..0fc5a746bb 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion.cc @@ -30,7 +30,6 @@ bool CanBeLoopFused(const HloInstruction& hlo) { // These are the only ones we fuse since we rely on effective elemental IR // generation. return hlo.IsElementwise() || // - hlo.opcode() == HloOpcode::kBitcast || hlo.opcode() == HloOpcode::kBroadcast || hlo.opcode() == HloOpcode::kConcatenate || hlo.opcode() == HloOpcode::kDynamicSlice || diff --git a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc index 595c3f55b3..6ed1cd31b1 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_instruction_fusion_test.cc @@ -77,7 +77,7 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Basic_1) { EXPECT_THAT(computation->root_instruction(), op::Fusion()); } -TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) { +TEST_F(InstructionFusionTest, DotOperationNoFusion_Bitcast) { HloComputation::Builder builder(TestName()); HloInstruction* arg0 = builder.AddInstruction(HloInstruction::CreateParameter( 0, ShapeUtil::MakeShape(F32, {2, 512, 2, 128}), "arg0")); @@ -94,8 +94,7 @@ TEST_F(InstructionFusionTest, DotOperationFusion_Bitcast) { auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); EXPECT_EQ(dot, computation->root_instruction()); - EXPECT_TRUE(CpuInstructionFusion().Run(module.get()).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), op::Fusion()); + EXPECT_FALSE(CpuInstructionFusion().Run(module.get()).ValueOrDie()); } TEST_F(InstructionFusionTest, DotOperationFusion_Reshape) { @@ -244,35 +243,33 @@ class OpcodeFusionTest : public InstructionFusionTest { } }; -TEST_F(OpcodeFusionTest, Exponential_Bitcast_Negate) { +TEST_F(OpcodeFusionTest, Exponential_Reshape_Negate) { HloComputation::Builder builder(TestName()); Shape param_shape = ShapeUtil::MakeShape(F32, {1, 4}); Shape result_shape = ShapeUtil::MakeShape(F32, {4}); HloInstruction* param0 = builder.AddInstruction( HloInstruction::CreateParameter(0, param_shape, "param")); - // InstructionFusion::ShouldFuse() precludes fusing a bitcast whose operand - // is a parameter, so create an operand between the parameter and bitcast. HloInstruction* exp1 = builder.AddInstruction( HloInstruction::CreateUnary(param_shape, HloOpcode::kExp, param0)); - HloInstruction* bitcast2 = builder.AddInstruction( - HloInstruction::CreateUnary(result_shape, HloOpcode::kBitcast, exp1)); + HloInstruction* reshape2 = + builder.AddInstruction(HloInstruction::CreateReshape(result_shape, exp1)); builder.AddInstruction( - HloInstruction::CreateUnary(result_shape, HloOpcode::kNegate, bitcast2)); + HloInstruction::CreateUnary(result_shape, HloOpcode::kNegate, reshape2)); auto module = CreateNewModule(); module->AddEntryComputation(builder.Build()); RunFusionAndCheckOpcodesWereFused( - module.get(), {HloOpcode::kNegate, HloOpcode::kBitcast, HloOpcode::kExp, + module.get(), {HloOpcode::kNegate, HloOpcode::kReshape, HloOpcode::kExp, HloOpcode::kParameter}); } -TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { +TEST_F(OpcodeFusionTest, Broadcast_Reshape_DynamicSlice_Tanh) { HloComputation::Builder builder(TestName()); Shape param_shape = ShapeUtil::MakeShape(F32, {8}); Shape starts_shape = ShapeUtil::MakeShape(F32, {2}); Shape broadcast_shape = ShapeUtil::MakeShape(F32, {1, 8, 8}); - Shape bitcast_shape = ShapeUtil::MakeShape(F32, {8, 8}); + Shape reshape_shape = ShapeUtil::MakeShape(F32, {8, 8}); Shape dynamic_slice_shape = ShapeUtil::MakeShape(F32, {4, 4}); HloInstruction* param0 = builder.AddInstruction( HloInstruction::CreateParameter(0, param_shape, "param")); @@ -280,11 +277,11 @@ TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { HloInstruction::CreateParameter(1, starts_shape, "starts")); HloInstruction* broadcast2 = builder.AddInstruction( HloInstruction::CreateBroadcast(broadcast_shape, param0, {1})); - HloInstruction* bitcast3 = builder.AddInstruction(HloInstruction::CreateUnary( - bitcast_shape, HloOpcode::kBitcast, broadcast2)); + HloInstruction* reshape3 = builder.AddInstruction( + HloInstruction::CreateReshape(reshape_shape, broadcast2)); HloInstruction* dynamic_slice4 = builder.AddInstruction(HloInstruction::CreateDynamicSlice( - dynamic_slice_shape, bitcast3, param1, {4, 4})); + dynamic_slice_shape, reshape3, param1, {4, 4})); builder.AddInstruction(HloInstruction::CreateUnary( dynamic_slice_shape, HloOpcode::kTanh, dynamic_slice4)); @@ -293,7 +290,7 @@ TEST_F(OpcodeFusionTest, Broadcast_Bitcast_DynamicSlice_Tanh) { RunFusionAndCheckOpcodesWereFused( module.get(), - {HloOpcode::kTanh, HloOpcode::kDynamicSlice, HloOpcode::kBitcast, + {HloOpcode::kTanh, HloOpcode::kDynamicSlice, HloOpcode::kReshape, HloOpcode::kBroadcast, HloOpcode::kParameter, HloOpcode::kParameter}); } diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 4929300f7d..39f9120e55 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -1561,6 +1561,13 @@ StatusOr LayoutAssignment::Run(HloModule* module) { // infeeds. Clearing the layouts here avoids hiding potential bugs in the // layout assignment pass that may accidently use the existing layout. for (HloInstruction* instruction : computation->instructions()) { + if (instruction->opcode() == HloOpcode::kBitcast) { + // bitcasts are inherently layout sensitive and so a bitcast instruction + // present in the IR before layout assignment is a bug. + return InternalError( + "Unexpected bitcast operation seen during layout assignment: %s.", + instruction->ToString().c_str()); + } if (instruction->opcode() != HloOpcode::kInfeed) { LayoutUtil::ClearLayout(instruction->mutable_shape()); } diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 62feb7c1e9..4b1c9bad41 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -796,5 +796,26 @@ TEST_F(LayoutAssignmentTest, ConditionalAsymmetricLayout) { EXPECT_THAT(false_result->opcode(), HloOpcode::kCopy); } +TEST_F(LayoutAssignmentTest, InternalErrorOnBitcast) { + auto builder = HloComputation::Builder(TestName()); + auto constant0 = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR2WithLayout( + {{1.0, 2.0}, {3.0, 4.0}}, LayoutUtil::MakeLayout({0, 1})))); + builder.AddInstruction(HloInstruction::CreateUnary( + constant0->shape(), HloOpcode::kBitcast, constant0)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + ComputationLayout computation_layout( + module->entry_computation()->ComputeProgramShape()); + LayoutAssignment layout_assignment(&computation_layout); + Status error_status = layout_assignment.Run(module.get()).status(); + EXPECT_FALSE(error_status.ok()); + EXPECT_THAT( + error_status.error_message(), + ::testing::HasSubstr( + "Unexpected bitcast operation seen during layout assignment")); +} + } // namespace } // namespace xla -- GitLab From e504797de0b1112caea5080c3ab2060156c4e8a1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 27 Feb 2018 12:05:41 -0800 Subject: [PATCH 143/884] Use a couple of type aliases for brevity; NFC PiperOrigin-RevId: 187211560 --- .../compiler/xla/service/hlo_evaluator.cc | 133 ++++++++---------- 1 file changed, 62 insertions(+), 71 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index afbfdac05e..8c7459099d 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -51,6 +51,10 @@ namespace xla { namespace { +using tensorflow::gtl::ArraySlice; +using tensorflow::gtl::FlatSet; +using tensorflow::gtl::optional; + template struct is_complex_t : public std::false_type {}; @@ -105,11 +109,10 @@ StatusOr> Compare(const Shape& shape, HloOpcode opcode, } auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); + TF_RETURN_IF_ERROR(result->Populate([&](ArraySlice multi_index) { + return compare_op(lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index)); + })); return std::move(result); } @@ -136,11 +139,10 @@ StatusOr> Compare( } auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - return compare_op(lhs_literal.Get(multi_index), - rhs_literal.Get(multi_index)); - })); + TF_RETURN_IF_ERROR(result->Populate([&](ArraySlice multi_index) { + return compare_op(lhs_literal.Get(multi_index), + rhs_literal.Get(multi_index)); + })); return std::move(result); } @@ -165,8 +167,8 @@ StatusOr> ElementWiseUnaryOpImpl( auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return unary_op(operand_literal.Get(multi_index)); })); return std::move(result); @@ -178,7 +180,7 @@ StatusOr> ElementWiseUnaryOpImpl( // with the base index. void IterateThroughWindow( const Shape& window_shape, const Window& window, const Shape& base_shape, - const tensorflow::gtl::ArraySlice& window_count_index, + const ArraySlice& window_count_index, const std::function&)>& f) { const int64 rank = ShapeUtil::Rank(base_shape); DimensionVector window_index(rank); @@ -332,13 +334,12 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { operand_to_broadcast.shape().dimensions(i)); } - return output->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { - for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { - broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; - } - return operand_to_broadcast.Get(broadcast_indices); - }); + return output->Populate([&](ArraySlice multi_index) { + for (int64 i = 0; i < broadcast->dimensions().size(); ++i) { + broadcast_indices[i] = multi_index[broadcast->dimensions(i)]; + } + return operand_to_broadcast.Get(broadcast_indices); + }); } template < @@ -902,8 +903,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice out_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice out_index) { std::vector from_index(out_index.begin(), out_index.end()); for (const int64 dim : reverse_dimensions) { from_index[dim] = result_shape.dimensions(dim) - 1 - out_index[dim]; @@ -978,7 +979,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector rhs_index(rhs_rank); DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size()); - auto func = [&](tensorflow::gtl::ArraySlice out_index) { + auto func = [&](ArraySlice out_index) { ElementwiseT result_val = static_cast(0); std::fill(lhs_index.begin(), lhs_index.end(), 0); @@ -1100,9 +1101,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } std::vector rhs_non_batch_non_contracting_dims; - tensorflow::gtl::FlatSet batch_dims_set( - dnums.rhs_batch_dimensions().begin(), - dnums.rhs_batch_dimensions().end()); + FlatSet batch_dims_set(dnums.rhs_batch_dimensions().begin(), + dnums.rhs_batch_dimensions().end()); for (int64 i = 0; i < rhs_rank; i++) { if (i != rhs_contracting_dimension && batch_dims_set.count(i) == 0) { rhs_non_batch_non_contracting_dims.push_back(i); @@ -1114,8 +1114,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector lhs_index(lhs_rank); DimensionVector rhs_index(rhs_rank); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice result_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice result_index) { ElementwiseT result_val = static_cast(0); // Find the corresponding non-contracting indices for lhs and rhs. @@ -1209,9 +1209,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { parent_->GetEvaluatedLiteralFor(pad->operand(1)).Get({}); auto result = Literal::CreateFromShape(pad->shape()); TF_RETURN_IF_ERROR(result->Populate( - [&scalar](tensorflow::gtl::ArraySlice multi_index) { - return scalar; - })); + [&scalar](ArraySlice multi_index) { return scalar; })); const Literal& evaluated_operand = parent_->GetEvaluatedLiteralFor(pad->operand(0)); @@ -1375,8 +1373,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(map->shape()); HloEvaluator embedded_evaluator; - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { std::vector> arg_literals; arg_literals.reserve(operands.size()); @@ -1466,7 +1464,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { Status HandleReduce(HloInstruction* reduce) override { auto arg = reduce->operand(0); auto init_value = reduce->operand(1); - tensorflow::gtl::ArraySlice dimensions(reduce->dimensions()); + ArraySlice dimensions(reduce->dimensions()); HloComputation* function = reduce->to_apply(); TF_RET_CHECK(ShapeUtil::Rank(reduce->shape()) == ShapeUtil::Rank(arg->shape()) - dimensions.size()); @@ -1511,8 +1509,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator embedded_evaluator; // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { ReturnT result_val = init_scalar; std::vector base(arg_dimensions.size()); @@ -1566,9 +1564,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // Initialize result array with the init value. TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice output_index) { - return init_scalar; - })); + [&](ArraySlice output_index) { return init_scalar; })); std::vector window_dimension_sizes; for (const auto& window_dimension : window.dimensions()) { @@ -1601,8 +1597,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // 2. Using the selected index, scatter value from `source` to result. We // do this by iterating through the window, and compare each index with // the selected index. - tensorflow::gtl::optional selected_val; - tensorflow::gtl::optional> selected_index; + optional selected_val; + optional> selected_index; IterateThroughWindow( window_shape, window, operand_literal.shape(), source_index, @@ -1698,8 +1694,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator embedded_evaluator; // For each resulting dimension, calculate and assign computed value. - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice output_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice output_index) { ReturnT result_val = init_scalar; std::fill(window_index.begin(), window_index.end(), 0); @@ -1749,7 +1745,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { const int64 rank = ShapeUtil::Rank(operand->shape()); const Literal& operand_literal = parent_->GetEvaluatedLiteralFor(operand); - auto func = [&](tensorflow::gtl::ArraySlice out_index) { + auto func = [&](ArraySlice out_index) { DimensionVector operand_index(rank); for (int64 i = 0; i < rank; ++i) { operand_index[i] = @@ -1930,8 +1926,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { std::vector operand_indices(start.size()); auto result = Literal::CreateFromShape(result_shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { for (int64 i = 0; i < operand_indices.size(); ++i) { CHECK_GE(multi_index[i] + start[i], 0); // Mod is only used here to be consistent with the existing @@ -2014,8 +2010,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return ConvertBinaryFunction(binary_op)( lhs_literal.Get(multi_index), rhs_literal.Get(multi_index)); @@ -2052,8 +2048,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(shape); - TF_RETURN_IF_ERROR(result->Populate( - [&](tensorflow::gtl::ArraySlice multi_index) { + TF_RETURN_IF_ERROR( + result->Populate([&](ArraySlice multi_index) { return ternary_op(lhs_literal.Get(multi_index), rhs_literal.Get(multi_index), ehs_literal.Get(multi_index)); @@ -2107,8 +2103,7 @@ HloEvaluator::HloEvaluator() { template StatusOr> HloEvaluator::Evaluate( - const HloModule& module, - tensorflow::gtl::ArraySlice arg_literals) { + const HloModule& module, ArraySlice arg_literals) { XLA_VLOG_LINES(2, "HloEvaluator::Evaluate module:\n" + module.ToString()); evaluated_.clear(); @@ -2125,8 +2120,7 @@ StatusOr> HloEvaluator::Evaluate( template StatusOr> HloEvaluator::Evaluate( - const HloComputation& computation, - tensorflow::gtl::ArraySlice arg_literals) { + const HloComputation& computation, ArraySlice arg_literals) { XLA_VLOG_LINES( 2, "HloEvaluator::Evaluate computation:\n" + computation.ToString()); @@ -2142,8 +2136,7 @@ StatusOr> HloEvaluator::Evaluate( template StatusOr> HloEvaluator::Evaluate( - HloInstruction* instruction, - tensorflow::gtl::ArraySlice arg_literals) { + HloInstruction* instruction, ArraySlice arg_literals) { TF_RET_CHECK(hlo_query::AllOperandsAreParametersOrConstants(*instruction)); TF_RETURN_IF_ERROR(ShapeUtil::ValidateShape(instruction->shape())); @@ -2268,8 +2261,7 @@ Status HloEvaluator::HandleTranspose(HloInstruction* transpose) { } Status HloEvaluator::HandleConcatenate(HloInstruction* concatenate) { - tensorflow::gtl::ArraySlice operands( - concatenate->operands()); + ArraySlice operands(concatenate->operands()); // The result concatenate dimension is going to be the sum of all // concatenate dimensions of the operands taking part of the operation. const Shape& reference_shape = operands[0]->shape(); @@ -2532,28 +2524,27 @@ Status HloEvaluator::Postprocess(HloInstruction* hlo) { // Explicit instantiation of templatized Evaluate* methods. // -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(const HloModule& module, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(const HloModule& module, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( - const HloModule& module, - tensorflow::gtl::ArraySlice> arg_literals); + const HloModule& module, ArraySlice> arg_literals); -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(const HloComputation& computation, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(const HloComputation& computation, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( const HloComputation& computation, - tensorflow::gtl::ArraySlice> arg_literals); + ArraySlice> arg_literals); -template StatusOr> HloEvaluator::Evaluate< - const Literal*>(HloInstruction* instruction, - tensorflow::gtl::ArraySlice arg_literals); +template StatusOr> +HloEvaluator::Evaluate(HloInstruction* instruction, + ArraySlice arg_literals); template StatusOr> HloEvaluator::Evaluate>( HloInstruction* instruction, - tensorflow::gtl::ArraySlice> arg_literals); + ArraySlice> arg_literals); } // namespace xla -- GitLab From 691f1e6de0ce628ed11406bd6fd2f599763bb7cc Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 12:06:33 -0800 Subject: [PATCH 144/884] Add consistency check: for constant arrays (those that have a buffer), there must be a shape, and its flat-size must equal the buffer length. PiperOrigin-RevId: 187211685 --- .../contrib/lite/toco/import_tensorflow.cc | 37 +++++++++++++++++++ tensorflow/contrib/lite/toco/model.h | 4 ++ tensorflow/contrib/lite/toco/tflite/import.cc | 3 ++ tensorflow/contrib/lite/toco/tooling_util.cc | 10 ++++- 4 files changed, 53 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 27d2f33a8d..52a0512e23 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -272,6 +272,39 @@ void ImportInt64Array(const TensorProto& input_tensor, Array* output_array) { } } +void ImportBoolArray(const TensorProto& input_tensor, Array* output_array) { + CHECK_EQ(input_tensor.dtype(), DT_BOOL); + const auto& input_shape = input_tensor.tensor_shape(); + CHECK_LE(input_shape.dim_size(), 4); + ImportShape(input_shape.dim(), output_array->mutable_shape()); + int input_flat_size = 1; + for (int k = 0; k < input_shape.dim_size(); k++) { + input_flat_size *= input_shape.dim(k).size(); + } + auto& output_bool_data = + output_array->GetMutableBuffer().data; + output_bool_data.resize(RequiredBufferSizeForShape(output_array->shape()), + false); + if (input_tensor.bool_val_size()) { + for (int i = 0; i < input_tensor.bool_val_size(); i++) { + output_bool_data[i] = input_tensor.bool_val(i); + } + } else if (input_tensor.tensor_content().size() == input_flat_size) { + std::vector buf(input_tensor.tensor_content().size()); + toco::port::CopyToBuffer(input_tensor.tensor_content(), buf.data()); + for (int i = 0; i < input_tensor.tensor_content().size(); i++) { + output_bool_data[i] = static_cast(buf[i]); + } + } else { + // Some graphs have bool const nodes without actual value... + // assuming that 'false' is implied. + // So far only encountered that in an array with 1 entry, let's + // require that until we encounter a graph where that's not the case. + CHECK_EQ(output_bool_data.size(), 1); + output_bool_data[0] = false; + } +} + void ImportStringArray(const TensorProto& input_tensor, Array* output_array) { CHECK_EQ(input_tensor.dtype(), DT_STRING); const auto& input_shape = input_tensor.tensor_shape(); @@ -347,6 +380,10 @@ void ConvertConstOperator(const NodeDef& node, array.data_type = ArrayDataType::kString; ImportStringArray(tensor, &array); break; + case DT_BOOL: + array.data_type = ArrayDataType::kBool; + ImportBoolArray(tensor, &array); + break; default: array.data_type = ArrayDataType::kNone; // do nothing, silently ignore the Const data. diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index 346859ab39..d5df0fb951 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -244,6 +244,8 @@ struct GenericBuffer { // in containers and have the containers call the right subclass destructor. virtual ~GenericBuffer() {} + virtual int Length() const = 0; + const ArrayDataType type; protected: @@ -256,6 +258,8 @@ template struct Buffer : GenericBuffer { Buffer() : GenericBuffer(A) {} + int Length() const override { return data.size(); } + std::vector> data; }; diff --git a/tensorflow/contrib/lite/toco/tflite/import.cc b/tensorflow/contrib/lite/toco/tflite/import.cc index 5b1ab514b2..d2aeb78114 100644 --- a/tensorflow/contrib/lite/toco/tflite/import.cc +++ b/tensorflow/contrib/lite/toco/tflite/import.cc @@ -64,6 +64,9 @@ void ImportTensors(const ::tflite::Model& input_model, Model* model) { auto shape = input_tensor->shape(); if (shape) { + // If the shape is 0-dimensional, make sure to record it as such, + // as oppose to leaving the array without a shape. + array.mutable_shape()->mutable_dims()->clear(); for (int i = 0; i < shape->Length(); ++i) { auto d = shape->Get(i); array.mutable_shape()->mutable_dims()->push_back(d); diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 9e72582238..1ab7b34331 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -84,6 +84,8 @@ string ArrayDataTypeName(ArrayDataType data_type) { return "Uint64"; case ArrayDataType::kString: return "String"; + case ArrayDataType::kBool: + return "Bool"; case ArrayDataType::kNone: return "None"; default: @@ -809,9 +811,15 @@ void CheckEachArray(const Model& model) { // It's OK to have a buffer or an alloc, but not both. // (Since allocs are for transient arrays without a buffer). CHECK(!array->buffer || !array->alloc); - // If there is a buffer, its type should be consistent with data_type. if (array->buffer) { + // If there is a buffer, its type should be consistent with data_type. CHECK(array->buffer->type == array->data_type); + // The presence of a fixed buffer should imply the presence of a fixed + // shape. + CHECK(array->has_shape()); + // The shape flat-size should agree with the buffer length. + CHECK_EQ(array->buffer->Length(), + RequiredBufferSizeForShape(array->shape())); } // Check name. Either "name_with_suffix_8", "name_with_port:3", but not -- GitLab From f97d233e79aa7d88057c8b8b355eda6cb3bfea07 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 12:08:24 -0800 Subject: [PATCH 145/884] Register the function optimizer in the meta optimizer. Made sure it's turned OFF by default until more validation is done. PiperOrigin-RevId: 187211957 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../core/grappler/optimizers/function_optimizer.cc | 5 +++++ .../core/grappler/optimizers/function_optimizer_test.cc | 7 +++++++ tensorflow/core/grappler/optimizers/meta_optimizer.cc | 9 +++++++++ tensorflow/core/grappler/utils/BUILD | 1 - tensorflow/core/protobuf/rewriter_config.proto | 2 ++ 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index bd41854c41..7b801db2c8 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -455,6 +455,7 @@ cc_library( ":custom_graph_optimizer", ":custom_graph_optimizer_registry", ":dependency_optimizer", + ":function_optimizer", ":graph_optimizer", ":layout_optimizer", ":loop_optimizer", diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index efc4f2f4bd..3c96ff869b 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -45,6 +45,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, NodeDef* func_inputs = graph->add_node(); func_inputs->set_name(strings::StrCat(node.name(), "/", "inlined_inputs")); func_inputs->set_op("IdentityN"); + func_inputs->set_device(node.device()); *func_inputs->mutable_input() = node.input(); AttrValue::ListValue* type_list = (*func_inputs->mutable_attr())["T"].mutable_list(); @@ -79,6 +80,9 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_body_node.set_name( strings::StrCat(node.name(), "/", func_body_node.name())); + // Make sure the node is placed + func_body_node.set_device(node.device()); + // Move the node to the main graph graph->add_node()->Swap(&func_body_node); } @@ -88,6 +92,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, NodeDef* func_outputs = graph->add_node(); func_outputs->set_name(node.name()); func_outputs->set_op("IdentityN"); + func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().output_arg()) { auto it = attr.find(arg.type_attr()); diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index b8e05a5296..76a5c08d35 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -50,33 +50,40 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { if (node.name() == "y/inlined_inputs") { count++; EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("x", node.input(0)); } else if (node.name() == "y/x") { count++; EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y/inlined_inputs:0", node.input(0)); } else if (node.name() == "y/two") { count++; EXPECT_EQ("Const", node.op()); + EXPECT_EQ(device, node.device()); } else if (node.name() == "y/scale") { count++; EXPECT_EQ("Cast", node.op()); + EXPECT_EQ(device, node.device()); } else if (node.name() == "y/y") { count++; EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(2, node.input_size()); EXPECT_EQ("y/x", node.input(0)); EXPECT_EQ("y/scale:0", node.input(1)); } else if (node.name() == "y") { count++; EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y/y", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); EXPECT_EQ("y", node.input(0)); } diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7ae77207af..93658a6475 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.h" #include "tensorflow/core/grappler/optimizers/dependency_optimizer.h" +#include "tensorflow/core/grappler/optimizers/function_optimizer.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/optimizers/layout_optimizer.h" #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" @@ -56,6 +57,9 @@ std::unique_ptr MetaOptimizer::NewOptimizer( if (optimizer == "pruning") { graph_optimizer.reset(new ModelPruner()); } + if (optimizer == "function") { + graph_optimizer.reset(new FunctionOptimizer()); + } if (optimizer == "constfold") { graph_optimizer.reset(new ConstantFolding(cpu_device_)); } @@ -90,6 +94,10 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (!cfg_.disable_model_pruning()) { optimizers.push_back(std::unique_ptr(new ModelPruner())); } + if (cfg_.function_optimization() == RewriterConfig::ON) { + optimizers.push_back( + std::unique_ptr(new FunctionOptimizer())); + } if (cfg_.constant_folding() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( new ConstantFolding(cfg_.constant_folding(), cpu_device_))); @@ -223,6 +231,7 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, bool MetaOptimizerEnabled(const RewriterConfig& cfg) { return !cfg.disable_model_pruning() || cfg.layout_optimizer() != RewriterConfig::OFF || + cfg.function_optimization() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || cfg.dependency_optimization() != RewriterConfig::OFF || cfg.loop_optimization() == RewriterConfig::ON || diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index fc05713494..3dbad40cae 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -155,7 +155,6 @@ cc_library( hdrs = ["functions.h"], visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 875e4663db..9ebf217811 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -44,6 +44,8 @@ message RewriterConfig { Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; + // Function optimizations (default is OFF). + Toggle function_optimization = 10; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From 24a1c89187e49847fbd3575d626f1e374ce9ed18 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Tue, 27 Feb 2018 12:12:32 -0800 Subject: [PATCH 146/884] Allow eager metrics to save internal variables by using global_variables. PiperOrigin-RevId: 187212528 --- .../contrib/eager/python/metrics_impl.py | 20 +++++++++++++------ .../contrib/eager/python/metrics_test.py | 13 ++++++++++++ 2 files changed, 27 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index ea8dbf2b46..5571e77c70 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -93,11 +93,12 @@ class Metric(object): `aggregate()`, it is for use by TensorFlow infrastructure. """ - def __init__(self, name=None): + def __init__(self, name=None, use_global_variables=False): self._built = False self._vars = [] self._initial_values = {} self._updates = [] + self._use_global_variables = use_global_variables name = name or self.__class__.__name__ # Replace things like spaces in name to create a valid scope name. scope_name = _to_replace.sub("_", name) @@ -245,9 +246,14 @@ class Metric(object): """***Only for use by descendants of Metric***.""" if self._built: raise RuntimeError("Can't call add_variable() except in build().") - collections = None if context.in_eager_mode() else [ - ops.GraphKeys.LOCAL_VARIABLES, ops.GraphKeys.METRIC_VARIABLES - ] + if context.in_eager_mode(): + collections = None + else: + if self._use_global_variables: + collections = [ops.GraphKeys.GLOBAL_VARIABLES] + else: + collections = [ops.GraphKeys.LOCAL_VARIABLES] + collections += [ops.GraphKeys.METRIC_VARIABLES] v = variable_scope.get_variable( name, shape, @@ -267,8 +273,10 @@ class Mean(Metric): # TODO(josh11b): Maybe have a dtype argument that defaults to tf.float64? # Or defaults to type of the input if it is tf.float32, else tf.float64? - def __init__(self, name=None, dtype=dtypes.float64): - super(Mean, self).__init__(name=name) + def __init__(self, name=None, dtype=dtypes.float64, + use_global_variables=False): + super(Mean, self).__init__(name=name, + use_global_variables=use_global_variables) self.dtype = dtype def build(self, *args, **kwargs): diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index a9ecaa3f8b..c9106294dc 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -50,6 +50,19 @@ class MetricsTest(test.TestCase): self.assertEqual( set(m.variables), set(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES))) + self.assertEqual(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES), []) + self.assertEqual( + set(m.variables), + set(ops.get_collection(ops.GraphKeys.METRIC_VARIABLES))) + + def testUseGlobalVariablesCollections(self): + with context.graph_mode(), ops.Graph().as_default(): + m = metrics.Mean(use_global_variables=True) + m(1000) + self.assertEqual( + set(m.variables), + set(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES))) + self.assertEqual(ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES), []) self.assertEqual( set(m.variables), set(ops.get_collection(ops.GraphKeys.METRIC_VARIABLES))) -- GitLab From 78376e4077f4e9d293811bdbc453c6d1b93db453 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Feb 2018 12:34:17 -0800 Subject: [PATCH 147/884] Make Layers Checkpointable (This change is mostly API goldens by volume) Layers will inherit from CheckpointableBase since they do variable management themselves. A __setattr__ override would also likely slow down functional layers significantly. I believe the plan for Model is to piggyback on its existing __setattr__ override rather than having Model inherit from CheckpointableBase through Layer and Checkpointable itself. PiperOrigin-RevId: 187215512 --- .../eager/python/checkpointable_utils_test.py | 32 ++++--------------- tensorflow/python/layers/base.py | 21 +++++++----- tensorflow/python/training/checkpointable.py | 16 +++++++--- .../api/golden/tensorflow.keras.-model.pbtxt | 1 + .../golden/tensorflow.keras.-sequential.pbtxt | 1 + .../tensorflow.keras.layers.-activation.pbtxt | 1 + ...eras.layers.-activity-regularization.pbtxt | 1 + .../golden/tensorflow.keras.layers.-add.pbtxt | 1 + ...nsorflow.keras.layers.-alpha-dropout.pbtxt | 1 + ...low.keras.layers.-average-pooling1-d.pbtxt | 1 + ...low.keras.layers.-average-pooling2-d.pbtxt | 1 + ...low.keras.layers.-average-pooling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-average.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool1-d.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool2-d.pbtxt | 1 + ...tensorflow.keras.layers.-avg-pool3-d.pbtxt | 1 + ...ow.keras.layers.-batch-normalization.pbtxt | 1 + ...nsorflow.keras.layers.-bidirectional.pbtxt | 1 + ...tensorflow.keras.layers.-concatenate.pbtxt | 1 + ...orflow.keras.layers.-conv-l-s-t-m2-d.pbtxt | 1 + .../tensorflow.keras.layers.-conv1-d.pbtxt | 1 + ...flow.keras.layers.-conv2-d-transpose.pbtxt | 1 + .../tensorflow.keras.layers.-conv2-d.pbtxt | 1 + ...flow.keras.layers.-conv3-d-transpose.pbtxt | 1 + .../tensorflow.keras.layers.-conv3-d.pbtxt | 1 + ...sorflow.keras.layers.-convolution1-d.pbtxt | 1 + ...ras.layers.-convolution2-d-transpose.pbtxt | 1 + ...sorflow.keras.layers.-convolution2-d.pbtxt | 1 + ...ras.layers.-convolution3-d-transpose.pbtxt | 1 + ...sorflow.keras.layers.-convolution3-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping1-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping2-d.pbtxt | 1 + ...tensorflow.keras.layers.-cropping3-d.pbtxt | 1 + .../tensorflow.keras.layers.-dense.pbtxt | 1 + .../golden/tensorflow.keras.layers.-dot.pbtxt | 1 + .../tensorflow.keras.layers.-dropout.pbtxt | 1 + .../tensorflow.keras.layers.-e-l-u.pbtxt | 1 + .../tensorflow.keras.layers.-embedding.pbtxt | 1 + .../tensorflow.keras.layers.-flatten.pbtxt | 1 + .../tensorflow.keras.layers.-g-r-u-cell.pbtxt | 1 + .../tensorflow.keras.layers.-g-r-u.pbtxt | 1 + ...rflow.keras.layers.-gaussian-dropout.pbtxt | 1 + ...sorflow.keras.layers.-gaussian-noise.pbtxt | 1 + ...as.layers.-global-average-pooling1-d.pbtxt | 1 + ...as.layers.-global-average-pooling2-d.pbtxt | 1 + ...as.layers.-global-average-pooling3-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool1-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool2-d.pbtxt | 1 + ...low.keras.layers.-global-avg-pool3-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool1-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool2-d.pbtxt | 1 + ...low.keras.layers.-global-max-pool3-d.pbtxt | 1 + ....keras.layers.-global-max-pooling1-d.pbtxt | 1 + ....keras.layers.-global-max-pooling2-d.pbtxt | 1 + ....keras.layers.-global-max-pooling3-d.pbtxt | 1 + ...tensorflow.keras.layers.-input-layer.pbtxt | 1 + ...ensorflow.keras.layers.-l-s-t-m-cell.pbtxt | 1 + .../tensorflow.keras.layers.-l-s-t-m.pbtxt | 1 + .../tensorflow.keras.layers.-lambda.pbtxt | 1 + .../tensorflow.keras.layers.-layer.pbtxt | 1 + ...ensorflow.keras.layers.-leaky-re-l-u.pbtxt | 1 + ...w.keras.layers.-locally-connected1-d.pbtxt | 1 + ...w.keras.layers.-locally-connected2-d.pbtxt | 1 + .../tensorflow.keras.layers.-masking.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool1-d.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool2-d.pbtxt | 1 + ...tensorflow.keras.layers.-max-pool3-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling1-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling2-d.pbtxt | 1 + ...sorflow.keras.layers.-max-pooling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-maximum.pbtxt | 1 + .../tensorflow.keras.layers.-multiply.pbtxt | 1 + .../tensorflow.keras.layers.-p-re-l-u.pbtxt | 1 + .../tensorflow.keras.layers.-permute.pbtxt | 1 + .../tensorflow.keras.layers.-r-n-n.pbtxt | 1 + ...nsorflow.keras.layers.-repeat-vector.pbtxt | 1 + .../tensorflow.keras.layers.-reshape.pbtxt | 1 + ...flow.keras.layers.-separable-conv1-d.pbtxt | 1 + ...flow.keras.layers.-separable-conv2-d.pbtxt | 1 + ...ras.layers.-separable-convolution1-d.pbtxt | 1 + ...ras.layers.-separable-convolution2-d.pbtxt | 1 + ...flow.keras.layers.-simple-r-n-n-cell.pbtxt | 1 + ...ensorflow.keras.layers.-simple-r-n-n.pbtxt | 1 + .../tensorflow.keras.layers.-softmax.pbtxt | 1 + ...low.keras.layers.-spatial-dropout1-d.pbtxt | 1 + ...low.keras.layers.-spatial-dropout2-d.pbtxt | 1 + ...low.keras.layers.-spatial-dropout3-d.pbtxt | 1 + ...ow.keras.layers.-stacked-r-n-n-cells.pbtxt | 1 + ...low.keras.layers.-thresholded-re-l-u.pbtxt | 1 + ...rflow.keras.layers.-time-distributed.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling1-d.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling2-d.pbtxt | 1 + ...sorflow.keras.layers.-up-sampling3-d.pbtxt | 1 + .../tensorflow.keras.layers.-wrapper.pbtxt | 1 + ...orflow.keras.layers.-zero-padding1-d.pbtxt | 1 + ...orflow.keras.layers.-zero-padding2-d.pbtxt | 1 + ...orflow.keras.layers.-zero-padding3-d.pbtxt | 1 + .../tensorflow.keras.models.-model.pbtxt | 1 + .../tensorflow.keras.models.-sequential.pbtxt | 1 + ...ensorflow.layers.-average-pooling1-d.pbtxt | 1 + ...ensorflow.layers.-average-pooling2-d.pbtxt | 1 + ...ensorflow.layers.-average-pooling3-d.pbtxt | 1 + ...nsorflow.layers.-batch-normalization.pbtxt | 1 + .../golden/tensorflow.layers.-conv1-d.pbtxt | 1 + ...tensorflow.layers.-conv2-d-transpose.pbtxt | 1 + .../golden/tensorflow.layers.-conv2-d.pbtxt | 1 + ...tensorflow.layers.-conv3-d-transpose.pbtxt | 1 + .../golden/tensorflow.layers.-conv3-d.pbtxt | 1 + .../api/golden/tensorflow.layers.-dense.pbtxt | 1 + .../golden/tensorflow.layers.-dropout.pbtxt | 1 + .../golden/tensorflow.layers.-flatten.pbtxt | 1 + .../api/golden/tensorflow.layers.-layer.pbtxt | 1 + .../tensorflow.layers.-max-pooling1-d.pbtxt | 1 + .../tensorflow.layers.-max-pooling2-d.pbtxt | 1 + .../tensorflow.layers.-max-pooling3-d.pbtxt | 1 + ...tensorflow.layers.-separable-conv1-d.pbtxt | 1 + ...tensorflow.layers.-separable-conv2-d.pbtxt | 1 + ...flow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt | 1 + ...nsorflow.nn.rnn_cell.-device-wrapper.pbtxt | 1 + ...sorflow.nn.rnn_cell.-dropout-wrapper.pbtxt | 1 + .../tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt | 1 + ...tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt | 1 + .../tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt | 1 + ...orflow.nn.rnn_cell.-residual-wrapper.pbtxt | 1 + 126 files changed, 154 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 3d6a200276..83187b51b5 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -42,24 +42,6 @@ from tensorflow.python.training import saver as core_saver from tensorflow.python.training import training_util -class CheckpointableDenseLayer(core.Dense, checkpointable.Checkpointable): - - def __init__(self, *args, **kwargs): - checkpointable.Checkpointable.__init__(self) - core.Dense.__init__(self, *args, **kwargs) - - def add_variable(self, name, shape, **kwargs): - # Calls both Checkpointable._add_variable and Layer.add_variable. Eventually - # Layer.add_variable should inherit from Checkpointable and simply call - # super and then do post-processing. - return checkpointable.Checkpointable._add_variable_with_custom_getter( - self, - name=name, - shape=shape, - getter=functools.partial(core.Dense.add_variable, self), - **kwargs) - - # pylint: disable=not-callable class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): @@ -122,9 +104,9 @@ class MyNetwork(CheckpointableNetwork): def __init__(self): super(MyNetwork, self).__init__() - self._named_dense = CheckpointableDenseLayer(1, use_bias=True) + self._named_dense = core.Dense(1, use_bias=True) self._via_track_layer = self.track_layer( - CheckpointableDenseLayer(1, use_bias=False), name="via_track_layer") + core.Dense(1, use_bias=False), name="via_track_layer") # We can still track Checkpointables which aren't Layers. self._non_layer = NonLayerCheckpointable() @@ -326,10 +308,10 @@ class CheckpointingTests(test.TestCase): "global_step:0", named_variables["optimizer_step" + suffix].name) self.assertEqual( - "my_network/checkpointable_dense_layer_1/kernel:0", + "my_network/dense_1/kernel:0", named_variables["network/via_track_layer/kernel" + suffix].name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel:0", + "my_network/dense/kernel:0", named_variables["network/_named_dense/kernel" + suffix].name) self.assertEqual( "beta1_power:0", @@ -348,18 +330,18 @@ class CheckpointingTests(test.TestCase): serialized_graph.nodes[optimizer_node.children[0].node_id] .attributes[0].full_name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel", + "my_network/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[0] .original_variable_node_id] .attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( - "my_network/checkpointable_dense_layer/kernel/Adam", + "my_network/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[0] .slot_variable_node_id] .attributes[0].full_name) self.assertEqual( - "my_network/checkpointable_dense_layer/kernel/Adam:0", + "my_network/dense/kernel/Adam:0", optimizer.get_slot( var=named_variables["network/_named_dense/kernel" + suffix], name="m").name) diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 8314c4aa87..2ec9971b88 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -36,12 +36,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @tf_export('layers.Layer') -class Layer(object): +class Layer(checkpointable.CheckpointableBase): """Base layer class. This is the class from which all layers inherit, implementing common @@ -532,13 +533,17 @@ class Layer(object): with vs.variable_scope( self._scope, reuse=reuse, auxiliary_name_scope=False) as scope: with ops.name_scope(self._name_scope_name(scope)): - variable = vs.get_variable(name, - shape=shape, - initializer=initializer, - dtype=dtypes.as_dtype(dtype), - constraint=constraint, - trainable=trainable and self.trainable, - partitioner=partitioner) + variable = self._add_variable_with_custom_getter( + name=name, + shape=shape, + getter=vs.get_variable, + # Manage errors in Layer rather than Checkpointable. + overwrite=True, + initializer=initializer, + dtype=dtypes.as_dtype(dtype), + constraint=constraint, + trainable=trainable and self.trainable, + partitioner=partitioner) if init_graph is not None: # pylint: disable=protected-access # The variable was created and initialized in a graph. diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index 11caa761ae..c5e7f3cdac 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -322,7 +322,8 @@ class CheckpointableBase(object): def _add_variable_with_custom_getter( self, name, shape=None, dtype=dtypes.float32, - initializer=None, getter=None, **kwargs_for_getter): + initializer=None, getter=None, overwrite=False, + **kwargs_for_getter): """Restore-on-create for a variable be saved with this `Checkpointable`. If the user has requested that this object or another `Checkpointable` which @@ -334,12 +335,11 @@ class CheckpointableBase(object): name: A name for the variable. Must be unique within this object. shape: The shape of the variable. dtype: The data type of the variable. - initializer: The initializer to use. Ignored if there is a deferred restoration left over from a call to `_restore_from_checkpoint_position`. - getter: The getter to wrap which actually fetches the variable. + overwrite: If True, disables unique name and type checks. **kwargs_for_getter: Passed to the getter. Returns: @@ -349,7 +349,7 @@ class CheckpointableBase(object): ValueError: If the variable name is not unique. """ self._maybe_initialize_checkpointable() - if name in self._dependency_names: + if not overwrite and name in self._dependency_names: raise ValueError( ("A variable named '%s' already exists in this Checkpointable, but " "Checkpointable._add_variable called to create another with " @@ -385,7 +385,13 @@ class CheckpointableBase(object): # assign again. It will add this variable to our dependencies, and if there # is a non-trivial restoration queued, it will handle that. This also # handles slot variables. - return self._track_checkpointable(new_variable, name=name) + if not overwrite or isinstance(new_variable, CheckpointableBase): + return self._track_checkpointable(new_variable, name=name, + overwrite=overwrite) + else: + # TODO(allenl): Some variable types are not yet supported. Remove this + # fallback once all get_variable() return types are Checkpointable. + return new_variable def _preload_simple_restoration(self, name, shape): """Return a dependency's value for restore-on-create. diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt index 241db8956a..7be2f4f61f 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-model.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt index 9673a508d6..0f2428d77a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.-sequential.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt index 041acf29ff..db8f626b98 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activation.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt index 48143b2cd6..809b3a5430 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-activity-regularization.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt index 11f78fed97..68d41bb6cc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-add.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt index 84eb825632..970b777e51 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-alpha-dropout.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt index ab377a248f..529c64ab29 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt index c2edd79f52..7e7c330d74 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt index f3f37eed99..ada8466d74 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average-pooling3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt index 31d1d1c049..2a5c1cd530 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-average.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt index 6582e1b18e..9a2cb29815 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt index 12f66095d2..f5e991ea42 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt index 3a45fa180e..31732214a6 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-avg-pool3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt index a0f272c178..422eddf10d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-batch-normalization.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt index 9c7d3154ad..9053a37916 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-bidirectional.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt index 949b225e54..3d536d2182 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-concatenate.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt index a736c84a10..6a7da1aef8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv-l-s-t-m2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt index 95f9afed28..801a033972 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt index 38ba15400a..13352e264a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt index bc84e2a97e..f400e4a15c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt index 0802578c22..b3a9f573b8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt index 8ad4646c74..a9be09c0ab 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-conv3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt index 110e267b75..be1ef5eb92 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt index 24cfc83af6..30034f7eaf 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt index c56e89187f..189b38054c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt index 3674f2746c..a76d85c629 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d-transpose.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt index 5a8f9d7702..782195d4ad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-convolution3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt index caa748be81..2cb7a39ea5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt index 97bd4a265a..8080330699 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt index 20c43eeed1..678f40bbc2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-cropping3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt index 256f0e4bdf..fac826109b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dense.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt index d1e53f900c..285d544af2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dot.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt index b010ff6805..b77976974c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-dropout.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt index fffd3854bb..b07714d3f2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-e-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt index 1155fe03fc..e67d4ddfc4 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-embedding.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt index 5e4bebb15b..b2a668e5a8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-flatten.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt index cb9bb3d821..1fd3febad2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt index 9a36e80649..f5f41d879d 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-g-r-u.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt index eb32238e15..f4f1a5d51c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-dropout.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt index 37fc8e29ae..e502df5e17 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-gaussian-noise.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt index 490816458b..9c8d5bfcd8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt index ab49f67f33..8dd65f1f24 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt index 3d7cb3ba49..5e30571cc7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-average-pooling3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt index c99ddab4f3..ba90fa4546 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt index 290d2eaebe..8823857758 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt index cf63069641..500ced852b 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-avg-pool3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt index 2dadc67c09..cf2717ed46 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt index 1a1a1dcf64..a86ff1a469 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt index 44898e23ad..e01cc7c1b0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pool3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt index 941d867d24..259c1fb37c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt index 9a5a6325f8..0c41bf97f7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt index 7a0c1932f6..bec8817aa3 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-global-max-pooling3-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt index f679c1d006..17be862229 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-input-layer.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt index ad1e7f2cad..6d2a8c5619 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt index 6dad4b4897..490b5b618c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-l-s-t-m.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt index fa45d8c902..21a65b838a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-lambda.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt index 023d6c0d69..127b04738e 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-layer.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.keras.layers.Layer" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt index e429fced77..87e49f2ed5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-leaky-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt index 462568124f..1aa3aad324 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt index 11bf6a2b42..5e9dc7d477 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-locally-connected2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt index a932448891..0d101e5b68 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-masking.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt index 6ff2adddac..c85cd49ac8 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt index 2957673d4d..4f59e330c9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt index 2191c10b73..c0ea0eb050 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pool3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt index af750ac1b6..ca37ae5131 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt index 9046061510..3ede237834 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt index a40666807b..d87e25a7ba 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-max-pooling3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt index 65378cef42..e4df7b48ae 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-maximum.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt index b037559e02..6bf7c77743 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-multiply.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt index b3a7f47fa5..c14be132b7 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-p-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt index b2f22f7da3..72ffbceae0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-permute.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt index 792eacf90d..d3e780c8b2 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-r-n-n.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt index 5b79a021ca..a27980a9d1 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-repeat-vector.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt index 99c64505ee..67f991276c 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-reshape.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt index d5873ccf76..fccea5e8af 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv1-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt index 76b4c10a46..d20663bdb0 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-conv2-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt index 40cd87de5f..889fa0a1b5 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution1-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt index c44c0da148..c850f3fedc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-separable-convolution2-d.pbtxt @@ -6,6 +6,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt index bd70c31c38..526d88ccba 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt index de717976cf..7fddae3447 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-simple-r-n-n.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activation" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt index a93b7b8f6e..5b9b62fc97 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-softmax.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt index 4dc24b195e..769da30999 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout1-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt index a3bb1cc414..fca2e42a15 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout2-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt index f9a78106fa..36e8de09a9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-spatial-dropout3-d.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt index 5aa21f4022..a96f16fae9 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-stacked-r-n-n-cells.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt index 88e8a46572..e1cbd0e150 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-thresholded-re-l-u.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt index f2a7673998..f0d35728fb 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-time-distributed.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt index 4db82ddfa9..74efaea6dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt index 61e65ad56d..dc5bd5fd53 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt index 3d9402db4e..e01ccfb74a 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-up-sampling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt index 0223799ed4..7e6f90f762 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt index 2e4429833a..4d0d402dad 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt index 26cf7b9e49..b353a529bc 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt index 64d35d9447..9fe1256e61 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.layers.-zero-padding3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt index 18be9c9701..8ccf15f9ab 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-model.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt index b934632922..102eb32203 100644 --- a/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.keras.models.-sequential.pbtxt @@ -5,6 +5,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt index de81206bc8..1c4f550d7f 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt index 72d5496464..d2db095269 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt index 595e77ff9f..34d9a9df28 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-average-pooling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt index 0c4aa2ff26..21ad0efecf 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-batch-normalization.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.BatchNormalization" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt index 5f576d0189..ed38747c76 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt index 675a7c76e5..ff453c6059 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d-transpose.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt index eaabbf6aab..5583bd22dc 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt index 838e070d79..63f0c32a7c 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d-transpose.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt index 4bd8cfc1a4..b77726252c 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-conv3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt index 57eccb03ff..92db9f6dcd 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dense.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Dense" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt index a1ec00eeea..80fa846a24 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-dropout.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Dropout" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt index a06943d51a..f63213b3dd 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-flatten.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.layers.Flatten" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt index 24fda0c87e..4e45b2d513 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-layer.pbtxt @@ -1,6 +1,7 @@ path: "tensorflow.layers.Layer" tf_class { is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt index 4c3d00e0e1..19ec33fce7 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling1-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt index f7e2017b0c..76180c333a 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling2-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt index 84780926a3..ded75c8ff0 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-max-pooling3-d.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt index 05799ecfc9..3dbfa5453f 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv1-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt index c2aeb35c46..ab171df1d1 100644 --- a/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.layers.-separable-conv2-d.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt index 44536787f0..9c71a24d05 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt index 768565d3ca..9e19f96b74 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-basic-r-n-n-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt index 0d253e5dd2..7540aa6286 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-device-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt index 97edf245f6..fc1ff38669 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-dropout-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt index 6ecc134d4d..751122cfff 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-g-r-u-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt index 4b3ca1578b..4b6313f395 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-l-s-t-m-cell.pbtxt @@ -4,6 +4,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt index 9a6c73a079..00e8c71140 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-multi-r-n-n-cell.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt index 27488f8e73..3852f90dd6 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-r-n-n-cell.pbtxt @@ -2,6 +2,7 @@ path: "tensorflow.nn.rnn_cell.RNNCell" tf_class { is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" diff --git a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt index 3310836ed2..8f3f0f7506 100644 --- a/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.nn.rnn_cell.-residual-wrapper.pbtxt @@ -3,6 +3,7 @@ tf_class { is_instance: "" is_instance: "" is_instance: "" + is_instance: "" is_instance: "" member { name: "activity_regularizer" -- GitLab From 7b71b0cfd9f7b4ceb17295cba5b651a04764c37b Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 27 Feb 2018 13:20:03 -0800 Subject: [PATCH 148/884] Checkpointable: Move the checkpoint-grouping utility out of the unit test file Renames Saver -> CheckpointableSaver in preparation for exposing the necessary symbols in tf.contrib.eager. There's a pending change for Optimizers, and Asim is handling Layers/Model. Once those are checked in, we should be able to save/restore everything in the eager examples (or at least the mnist one...). Still plenty more to make Checkpointable, but it should be usable at that point. PiperOrigin-RevId: 187221803 --- .../eager/python/checkpointable_utils.py | 93 ++++++++++++- .../eager/python/checkpointable_utils_test.py | 128 +++++++----------- 2 files changed, 139 insertions(+), 82 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index e57093bdbc..ed431e02ea 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -518,7 +518,7 @@ class _SessionWithFeedDictAdditions(session_lib.SessionInterface): fetches=fetches, feed_dict=feed_dict, **kwargs) -class Saver(object): +class CheckpointableSaver(object): """Saves and restores a `Checkpointable` object and its dependencies. See `Checkpointable` for details of dependency management. `Saver` wraps @@ -770,3 +770,94 @@ class Saver(object): load_status = CheckpointLoadStatus( checkpoint, feed_dict=file_prefix_feed_dict) return load_status + + +class Checkpoint(core_checkpointable.Checkpointable): + """A utility class which groups `Checkpointable` objects. + + Accepts arbitrary keyword arguments to its constructor and saves those values + with a checkpoint. Maintains a `save_counter` for numbering checkpoints. + + Example usage: + + ```python + import tensorflow as tf + import tensorflow.contrib.eager as tfe + import os + + checkpoint_directory = "/tmp/training_checkpoints" + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + + root = tfe.Checkpoint(optimizer=optimizer, model=model) + root.restore(tf.train.latest_checkpoint(checkpoint_directory)) + for _ in range(num_training_steps): + optimizer.minimize( ... ) + root.save(file_prefix=checkpoint_prefix) + ``` + + For more manual control over saving, use `tfe.CheckpointableSaver` directly. + + Attributes: + save_counter: Incremented when `save()` is called. Used to number + checkpoints. + """ + + def __init__(self, **kwargs): + """Group objects into a training checkpoint. + + Args: + **kwargs: Keyword arguments are set as attributes of this object, and are + saved with the checkpoint. Attribute values must derive from + `CheckpointableBase`. + Raises: + ValueError: If objects in `kwargs` are not Checkpointable. + """ + super(Checkpoint, self).__init__() + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + if not isinstance(v, core_checkpointable.CheckpointableBase): + raise ValueError( + ("`Checkpoint` was expecting an object derived from " + "`CheckpointableBase`, got %s.") % (v,)) + setattr(self, k, v) + self._save_counter = None # Created lazily for restore-on-create. + self._saver = CheckpointableSaver(weakref.ref(self)) + + def _maybe_create_save_counter(self): + """Create a save counter if it does not yet exist.""" + if self._save_counter is None: + # Initialized to 0 and incremented before saving. + self._save_counter = add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) + + @property + def save_counter(self): + """An integer variable which starts at zero and is incremented on save. + + Used to number checkpoints. + + Returns: + The save counter variable. + """ + self._maybe_create_save_counter() + return self._save_counter + + def save(self, file_prefix, session=None): + """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" + assign_op = self.save_counter.assign_add(1) + if context.in_graph_mode(): + if session is None: + session = ops.get_default_session() + session.run(assign_op) + return self._saver.save( + file_prefix=file_prefix, + checkpoint_number=self.save_counter, + session=session) + + def restore(self, save_path): + """Restore a checkpoint. Wraps `tfe.CheckpointableSaver.restore`.""" + status = self._saver.restore(save_path=save_path) + # Create the save counter now so it gets initialized with other variables + # when graph building. Creating it earlier would lead to double + # initialization when executing eagerly. + self._maybe_create_save_counter() + return status diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 83187b51b5..68f0d93632 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -18,7 +18,6 @@ from __future__ import print_function import functools import os -import weakref import six @@ -114,51 +113,6 @@ class MyNetwork(CheckpointableNetwork): return self._via_track_layer(self._named_dense(values)) -class Checkpoint(checkpointable.Checkpointable): - """A utility class which groups `Checkpointable` objects.""" - - def __init__(self, **kwargs): - super(Checkpoint, self).__init__() - for k, v in sorted(kwargs.items(), key=lambda item: item[0]): - setattr(self, k, v) - self._save_counter = None # Created lazily for restore-on-create. - self._saver = checkpointable_utils.Saver(weakref.ref(self)) - - @property - def save_counter(self): - """An integer variable which starts at zero and is incremented on save. - - Used to number checkpoints. - - Returns: - The save counter variable. - """ - if self._save_counter is None: - # Initialized to 0 and incremented before saving. - self._save_counter = checkpointable_utils.add_variable( - self, name="save_counter", initializer=0, dtype=dtypes.int64) - return self._save_counter - - def save(self, file_prefix, session=None): - assign_op = self.save_counter.assign_add(1) - if context.in_graph_mode(): - if session is None: - session = ops.get_default_session() - session.run(assign_op) - return self._saver.save( - file_prefix=file_prefix, - checkpoint_number=self.save_counter, - session=session) - - def restore(self, save_path): - status = self._saver.restore(save_path=save_path) - # Create the save counter now so it gets initialized with other variables - # when graph building. Creating it earlier would lead to double - # initialization when executing eagerly. - self.save_counter # pylint: disable=pointless-statement - return status - - class InterfaceTests(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) @@ -256,7 +210,7 @@ class CheckpointingTests(test.TestCase): other_network = MyNetwork() optimizer = CheckpointableAdam(0.001) optimizer_step = training_util.get_or_create_global_step() - root_checkpointable = Checkpoint( + root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) if context.in_eager_mode(): optimizer.minimize( @@ -361,7 +315,8 @@ class CheckpointingTests(test.TestCase): def testSaveRestore(self): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root_checkpointable = Checkpoint(optimizer=optimizer, network=network) + root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, network=network) input_value = constant_op.constant([[3.]]) if context.in_eager_mode(): optimizer.minimize( @@ -392,7 +347,7 @@ class CheckpointingTests(test.TestCase): return # Restore-on-create is only supported when executing eagerly on_create_network = MyNetwork() on_create_optimizer = CheckpointableAdam(0.001) - on_create_root = Checkpoint( + on_create_root = checkpointable_utils.Checkpoint( optimizer=on_create_optimizer, network=on_create_network) # Deferred restoration status = on_create_root.restore(save_path=save_path) @@ -424,7 +379,7 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=training_util.get_or_create_global_step()) root.restore(core_saver.latest_checkpoint(checkpoint_directory)) @@ -448,7 +403,7 @@ class CheckpointingTests(test.TestCase): with ops.Graph().as_default(): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) input_value = constant_op.constant([[3.]]) @@ -485,7 +440,7 @@ class CheckpointingTests(test.TestCase): graph=ops.get_default_graph()): network = MyNetwork() optimizer = CheckpointableAdam(0.001) - root = Checkpoint( + root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) @@ -567,9 +522,11 @@ class CheckpointingTests(test.TestCase): self.evaluate(state_ops.assign(original.dep.var, 123.)) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpointable_utils.Saver(original).save(checkpoint_prefix) + save_path = checkpointable_utils.CheckpointableSaver( + original).save(checkpoint_prefix) load_into = LateDependencies() - status = checkpointable_utils.Saver(load_into).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + load_into).restore(save_path) with self.assertRaises(AssertionError): status.assert_consumed() load_into.add_dep() @@ -598,11 +555,12 @@ class CheckpointingTests(test.TestCase): self.evaluate(state_ops.assign(dep_after_var.dep.var, -14.)) checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") - save_path = checkpointable_utils.Saver(dep_after_var).save( + save_path = checkpointable_utils.CheckpointableSaver(dep_after_var).save( checkpoint_prefix) loaded_dep_after_var = DepAfterVar() - status = checkpointable_utils.Saver(loaded_dep_after_var).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + loaded_dep_after_var).restore(save_path) loaded_dep_after_var.add_dep() status.assert_consumed() status.run_restore_ops() @@ -622,24 +580,26 @@ class CheckpointingTests(test.TestCase): # `root`. Create a one-off grouping so that slot variables for `root.var` # get initialized too. self.evaluate(checkpointable_utils.gather_initializers( - Checkpoint(root=root, optimizer=optimizer))) + checkpointable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) else: optimizer.minimize(root.var.read_value) self.evaluate(state_ops.assign(root.var, 12.)) - no_slots_path = checkpointable_utils.Saver(root).save( + no_slots_path = checkpointable_utils.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "no_slots")) root.optimizer = optimizer self.evaluate(state_ops.assign(root.var, 13.)) self.evaluate(state_ops.assign(optimizer.get_slot(name="m", var=root.var), 14.)) - slots_path = checkpointable_utils.Saver(root).save( + slots_path = checkpointable_utils.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "with_slots")) new_root = checkpointable.Checkpointable() # Load the slot-containing checkpoint (deferred), then immediately overwrite # the non-slot variable (also deferred). - slot_status = checkpointable_utils.Saver(new_root).restore(slots_path) - no_slot_status = checkpointable_utils.Saver(new_root).restore(no_slots_path) + slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(slots_path) + no_slot_status = checkpointable_utils.CheckpointableSaver( + new_root).restore(no_slots_path) with self.assertRaises(AssertionError): no_slot_status.assert_consumed() new_root.var = checkpointable_utils.add_variable( @@ -679,15 +639,17 @@ class CheckpointingTests(test.TestCase): save_root.dep.var = checkpointable_utils.add_variable( save_root.dep, name="var", initializer=0.) self.evaluate(state_ops.assign(save_root.dep.var, 12.)) - saver = checkpointable_utils.Saver(save_root) + saver = checkpointable_utils.CheckpointableSaver(save_root) first_path = saver.save(os.path.join(checkpoint_directory, "first")) self.evaluate(state_ops.assign(save_root.dep.var, 13.)) second_path = saver.save(os.path.join(checkpoint_directory, "second")) first_root = checkpointable.Checkpointable() second_root = checkpointable.Checkpointable() - first_status = checkpointable_utils.Saver(first_root).restore(first_path) - second_status = checkpointable_utils.Saver(second_root).restore(second_path) + first_status = checkpointable_utils.CheckpointableSaver( + first_root).restore(first_path) + second_status = checkpointable_utils.CheckpointableSaver( + second_root).restore(second_path) load_dep = checkpointable.Checkpointable() load_dep.var = checkpointable_utils.add_variable( load_dep, name="var", shape=[]) @@ -704,8 +666,10 @@ class CheckpointingTests(test.TestCase): # determines the final value. first_root = checkpointable.Checkpointable() second_root = checkpointable.Checkpointable() - second_status = checkpointable_utils.Saver(second_root).restore(second_path) - first_status = checkpointable_utils.Saver(first_root).restore(first_path) + second_status = checkpointable_utils.CheckpointableSaver( + second_root).restore(second_path) + first_status = checkpointable_utils.CheckpointableSaver( + first_root).restore(first_path) load_dep = checkpointable.Checkpointable() load_dep.var = checkpointable_utils.add_variable( load_dep, name="var", shape=[]) @@ -730,10 +694,10 @@ class CheckpointingTests(test.TestCase): save_root.dep_two.dep_three = dep_three checkpointable_utils.add_variable(dep_three, name="var", initializer=0.) self.evaluate(checkpointable_utils.gather_initializers(save_root)) - save_path = checkpointable_utils.Saver(save_root).save( + save_path = checkpointable_utils.CheckpointableSaver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() - checkpointable_utils.Saver(load_root).restore(save_path) + checkpointable_utils.CheckpointableSaver(load_root).restore(save_path) load_root.dep_one = checkpointable.Checkpointable() load_root.dep_two = checkpointable.Checkpointable() load_root.dep_one.dep_three = checkpointable.Checkpointable() @@ -753,7 +717,7 @@ class CheckpointingTests(test.TestCase): checkpointable_utils.add_variable( save_root.dep_two, name="var2", initializer=64., dtype=dtypes.float64) self.evaluate(checkpointable_utils.gather_initializers(save_root)) - save_path = checkpointable_utils.Saver(save_root).save( + save_path = checkpointable_utils.CheckpointableSaver(save_root).save( os.path.join(checkpoint_directory, "ckpt")) load_root = checkpointable.Checkpointable() load_root.dep_one = checkpointable.Checkpointable() @@ -762,7 +726,7 @@ class CheckpointingTests(test.TestCase): load_root.dep_one, name="var1", shape=[], dtype=dtypes.float64) v2 = checkpointable_utils.add_variable( load_root.dep_one, name="var2", shape=[], dtype=dtypes.float64) - status = checkpointable_utils.Saver(load_root).restore( + status = checkpointable_utils.CheckpointableSaver(load_root).restore( save_path).assert_consumed() status.run_restore_ops() self.assertEqual(32., self.evaluate(v1)) @@ -782,12 +746,13 @@ class CheckpointingTests(test.TestCase): second, "v2", initializer=[1., 1., 2., 3.]) self.evaluate(checkpointable_utils.gather_initializers(first)) checkpoint_directory = self.get_temp_dir() - save_path = checkpointable_utils.Saver(first).save( + save_path = checkpointable_utils.CheckpointableSaver(first).save( os.path.join(checkpoint_directory, "ckpt")) # Test deferred loading first_load = checkpointable.Checkpointable() - status = checkpointable_utils.Saver(first_load).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + first_load).restore(save_path) second_load = checkpointable.Checkpointable() first_load.second = second_load second_load.first = first_load @@ -807,7 +772,7 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual([2., 7., 1.], self.evaluate(first_load.v)) self.evaluate(second_load.v.assign([2., 7., 1., 8.])) self.assertAllEqual([2., 7., 1., 8.], self.evaluate(second_load.v)) - status = checkpointable_utils.Saver(first_load).restore( + status = checkpointable_utils.CheckpointableSaver(first_load).restore( save_path).assert_consumed() status.run_restore_ops() self.assertAllEqual([3., 1., 4.], self.evaluate(first_load.v)) @@ -826,14 +791,15 @@ class CheckpointingTests(test.TestCase): name="blah", initializer=0.) self.evaluate(first.var1.assign(4.)) self.evaluate(first.var2.assign(8.)) - save_path = checkpointable_utils.Saver(first).save( + save_path = checkpointable_utils.CheckpointableSaver(first).save( checkpoint_prefix) restore_graph = ops.Graph() with restore_graph.as_default(), self.test_session(restore_graph): second = checkpointable.Checkpointable() second.var2 = variable_scope.get_variable( name="blah", initializer=0.) - status = checkpointable_utils.Saver(second).restore(save_path) + status = checkpointable_utils.CheckpointableSaver( + second).restore(save_path) recreated_var1 = variable_scope.get_variable( name="outside_var", initializer=0.) status.run_restore_ops() @@ -856,7 +822,7 @@ class CheckpointingTests(test.TestCase): obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) - saver = checkpointable_utils.Saver(obj) + saver = checkpointable_utils.CheckpointableSaver(obj) saver.save(checkpoint_prefix) before_ops = graph.get_operations() saver.save(checkpoint_prefix) @@ -874,7 +840,7 @@ class CheckpointingTests(test.TestCase): obj.opt = CheckpointableAdam(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) - saver = checkpointable_utils.Saver(obj) + saver = checkpointable_utils.CheckpointableSaver(obj) save_path = saver.save(checkpoint_prefix) saver.restore(save_path) before_ops = graph.get_operations() @@ -889,7 +855,7 @@ class CheckpointCompatibilityTests(test.TestCase): network = MyNetwork() optimizer = CheckpointableAdam(0.001) optimizer_step = training_util.get_or_create_global_step() - root_checkpointable = Checkpoint( + root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) train_op = optimizer.minimize( functools.partial(network, input_value), @@ -945,7 +911,7 @@ class CheckpointCompatibilityTests(test.TestCase): self._set_sentinels(root) with self.assertRaises(AssertionError): self._check_sentinels(root) - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) status = object_saver.restore(save_path) with self.assertRaises(AssertionError): status.assert_consumed() @@ -966,7 +932,7 @@ class CheckpointCompatibilityTests(test.TestCase): with save_graph.as_default(), self.test_session( graph=save_graph) as session: root = self._initialized_model() - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) save_path = object_saver.save( session=session, file_prefix=checkpoint_prefix) with context.eager_mode(): @@ -980,7 +946,7 @@ class CheckpointCompatibilityTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") with context.eager_mode(): root = self._initialized_model() - object_saver = checkpointable_utils.Saver(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) save_path = object_saver.save(file_prefix=checkpoint_prefix) with context.graph_mode(): save_graph = ops.Graph() -- GitLab From 142c1f0b9333a6e69fefad18b951944fa4617cd9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:22:58 -0800 Subject: [PATCH 149/884] During late import, update model->flags from the input-arrays shape information that was read from the graph (e.g. shape attribute in Placeholder nodes). PiperOrigin-RevId: 187222358 --- tensorflow/contrib/lite/toco/tooling_util.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 1ab7b34331..d23b3737fc 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1199,7 +1199,7 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { << "This model does not define output arrays, so a " "--output_arrays flag must be given on the command-line."; - for (const auto& input_array_proto : model->flags.input_arrays()) { + for (auto& input_array_proto : *model->flags.mutable_input_arrays()) { auto& input_array = model->GetOrCreateArray(input_array_proto.name()); if (input_array_proto.has_data_type()) { const ArrayDataType specified_type = @@ -1243,6 +1243,11 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { for (int i = 0; i < input_array_dims.size(); i++) { CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i)); } + } else { + for (int i = 0; i < input_array.shape().dimensions_count(); i++) { + input_array_proto.mutable_shape()->add_dims( + input_array.shape().dims(i)); + } } } -- GitLab From 93f5dd54dab124a9ec3b4c5dcb42d31716fe2f95 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:36:10 -0800 Subject: [PATCH 150/884] Optimized non-aligned case of split and split_v on the first input dimension. PiperOrigin-RevId: 187224344 --- tensorflow/core/kernels/batch_kernels.cc | 14 +- tensorflow/core/kernels/split_lib.h | 32 ++-- tensorflow/core/kernels/split_lib_cpu.cc | 32 ++-- tensorflow/core/kernels/split_lib_gpu.cu.cc | 16 +- tensorflow/core/kernels/split_op.cc | 154 +++++++++++++------- tensorflow/core/kernels/split_v_op.cc | 149 ++++++++++++------- tensorflow/core/kernels/tensor_array_ops.cc | 12 +- tensorflow/core/kernels/unpack_op.cc | 14 +- 8 files changed, 258 insertions(+), 165 deletions(-) diff --git a/tensorflow/core/kernels/batch_kernels.cc b/tensorflow/core/kernels/batch_kernels.cc index 546e51be53..8c99ded0a8 100644 --- a/tensorflow/core/kernels/batch_kernels.cc +++ b/tensorflow/core/kernels/batch_kernels.cc @@ -146,7 +146,7 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input, suffix_dim_size *= input.shape().dim_size(i); } auto input_reshaped = - input.shaped({1, input.shape().dim_size(0), suffix_dim_size}); + input.shaped({input.shape().dim_size(0), suffix_dim_size}); int64 position = 0; for (const int64 size : sizes) { @@ -155,13 +155,13 @@ Status SplitCPU(OpKernelContext* context, const Tensor& input, Tensor output; TF_RETURN_IF_ERROR( context->allocate_temp(input.dtype(), output_shape, &output)); - auto output_shaped = output.shaped({1, size, suffix_dim_size}); + auto output_shaped = output.shaped({size, suffix_dim_size}); - Eigen::DSizes slice_indices{0, position, 0}; - Eigen::DSizes slice_sizes{1, size, suffix_dim_size}; - functor::Split()(context->eigen_device(), - output_shaped, input_reshaped, slice_indices, - slice_sizes); + Eigen::DSizes slice_indices{position, 0}; + Eigen::DSizes slice_sizes{size, suffix_dim_size}; + functor::Split()(context->eigen_device(), + output_shaped, input_reshaped, + slice_indices, slice_sizes); outputs->emplace_back(output); diff --git a/tensorflow/core/kernels/split_lib.h b/tensorflow/core/kernels/split_lib.h index a08949e626..bc1fa28f8f 100644 --- a/tensorflow/core/kernels/split_lib.h +++ b/tensorflow/core/kernels/split_lib.h @@ -31,31 +31,31 @@ struct SplitCustom { const Eigen::DSizes& slice_sizes); }; -template +template struct Split { - void operator()(const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + void operator()(const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; -template -struct Split { +template +struct Split { void operator()(const Eigen::ThreadPoolDevice& d, - typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; #ifdef TENSORFLOW_USE_SYCL -template +template struct Split { void operator()(const Eigen::SyclDevice& d, - typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes); + typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes); }; #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/split_lib_cpu.cc b/tensorflow/core/kernels/split_lib_cpu.cc index 771c633b15..a3060e4e90 100644 --- a/tensorflow/core/kernels/split_lib_cpu.cc +++ b/tensorflow/core/kernels/split_lib_cpu.cc @@ -24,12 +24,12 @@ limitations under the License. namespace tensorflow { namespace functor { -template -void Split::operator()( - const Eigen::ThreadPoolDevice& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Eigen::ThreadPoolDevice& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { if (output.size() < 131072) { output = input.slice(slice_indices, slice_sizes); } else { @@ -37,22 +37,26 @@ void Split::operator()( } } -#define DEFINE_CPU_KERNELS(T) template struct Split; +#define DEFINE_CPU_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_ALL_TYPES(DEFINE_CPU_KERNELS) DEFINE_CPU_KERNELS(quint8) #ifdef TENSORFLOW_USE_SYCL -template -void Split::operator()( - const Eigen::SyclDevice& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Eigen::SyclDevice& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { output.device(d) = input.slice(slice_indices, slice_sizes); } -#define DEFINE_SYCL_KERNELS(T) template struct Split; +#define DEFINE_SYCL_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_GPU_NUMBER_TYPES_NO_HALF(DEFINE_SYCL_KERNELS); #endif // TENSORFLOW_USE_SYCL diff --git a/tensorflow/core/kernels/split_lib_gpu.cu.cc b/tensorflow/core/kernels/split_lib_gpu.cu.cc index 9f234fc093..393818730b 100644 --- a/tensorflow/core/kernels/split_lib_gpu.cu.cc +++ b/tensorflow/core/kernels/split_lib_gpu.cu.cc @@ -29,12 +29,12 @@ limitations under the License. namespace tensorflow { namespace functor { -template -void Split::operator()( - const Device& d, typename TTypes::Tensor output, - typename TTypes::ConstTensor input, - const Eigen::DSizes& slice_indices, - const Eigen::DSizes& slice_sizes) { +template +void Split::operator()( + const Device& d, typename TTypes::Tensor output, + typename TTypes::ConstTensor input, + const Eigen::DSizes& slice_indices, + const Eigen::DSizes& slice_sizes) { To32Bit(output).device(d) = To32Bit(input).slice(slice_indices, slice_sizes); } @@ -47,7 +47,9 @@ void SplitCustom::operator()( To32Bit(output).device(d) = To32Bit(input).slice(slice_indices, slice_sizes); } -#define DEFINE_GPU_KERNELS(T) template struct Split; +#define DEFINE_GPU_KERNELS(T) \ + template struct Split; \ + template struct Split; TF_CALL_GPU_NUMBER_TYPES(DEFINE_GPU_KERNELS); TF_CALL_complex64(DEFINE_GPU_KERNELS); diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index 85f529326d..1bc92a4f70 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -121,6 +121,77 @@ class SplitOpBase : public OpKernel { } }; +template +class SplitOpCPUImpl { + public: + template + void operator()(OpKernelContext* context, + const InputReshapedType& input_reshaped, + const TensorShape& input_shape, int32 split_dim, + Eigen::DenseIndex prefix_dim_size, + Eigen::DenseIndex split_dim_size, + Eigen::DenseIndex suffix_dim_size, + const MakeSizesType& make_sizes, + const ReshapeResultType& reshape_result, int32 num_split, + int64 split_dim_output_size) const { + const auto num_threads = + context->device()->tensorflow_cpu_worker_threads()->num_threads; + // TODO(jewillco): Tune heuristic further. + const auto input_element_count = input_shape.num_elements(); + const bool use_parallelism_between_outputs = + (num_split >= 4 && + input_element_count >= std::max(num_threads, num_split) * 4096 && + input_element_count < num_split * 180 * 1024); + Eigen::DSizes indices; + for (int i = 0; i < NDims; ++i) { + indices[i] = 0; + } + auto sizes = make_sizes(split_dim_output_size); + TensorShape output_shape(input_shape); + output_shape.set_dim(split_dim, split_dim_output_size); + + auto range_output_func = [&indices, context, &output_shape, prefix_dim_size, + split_dim_output_size, suffix_dim_size, &sizes, + use_parallelism_between_outputs, &input_reshaped, + &reshape_result](int64 start, int64 limit) { + for (int64 i = start; i < limit; ++i) { + Tensor* result = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(i, output_shape, &result)); + if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) { + Eigen::DSizes slice_indices; + Eigen::DSizes slice_sizes; + for (int j = 0; j < NDims; ++j) { + slice_indices[j] = + (j == NDims - 2 ? i * split_dim_output_size : indices[j]); + slice_sizes[j] = sizes[j]; + } + + auto result_shaped = reshape_result(result, split_dim_output_size); + + if (use_parallelism_between_outputs) { + // Use sequential implementation for single output. + result_shaped = input_reshaped.slice(slice_indices, slice_sizes); + } else { + // This implementation may be parallel internally. + functor::Split()( + context->eigen_device(), result_shaped, + input_reshaped, slice_indices, slice_sizes); + } + } + } + }; + if (use_parallelism_between_outputs) { + // Run in parallel, disabling parallelism in functor. + context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( + num_split, input_element_count / num_split, range_output_func); + } else { + // Run sequentially, but allow internal parallelism in functor. + range_output_func(0, num_split); + } + } +}; + template class SplitOpCPU : public SplitOpBase { public: @@ -154,66 +225,37 @@ class SplitOpCPU : public SplitOpBase { std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) = Base::template SetDims(input_shape, split_dim); - auto input_reshaped = - input.shaped({prefix_dim_size, split_dim_size, suffix_dim_size}); const int64 split_dim_output_size = split_dim_size / num_split; - TensorShape output_shape(input_shape); - output_shape.set_dim(split_dim, split_dim_output_size); - - Eigen::DSizes indices{0, 0, 0}; - const Eigen::DSizes sizes{ - prefix_dim_size, split_dim_output_size, suffix_dim_size}; - - const auto num_threads = - context->device()->tensorflow_cpu_worker_threads()->num_threads; - // TODO(jewillco): Tune heuristic further. - const auto input_element_count = input_shape.num_elements(); - const bool use_parallelism_between_outputs = - (num_split >= 4 && - input_element_count >= std::max(num_threads, num_split) * 4096 && - input_element_count < num_split * 180 * 1024); - - auto range_output_func = [&indices, context, &output_shape, prefix_dim_size, - split_dim_output_size, suffix_dim_size, &sizes, - use_parallelism_between_outputs, - &input_reshaped](int64 start, int64 limit) { - for (int64 i = start; i < limit; ++i) { - Tensor* result = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &result)); - if (prefix_dim_size * split_dim_output_size * suffix_dim_size > 0) { - Eigen::DSizes slice_indices; - Eigen::DSizes slice_sizes; - for (int j = 0; j < 3; ++j) { - slice_indices[j] = - (j == 1 ? i * split_dim_output_size : indices[j]); - slice_sizes[j] = sizes[j]; - } - - auto result_shaped = result->shaped( - {prefix_dim_size, split_dim_output_size, suffix_dim_size}); - if (use_parallelism_between_outputs) { - // Use sequential implementation for single output. - result_shaped = input_reshaped.slice(slice_indices, slice_sizes); - } else { - // This implementation may be parallel internally. - functor::Split()(context->eigen_device(), - result_shaped, input_reshaped, - slice_indices, slice_sizes); - } - } - } - }; - if (use_parallelism_between_outputs) { - // Run in parallel, disabling parallelism in functor. - Shard(num_split, - context->device()->tensorflow_cpu_worker_threads()->workers, - num_split, input_element_count / num_split, range_output_func); + if (prefix_dim_size == 1) { + auto input_reshaped = + input.shaped({split_dim_size, suffix_dim_size}); + auto make_sizes = [&](int64 split_size) { + return Eigen::DSizes{split_size, suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, int64 split_size) { + return result->shaped({split_size, suffix_dim_size}); + }; + SplitOpCPUImpl{}( + context, input_reshaped, input_shape, split_dim, prefix_dim_size, + split_dim_size, suffix_dim_size, make_sizes, reshape_result, + num_split, split_dim_output_size); } else { - // Run sequentially, but allow internal parallelism in functor. - range_output_func(0, num_split); + auto input_reshaped = input.shaped( + {prefix_dim_size, split_dim_size, suffix_dim_size}); + auto make_sizes = [&](int64 split_size) { + return Eigen::DSizes{prefix_dim_size, split_size, + suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, int64 split_size) { + return result->shaped( + {prefix_dim_size, split_size, suffix_dim_size}); + }; + SplitOpCPUImpl{}( + context, input_reshaped, input_shape, split_dim, prefix_dim_size, + split_dim_size, suffix_dim_size, make_sizes, reshape_result, + num_split, split_dim_output_size); } } }; diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 7ff5df47d7..16fa890780 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -175,6 +175,76 @@ class SplitVOpBase : public OpKernel { } }; +template +class SplitVOpCPUImpl { + public: + template + void operator()(OpKernelContext* context, + const InputReshapedType& input_reshaped, + const std::vector& split_start_points, + const TensorShape& input_shape, int32 split_dim, + Eigen::DenseIndex prefix_dim_size, + Eigen::DenseIndex split_dim_size, + Eigen::DenseIndex suffix_dim_size, + std::vector& split_sizes_vec, + const MakeSizesType& make_sizes, + const ReshapeResultType& reshape_result) const { + Eigen::DSizes indices; + for (int i = 0; i < NDims; ++i) { + indices[i] = 0; + } + const auto num_threads = + context->device()->tensorflow_cpu_worker_threads()->num_threads; + // TODO(jewillco): Tune heuristic further. + const auto input_element_count = input_shape.num_elements(); + const int num_split = split_start_points.size(); + const bool use_parallelism_between_outputs = + (num_split >= 4 && + input_element_count >= std::max(num_threads, num_split) * 4096 && + input_element_count < num_split * 180 * 1024); + + auto range_output_func = [&indices, context, &input_shape, prefix_dim_size, + split_dim, &split_sizes_vec, &split_start_points, + suffix_dim_size, use_parallelism_between_outputs, + &input_reshaped, &make_sizes, + &reshape_result](int64 start, int64 limit) { + for (int64 i = start; i < limit; ++i) { + TensorShape output_shape(input_shape); + output_shape.set_dim(split_dim, split_sizes_vec[i]); + Tensor* result = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(i, output_shape, &result)); + + const auto sizes = make_sizes(split_sizes_vec[i]); + + if (sizes.TotalSize() > 0) { + auto result_shaped = reshape_result(result, split_sizes_vec[i]); + + auto current_indices = indices; + current_indices[NDims - 2] = split_start_points[i]; + if (use_parallelism_between_outputs) { + // Use sequential implementation for single output. + result_shaped = input_reshaped.slice(current_indices, sizes); + } else { + // This implementation may be parallel internally. + functor::Split()( + context->eigen_device(), result_shaped, + input_reshaped, current_indices, sizes); + } + } + } + }; + if (use_parallelism_between_outputs) { + // Run in parallel, disabling parallelism in functor. + context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( + num_split, input_element_count / num_split, range_output_func); + } else { + // Run sequentially, but allow internal parallelism in functor. + range_output_func(0, num_split); + } + } +}; + template class SplitVOpCPU : public SplitVOpBase { public: @@ -209,10 +279,6 @@ class SplitVOpCPU : public SplitVOpBase { std::tie(prefix_dim_size, split_dim_size, suffix_dim_size) = Base::template SetDims(input_shape, split_dim); - auto input_reshaped = - input.shaped({prefix_dim_size, split_dim_size, suffix_dim_size}); - - Eigen::DSizes indices{0, 0, 0}; std::vector split_start_points(num_split); for (int i = 0; i < num_split; ++i) { if (i == 0) { @@ -223,55 +289,34 @@ class SplitVOpCPU : public SplitVOpBase { } } - const auto num_threads = - context->device()->tensorflow_cpu_worker_threads()->num_threads; - // TODO(jewillco): Tune heuristic further. - const auto input_element_count = input_shape.num_elements(); - const bool use_parallelism_between_outputs = - (num_split >= 4 && - input_element_count >= std::max(num_threads, num_split) * 4096 && - input_element_count < num_split * 180 * 1024); - - auto range_output_func = [&indices, context, &input_shape, prefix_dim_size, - split_dim, &split_sizes_vec, &split_start_points, - suffix_dim_size, use_parallelism_between_outputs, - &input_reshaped](int64 start, int64 limit) { - for (int64 i = start; i < limit; ++i) { - TensorShape output_shape(input_shape); - output_shape.set_dim(split_dim, split_sizes_vec[i]); - Tensor* result = nullptr; - OP_REQUIRES_OK(context, - context->allocate_output(i, output_shape, &result)); - - Eigen::DSizes sizes{ - prefix_dim_size, split_sizes_vec[i], suffix_dim_size}; - - if (sizes.TotalSize() > 0) { - auto result_shaped = result->shaped( - {prefix_dim_size, split_sizes_vec[i], suffix_dim_size}); - - auto current_indices = indices; - current_indices[1] = split_start_points[i]; - if (use_parallelism_between_outputs) { - // Use sequential implementation for single output. - result_shaped = input_reshaped.slice(current_indices, sizes); - } else { - // This implementation may be parallel internally. - functor::Split()(context->eigen_device(), - result_shaped, input_reshaped, - current_indices, sizes); - } - } - } - }; - if (use_parallelism_between_outputs) { - // Run in parallel, disabling parallelism in functor. - Shard(num_split, - context->device()->tensorflow_cpu_worker_threads()->workers, - num_split, input_element_count / num_split, range_output_func); + if (prefix_dim_size == 1) { + auto input_reshaped = + input.shaped({split_dim_size, suffix_dim_size}); + auto make_sizes = [&](Tlen split_size) { + return Eigen::DSizes{split_size, suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, Tlen split_size) { + return result->shaped({split_size, suffix_dim_size}); + }; + SplitVOpCPUImpl{}( + context, input_reshaped, split_start_points, input_shape, split_dim, + prefix_dim_size, split_dim_size, suffix_dim_size, split_sizes_vec, + make_sizes, reshape_result); } else { - // Run sequentially, but allow internal parallelism in functor. - range_output_func(0, num_split); + auto input_reshaped = input.shaped( + {prefix_dim_size, split_dim_size, suffix_dim_size}); + auto make_sizes = [&](Tlen split_size) { + return Eigen::DSizes{prefix_dim_size, split_size, + suffix_dim_size}; + }; + auto reshape_result = [&](Tensor* result, Tlen split_size) { + return result->shaped( + {prefix_dim_size, split_size, suffix_dim_size}); + }; + SplitVOpCPUImpl{}( + context, input_reshaped, split_start_points, input_shape, split_dim, + prefix_dim_size, split_dim_size, suffix_dim_size, split_sizes_vec, + make_sizes, reshape_result); } } }; diff --git a/tensorflow/core/kernels/tensor_array_ops.cc b/tensorflow/core/kernels/tensor_array_ops.cc index af93d814ec..7ec26d95e6 100644 --- a/tensorflow/core/kernels/tensor_array_ops.cc +++ b/tensorflow/core/kernels/tensor_array_ops.cc @@ -1104,9 +1104,9 @@ class TensorArrayUnpackOrScatterOp : public OpKernel { indices[1] = i; if (element_shape.num_elements() > 0) { - functor::Split()(ctx->eigen_device(), - tensor_value_i_t, tensor_value_t, indices, - sizes); + functor::Split()(ctx->eigen_device(), + tensor_value_i_t, tensor_value_t, + indices, sizes); } write_values.push_back(persistent_tensor); @@ -1295,9 +1295,9 @@ class TensorArraySplitOp : public OpKernel { auto tensor_value_i_t = tensor_value_i->shaped( {1, tensor_lengths_t(i), elements_per_row}); - functor::Split()(ctx->eigen_device(), - tensor_value_i_t, tensor_value_t, indices, - sizes); + functor::Split()(ctx->eigen_device(), + tensor_value_i_t, tensor_value_t, + indices, sizes); } write_values.push_back(persistent_tensor); diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 764b6a252a..4376df34be 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -104,7 +104,7 @@ class UnpackOp : public OpKernel { // Except for shape, unpack is a special case of split, so we reuse the // same computational kernels. auto input_reshaped = - input.shaped({1, before_dim, axis_dim * after_dim}); + input.shaped({before_dim, axis_dim * after_dim}); for (int i = 0; i < num; ++i) { Tensor* output; @@ -112,12 +112,12 @@ class UnpackOp : public OpKernel { context->allocate_output(i, output_shape, &output)); if (output_shape.num_elements() > 0) { - auto output_shaped = output->shaped({1, before_dim, after_dim}); - Eigen::DSizes indices{0, 0, i * after_dim}; - Eigen::DSizes sizes{1, before_dim, after_dim}; - functor::Split()(context->eigen_device(), - output_shaped, input_reshaped, indices, - sizes); + auto output_shaped = output->shaped({before_dim, after_dim}); + Eigen::DSizes indices{0, i * after_dim}; + Eigen::DSizes sizes{before_dim, after_dim}; + functor::Split()(context->eigen_device(), + output_shaped, input_reshaped, indices, + sizes); } } } -- GitLab From 180c457563271b072b33c90bf2f2fbbea450c943 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:38:24 -0800 Subject: [PATCH 151/884] Allow the Ftrl-proximal optimizer parameter 'initial_accumulator_value' to take zero values. PiperOrigin-RevId: 187224701 --- tensorflow/python/training/ftrl.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/training/ftrl.py b/tensorflow/python/training/ftrl.py index 9d02e694db..4fa081fab7 100644 --- a/tensorflow/python/training/ftrl.py +++ b/tensorflow/python/training/ftrl.py @@ -53,7 +53,7 @@ class FtrlOptimizer(optimizer.Optimizer): learning_rate: A float value or a constant float `Tensor`. learning_rate_power: A float value, must be less or equal to zero. initial_accumulator_value: The starting value for accumulators. - Only positive values are allowed. + Only zero or positive values are allowed. l1_regularization_strength: A float value, must be greater than or equal to zero. l2_regularization_strength: A float value, must be greater than or @@ -84,9 +84,10 @@ class FtrlOptimizer(optimizer.Optimizer): """ super(FtrlOptimizer, self).__init__(use_locking, name) - if initial_accumulator_value <= 0.0: - raise ValueError("initial_accumulator_value %f needs to be positive" % - initial_accumulator_value) + if initial_accumulator_value < 0.0: + raise ValueError( + "initial_accumulator_value %f needs to be be positive or zero" % + initial_accumulator_value) if learning_rate_power > 0.0: raise ValueError("learning_rate_power %f needs to be negative or zero" % learning_rate_power) -- GitLab From 1034bb2e69cae7ddd7f26f818e0d8527c5d4c3e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 13:49:03 -0800 Subject: [PATCH 152/884] Renames sequential_feature_column to sequence_feature_column and adds pydoc. PiperOrigin-RevId: 187226365 --- tensorflow/contrib/feature_column/BUILD | 12 +- tensorflow/contrib/feature_column/__init__.py | 2 +- ...e_column.py => sequence_feature_column.py} | 121 +++++++++++++++++- ...est.py => sequence_feature_column_test.py} | 2 +- 4 files changed, 123 insertions(+), 14 deletions(-) rename tensorflow/contrib/feature_column/python/feature_column/{sequential_feature_column.py => sequence_feature_column.py} (72%) rename tensorflow/contrib/feature_column/python/feature_column/{sequential_feature_column_test.py => sequence_feature_column_test.py} (99%) diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index a53e36c2d5..8ba0823a71 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -25,13 +25,13 @@ py_library( srcs = ["__init__.py"], srcs_version = "PY2AND3", deps = [ - ":sequential_feature_column", + ":sequence_feature_column", ], ) py_library( - name = "sequential_feature_column", - srcs = ["python/feature_column/sequential_feature_column.py"], + name = "sequence_feature_column", + srcs = ["python/feature_column/sequence_feature_column.py"], srcs_version = "PY2AND3", deps = [ "//tensorflow/python:array_ops", @@ -48,12 +48,12 @@ py_library( ) py_test( - name = "sequential_feature_column_test", - srcs = ["python/feature_column/sequential_feature_column_test.py"], + name = "sequence_feature_column_test", + srcs = ["python/feature_column/sequence_feature_column_test.py"], srcs_version = "PY2AND3", tags = ["no_pip"], deps = [ - ":sequential_feature_column", + ":sequence_feature_column", "//tensorflow/python:client_testlib", "//tensorflow/python:dtypes", "//tensorflow/python:errors", diff --git a/tensorflow/contrib/feature_column/__init__.py b/tensorflow/contrib/feature_column/__init__.py index 6da7b12693..650a80144f 100644 --- a/tensorflow/contrib/feature_column/__init__.py +++ b/tensorflow/contrib/feature_column/__init__.py @@ -19,7 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long,wildcard-import -from tensorflow.contrib.feature_column.python.feature_column.sequential_feature_column import * +from tensorflow.contrib.feature_column.python.feature_column.sequence_feature_column import * from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py similarity index 72% rename from tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py rename to tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index 4ed7268e7a..e99033bbec 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -34,8 +34,7 @@ from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import sparse_ops from tensorflow.python.ops import variable_scope -# TODO(b/73160931): Fix pydoc. -# pylint: disable=g-doc-args,missing-docstring,protected-access +# pylint: disable=protected-access # TODO(b/73827486): Support SequenceExample. @@ -43,8 +42,7 @@ def sequence_input_layer( features, feature_columns, weight_collections=None, - trainable=True, - scope=None): + trainable=True): """"Builds input layer for sequence input. All `feature_columns` must be sequence dense columns with the same @@ -76,6 +74,17 @@ def sequence_input_layer( rnn_cell, inputs=input_layer, sequence_length=sequence_length) ``` + Args: + features: A dict mapping keys to tensors. + feature_columns: An iterable of dense sequence columns. Valid columns are + - `embedding_column` that wraps a `sequence_categorical_column_with_*` + - `sequence_numeric_column`. + weight_collections: A list of collection names to which the Variable will be + added. Note that variables will also be added to collections + `tf.GraphKeys.GLOBAL_VARIABLES` and `ops.GraphKeys.MODEL_VARIABLES`. + trainable: If `True` also add the variable to the graph collection + `GraphKeys.TRAINABLE_VARIABLES`. + Returns: An `(input_layer, sequence_length)` tuple where: - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. @@ -84,6 +93,7 @@ def sequence_input_layer( `feature_columns`. - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence length for each example. + Raises: ValueError: If any of the `feature_columns` is the wrong type. """ @@ -95,7 +105,7 @@ def sequence_input_layer( 'Given (type {}): {}'.format(type(c), c)) with variable_scope.variable_scope( - scope, default_name='sequence_input_layer', values=features.values()): + None, default_name='sequence_input_layer', values=features.values()): builder = fc._LazyBuilder(features) output_tensors = [] sequence_lengths = [] @@ -124,6 +134,35 @@ def sequence_input_layer( # TODO(b/73160931): Add remaining categorical columns. def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): + """Returns a feature column that represents sequences of integers. + + Example: + + ```python + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input feature. + num_buckets: Range of inputs. Namely, inputs are expected to be in the + range `[0, num_buckets)`. + default_value: If `None`, this column's graph operations will fail for + out-of-range inputs. Otherwise, this value must be in the range + `[0, num_buckets)`, and will replace out-of-range inputs. + + Returns: + A `_SequenceCategoricalColumn`. + """ return _SequenceCategoricalColumn( fc.categorical_column_with_identity( key=key, @@ -135,6 +174,46 @@ def sequence_categorical_column_with_identity( def _sequence_embedding_column( categorical_column, dimension, initializer=None, ckpt_to_load_from=None, tensor_name_in_ckpt=None, max_norm=None, trainable=True): + """Returns a feature column that represents sequences of embeddings. + + Use this to convert sequence categorical data into dense representation for + input to sequence NN, such as RNN. + + Example: + + ```python + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + categorical_column: A `_SequenceCategoricalColumn` created with a + `sequence_cateogrical_column_with_*` function. + dimension: Integer dimension of the embedding. + initializer: Initializer function used to initialize the embeddings. + ckpt_to_load_from: String representing checkpoint name/pattern from which to + restore column weights. Required if `tensor_name_in_ckpt` is not `None`. + tensor_name_in_ckpt: Name of the `Tensor` in `ckpt_to_load_from` from + which to restore the column weights. Required if `ckpt_to_load_from` is + not `None`. + max_norm: If not `None`, embedding values are l2-normalized to this value. + trainable: Whether or not the embedding is trainable. Default is True. + + Returns: + A `_SequenceEmbeddingColumn`. + + Raises: + ValueError: If `categorical_column` is not the right type. + """ if not isinstance(categorical_column, _SequenceCategoricalColumn): raise ValueError( 'categorical_column must be of type _SequenceCategoricalColumn. ' @@ -156,6 +235,33 @@ def sequence_numeric_column( shape=(1,), default_value=0., dtype=dtypes.float32): + """Returns a feature column that represents sequences of numeric data. + + Example: + + ```python + temperature = sequence_numeric_column('temperature') + columns = [temperature] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input features. + shape: The shape of the input data per sequence id. E.g. if `shape=(2,)`, + each example must contain `2 * sequence_length` values. + default_value: A single value compatible with `dtype` that is used for + padding the sparse data into a dense `Tensor`. + dtype: The type of values. + + Returns: + A `_SequenceNumericColumn`. + """ # TODO(b/73160931): Add validations. return _SequenceNumericColumn( key, @@ -202,6 +308,7 @@ class _SequenceCategoricalColumn( fc._CategoricalColumn, collections.namedtuple( '_SequenceCategoricalColumn', ['categorical_column'])): + """Represents sequences of categorical data.""" @property def name(self): @@ -254,6 +361,7 @@ class _SequenceCategoricalColumn( class _SequenceEmbeddingColumn( _SequenceDenseColumn, collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + """Represents sequences of embeddings.""" @property def name(self): @@ -287,6 +395,7 @@ class _SequenceNumericColumn( collections.namedtuple( '_SequenceNumericColumn', ['key', 'shape', 'default_value', 'dtype'])): + """Represents sequences of numeric data.""" @property def name(self): @@ -322,4 +431,4 @@ class _SequenceNumericColumn( return _SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) -# pylint: enable=g-doc-args,missing-docstring,protected-access +# pylint: enable=protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py similarity index 99% rename from tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py rename to tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 59674869a2..8c37ccf11b 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -20,7 +20,7 @@ from __future__ import print_function import numpy as np -from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc from tensorflow.python.feature_column.feature_column import _LazyBuilder from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors -- GitLab From 0a799feaea50d4e48e8daa1f3954427fdccd76f1 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 10:17:15 -0800 Subject: [PATCH 153/884] Generalize the gather_indices dimension that stores indices This is now exposed as a index_vector_dim dimension number. Also fixed an off-by-one error in ValidateGatherDimensionNumbers in the expression computing output_shape_rank. PiperOrigin-RevId: 187040748 --- .../compiler/xla/service/hlo_instruction.cc | 9 +- .../compiler/xla/service/hlo_instruction.h | 3 +- .../xla/service/hlo_instruction_test.cc | 43 +++- .../compiler/xla/service/shape_inference.cc | 42 ++-- .../xla/service/shape_inference_test.cc | 191 ++++++++++++++---- tensorflow/compiler/xla/xla_data.proto | 4 + .../performance/xla/operation_semantics.md | 61 ++++-- 7 files changed, 274 insertions(+), 79 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index b7dd055d7c..a534d8ff06 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -1172,7 +1172,8 @@ bool HloInstruction::HasSideEffect() const { /* static */ GatherDimensionNumbers HloInstruction::MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims) { + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim) { GatherDimensionNumbers gather_dim_numbers; for (int64 output_window_dim : output_window_dims) { gather_dim_numbers.add_output_window_dims(output_window_dim); @@ -1184,6 +1185,7 @@ bool HloInstruction::HasSideEffect() const { gather_dim_numbers.add_gather_dims_to_operand_dims(gather_dim_to_input_dim); } + gather_dim_numbers.set_index_vector_dim(index_vector_dim); return gather_dim_numbers; } @@ -3369,9 +3371,12 @@ string HloInstruction::GatherDimensionNumbersToString() const { string gather_dims_to_operand_dims = StrCat( "gather_dims_to_operand_dims={", Join(gather_dimension_numbers_->gather_dims_to_operand_dims(), ","), "}"); + string index_vector_dim = StrCat( + "index_vector_dim=", gather_dimension_numbers_->index_vector_dim()); return Join>( - {output_window_dims, elided_window_dims, gather_dims_to_operand_dims}, + {output_window_dims, elided_window_dims, gather_dims_to_operand_dims, + index_vector_dim}, ", "); } diff --git a/tensorflow/compiler/xla/service/hlo_instruction.h b/tensorflow/compiler/xla/service/hlo_instruction.h index e4d22e5703..e4c86214c2 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.h +++ b/tensorflow/compiler/xla/service/hlo_instruction.h @@ -502,7 +502,8 @@ class HloInstruction { static GatherDimensionNumbers MakeGatherDimNumbers( tensorflow::gtl::ArraySlice output_window_dims, tensorflow::gtl::ArraySlice elided_window_dims, - tensorflow::gtl::ArraySlice gather_dims_to_operand_dims); + tensorflow::gtl::ArraySlice gather_dims_to_operand_dims, + int64 index_vector_dim); // Returns the opcode for this instruction. HloOpcode opcode() const { return opcode_; } diff --git a/tensorflow/compiler/xla/service/hlo_instruction_test.cc b/tensorflow/compiler/xla/service/hlo_instruction_test.cc index 32d3ed272b..f2980d309d 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction_test.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction_test.cc @@ -1271,7 +1271,7 @@ TEST_F(HloInstructionTest, Stringification) { "true_computation=%TransposeDot, false_computation=%TransposeDot"); } -TEST_F(HloInstructionTest, StringifyGather) { +TEST_F(HloInstructionTest, StringifyGather_0) { Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); Shape gather_indices_tensor_shape = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); @@ -1291,7 +1291,8 @@ TEST_F(HloInstructionTest, StringifyGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); HloModule module(TestName()); @@ -1303,7 +1304,43 @@ TEST_F(HloInstructionTest, StringifyGather) { "s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), " "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " "gather_dims_to_operand_dims={0,1,2,3,4}, " - "window_bounds={30,29,28,27,26}"); + "index_vector_dim=4, window_bounds={30,29,28,27,26}"); +} + +TEST_F(HloInstructionTest, StringifyGather_1) { + Shape input_tensor_shape = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); + Shape gather_indices_tensor_shape = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); + Shape gather_result_shape = + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}); + + HloComputation::Builder builder("Gather"); + HloInstruction* input = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_tensor_shape, "input_tensor")); + HloInstruction* gather_indices = + builder.AddInstruction(HloInstruction::CreateParameter( + 1, gather_indices_tensor_shape, "gather_indices")); + + HloInstruction* gather_instruction = + builder.AddInstruction(HloInstruction::CreateGather( + gather_result_shape, input, gather_indices, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + HloModule module(TestName()); + module.AddEntryComputation(builder.Build()); + + EXPECT_EQ(gather_instruction->ToString(), + "%gather = f32[10,9,7,6,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} " + "gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, " + "s64[10,9,5,7,6]{4,3,2,1,0} %gather_indices), " + "output_window_dims={4,5,6,7,8}, elided_window_dims={}, " + "gather_dims_to_operand_dims={0,1,2,3,4}, " + "index_vector_dim=2, window_bounds={30,29,28,27,26}"); } } // namespace diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index c9692757b2..607a672025 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2467,27 +2467,27 @@ static Status ValidateGatherDimensionNumbers( const int64 output_window_dim_count = dim_numbers.output_window_dims_size(); const int64 output_shape_rank = - output_window_dim_count + gather_indices_shape.size(); + output_window_dim_count + gather_indices_shape.size() - 1; for (int i = 0; i < dim_numbers.output_window_dims_size(); ++i) { int64 window_index = dim_numbers.output_window_dims(i); if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in" - "[0,%lld)", + "have been in [0,%lld)", i, window_index, output_shape_rank); } } if (dim_numbers.gather_dims_to_operand_dims_size() != - gather_indices_shape.back()) { + gather_indices_shape[dim_numbers.index_vector_dim()]) { return InvalidArgument( - "There must be exactly as many elements in gather_dims_to_operand_dims " - "as there are elements in the last dimension of %%gather_indices; got: " - "%d, expected %lld", + "Gather op has %d elements in gather_dims_to_operand_dims and the " + "bound of dimension index_vector_dim=%lld of gather_indices is " + "%lld. These two numbers must be equal.", dim_numbers.gather_dims_to_operand_dims_size(), - gather_indices_shape.back()); + dim_numbers.index_vector_dim(), + gather_indices_shape[dim_numbers.index_vector_dim()]); } for (int i = 0; i < dim_numbers.gather_dims_to_operand_dims_size(); i++) { @@ -2550,24 +2550,33 @@ static Status ValidateGatherDimensionNumbers( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque( gather_indices_shape, "gather indices operand of gather op")); - if (gather_indices_shape.dimensions_size() < 1) { + if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must at least of rank 1; got %s", + "Gather indices parameter must be an integral tensor; got %s", ShapeUtil::HumanString(gather_indices_shape).c_str()); } - if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { + // We implicitly reshape gather indices of shape P[A,B,C] to P[A,B,C,1] if + // index_vector_dim is rank(P). The bounds of this expanded shape is + // stored in expanded_gather_indices_shape. + + if (gather_indices_shape.dimensions_size() < + gather_dim_numbers.index_vector_dim() || + gather_dim_numbers.index_vector_dim() < 0) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", - ShapeUtil::HumanString(gather_indices_shape).c_str()); + "Gather index leaf dimension must be within [0, rank(gather_indices) + " + "1). rank(gather_indices) is %d and gather index leaf dimension is " + "%lld.", + gather_indices_shape.dimensions_size(), + gather_dim_numbers.index_vector_dim()); } std::vector expanded_gather_indices_shape; - // We implicitly reshape gather indices of shape P[N] to P[N,1]. expanded_gather_indices_shape.reserve(gather_indices_shape.dimensions_size()); c_copy(gather_indices_shape.dimensions(), std::back_inserter(expanded_gather_indices_shape)); - if (expanded_gather_indices_shape.size() == 1) { + if (expanded_gather_indices_shape.size() == + gather_dim_numbers.index_vector_dim()) { expanded_gather_indices_shape.push_back(1); } @@ -2632,6 +2641,9 @@ static Status ValidateGatherDimensionNumbers( } current_bound = window_bounds[window_dims_seen++]; } else { + if (gather_dims_seen == gather_dim_numbers.index_vector_dim()) { + gather_dims_seen++; + } current_bound = expanded_gather_indices_shape[gather_dims_seen++]; } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 7eb120843f..029d2b3b86 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -1530,11 +1530,17 @@ TEST_F(ShapeInferenceTest, BadSlice) { class GatherShapeInferenceTest : public ShapeInferenceTest { protected: + const Shape s64_scalar_ = ShapeUtil::MakeShape(S64, {}); + const Shape s64_vector_5_ = ShapeUtil::MakeShape(S64, {5}); const Shape s64_vector_32_ = ShapeUtil::MakeShape(S64, {32}); const Shape s64_4d_tensor_10_9_8_7_1_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 1}); const Shape s64_4d_tensor_10_9_8_7_5_ = ShapeUtil::MakeShape(S64, {10, 9, 8, 7, 5}); + const Shape s64_4d_tensor_5_10_9_7_6_ = + ShapeUtil::MakeShape(S64, {5, 10, 9, 7, 6}); + const Shape s64_4d_tensor_10_9_5_7_6_ = + ShapeUtil::MakeShape(S64, {10, 9, 5, 7, 6}); const Shape f32_5d_tensor_50_49_48_47_46_ = ShapeUtil::MakeShape(F32, {50, 49, 48, 47, 46}); const Shape tuple_shape_ = ShapeUtil::MakeTupleShape( @@ -1548,7 +1554,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGather) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {64, 32}))) @@ -1562,7 +1569,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherV2) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{1}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/1), /*window_bounds=*/{1, 48})); EXPECT_TRUE( ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {32, 48}))) @@ -1576,7 +1584,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowGatherNd) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4}, /*elided_window_dims=*/{0}, - /*gather_dims_to_operand_dims=*/{0}), + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 48})); EXPECT_TRUE(ShapeUtil::Equal(gather_shape, ShapeUtil::MakeShape(F32, {10, 9, 8, 7, 48}))) @@ -1591,7 +1600,8 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26})); EXPECT_TRUE(ShapeUtil::Equal( gather_shape, @@ -1599,12 +1609,85 @@ TEST_F(GatherShapeInferenceTest, TensorFlowBatchDynamicSlice) { << ShapeUtil::HumanString(gather_shape); } +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_A) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/2), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NonDefaultGatherIndicesLeafDim_B) { + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_5_10_9_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal( + gather_shape, + ShapeUtil::MakeShape(F32, {10, 9, 7, 6, 30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, NoOutputGatherDims) { + // This is equivalent to a dynamic slice. + TF_ASSERT_OK_AND_ASSIGN( + Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_vector_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3, 4}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/0), + /*window_bounds=*/{30, 29, 28, 27, 26})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27, 26}))) + << ShapeUtil::HumanString(gather_shape); +} + +TEST_F(GatherShapeInferenceTest, ScalarGatherIndices) { + // The gather indices "tensor" is a scalar S here that's used to slice out + // [S,0,0,0,0]..[S,30,29,28,27] into a [30,29,28,27] shaped result. + TF_ASSERT_OK_AND_ASSIGN(Shape gather_shape, + ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_scalar_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{0, 1, 2, 3}, + /*elided_window_dims=*/{0}, + /*gather_dims_to_operand_dims=*/{0}, + /*index_vector_dim=*/0), + /*window_bounds=*/{1, 30, 29, 28, 27})); + + EXPECT_TRUE(ShapeUtil::Equal(gather_shape, + ShapeUtil::MakeShape(F32, {30, 29, 28, 27}))) + << ShapeUtil::HumanString(gather_shape); +} + TEST_F(GatherShapeInferenceTest, TupleShapedTensorInput) { StatusOr statusor = ShapeInference::InferGatherShape( tuple_shape_, s64_vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/1), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1617,7 +1700,8 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { s64_vector_32_, tuple_shape_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1625,25 +1709,13 @@ TEST_F(GatherShapeInferenceTest, TupleShapedGatherIndicesInput) { << statusor.status(); } -TEST_F(GatherShapeInferenceTest, ScalarGatherIndicesInput) { - StatusOr statusor = ShapeInference::InferGatherShape( - s64_vector_32_, s32_, - HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, - /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), - /*window_bounds=*/{64, 1}); - ASSERT_FALSE(statusor.ok()); - EXPECT_THAT(statusor.status().error_message(), - HasSubstr("Gather indices parameter must at least of rank 1")) - << statusor.status(); -} - TEST_F(GatherShapeInferenceTest, FloatingPointGatherIndicesInput) { StatusOr statusor = ShapeInference::InferGatherShape( s64_vector_32_, vector_32_, HloInstruction::MakeGatherDimNumbers(/*output_window_dims=*/{0}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{1}), + /*gather_dims_to_operand_dims=*/{1}, + /*index_vector_dim=*/0), /*window_bounds=*/{64, 1}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1658,7 +1730,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 8, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1674,7 +1747,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 7}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1690,7 +1764,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 99, 100, 101}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1698,6 +1773,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, + InvalidGatherDimNumbers_WindowIndexBarelyOutOfBounds) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_8_7_5_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 9}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), + /*window_bounds=*/{30, 29, 28, 27, 26}); + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Window index 4 in gather op is out of bounds")) + << statusor.status(); +} + TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_MismatchingElidedWindowDims) { StatusOr statusor = ShapeInference::InferGatherShape( @@ -1705,7 +1796,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{4}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1722,7 +1814,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 19}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1738,7 +1831,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{0, 1, 2, 3, 3}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1755,15 +1849,15 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( statusor.status().error_message(), - HasSubstr( - "There must be exactly as many elements in " - "gather_dims_to_operand_dims " - "as there are elements in the last dimension of %gather_indices")) + HasSubstr("Gather op has 4 elements in gather_dims_to_operand_dims and " + "the bound of dimension index_vector_dim=4 of " + "gather_indices is 5. These two numbers must be equal.")) << statusor.status(); } @@ -1774,7 +1868,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 7}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1791,7 +1886,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 3}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1808,7 +1904,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{2, 1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{1, 1, 28, 27, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1822,7 +1919,8 @@ TEST_F(GatherShapeInferenceTest, InvalidGatherDimNumbers_WindowBoundsTooLarge) { HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{2}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 1, 300, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1838,7 +1936,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7, 8}, /*elided_window_dims=*/{}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT( @@ -1855,7 +1954,8 @@ TEST_F(GatherShapeInferenceTest, HloInstruction::MakeGatherDimNumbers( /*output_window_dims=*/{4, 5, 6, 7}, /*elided_window_dims=*/{1}, - /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}), + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/4), /*window_bounds=*/{30, 29, 28, 26, 20}); ASSERT_FALSE(statusor.ok()); EXPECT_THAT(statusor.status().error_message(), @@ -1864,5 +1964,22 @@ TEST_F(GatherShapeInferenceTest, << statusor.status(); } +TEST_F(GatherShapeInferenceTest, OutOfBoundsGatherIndicesLeafDim) { + StatusOr statusor = ShapeInference::InferGatherShape( + f32_5d_tensor_50_49_48_47_46_, s64_4d_tensor_10_9_5_7_6_, + HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/{4, 5, 6, 7, 8}, + /*elided_window_dims=*/{}, + /*gather_dims_to_operand_dims=*/{0, 1, 2, 3, 4}, + /*index_vector_dim=*/32), + /*window_bounds=*/{30, 29, 28, 27, 26}); + + ASSERT_FALSE(statusor.ok()); + EXPECT_THAT(statusor.status().error_message(), + HasSubstr("Gather index leaf dimension must be within [0, " + "rank(gather_indices) + 1)")) + << statusor.status(); +} + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/xla_data.proto b/tensorflow/compiler/xla/xla_data.proto index 28620c3b86..1f16e6d251 100644 --- a/tensorflow/compiler/xla/xla_data.proto +++ b/tensorflow/compiler/xla/xla_data.proto @@ -418,6 +418,10 @@ message GatherDimensionNumbers { // transforms the gather index looked up from the gather_indices tensor into // the starting index in the input space. repeated int64 gather_dims_to_operand_dims = 3; + + // The dimension in the gather_indices input that contains the starting + // indices. + int64 index_vector_dim = 4; } // Operation requests that are all collected as a tagged union with a oneof diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index b0abf5fdd2..b2190c5243 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -1050,6 +1050,9 @@ For a more intuitive description, see the "Informal Description" section below. : : : indices of the slices we're : : : : we're stitching together into : : : : the output tensor. : +|`index_vector_dim` | `int64` | The dimension in | +: : : `gather_indices` that contains : +: : : the starting indices. : |`output_window_dims` | `ArraySlice` | The set of dimensions in the | : : : output shape that are _window : : : : dimensions_ (defined below). : @@ -1066,22 +1069,20 @@ For a more intuitive description, see the "Informal Description" section below. : : : `output_window_dims`) and the window : : : : dimensions that are elided (via : : : : `elided_window_dims`). : -|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | +|`gather_dims_to_operand_dims` | `ArraySlice` | A dimension map (the | : : : array is interpreted as mapping `i` to : : : : `gather_dims_to_operand_dims[i]`) from : : : : the gather indices in `gather_indices` to : : : : the operand index space. It has to be : : : : one-to-one and total. : -If `gather_indices` is a vector with `N` elements then we implicitly reshape it -to a tensor of shape `[N,1]` before proceeding. - For every index `Out` in the output tensor, we compute two things (more precisely described later): - - An index into the first `gather_indices.rank` - `1` dimensions of - `gather_indices`, which gives us a starting index of a slice, _operand - slice_, in the operand tensor. + - An index into `gather_indices.rank` - `1` dimensions of `gather_indices`, + which gives us a starting index of a slice, _operand slice_, in the operand + tensor. These `gather_indices.rank` - `1` dimensions are all the dimensions + in `gather_indices` except `index_vector_dim`. - A _window index_ that has the same rank as the operand. This index is composed of the values in `Out` at dimensions `output_window_dims`, embedded @@ -1093,29 +1094,42 @@ should be present in the output at index `Out`. The output is a tensor of rank `output_window_dims.size` + `gather_indices.rank` - `1`. Additionally, as a shorthand, we define `output_gather_dims` of type `ArraySlice` as the set of dimensions in the output shape but not in -`output_window_dims`, in ascending order. E.g. if the output tensor has rank 5, -`output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, `3`} +`output_window_dims`, in ascending order. E.g. if the output tensor has rank +`5`, `output_window_dims` is {`2`, `4`} then `output_gather_dims` is {`0`, `1`, +`3`} + +If `index_vector_dim` is equal to `gather_indices.rank` we implicitly +consider `gather_indices` to have a trailing `1` dimension (i.e. if +`gather_indices` was of shape `[6,7]` and `index_vector_dim` is `2` then +we implicitly consider the shape of `gather_indices` to be `[6,7,1]`). The bounds for the output tensor along dimension `i` is computed as follows: 1. If `i` is present in `output_gather_dims` (i.e. is equal to - `output_gather_dims[k]` for some `k`) then we pick the corresponding - dimension bounds out of `gather_indices.shape` (i.e. pick - `gather_indices.shape.dims[k]`). + `output_gather_dims[k]` for some `k`) then we pick the corresponding + dimension bounds out of `gather_indices.shape`, skipping + `index_vector_dim` (i.e. pick `gather_indices.shape.dims`[`k`] if `k` + < `index_vector_dim` and `gather_indices.shape.dims`[`k`+`1`] + otherwise). 2. If `i` is present in `output_window_dims` (i.e. equal to - `output_window_dims[k]` for some `k`) then we pick the corresponding bound - out of `window_bounds` after accounting for `elided_window_dims` (i.e. we - pick `adjusted_window_bounds[k]` where `adjusted_window_bounds` is - `window_bounds` with the bounds at indices `elided_window_dims` removed). + `output_window_dims`[`k`] for some `k`) then we pick the corresponding + bound out of `window_bounds` after accounting for `elided_window_dims` + (i.e. we pick `adjusted_window_bounds`[`k`] where `adjusted_window_bounds` + is `window_bounds` with the bounds at indices `elided_window_dims` + removed). The operand index `In` corresponding to an output index `Out` is computed as follows: 1. Let `G` = { `Out`[`k`] for `k` in `output_gather_dims` }. Use `G` to slice - out vector `S` such that `S`[`i`] = `gather_indices`[`G`, `i`]. - 2. Create an index, `S``in`, into `operand` using `S` by scattering - `S` using the `gather_dims_to_operand_dims` map (`S``in` is the - starting indices for _operand slice_ mentioned above.). More precisely: + out vector `S` such that `S`[`i`] = `gather_indices`[Combine(`G`, `i`)] + where Combine(A, b) inserts b at position `index_vector_dim` into A. + Note that this is well defined even if `G` is empty -- if `G` is empty then + `S` = `gather_indices`. + 2. Create an index, `S``in`, into `operand` using `S` by + scattering `S` using the `gather_dims_to_operand_dims` map + (`S``in` is the starting indices for _operand slice_ mentioned + above). More precisely: 1. `S``in`[`gather_dims_to_operand_dims`[`k`]] = `S`[`k`] if `k` < `gather_dims_to_operand_dims.size`. 2. `S``in`[`_`] = `0` otherwise. @@ -1136,7 +1150,12 @@ follows: `operand.rank` is `6` and `elided_window_dims` is {`0`, `2`} then `window_dims_to_operand_dims` is {`0`→`1`, `1`→`3`, `2`→`4`, `3`→`5`}. -### Informal Description +### Informal Description and Examples + +`index_vector_dim` is set to `gather_indices.rank` - `1` in all of the +examples that follow. More interesting values for `index_vector_dim` +does not change the operation fundamentally, but makes the visual representation +more cumbersome. To get an intuition on how all of the above fits together, let's look at an example that gathers 5 slices of shape `[8,6]` from a `[16,11]` tensor. The -- GitLab From 1fc324c6701bc179ca73908731857e8a582437b5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 10:24:08 -0800 Subject: [PATCH 154/884] Arithemtic optimization: Rewite Sub(0, y) => Neg(y) PiperOrigin-RevId: 187041872 --- .../grappler/optimizers/constant_folding.cc | 18 +++++++++++++++++- .../grappler/optimizers/constant_folding.h | 1 + .../optimizers/constant_folding_test.cc | 7 +++---- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 182e03f04e..10ca7dcce0 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1434,6 +1434,17 @@ void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node, graph_modified_ = true; } +void ConstantFolding::ReplaceSubtractionFromZeroByNegation(NodeDef* node, + GraphDef* graph) { + node->set_op("Neg"); + node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = + AddControlDependency(node->input(1), graph, node_map_.get()); + node_map_->UpdateInput(node->name(), node->input(1), ctrl_dep); + node->set_input(1, ctrl_dep); + graph_modified_ = true; +} + Status ConstantFolding::ReplaceOperationWithConstant( double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph) { @@ -1636,12 +1647,17 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); if (y_matches_output_shape && ((is_mul && x_is_one) || (is_add && x_is_zero))) { - // TODO(rmlarsen): Handle subtraction 0 - y. // 1 * y = y or 0 + y = y. ReplaceOperationWithSnapshot(1, node, output); continue; } + if (y_matches_output_shape && (is_sub && x_is_zero)) { + // Replace 0 - y with Neg(y). + ReplaceSubtractionFromZeroByNegation(node, output); + continue; + } + // Replace 1 / y with Reciprocal op. if (y_matches_output_shape && is_any_div && x_is_one) { DataType type = node->attr().at("T").type(); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 232b2f9fa0..2fd59c7f9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -82,6 +82,7 @@ class ConstantFolding : public GraphOptimizer { GraphDef* graph); void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node, GraphDef* graph); + void ReplaceSubtractionFromZeroByNegation(NodeDef* node, GraphDef* graph); Status ReplaceOperationWithConstant(double value, const TensorShapeProto& shape, NodeDef* node, GraphDef* graph); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 219f3bd5ec..c6540192d7 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -286,10 +286,9 @@ TEST_F(ConstantFoldingTest, NeutralElement) { EXPECT_EQ("x", node.input(0)); EXPECT_EQ("^zeros", node.input(1)); } else if (name == "sub2") { - // We don't handle this case yet. - EXPECT_EQ("Sub", node.op()); - EXPECT_EQ("zeros", node.input(0)); - EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("Neg", node.op()); + EXPECT_EQ("y", node.input(0)); + EXPECT_EQ("^zeros", node.input(1)); } const std::set square_zero_const{"mul1", "mul2", "mul5", "mul6", "matmul1", "matmul2"}; -- GitLab From 620348fb6d045dc1f644925a3828ebb12de944d7 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 10:24:56 -0800 Subject: [PATCH 155/884] Move accumulate_n_v2 to core. PiperOrigin-RevId: 187042001 --- tensorflow/contrib/framework/BUILD | 38 ------ .../framework/python/ops/accumulate_n_v2.py | 111 ------------------ tensorflow/python/kernel_tests/BUILD | 34 ++++++ .../kernel_tests/accumulate_n_eager_test.py} | 27 ++--- .../kernel_tests/accumulate_n_test.py} | 34 +++--- tensorflow/python/ops/math_ops.py | 81 ++++++------- 6 files changed, 99 insertions(+), 226 deletions(-) delete mode 100644 tensorflow/contrib/framework/python/ops/accumulate_n_v2.py rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_eager_test.py => python/kernel_tests/accumulate_n_eager_test.py} (72%) rename tensorflow/{contrib/framework/python/ops/accumulate_n_v2_test.py => python/kernel_tests/accumulate_n_test.py} (79%) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index dbdb5cfaac..1accb319d2 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -28,7 +28,6 @@ tf_custom_op_py_library( "python/framework/graph_util.py", "python/framework/tensor_util.py", "python/ops/__init__.py", - "python/ops/accumulate_n_v2.py", "python/ops/arg_scope.py", "python/ops/audio_ops.py", "python/ops/checkpoint_ops.py", @@ -161,23 +160,6 @@ py_test( ], ) -py_test( - name = "accumulate_n_v2_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:platform_test", - "//tensorflow/python:variables", - "//third_party/py/numpy", - ], -) - cuda_py_test( name = "critical_section_test", size = "medium", @@ -196,26 +178,6 @@ cuda_py_test( ], ) -py_test( - name = "accumulate_n_v2_eager_test", - size = "small", - srcs = ["python/ops/accumulate_n_v2_eager_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":framework_py", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python/eager:backprop", - "//tensorflow/python/eager:context", - "//tensorflow/python/eager:tape", - "//third_party/py/numpy", - ], -) - py_test( name = "ops_test", size = "small", diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py b/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py deleted file mode 100644 index 476528b0dd..0000000000 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2.py +++ /dev/null @@ -1,111 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Ops that will eventually be folded into tensorflow/python/ops/math_ops.py -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -from tensorflow.python.eager import context -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_math_ops -from tensorflow.python.ops import math_ops - - - -def accumulate_n_v2(inputs, shape=None, tensor_dtype=None, name=None): - """Returns the element-wise sum of a list of tensors. - - Optionally, pass `shape` and `tensor_dtype` for shape and type checking, - otherwise, these are inferred. - - `tf.accumulate_n_v2` performs the same operation as `tf.add_n`, but does not - wait for all of its inputs to be ready before beginning to sum. This can - save memory if inputs are ready at different times, since minimum temporary - storage is proportional to the output size rather than the inputs size. - - Unlike the original `accumulate_n`, `accumulate_n_v2` is differentiable. - - For example: - - ```python - a = tf.constant([[1, 2], [3, 4]]) - b = tf.constant([[5, 0], [0, 6]]) - tf.accumulate_n_v2([a, b, a]) # [[7, 4], [6, 14]] - - # Explicitly pass shape and type - tf.accumulate_n_v2([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) - # [[7, 4], - # [6, 14]] - ``` - - Args: - inputs: A list of `Tensor` objects, each with same shape and type. - shape: Shape of elements of `inputs`. - tensor_dtype: The type of `inputs`. - name: A name for the operation (optional). - - Returns: - A `Tensor` of same shape and type as the elements of `inputs`. - - Raises: - ValueError: If `inputs` don't all have same shape and dtype or the shape - cannot be inferred. - """ - _INPUTS_ERR_MSG = ValueError("inputs must be a list of at least one Tensor" - "with the same dtype and shape") - if not inputs or not isinstance(inputs, (list, tuple)): - raise _INPUTS_ERR_MSG - inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) - if not all(isinstance(x, ops.Tensor) for x in inputs): - raise _INPUTS_ERR_MSG - if not all(x.dtype == inputs[0].dtype for x in inputs): - raise _INPUTS_ERR_MSG - if shape is not None: - shape = tensor_shape.as_shape(shape) - else: - shape = tensor_shape.unknown_shape() - for input_tensor in inputs: - if isinstance(input_tensor, ops.Tensor): - shape = shape.merge_with(input_tensor.get_shape()) - - # tensor_dtype is for safety only; operator's output type computed in C++ - if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}" - .format(tensor_dtype, inputs[0].dtype)) - - if len(inputs) == 1 and name is None: - return inputs[0] - elif len(inputs) == 1 and name is not None: - return array_ops.identity(inputs[0], name=name) - elif context.in_eager_mode(): - # TemporaryVariable not currently supported in eager mode; fall back - # onto AddN for now. - # TODO(frreiss) remove this once the lifetime of eager variables gets - # addressed - return math_ops.add_n(inputs, name=name) - else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) - -# The following code should eventually be merged into -# tensorflow/python/ops/math_grad.py -@ops.RegisterGradient("AccumulateNV2") -def _AddNGrad(op, grad): - """Same as gradient for AddN. Copies the gradient to all inputs.""" - # Not broadcasting. - return [grad] * len(op.inputs) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index d4ceb2e489..c9aa4a252d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -2892,6 +2892,40 @@ tf_py_test( ], ) +tf_py_test( + name = "accumulate_n_test", + size = "small", + srcs = ["accumulate_n_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + "//tensorflow/python:variables", + ], +) + +tf_py_test( + name = "accumulate_n_eager_test", + size = "small", + srcs = ["accumulate_n_eager_test.py"], + additional_deps = [ + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", + "//tensorflow/python:math_ops", + "//tensorflow/python:resource_variable_ops", + "//tensorflow/python/eager:backprop", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py similarity index 72% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py rename to tensorflow/python/kernel_tests/accumulate_n_eager_test.py index 35974b9e21..dc11b7dece 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_eager_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_eager_test.py @@ -12,48 +12,41 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`. - -These test cases spefically exercise the `eager` APIs. They need to be in a -separate file from the remaining tests because eager mode is currently something -you can turn on but can't turn off for the lifetime of the current process.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.eager import backprop from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.platform import test - class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testMinimalEagerMode(self): forty = constant_op.constant(40) two = constant_op.constant(2) - answer = av2.accumulate_n_v2([forty, two]) + answer = math_ops.accumulate_n([forty, two]) self.assertEqual(42, answer.numpy()) - def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).numpy()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).numpy()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).numpy()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).numpy()) def testGrad(self): np.random.seed(42) @@ -65,16 +58,14 @@ class AccumulateNV2EagerTest(test_util.TensorFlowTestCase): ] def fn(first, second, third): - return av2.accumulate_n_v2([first, second, third]) + return math_ops.accumulate_n([first, second, third]) grad_fn = backprop.gradients_function(fn) grad = grad_fn(input_vars[0], input_vars[1], input_vars[2]) - self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 + self.assertAllEqual(np.repeat(1.0, num_inputs), # d/dx (x + y + ...) = 1 [elem.numpy() for elem in grad]) - if __name__ == "__main__": ops.enable_eager_execution() test.main() - diff --git a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py similarity index 79% rename from tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py rename to tensorflow/python/kernel_tests/accumulate_n_test.py index 45962098e9..0a6d4aea37 100644 --- a/tensorflow/contrib/framework/python/ops/accumulate_n_v2_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -12,42 +12,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for new version of accumulate_n op that will eventually go into -`ops.math_ops`.""" +"""Tests for new version of accumulate_n op.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function import numpy as np -from tensorflow.contrib.framework.python.ops import accumulate_n_v2 as av2 - from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import gradients +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables from tensorflow.python.platform import googletest class AccumulateNV2Test(test_util.TensorFlowTestCase): - """Tests of the new, differentiable version of accumulate_n""" + """Tests of the new, differentiable version of accumulate_n.""" def testFloat(self): np.random.seed(12345) x = [np.random.random((1, 2, 3, 4, 5)) - 0.5 for _ in range(5)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllClose(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllClose(x[0] * 5, av2.accumulate_n_v2([tf_x[0]] * 5).eval()) + self.assertAllClose(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllClose(x[0] * 5, + math_ops.accumulate_n([tf_x[0]] * 5).eval()) def testInt(self): np.random.seed(54321) x = [np.random.randint(-128, 128, (5, 4, 3, 2, 1)) for _ in range(6)] tf_x = ops.convert_n_to_tensor(x) with self.test_session(use_gpu=True): - self.assertAllEqual(sum(x), av2.accumulate_n_v2(tf_x).eval()) - self.assertAllEqual(x[0] * 6, av2.accumulate_n_v2([tf_x[0]] * 6).eval()) + self.assertAllEqual(sum(x), math_ops.accumulate_n(tf_x).eval()) + self.assertAllEqual(x[0] * 6, + math_ops.accumulate_n([tf_x[0]] * 6).eval()) def testGrad(self): np.random.seed(42) @@ -55,9 +55,9 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.test_session(use_gpu=True) as sess: input_vars = [ variables.Variable(10.0 * np.random.random()) - for i in range(0, num_inputs) + for _ in range(0, num_inputs) ] - accum_n = av2.accumulate_n_v2(input_vars) + accum_n = math_ops.accumulate_n(input_vars) sess.run(variables.global_variables_initializer()) accum_n_grad = gradients.gradients(accum_n, input_vars) self.assertAllEqual( @@ -77,7 +77,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): ops.convert_to_tensor(x, dtype=dtypes_lib.float32) for x in random_arrays ] - tf_val = av2.accumulate_n_v2(random_tensors) + tf_val = math_ops.accumulate_n(random_tensors) np_val = random_arrays[0] for random_array in random_arrays[1:]: np_val += random_array @@ -86,7 +86,7 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): def testZeroArgs(self): with self.test_session(): with self.assertRaises(ValueError): - tf_val = av2.accumulate_n_v2([]) + tf_val = math_ops.accumulate_n([]) tf_val.eval() def testWrongShape(self): @@ -94,28 +94,28 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): with self.assertRaises(ValueError): a = variables.Variable(0.2) b = variables.Variable(0.1) - tf_val = av2.accumulate_n_v2([a, b], shape=[2, 2]) # Should be shape=[] + math_ops.accumulate_n([a, b], shape=[2, 2]) # Should be shape=[] def testIncompatibleShapes(self): with self.test_session(): with self.assertRaises(ValueError): a = variables.Variable(np.array([0.1, 0.2])) b = variables.Variable(np.array([[0.3], [0.4]])) - tf_val = av2.accumulate_n_v2([a, b]) + math_ops.accumulate_n([a, b]) def testWrongType(self): with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) b = variables.Variable(0.1, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a, b], tensor_dtype=np.int32) + math_ops.accumulate_n([a, b], tensor_dtype=np.int32) def testWrongTypeOneInput(self): # Scenario that used to trigger a bug, even when testWrongType() worked with self.test_session(): with self.assertRaises(TypeError): a = variables.Variable(0.2, dtype=np.float32) - tf_val = av2.accumulate_n_v2([a], tensor_dtype=np.int32) + math_ops.accumulate_n([a], tensor_dtype=np.int32) if __name__ == "__main__": diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 2ae8b610da..ed11fe5348 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -161,14 +161,11 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_control_flow_ops from tensorflow.python.ops import gen_data_flow_ops from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gen_sparse_ops from tensorflow.python.ops import gen_spectral_ops -from tensorflow.python.ops import gen_state_ops -from tensorflow.python.ops import state_ops # go/tf-wildcard-import # pylint: disable=wildcard-import from tensorflow.python.ops.gen_math_ops import * @@ -2218,14 +2215,12 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): Optionally, pass `shape` and `tensor_dtype` for shape and type checking, otherwise, these are inferred. - NOTE: This operation is not differentiable and cannot be used if inputs depend - on trainable variables. Please use `tf.add_n` for such cases. + `tf.accumulate_n` performs the same operation as `tf.add_n`, but does not + wait for all of its inputs to be ready before beginning to sum. This can + save memory if inputs are ready at different times, since minimum temporary + storage is proportional to the output size rather than the inputs size. - Aside from differentiability, `tf.accumulate_n` performs the same operation as - `tf.add_n`, but does not wait for all of its inputs to be ready before - beginning to sum. This can save memory if inputs are ready at different times, - since minimum temporary storage is proportional to the output size rather than - the inputs size. + `accumulate_n` is differentiable (but wasn't previous to TensorFlow 1.7). For example: @@ -2235,8 +2230,9 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): tf.accumulate_n([a, b, a]) # [[7, 4], [6, 14]] # Explicitly pass shape and type - tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) # [[7, 4], - # [6, 14]] + tf.accumulate_n([a, b, a], shape=[2, 2], tensor_dtype=tf.int32) + # [[7, 4], + # [6, 14]] ``` Args: @@ -2252,20 +2248,17 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): ValueError: If `inputs` don't all have same shape and dtype or the shape cannot be inferred. """ - if context.in_eager_mode(): - # TODO(apassos) remove this once the lifetime of eager variables gets - # addressed. - raise ValueError("accumulate_n not supported in eager mode") + def _input_error(): + return ValueError( + "inputs must be a list of at least one Tensor with the " + "same dtype and shape") if not inputs or not isinstance(inputs, (list, tuple)): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs) if not all(isinstance(x, ops.Tensor) for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if not all(x.dtype == inputs[0].dtype for x in inputs): - raise ValueError("inputs must be a list of at least one Tensor with the " - "same dtype and shape") + raise _input_error() if shape is not None: shape = tensor_shape.as_shape(shape) else: @@ -2273,27 +2266,31 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): for input_tensor in inputs: if isinstance(input_tensor, ops.Tensor): shape = shape.merge_with(input_tensor.get_shape()) - if tensor_dtype is None: - tensor_dtype = inputs[0].dtype - if tensor_dtype != inputs[0].dtype: - raise TypeError("tensor_dtype is {}, but input is of type {}".format( - tensor_dtype, inputs[0].dtype)) - if len(inputs) == 1: + + # tensor_dtype is for safety only; operator's output type computed in C++ + if tensor_dtype is not None and tensor_dtype != inputs[0].dtype: + raise TypeError("tensor_dtype is {}, but input is of type {}" + .format(tensor_dtype, inputs[0].dtype)) + + if len(inputs) == 1 and name is None: return inputs[0] - with ops.name_scope(name, "AccumulateN", inputs) as name: - var = gen_state_ops._temporary_variable( - shape=tensor_shape.vector(0), dtype=tensor_dtype) - with ops.colocate_with(var): - zeros = array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]) - zeros.set_shape(shape) - ref = state_ops.assign(var, zeros, validate_shape=False) - update_ops = [ - state_ops.assign_add(ref, input_tensor, use_locking=True) - for input_tensor in inputs - ] - with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name, name=name) + elif len(inputs) == 1 and name is not None: + return array_ops.identity(inputs[0], name=name) + elif context.in_eager_mode(): + # TemporaryVariable not currently supported in eager mode; fall back + # onto AddN for now. + # TODO(frreiss) remove this once the lifetime of eager variables gets + # addressed + return add_n(inputs, name=name) + else: + return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + + +@ops.RegisterGradient("AccumulateNV2") +def _accumulate_n_grad(op, grad): + """Same as gradient for AddN. Copies the gradient to all inputs.""" + # Not broadcasting. + return [grad] * len(op.inputs) @tf_export("nn.sigmoid", "sigmoid") -- GitLab From feeb6c095ffa15b555298122840f0542ee986eac Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 26 Feb 2018 10:41:44 -0800 Subject: [PATCH 156/884] Deleting references to outdated `translate/seq2seq` tutorial. PiperOrigin-RevId: 187044697 --- tensorflow/tools/ci_build/builds/test_tutorials.sh | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tensorflow/tools/ci_build/builds/test_tutorials.sh b/tensorflow/tools/ci_build/builds/test_tutorials.sh index 67e5af5564..db335f14ca 100755 --- a/tensorflow/tools/ci_build/builds/test_tutorials.sh +++ b/tensorflow/tools/ci_build/builds/test_tutorials.sh @@ -277,17 +277,6 @@ test_ptb_word_lm() { fi } - -# ----------------------------------------------------------- -# translate_test -test_translate_test() { - LOG_FILE=$1 - - run_in_directory "${TEST_DIR}" "${LOG_FILE}" \ - "${TF_MODELS_DIR}/tutorials/rnn/translate/translate.py" --self_test=True -} - - # Run the tutorial tests test_runner "tutorial test-on-install" \ "${TUT_TESTS}" "${TF_BUILD_TUT_TEST_BLACKLIST}" "${LOGS_DIR}" -- GitLab From f487340e7628802b1b8c3b12747f3b9ce9254af3 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 26 Feb 2018 10:42:59 -0800 Subject: [PATCH 157/884] [XLA] Add kConvert to EffectiveOperandPrecisionIsOutputPrecision list. PiperOrigin-RevId: 187044921 --- tensorflow/compiler/xla/service/bfloat16_support.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/service/bfloat16_support.cc b/tensorflow/compiler/xla/service/bfloat16_support.cc index 3fd9e24601..07b4b14b5e 100644 --- a/tensorflow/compiler/xla/service/bfloat16_support.cc +++ b/tensorflow/compiler/xla/service/bfloat16_support.cc @@ -79,6 +79,7 @@ bool BFloat16Support::EffectiveOperandPrecisionIsOutputPrecision( case HloOpcode::kBroadcast: case HloOpcode::kClamp: case HloOpcode::kConcatenate: + case HloOpcode::kConvert: case HloOpcode::kCopy: case HloOpcode::kGetTupleElement: case HloOpcode::kMaximum: -- GitLab From c6312773dd5473fb47f73c88c2f5c8f41e20c0fa Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 10:52:05 -0800 Subject: [PATCH 158/884] [XLA] Do not recompute flattened sets inside layout assignment. Cache the flattened sets instead of recomputing them. This matters for large graphs, since we may request the flattened set thousands of times on the same instruction, and it may be fairly expensive to construct for large tuples. PiperOrigin-RevId: 187046642 --- .../compiler/xla/service/layout_assignment.cc | 31 ++++++++++++++----- .../compiler/xla/service/layout_assignment.h | 10 ++++++ 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment.cc b/tensorflow/compiler/xla/service/layout_assignment.cc index 0668f66051..4929300f7d 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.cc +++ b/tensorflow/compiler/xla/service/layout_assignment.cc @@ -192,17 +192,34 @@ LayoutConstraints::LayoutConstraints( } } +PointsToSet::BufferSet* LayoutConstraints::GetBufferSet( + const HloInstruction* instruction) const { + auto it = buffer_sets_cache_.find(instruction); + if (it != buffer_sets_cache_.end()) { + return it->second.get(); + } + auto& buffer_set = + buffer_sets_cache_ + .emplace(instruction, MakeUnique()) + .first->second; + const auto& points_to_set = points_to_analysis_.GetPointsToSet(instruction); + points_to_set.ForEachElement( + [&buffer_set](const ShapeIndex& /*index*/, + const PointsToSet::BufferList& buffers) { + buffer_set->insert(buffers.begin(), buffers.end()); + }); + return buffer_set.get(); +} + bool LayoutConstraints::OperandBufferForwarded( const HloInstruction* instruction, int64 operand_no) const { // The operand is potentially forwarded if the intersection of points-to sets // of the operand and the instruction is non-empty. - auto output_buffers = - points_to_analysis_.GetPointsToSet(instruction).CreateFlattenedSet(); - auto operand_buffers = - points_to_analysis_.GetPointsToSet(instruction->operand(operand_no)) - .CreateFlattenedSet(); - for (const LogicalBuffer* output_buffer : output_buffers) { - if (operand_buffers.count(output_buffer) > 0) { + PointsToSet::BufferSet* output_buffers = GetBufferSet(instruction); + PointsToSet::BufferSet* operand_buffers = + GetBufferSet(instruction->operand(operand_no)); + for (const LogicalBuffer* output_buffer : *output_buffers) { + if (operand_buffers->count(output_buffer) > 0) { return true; } } diff --git a/tensorflow/compiler/xla/service/layout_assignment.h b/tensorflow/compiler/xla/service/layout_assignment.h index 2901858448..7126cb50cf 100644 --- a/tensorflow/compiler/xla/service/layout_assignment.h +++ b/tensorflow/compiler/xla/service/layout_assignment.h @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/platform/types.h" namespace xla { @@ -199,6 +200,11 @@ class LayoutConstraints { string ToString() const; private: + // Find a bufferset in the bufferset cache. This is useful since we can + // currently create the flattened buffer set for the same instruction many + // times, which is often slow. + PointsToSet::BufferSet* GetBufferSet(const HloInstruction* instruction) const; + // The set of BufferLayoutConstraints applied to the computation. std::unordered_map buffer_constraints_; @@ -221,6 +227,10 @@ class LayoutConstraints { // Array-shaped buffers which have not yet been constrained. std::set unconstrained_buffer_ids_; + mutable tensorflow::gtl::FlatMap> + buffer_sets_cache_; + HloComputation* computation_; }; -- GitLab From 616de9709cbd1ec2b06a036db628bed04b143560 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 10:54:31 -0800 Subject: [PATCH 159/884] Integrate ClusterResolvers with TPUEstimator. PiperOrigin-RevId: 187047094 --- tensorflow/contrib/cluster_resolver/BUILD | 1 + .../python/training/cluster_resolver.py | 23 +- .../python/training/cluster_resolver_test.py | 2 + .../python/training/gce_cluster_resolver.py | 3 + .../python/training/tpu_cluster_resolver.py | 150 +++++++++--- .../training/tpu_cluster_resolver_test.py | 226 +++++++++++++----- .../contrib/tpu/python/tpu/tpu_config.py | 31 +++ 7 files changed, 345 insertions(+), 91 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/BUILD b/tensorflow/contrib/cluster_resolver/BUILD index 6b03df2b8e..1a124eca36 100644 --- a/tensorflow/contrib/cluster_resolver/BUILD +++ b/tensorflow/contrib/cluster_resolver/BUILD @@ -110,5 +110,6 @@ tf_py_test( "//tensorflow/python:platform_test", "//tensorflow/python:training", ], + grpc_enabled = True, main = "python/training/tpu_cluster_resolver_test.py", ) diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py index b04822fa9d..1c480b2513 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver.py @@ -53,11 +53,16 @@ class ClusterResolver(object): raise NotImplementedError( 'cluster_spec is not implemented for {}.'.format(self)) + @abc.abstractmethod + def master(self): + """...""" + raise NotImplementedError('master is not implemented for {}.'.format(self)) + class SimpleClusterResolver(ClusterResolver): """Simple implementation of ClusterResolver that accepts a ClusterSpec.""" - def __init__(self, cluster_spec): + def __init__(self, cluster_spec, master=''): """Creates a SimpleClusterResolver from a ClusterSpec.""" super(SimpleClusterResolver, self).__init__() @@ -65,10 +70,18 @@ class SimpleClusterResolver(ClusterResolver): raise TypeError('cluster_spec must be a ClusterSpec.') self._cluster_spec = cluster_spec + if not isinstance(master, str): + raise TypeError('master must be a string.') + self._master = master + def cluster_spec(self): """Returns the ClusterSpec passed into the constructor.""" return self._cluster_spec + def master(self): + """Returns the master address to use when creating a session.""" + return self._master + class UnionClusterResolver(ClusterResolver): """Performs a union on underlying ClusterResolvers. @@ -87,9 +100,13 @@ class UnionClusterResolver(ClusterResolver): Raises: TypeError: If any argument is not a subclass of `ClusterResolvers`. + ValueError: If there are no arguments passed. """ super(UnionClusterResolver, self).__init__() + if not args: + raise ValueError('At least one ClusterResolver is required.') + for cluster_resolver in args: if not isinstance(cluster_resolver, ClusterResolver): raise TypeError('All arguments must be a sub-class of ' @@ -169,3 +186,7 @@ class UnionClusterResolver(ClusterResolver): merged_cluster[job_name].update(task_dict) return ClusterSpec(merged_cluster) + + def master(self): + """master returns the master address from the first cluster resolver.""" + return self._cluster_resolvers[0].master() diff --git a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py index dbfb77723c..d9c97d53eb 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/cluster_resolver_test.py @@ -234,5 +234,7 @@ class UnionClusterResolverTest(test.TestCase): self._verifyClusterSpecEquality(cluster_spec, expected_proto) +# TODO(saeta): Include tests for master resolution + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py index d6f2eced93..3f58241289 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/gce_cluster_resolver.py @@ -134,3 +134,6 @@ class GceClusterResolver(ClusterResolver): worker_list.sort() return ClusterSpec({self._job_name: worker_list}) + + def master(self): + return '' diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index a6a6e642e4..aeccf4c06b 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -23,7 +23,8 @@ from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen from tensorflow.contrib.cluster_resolver.python.training.cluster_resolver import ClusterResolver -from tensorflow.python.training.server_lib import ClusterSpec +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat _GOOGLE_API_CLIENT_INSTALLED = True try: @@ -46,13 +47,23 @@ class TPUClusterResolver(ClusterResolver): req = Request('http://metadata/computeMetadata/v1/%s' % path, headers={'Metadata-Flavor': 'Google'}) resp = urlopen(req) - return resp.read() + return compat.as_bytes(resp.read()) + + def _shouldResolve(self): + if (self._tpu == compat.as_bytes('') or + self._tpu == compat.as_bytes('local') or + self._tpu.startswith(compat.as_bytes('/bns')) or + self._tpu.startswith(compat.as_bytes('grpc://'))): + return False + return True def __init__(self, - tpu_names, + tpu, zone=None, project=None, - job_name='tpu_worker', + job_name='worker', + coordinator_name='coordinator', + coordinator_address=None, credentials='default', service=None): """Creates a new TPUClusterResolver object. @@ -61,7 +72,11 @@ class TPUClusterResolver(ClusterResolver): for the IP addresses and ports of each Cloud TPU listed. Args: - tpu_names: A list of names of the target Cloud TPUs. + tpu: Either a string, or a list of strings corresponding to the TPUs to + use. If the single string is the empty string, the string 'local', or a + string that begins with 'grpc://' or '/bns', then it is assumed to not + correspond with a Cloud TPU and will instead be passed as the session + master and no ClusterSpec propagation will be done. zone: Zone where the TPUs are located. If omitted or empty, we will assume that the zone of the TPU is the same as the zone of the GCE VM, which we will try to discover from the GCE metadata service. @@ -69,6 +84,12 @@ class TPUClusterResolver(ClusterResolver): empty, we will try to discover the project name of the GCE VM from the GCE metadata service. job_name: Name of the TensorFlow job the TPUs belong to. + coordinator_name: The name to use for the coordinator. Set to None if the + coordinator should not be included in the computed ClusterSpec. + coordinator_address: The address of the coordinator (typically an ip:port + pair). If set to None, a TF server will be started. If coordinator_name + is None, a TF server will not be started even if coordinator_address is + None. credentials: GCE Credentials. If None, then we use default credentials from the oauth2client service: The GCE API object returned by the googleapiclient.discovery @@ -77,26 +98,36 @@ class TPUClusterResolver(ClusterResolver): Raises: ImportError: If the googleapiclient is not installed. + ValueError: If no TPUs are specified. """ + if isinstance(tpu, list): + if not tpu: + raise ValueError('At least one TPU must be specified.') + if len(tpu) != 1: + raise NotImplementedError( + 'Using multiple TPUs in a single session is not yet implemented') + tpu = tpu[0] + self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes + self._job_name = job_name + self._credentials = credentials - if not project: - project = self._requestComputeMetadata('/project/project-id') + should_resolve = self._shouldResolve() - if not zone: - zone_path = self._requestComputeMetadata('/instance/zone') + if not project and should_resolve: + project = self._requestComputeMetadata('project/project-id') + + if not zone and should_resolve: + zone_path = self._requestComputeMetadata('instance/zone') zone = zone_path.split('/')[-1] self._project = project self._zone = zone - self._tpu_names = tpu_names - self._job_name = job_name - self._credentials = credentials - if credentials == 'default': + if credentials == 'default' and should_resolve: if _GOOGLE_API_CLIENT_INSTALLED: self._credentials = GoogleCredentials.get_application_default() - if service is None: + if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: raise ImportError('googleapiclient must be installed before using the ' 'TPU cluster resolver') @@ -107,25 +138,41 @@ class TPUClusterResolver(ClusterResolver): else: self._service = service - def get_master(self): - """Get the ClusterSpec grpc master path. + self._coordinator_name = coordinator_name + if coordinator_name and not coordinator_address and should_resolve: + self._start_local_server() + else: + self._coordinator_address = coordinator_address + + def master(self): + """Get the Master string to be used for the session. + + In the normal case, this returns the grpc path (grpc://1.2.3.4:8470) of + first instance in the ClusterSpec returned by the cluster_spec function. - This returns the grpc path (grpc://1.2.3.4:8470) of first instance in the - ClusterSpec returned by the cluster_spec function. This is suitable for use - for the `master` argument in tf.Session() when you are using one TPU. + If a non-TPU name is used when constructing a TPUClusterResolver, that will + be returned instead (e.g. If the tpus argument's value when constructing + this TPUClusterResolver was 'grpc://10.240.1.2:8470', + 'grpc://10.240.1.2:8470' will be returned). Returns: - string, the grpc path of the first instance in the ClusterSpec. + string, the connection string to use when creating a session. Raises: ValueError: If none of the TPUs specified exists. """ + if not self._shouldResolve(): + return self._tpu + job_tasks = self.cluster_spec().job_tasks(self._job_name) if not job_tasks: raise ValueError('No TPUs exists with the specified names exist.') return 'grpc://' + job_tasks[0] + def get_master(self): + return self.master() + def cluster_spec(self): """Returns a ClusterSpec object based on the latest TPU information. @@ -134,17 +181,54 @@ class TPUClusterResolver(ClusterResolver): Returns: A ClusterSpec containing host information returned from Cloud TPUs. - """ - worker_list = [] - - for tpu_name in self._tpu_names: - full_name = 'projects/%s/locations/%s/nodes/%s' % ( - self._project, self._zone, tpu_name) - request = self._service.projects().locations().nodes().get(name=full_name) - response = request.execute() - if 'health' in response and response['health'] == 'HEALTHY': - instance_url = '%s:%s' % (response['ipAddress'], response['port']) - worker_list.append(instance_url) - - return ClusterSpec({self._job_name: worker_list}) + Raises: + RuntimeError: If the provided TPU is not healthy. + """ + if not self._shouldResolve(): + return server_lib.ClusterSpec({}) + + full_name = 'projects/%s/locations/%s/nodes/%s' % ( + self._project, self._zone, compat.as_text(self._tpu)) + request = self._service.projects().locations().nodes().get(name=full_name) + response = request.execute() + + if 'health' in response and response['health'] != 'HEALTHY': + raise RuntimeError('TPU "%s" is unhealthy: "%s"' % (self._tpu, + response['health'])) + + if 'networkEndpoints' in response: + worker_list = [ + '%s:%s' % (endpoint['ipAddress'], endpoint['port']) + for endpoint in response['networkEndpoints'] + ] + else: + # Fall back to the deprecated response format + instance_url = '%s:%s' % (response['ipAddress'], response['port']) + worker_list = [instance_url] + + cluster_spec = {self._job_name: worker_list} + + if self._coordinator_address: + cluster_spec[self._coordinator_name] = [self._coordinator_address] + + return server_lib.ClusterSpec(cluster_spec) + + def _start_local_server(self): + address = self._requestComputeMetadata('instance/network-interfaces/0/ip') + self._server = server_lib.Server( + { + 'local': ['0.0.0.0:0'] + }, protocol='grpc', config=None, start=True) + # self._server.target is of the form: grpc://ipaddress:port + target = compat.as_bytes(self._server.target) + splits = target.split(compat.as_bytes(':')) + assert len(splits) == 3, self._server.target + assert splits[0] == compat.as_bytes('grpc'), self._server.target + self._coordinator_port = compat.as_text(splits[2]) + self._coordinator_address = '%s:%s' % ( + address, compat.as_text(self._coordinator_port)) + + def __deepcopy__(self, memo): + # TODO(b/73668574): Remove this once RunConfig avoids performing deepcopy. + return self diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 4fd34629cf..6b4a155152 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -21,7 +21,7 @@ from __future__ import print_function from tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver import TPUClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib - +from tensorflow.python.util import compat mock = test.mock @@ -50,10 +50,12 @@ class MockNodeClass(object): def mock_request_compute_metadata(cls, *args, **kwargs): del cls, kwargs # Unused. - if args[0] == '/project/project-id': + if args[0] == 'project/project-id': return 'test-project' - elif args[0] == '/instance/zone': + elif args[0] == 'instance/zone': return 'projects/test-project/locations/us-central1-c' + elif args[0] == 'instance/network-interfaces/0/ip': + return '10.128.1.2' return '' @@ -113,17 +115,26 @@ class TPUClusterResolverTest(test.TestCase): tpu_cluster_resolver = TPUClusterResolver( project=None, zone=None, - tpu_names=['test-tpu-1'], + tpu=['test-tpu-1'], credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } - """ - self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + job { + name: 'coordinator' + tasks { key: 0 value: '10.128.1.2:%s' } + } + job { + name: 'worker' + tasks { key: 0 value: '10.1.2.3:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) - def testSimpleSuccessfulRetrieval(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testRetrieveProjectAndZoneFromMetadataNoCoordinator(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', @@ -133,116 +144,217 @@ class TPUClusterResolverTest(test.TestCase): } tpu_cluster_resolver = TPUClusterResolver( - project='test-project', - zone='us-central1-c', - tpu_names=['test-tpu-1'], + project=None, + zone=None, + tpu=['test-tpu-1'], + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.1.2.3:8470' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testMultipleSuccessfulRetrieval(self): + def testSimpleSuccessfulRetrieval(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { 'ipAddress': '10.1.2.3', 'port': '8470', 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu=['test-tpu-1'], + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { name: 'tpu_worker' tasks { key: 0 value: '10.4.5.6:8470' } - tasks { key: 1 value: '10.1.2.3:8470' } } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.1.2.3:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) - def testHealthyTpuNodeRetrieval(self): + def testNewNetworkEndpointFormat(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-3': { - 'ipAddress': '10.7.8.9', - 'port': '8470', - 'health': 'UNHEALTHY' + 'health': 'HEALTHY', + 'networkEndpoints': [{ + 'ipAddress': '10.2.3.4', + 'port': 8470, + }] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1', 'test-tpu-3'], + tpu='test-tpu-1', + coordinator_address='10.128.1.5:10203', credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) actual_cluster_spec = tpu_cluster_resolver.cluster_spec() expected_proto = """ - job { - name: 'tpu_worker' - tasks { - key: 0 - value: '10.1.2.3:8470' - } - } + job { name: 'coordinator' tasks { key: 0 value: '10.128.1.5:10203' } } + job { name: 'worker' tasks { key: 0 value: '10.2.3.4:8470' } } """ self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) + self.assertEqual('grpc://10.2.3.4:8470', tpu_cluster_resolver.master()) - def testGetMasterMultipleEntries(self): + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', + mock_request_compute_metadata) + def testPodResolution(self): tpu_map = { 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { - 'ipAddress': '10.1.2.3', - 'port': '8470', - 'health': 'HEALTHY' - }, - 'projects/test-project/locations/us-central1-c/nodes/test-tpu-2': { - 'ipAddress': '10.4.5.6', - 'port': '8470', - 'health': 'HEALTHY' + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] + } + } + + tpu_cluster_resolver = TPUClusterResolver( + tpu='test-tpu-1', + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'coordinator', + tasks { key: 0 value: '10.128.1.2:%s'} + } + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ % tpu_cluster_resolver._coordinator_port + self._verifyClusterSpecEquality(actual_cluster_spec, str(expected_proto)) + + def testPodResolutionNoCoordinator(self): + tpu_map = { + 'projects/test-project/locations/us-central1-c/nodes/test-tpu-1': { + 'health': + 'HEALTHY', + 'networkEndpoints': [ + { + 'ipAddress': '10.2.3.4', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.5', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.6', + 'port': 8470, + }, + { + 'ipAddress': '10.2.3.7', + 'port': 8470, + }, + ] } } tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=['test-tpu-2', 'test-tpu-1'], + tpu='test-tpu-1', + coordinator_name=None, credentials=None, service=self.mock_service_client(tpu_map=tpu_map)) - self.assertEqual('grpc://10.4.5.6:8470', tpu_cluster_resolver.get_master()) + + actual_cluster_spec = tpu_cluster_resolver.cluster_spec() + expected_proto = """ + job { + name: 'worker' + tasks { key: 0 value: '10.2.3.4:8470' } + tasks { key: 1 value: '10.2.3.5:8470' } + tasks { key: 2 value: '10.2.3.6:8470' } + tasks { key: 3 value: '10.2.3.7:8470' } + } + """ + self._verifyClusterSpecEquality(actual_cluster_spec, expected_proto) def testGetMasterNoEntries(self): tpu_map = {} + with self.assertRaises(ValueError): + TPUClusterResolver( + project='test-project', + zone='us-central1-c', + tpu=[], + coordinator_name=None, + credentials=None, + service=self.mock_service_client(tpu_map=tpu_map)) + + # TODO(saeta): Convert to parameterized test when included in OSS TF. + def verifyShouldResolve(self, tpu, should_resolve): tpu_cluster_resolver = TPUClusterResolver( project='test-project', zone='us-central1-c', - tpu_names=[], + tpu=tpu, + coordinator_name=None, credentials=None, - service=self.mock_service_client(tpu_map=tpu_map)) - with self.assertRaises(ValueError): - tpu_cluster_resolver.get_master() + service=self.mock_service_client(tpu_map={})) + self.assertEqual(should_resolve, tpu_cluster_resolver._shouldResolve(), + "TPU: '%s'" % tpu) + + def testShouldResolveNoName(self): + self.verifyShouldResolve('', False) + + def testShouldResolveLocal(self): + self.verifyShouldResolve('local', False) + + def testShouldResolveGrpc(self): + self.verifyShouldResolve('grpc://10.1.2.3:8470', False) + + def testShouldResolveBns(self): + self.verifyShouldResolve('/bns/foo/bar', False) + + def testShouldResolveName(self): + self.verifyShouldResolve('mytpu', True) + + def testShouldResolveList(self): + self.verifyShouldResolve(['myothertpu'], True) + + def testShouldResolveGrpcPrefix(self): + self.verifyShouldResolve('grpctpu', True) + + def testNoCallComputeMetadata(self): + tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') + self.assertEqual(compat.as_bytes('/bns/foo/bar'), + tpu_cluster_resolver.master()) + self.assertEqual( + server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 6440702182..7ceb4069cf 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -26,6 +26,7 @@ import os import numpy as np from tensorflow.contrib.tpu.python.tpu import util as util_lib +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.platform import tf_logging as logging @@ -140,6 +141,7 @@ class RunConfig(run_config_lib.RunConfig): tpu_config=None, evaluation_master=None, master=None, + cluster=None, **kwargs): """Constructs a RunConfig. @@ -148,15 +150,26 @@ class RunConfig(run_config_lib.RunConfig): evaluation_master: a string. The address of the master to use for eval. Defaults to master if not set. master: a string. The address of the master to use for training. + cluster: a ClusterResolver **kwargs: keyword config parameters. + + Raises: + ValueError: if cluster is not None and the provided session_config has a + cluster_def already. """ super(RunConfig, self).__init__(**kwargs) self._tpu_config = tpu_config or TPUConfig() + self._cluster = cluster # If user sets master and/or evaluation_master explicilty, including empty # string '', take it. Otherwise, take the values set by parent class. if master is not None: + if cluster is not None: + raise ValueError('Both master and cluster are set.') self._master = master + else: + if cluster: + self._master = cluster.master() if evaluation_master is not None: self._evaluation_master = evaluation_master @@ -170,6 +183,20 @@ class RunConfig(run_config_lib.RunConfig): # evaluation_master to master, unless user overwrites it. self._evaluation_master = self._master + # Set the ClusterSpec to use + if cluster: + self._cluster_spec = cluster.cluster_spec() + + # Merge the cluster_def into the ConfigProto. + if self._session_config is None: # pylint: disable=access-member-before-definition + self._session_config = config_pb2.ConfigProto(allow_soft_placement=True) + if self._session_config.HasField('cluster_def'): + raise ValueError( + 'You cannot provide a ClusterResolver and ' + 'session_config.cluster_def.') + self._session_config.cluster_def.CopyFrom( + self._cluster_spec.as_cluster_def()) + @property def evaluation_master(self): return self._evaluation_master @@ -182,6 +209,10 @@ class RunConfig(run_config_lib.RunConfig): def tpu_config(self): return self._tpu_config + @property + def cluster(self): + return self._cluster + def replace(self, **kwargs): if 'tpu_config' not in kwargs: return super(RunConfig, self).replace(**kwargs) -- GitLab From 3af99b657f23e52d9c291d488fa3bb2a68e90022 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Mon, 26 Feb 2018 10:59:54 -0800 Subject: [PATCH 160/884] Automated g4 rollback of changelist 185324160 PiperOrigin-RevId: 187048135 --- tensorflow/contrib/cmake/tf_core_cpu.cmake | 7 ++ tensorflow/contrib/makefile/Makefile | 1 + .../core/common_runtime/gpu/gpu_id_manager.cc | 50 +++++++-- .../core/common_runtime/gpu/gpu_id_manager.h | 14 ++- tensorflow/core/grappler/clusters/BUILD | 26 ++++- .../core/grappler/clusters/single_machine.cc | 17 ++- tensorflow/core/grappler/clusters/utils.cc | 71 ++++++++----- tensorflow/core/grappler/clusters/utils.h | 3 +- .../core/grappler/clusters/utils_test.cc | 100 ++++++++++++++++++ tensorflow/core/grappler/costs/BUILD | 1 + tensorflow/core/grappler/costs/utils.cc | 18 +++- 11 files changed, 262 insertions(+), 46 deletions(-) create mode 100644 tensorflow/core/grappler/clusters/utils_test.cc diff --git a/tensorflow/contrib/cmake/tf_core_cpu.cmake b/tensorflow/contrib/cmake/tf_core_cpu.cmake index 96ac60d095..a54cbff33b 100644 --- a/tensorflow/contrib/cmake/tf_core_cpu.cmake +++ b/tensorflow/contrib/cmake/tf_core_cpu.cmake @@ -63,6 +63,12 @@ file(GLOB_RECURSE tf_core_cpu_exclude_srcs "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.h" "${tensorflow_source_dir}/tensorflow/core/grappler/inputs/trivial_test_graph_input_yielder.cc" ) +file(GLOB_RECURSE tf_core_cpu_whitelisted_srcs + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.h" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id.cc" + "${tensorflow_source_dir}/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc" +) +list(REMOVE_ITEM tf_core_cpu_exclude_srcs ${tf_core_cpu_whitelisted_srcs}) list(REMOVE_ITEM tf_core_cpu_srcs ${tf_core_cpu_exclude_srcs}) if (tensorflow_ENABLE_GPU) @@ -79,6 +85,7 @@ if (tensorflow_ENABLE_GPU) "${tensorflow_source_dir}/tensorflow/core/*test*.cc" ) list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_gpu_exclude_srcs}) + list(REMOVE_ITEM tf_core_gpu_srcs ${tf_core_cpu_whitelisted_srcs}) list(APPEND tf_core_cpu_srcs ${tf_core_gpu_srcs}) endif() diff --git a/tensorflow/contrib/makefile/Makefile b/tensorflow/contrib/makefile/Makefile index 81327407d4..05e8d9064b 100644 --- a/tensorflow/contrib/makefile/Makefile +++ b/tensorflow/contrib/makefile/Makefile @@ -677,6 +677,7 @@ endif # TEGRA TF_CC_SRCS := $(filter-out $(CORE_CC_EXCLUDE_SRCS), $(CORE_CC_ALL_SRCS)) # Add in any extra files that don't fit the patterns easily TF_CC_SRCS += tensorflow/contrib/makefile/downloads/fft2d/fftsg.c +TF_CC_SRCS += tensorflow/core/common_runtime/gpu/gpu_id_manager.cc # Also include the op and kernel definitions. TF_CC_SRCS += $(shell cat $(MAKEFILE_DIR)/tf_op_files.txt) PBT_CC_SRCS := $(shell cat $(MAKEFILE_DIR)/tf_pb_text_files.txt) diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc index 207afdca75..7dfff3269c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.cc @@ -18,7 +18,10 @@ limitations under the License. #include #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/mutex.h" namespace tensorflow { @@ -27,8 +30,8 @@ namespace { class TfToCudaGpuIdMap { public: static TfToCudaGpuIdMap* singleton() { - static auto* manager = new TfToCudaGpuIdMap; - return manager; + static auto* id_map = new TfToCudaGpuIdMap; + return id_map; } void InsertOrDie(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id) @@ -47,18 +50,41 @@ class TfToCudaGpuIdMap { } } - int32 FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { + CudaGpuId FindOrDie(TfGpuId tf_gpu_id) const LOCKS_EXCLUDED(mu_) { mutex_lock lock(mu_); + return FindOrDieLocked(tf_gpu_id); + } + + bool Find(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) const + LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + if (id_map_.count(tf_gpu_id.value()) == 0) return false; + *cuda_gpu_id = FindOrDieLocked(tf_gpu_id); + return true; + } + + private: + TfToCudaGpuIdMap() = default; + + CudaGpuId FindOrDieLocked(TfGpuId tf_gpu_id) const + EXCLUSIVE_LOCKS_REQUIRED(mu_) { auto result = id_map_.find(tf_gpu_id.value()); CHECK(result != id_map_.end()) << "Could not find the mapping for TfGpuId: " << tf_gpu_id; - return result->second; + return CudaGpuId(result->second); + } + + void TestOnlyReset() LOCKS_EXCLUDED(mu_) { + mutex_lock lock(mu_); + id_map_.clear(); } - private: using IdMapType = std::unordered_map; mutable mutex mu_; IdMapType id_map_ GUARDED_BY(mu_); + + friend class ::tensorflow::GpuIdManager; + TF_DISALLOW_COPY_AND_ASSIGN(TfToCudaGpuIdMap); }; } // namespace @@ -67,8 +93,20 @@ void GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, TfToCudaGpuIdMap::singleton()->InsertOrDie(tf_gpu_id, cuda_gpu_id); } +Status GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id) { + if (TfToCudaGpuIdMap::singleton()->Find(tf_gpu_id, cuda_gpu_id)) { + return Status::OK(); + } + return errors::NotFound("TF GPU device with id ", tf_gpu_id.value(), + " was not registered"); +} + CudaGpuId GpuIdManager::TfToCudaGpuId(TfGpuId tf_gpu_id) { - return CudaGpuId(TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id)); + return TfToCudaGpuIdMap::singleton()->FindOrDie(tf_gpu_id); +} + +void GpuIdManager::TestOnlyReset() { + TfToCudaGpuIdMap::singleton()->TestOnlyReset(); } } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h index 33925d8c36..2b54cc184c 100644 --- a/tensorflow/core/common_runtime/gpu/gpu_id_manager.h +++ b/tensorflow/core/common_runtime/gpu/gpu_id_manager.h @@ -17,15 +17,25 @@ limitations under the License. #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ID_MANAGER_H_ #include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/lib/core/status.h" namespace tensorflow { -// Class that manages the translation between Tensorflow GPU ids and CUDA GPU -// ids. +// Class that maintains a map from TfGpuId to CudaGpuId, and manages the +// translation between them. class GpuIdManager { public: + // Adds a mapping from tf_gpu_id to cuda_gpu_id. static void InsertTfCudaGpuIdPair(TfGpuId tf_gpu_id, CudaGpuId cuda_gpu_id); + + // Gets the cuda_gpu_id associated with tf_gpu_id. Returns OK if found. + static Status TfToCudaGpuId(TfGpuId tf_gpu_id, CudaGpuId* cuda_gpu_id); + // Similar to the above version, but returns the result, and checks fail if + // no result is found. static CudaGpuId TfToCudaGpuId(TfGpuId tf_gpu_id); + + // Clears the map. Used in unit tests only. + static void TestOnlyReset(); }; } // namespace tensorflow diff --git a/tensorflow/core/grappler/clusters/BUILD b/tensorflow/core/grappler/clusters/BUILD index b8f8e13c9a..b653f902e8 100644 --- a/tensorflow/core/grappler/clusters/BUILD +++ b/tensorflow/core/grappler/clusters/BUILD @@ -1,7 +1,12 @@ licenses(["notice"]) # Apache 2.0 +load("//tensorflow:tensorflow.bzl", "if_cuda") load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cuda_library") +load( + "//tensorflow/core:platform/default/build_config_root.bzl", + "tf_cuda_tests_tags", +) filegroup( name = "all_files", @@ -26,13 +31,12 @@ config_setting( tf_cuda_library( name = "utils", srcs = ["utils.cc"], - hdrs = [ - "utils.h", - ], + hdrs = ["utils.h"], visibility = ["//visibility:public"], deps = [ "//third_party/eigen3", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", ] + select({ @@ -41,6 +45,21 @@ tf_cuda_library( }), ) +tf_cc_test( + name = "utils_test", + srcs = ["utils_test.cc"], + linkstatic = if_cuda(1, 0), + tags = tf_cuda_tests_tags(), + deps = [ + ":utils", + "//tensorflow/core:gpu_id", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], +) + cc_library( name = "cluster", srcs = ["cluster.cc"], @@ -104,6 +123,7 @@ cc_library( "//tensorflow/core:core_cpu_lib", "//tensorflow/core:direct_session", "//tensorflow/core:framework", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core/grappler:utils", "//tensorflow/core/kernels:ops_util", diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index cc7f418d49..8e236c9ee8 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -21,6 +21,8 @@ limitations under the License. #include "tensorflow/cc/training/queue_runner.h" #include "tensorflow/core/common_runtime/device.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/grappler/clusters/utils.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/kernels/ops_util.h" @@ -80,13 +82,24 @@ Status SingleMachine::Provision() { std::vector devices; TF_RETURN_IF_ERROR(session_->ListDevices(&devices)); - int gpu_id = 0; for (const auto& dev : devices) { DeviceProperties attr; if (dev.device_type() == "CPU") { attr = GetLocalCPUInfo(); } else if (dev.device_type() == "GPU") { - attr = GetLocalGPUInfo(gpu_id++); + DeviceNameUtils::ParsedName parsed; + if (!DeviceNameUtils::ParseFullName(dev.name(), &parsed)) { + return errors::InvalidArgument( + strings::StrCat("Not able to parse GPU device name: ", dev.name())); + } + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + return errors::Unavailable("Unknown TF GPU device with id ", + tf_gpu_id.value(), ": ", s.ToString()); + } + attr = GetLocalGPUInfo(cuda_gpu_id); } else if (dev.device_type().find("XLA") == string::npos) { // Filter out the fake XLA devices to avoid double counting the actual // hardware resources that are available. diff --git a/tensorflow/core/grappler/clusters/utils.cc b/tensorflow/core/grappler/clusters/utils.cc index 607e10e1ab..b54b34959a 100644 --- a/tensorflow/core/grappler/clusters/utils.cc +++ b/tensorflow/core/grappler/clusters/utils.cc @@ -27,6 +27,9 @@ limitations under the License. #include "include/libxsmm.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" @@ -66,36 +69,40 @@ DeviceProperties GetLocalCPUInfo() { return device; } -DeviceProperties GetLocalGPUInfo(int gpu_id) { +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id) { DeviceProperties device; device.set_type("GPU"); #if GOOGLE_CUDA cudaDeviceProp properties; - cudaError_t error = cudaGetDeviceProperties(&properties, gpu_id); - if (error == cudaSuccess) { - device.set_vendor("NVidia"); - device.set_model(properties.name); - device.set_frequency(properties.clockRate * 1e-3); - device.set_num_cores(properties.multiProcessorCount); - device.set_num_registers(properties.regsPerMultiprocessor); - // For compute capability less than 5, l1 cache size is configurable to - // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For - // compute capability larger or equal to 5, l1 cache (unified with texture - // cache) size is 24 KB. This number may need to be updated for future - // compute capabilities. - device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); - device.set_l2_cache_size(properties.l2CacheSize); - device.set_l3_cache_size(0); - device.set_shared_memory_size_per_multiprocessor( - properties.sharedMemPerMultiprocessor); - device.set_memory_size(properties.totalGlobalMem); - // 8 is the number of bits per byte. 2 is accounted for - // double data rate (DDR). - device.set_bandwidth(properties.memoryBusWidth / 8 * - properties.memoryClockRate * 2); + cudaError_t error = cudaGetDeviceProperties(&properties, cuda_gpu_id.value()); + if (error != cudaSuccess) { + device.set_type("UNKNOWN"); + LOG(ERROR) << "Failed to get device properties, error code: " << error; + return device; } + device.set_vendor("NVIDIA"); + device.set_model(properties.name); + device.set_frequency(properties.clockRate * 1e-3); + device.set_num_cores(properties.multiProcessorCount); + device.set_num_registers(properties.regsPerMultiprocessor); + // For compute capability less than 5, l1 cache size is configurable to + // either 16 KB or 48 KB. We use the initial configuration 16 KB here. For + // compute capability larger or equal to 5, l1 cache (unified with texture + // cache) size is 24 KB. This number may need to be updated for future + // compute capabilities. + device.set_l1_cache_size((properties.major < 5) ? 16 * 1024 : 24 * 1024); + device.set_l2_cache_size(properties.l2CacheSize); + device.set_l3_cache_size(0); + device.set_shared_memory_size_per_multiprocessor( + properties.sharedMemPerMultiprocessor); + device.set_memory_size(properties.totalGlobalMem); + // 8 is the number of bits per byte. 2 is accounted for + // double data rate (DDR). + device.set_bandwidth(properties.memoryBusWidth / 8 * + properties.memoryClockRate * 2); + (*device.mutable_environment())["architecture"] = strings::StrCat(properties.major, ".", properties.minor); (*device.mutable_environment())["cuda"] = strings::StrCat(CUDA_VERSION); @@ -106,18 +113,26 @@ DeviceProperties GetLocalGPUInfo(int gpu_id) { } DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + if (device.type == "CPU") { return GetLocalCPUInfo(); } else if (device.type == "GPU") { if (device.has_id) { - return GetLocalGPUInfo(device.id); + TfGpuId tf_gpu_id(device.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else { - return GetLocalGPUInfo(0); + return GetLocalGPUInfo(CudaGpuId(0)); } } - DeviceProperties result; - result.set_type("UNKNOWN"); - return result; + return unknown; } } // end namespace grappler diff --git a/tensorflow/core/grappler/clusters/utils.h b/tensorflow/core/grappler/clusters/utils.h index 191942040a..df8e7dca44 100644 --- a/tensorflow/core/grappler/clusters/utils.h +++ b/tensorflow/core/grappler/clusters/utils.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ #define TENSORFLOW_GRAPPLER_CLUSTERS_UTILS_H_ +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" #include "tensorflow/core/protobuf/device_properties.pb.h" #include "tensorflow/core/util/device_name_utils.h" @@ -27,7 +28,7 @@ DeviceProperties GetLocalCPUInfo(); // Returns the DeviceProperties for the specified GPU attached to the server on // which grappler is running. -DeviceProperties GetLocalGPUInfo(int gpu_id); +DeviceProperties GetLocalGPUInfo(CudaGpuId cuda_gpu_id); // Returns the DeviceProperties of the specified device DeviceProperties GetDeviceInfo(const DeviceNameUtils::ParsedName& device); diff --git a/tensorflow/core/grappler/clusters/utils_test.cc b/tensorflow/core/grappler/clusters/utils_test.cc new file mode 100644 index 0000000000..74218adbac --- /dev/null +++ b/tensorflow/core/grappler/clusters/utils_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/clusters/utils.h" + +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/protobuf/device_properties.pb.h" + +namespace tensorflow { +namespace grappler { +namespace { + +TEST(UtilsTest, GetLocalGPUInfo) { + GpuIdManager::TestOnlyReset(); +#if GOOGLE_CUDA + LOG(INFO) << "CUDA is enabled."; + DeviceProperties properties; + + // Invalid CUDA GPU ID. + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Succeed when a valid CUDA GPU id was inserted. + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#else + LOG(INFO) << "CUDA is not enabled."; + DeviceProperties properties; + + properties = GetLocalGPUInfo(CudaGpuId(0)); + EXPECT_EQ("GPU", properties.type()); + + properties = GetLocalGPUInfo(CudaGpuId(100)); + EXPECT_EQ("GPU", properties.type()); +#endif +} + +TEST(UtilsTest, GetDeviceInfo) { + GpuIdManager::TestOnlyReset(); + DeviceNameUtils::ParsedName device; + DeviceProperties properties; + + // Invalid type. + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Cpu info. + device.type = "CPU"; + properties = GetDeviceInfo(device); + EXPECT_EQ("CPU", properties.type()); + + // No TF GPU id provided. + device.type = "GPU"; + device.has_id = false; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); +#if GOOGLE_CUDA + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif + + // TF to CUDA GPU id mapping entry doesn't exist. + device.has_id = true; + device.id = 0; + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + +#if GOOGLE_CUDA + // Invalid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(0), CudaGpuId(100)); + properties = GetDeviceInfo(device); + EXPECT_EQ("UNKNOWN", properties.type()); + + // Valid CUDA GPU id. + GpuIdManager::InsertTfCudaGpuIdPair(TfGpuId(1), CudaGpuId(0)); + device.id = 1; + properties = GetDeviceInfo(device); + EXPECT_EQ("GPU", properties.type()); + EXPECT_EQ("NVIDIA", properties.vendor()); +#endif +} + +} // namespace +} // namespace grappler +} // namespace tensorflow diff --git a/tensorflow/core/grappler/costs/BUILD b/tensorflow/core/grappler/costs/BUILD index 0fe01e9c9e..5336df1f51 100644 --- a/tensorflow/core/grappler/costs/BUILD +++ b/tensorflow/core/grappler/costs/BUILD @@ -142,6 +142,7 @@ tf_cuda_library( "//third_party/eigen3", "//tensorflow/core:framework", "//tensorflow/core:graph", + "//tensorflow/core:gpu_id", "//tensorflow/core:lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index 602f69f12e..076945d5c6 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -26,6 +26,8 @@ limitations under the License. #include "cuda/include/cudnn.h" #endif +#include "tensorflow/core/common_runtime/gpu/gpu_id.h" +#include "tensorflow/core/common_runtime/gpu/gpu_id_manager.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" @@ -200,17 +202,25 @@ std::vector FindInputFeatures( } DeviceProperties GetDeviceInfo(const string& device_str) { + DeviceProperties unknown; + unknown.set_type("UNKNOWN"); + DeviceNameUtils::ParsedName parsed; if (DeviceNameUtils::ParseFullName(device_str, &parsed)) { if (parsed.type == "GPU") { - return GetLocalGPUInfo(parsed.id); + TfGpuId tf_gpu_id(parsed.id); + CudaGpuId cuda_gpu_id; + Status s = GpuIdManager::TfToCudaGpuId(tf_gpu_id, &cuda_gpu_id); + if (!s.ok()) { + LOG(ERROR) << s; + return unknown; + } + return GetLocalGPUInfo(cuda_gpu_id); } else if (parsed.type == "CPU") { return GetLocalCPUInfo(); } } - DeviceProperties device; - device.set_type("UNKNOWN"); - return device; + return unknown; } DeviceProperties GetDeviceInfo(const CostGraphDef::Node& node) { -- GitLab From 387e0e51a3a8b6c7752bb198bf1fdfa1ebf12b60 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:08:54 -0800 Subject: [PATCH 161/884] Bring in `isbuiltin`. PiperOrigin-RevId: 187049824 --- tensorflow/python/util/tf_inspect.py | 5 +++++ tensorflow/python/util/tf_inspect_test.py | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/tensorflow/python/util/tf_inspect.py b/tensorflow/python/util/tf_inspect.py index c2fe6fc449..a7cead5555 100644 --- a/tensorflow/python/util/tf_inspect.py +++ b/tensorflow/python/util/tf_inspect.py @@ -149,6 +149,11 @@ def getsource(object): # pylint: disable=redefined-builtin return _inspect.getsource(tf_decorator.unwrap(object)[1]) +def isbuiltin(object): # pylint: disable=redefined-builtin + """TFDecorator-aware replacement for inspect.isbuiltin.""" + return _inspect.isbuiltin(tf_decorator.unwrap(object)[1]) + + def isclass(object): # pylint: disable=redefined-builtin """TFDecorator-aware replacement for inspect.isclass.""" return _inspect.isclass(tf_decorator.unwrap(object)[1]) diff --git a/tensorflow/python/util/tf_inspect_test.py b/tensorflow/python/util/tf_inspect_test.py index 8903e1156b..129408449e 100644 --- a/tensorflow/python/util/tf_inspect_test.py +++ b/tensorflow/python/util/tf_inspect_test.py @@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'): self.assertEqual( expected, tf_inspect.getsource(test_decorated_function_with_defaults)) + def testIsBuiltin(self): + self.assertEqual( + tf_inspect.isbuiltin(TestDecoratedClass), + inspect.isbuiltin(TestDecoratedClass)) + self.assertEqual( + tf_inspect.isbuiltin(test_decorated_function), + inspect.isbuiltin(test_decorated_function)) + self.assertEqual( + tf_inspect.isbuiltin(test_undecorated_function), + inspect.isbuiltin(test_undecorated_function)) + self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range)) + self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max)) + def testIsClass(self): self.assertTrue(tf_inspect.isclass(TestDecoratedClass)) self.assertFalse(tf_inspect.isclass(test_decorated_function)) -- GitLab From 2513479d7b39235f9504ede2bf6f61cb78aae923 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 26 Feb 2018 11:10:20 -0800 Subject: [PATCH 162/884] eager/examples/resnet50: Fix breakage. PiperOrigin-RevId: 187050075 --- .../contrib/eager/python/examples/resnet50/resnet50_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py index c106ab0a06..65dcc53aab 100644 --- a/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py +++ b/tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py @@ -194,11 +194,11 @@ class ResNet50Benchmarks(tf.test.Benchmark): with tf.device(device): images, _ = random_batch(batch_size) for _ in xrange(num_burn): - model(images).cpu() + model(images, training=False).cpu() gc.collect() start = time.time() for _ in xrange(num_iters): - model(images).cpu() + model(images, training=False).cpu() self._report(label, start, num_iters, device, batch_size, data_format) def benchmark_eager_apply(self): -- GitLab From 5a9343b2ac7011593fb2ad2e7c82119181e608ec Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 11:12:04 -0800 Subject: [PATCH 163/884] Add support for parsing the "gather" HLO PiperOrigin-RevId: 187050345 --- .../compiler/xla/tools/parser/hlo_parser.cc | 37 +++++++++++++++++-- .../xla/tools/parser/hlo_parser_test.cc | 24 ++++++++++++ 2 files changed, 58 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc index cd2b843ad3..e60a5a4919 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser.cc @@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder, HloInstruction::CreateDot(shape, operands[0], operands[1], dnum)); break; } - case HloOpcode::kGather: - // TODO(b/72710576): HLO parsing is not implemented for Gather. - return TokenError("HLO parsing is not implemented for Gather"); + case HloOpcode::kGather: { + optional> output_window_dims; + attrs["output_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims}; + optional> elided_window_dims; + attrs["elided_window_dims"] = { + /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims}; + optional> gather_dims_to_operand_dims; + attrs["gather_dims_to_operand_dims"] = {/*required=*/true, + AttrTy::kBracedInt64List, + &gather_dims_to_operand_dims}; + optional index_vector_dim; + attrs["index_vector_dim"] = {/*required=*/true, AttrTy::kInt64, + &index_vector_dim}; + optional> window_bounds; + attrs["window_bounds"] = {/*required=*/true, AttrTy::kBracedInt64List, + &window_bounds}; + + if (!ParseOperands(&operands, /*expected_size=*/2) || + !ParseAttributes(attrs)) { + return false; + } + + GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers( + /*output_window_dims=*/*output_window_dims, + /*elided_window_dims=*/*elided_window_dims, + /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims, + /*index_vector_dim=*/*index_vector_dim); + + instruction = builder->AddInstruction(HloInstruction::CreateGather( + shape, /*operand=*/operands[0], /*gather_indices=*/operands[1], + dim_numbers, *window_bounds)); + break; + } case HloOpcode::kTrace: return TokenError(StrCat("parsing not yet implemented for op: ", HloOpcodeString(opcode))); diff --git a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc index b8c6b59204..863081d654 100644 --- a/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc +++ b/tensorflow/compiler/xla/tools/parser/hlo_parser_test.cc @@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] { ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6}) } +)" +}, +{ +"gather", +R"(HloModule StringifyGather + +ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] { + %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); @@ -860,6 +872,18 @@ ENTRY dot { ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0} } +)" +}, +{ +"gather", +R"(HloModule gather + +ENTRY Gather { + input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0) + gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1) + ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26} +} + )" }, }); -- GitLab From 4fac98fbc731f742e0121fde561fcf6ed1203423 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Mon, 26 Feb 2018 11:13:09 -0800 Subject: [PATCH 164/884] Added const to Node* in various parts of the code base. PiperOrigin-RevId: 187050526 --- tensorflow/compiler/tf2xla/const_analysis.cc | 4 ++-- tensorflow/compiler/tf2xla/graph_compiler.cc | 2 +- .../core/common_runtime/shape_refiner.cc | 4 ++-- .../core/distributed_runtime/scheduler.cc | 18 +++++++++--------- .../core/distributed_runtime/scheduler.h | 6 +++--- tensorflow/core/graph/costmodel.cc | 2 +- tensorflow/core/graph/graph.cc | 2 +- tensorflow/core/graph/graph.h | 2 +- tensorflow/core/graph/graph_constructor.cc | 2 +- tensorflow/core/graph/graph_partition.cc | 6 +++--- tensorflow/core/graph/node_builder.cc | 6 +++--- tensorflow/core/graph/node_builder.h | 6 +++--- tensorflow/core/graph/optimizer_cse.cc | 16 ++++++++-------- 13 files changed, 38 insertions(+), 38 deletions(-) diff --git a/tensorflow/compiler/tf2xla/const_analysis.cc b/tensorflow/compiler/tf2xla/const_analysis.cc index 82923722c5..6f46532419 100644 --- a/tensorflow/compiler/tf2xla/const_analysis.cc +++ b/tensorflow/compiler/tf2xla/const_analysis.cc @@ -37,7 +37,7 @@ Status BackwardsConstAnalysis(const Graph& g, }; Status status; - std::unordered_set must_be_const; + std::unordered_set must_be_const; auto visit = [&status, &metadata_ops, &must_be_const, compile_time_const_args](Node* node) { if (!status.ok()) return; @@ -55,7 +55,7 @@ Status BackwardsConstAnalysis(const Graph& g, compile_time_const_args->at(index) = true; return; } - for (Node* pred : node->in_nodes()) { + for (const Node* pred : node->in_nodes()) { must_be_const.insert(pred); } return; diff --git a/tensorflow/compiler/tf2xla/graph_compiler.cc b/tensorflow/compiler/tf2xla/graph_compiler.cc index 058a1f2621..b20c1ffc7d 100644 --- a/tensorflow/compiler/tf2xla/graph_compiler.cc +++ b/tensorflow/compiler/tf2xla/graph_compiler.cc @@ -130,7 +130,7 @@ Status GraphCompiler::Compile() { // Set up inputs from outputs of previous nodes. for (auto* e : n->in_edges()) { if (e->IsControlEdge()) continue; - Node* src = e->src(); + const Node* src = e->src(); TF_RET_CHECK(src->id() < output_registry.size()); const NodeOutputs& src_outputs = output_registry[src->id()]; diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 45cdab98e0..2acaa31d32 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -211,14 +211,14 @@ Status ShapeRefiner::AddNode(const Node* node) { // For each 'input' of this node, fetch the corresponding shape // from 'input's InferenceContext, and store into a vector // indexed by 'node's input. - std::vector input_nodes(node->num_inputs()); + std::vector input_nodes(node->num_inputs()); std::vector input_shapes(node->num_inputs()); std::vector>> input_handle_shapes_and_types(node->num_inputs()); for (const Edge* e : node->in_edges()) { if (e->IsControlEdge()) continue; - Node* input = e->src(); + const Node* input = e->src(); auto it = node_to_context_.find(input); if (it == node_to_context_.end()) { return errors::FailedPrecondition( diff --git a/tensorflow/core/distributed_runtime/scheduler.cc b/tensorflow/core/distributed_runtime/scheduler.cc index 9dae5b3b92..8403636197 100644 --- a/tensorflow/core/distributed_runtime/scheduler.cc +++ b/tensorflow/core/distributed_runtime/scheduler.cc @@ -80,7 +80,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { std::vector pending_count(graph_->num_node_ids()); InitializePending(graph_, &pending_count); - std::deque queue; + std::deque queue; Node* srcNode = graph_->source_node(); queue.push_back(srcNode); (*asap_times)[srcNode->id()] = 0; @@ -92,7 +92,7 @@ Microseconds SlackAnalysis::ComputeAsap(std::vector* asap_times) { for (const Edge* out_edge : curr->out_edges()) { // The time needed for 'out' to get its input from 'curr'. Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && curr->assigned_device_name() != out->assigned_device_name()) { // Add an arbitrary 10microsecs for each copy. @@ -137,7 +137,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { } } - std::deque queue; + std::deque queue; Node* sinkNode = graph_->sink_node(); queue.push_back(sinkNode); (*alap_times)[sinkNode->id()] = 0; @@ -148,7 +148,7 @@ Microseconds SlackAnalysis::ComputeAlap(std::vector* alap_times) { for (const Edge* in_edge : curr->in_edges()) { // The time needed for 'curr' to get its input from 'src'. Microseconds copy_time(0); - Node* src = in_edge->src(); + const Node* src = in_edge->src(); if (!in_edge->IsControlEdge() && src->assigned_device_name() != curr->assigned_device_name()) { // TODO(yuanbyu): Use the real cost model @@ -236,7 +236,7 @@ Microseconds GreedyScheduler::ComputeSchedule( for (const Edge* out_edge : event.node->out_edges()) { Microseconds copy_time(0); - Node* out = out_edge->dst(); + const Node* out = out_edge->dst(); if (!out_edge->IsControlEdge() && event.node->assigned_device_name() != out->assigned_device_name()) { // TODO(yuanbyu): Use below with the real cost model. @@ -277,11 +277,11 @@ Microseconds GreedyScheduler::ComputeSchedule( return max_completion; } -Node* GreedyScheduler::GetNodeWithHighestPriority( - const std::vector& nodes) { - Node* curr_node = nullptr; +const Node* GreedyScheduler::GetNodeWithHighestPriority( + const std::vector& nodes) { + const Node* curr_node = nullptr; int64 curr_priority = kint64max; - for (Node* n : nodes) { + for (const Node* n : nodes) { if ((*priority_)[n->id()] < curr_priority) { curr_node = n; curr_priority = (*priority_)[n->id()]; diff --git a/tensorflow/core/distributed_runtime/scheduler.h b/tensorflow/core/distributed_runtime/scheduler.h index ef87b9834d..bf9d0d1bec 100644 --- a/tensorflow/core/distributed_runtime/scheduler.h +++ b/tensorflow/core/distributed_runtime/scheduler.h @@ -57,11 +57,11 @@ class GreedyScheduler { struct Sim { int degree_parallelism; int num_running; - std::vector ready_nodes; + std::vector ready_nodes; }; struct Event { - Node* node; + const Node* node; Microseconds time; bool is_completion; @@ -79,7 +79,7 @@ class GreedyScheduler { private: // Returns the ready node with the highest priority for a sim. - Node* GetNodeWithHighestPriority(const std::vector& nodes); + const Node* GetNodeWithHighestPriority(const std::vector& nodes); const DeviceSet* devices_; const CostModel* cost_model_; diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 4f3a6ec38c..1df45d9b89 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -427,7 +427,7 @@ static void AssignSizes(const Graph& g, CostModel* cost_model) { if (e->IsControlEdge()) { continue; } - Node* src = e->src(); + const Node* src = e->src(); // TODO(josh11b): Get an estimate from the Op Bytes size(1); diff --git a/tensorflow/core/graph/graph.cc b/tensorflow/core/graph/graph.cc index 9b56216f1f..a7af5e2312 100644 --- a/tensorflow/core/graph/graph.cc +++ b/tensorflow/core/graph/graph.cc @@ -339,7 +339,7 @@ Node* Graph::AddNode(const NodeDef& node_def, Status* status) { return node; } -Node* Graph::CopyNode(Node* node) { +Node* Graph::CopyNode(const Node* node) { DCHECK(!node->IsSource()); DCHECK(!node->IsSink()); Node* copy = AllocateNode(node->props_, node); diff --git a/tensorflow/core/graph/graph.h b/tensorflow/core/graph/graph.h index 9d96cd4654..cbd58b051a 100644 --- a/tensorflow/core/graph/graph.h +++ b/tensorflow/core/graph/graph.h @@ -422,7 +422,7 @@ class Graph { // Copies *node, which may belong to another graph, to a new node, // which is returned. Does not copy any edges. *this owns the // returned instance. - Node* CopyNode(Node* node); + Node* CopyNode(const Node* node); // Removes a node from this graph, including all edges from or to it. // *node should not be accessed after calling this function. diff --git a/tensorflow/core/graph/graph_constructor.cc b/tensorflow/core/graph/graph_constructor.cc index 0629ff32d0..627309078a 100644 --- a/tensorflow/core/graph/graph_constructor.cc +++ b/tensorflow/core/graph/graph_constructor.cc @@ -1271,7 +1271,7 @@ void CopyGraph(const Graph& src, Graph* dest) { dest->set_versions(src.versions()); // Copy the nodes - std::unordered_map + std::unordered_map node_map; // "Node in src" -> "Node in *dest" node_map[src.source_node()] = dest->source_node(); node_map[src.sink_node()] = dest->sink_node(); diff --git a/tensorflow/core/graph/graph_partition.cc b/tensorflow/core/graph/graph_partition.cc index add80eda23..17a174101b 100644 --- a/tensorflow/core/graph/graph_partition.cc +++ b/tensorflow/core/graph/graph_partition.cc @@ -123,8 +123,8 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { return false; } - Node* src = edge->src(); - Node* dst = edge->dst(); + const Node* src = edge->src(); + const Node* dst = edge->dst(); if (src->assigned_device_name() == dst->assigned_device_name()) { int src_port = edge->src_output(); int dst_port = edge->dst_input(); @@ -141,7 +141,7 @@ bool NeedSameDeviceSendRecv(const Edge* edge, const GraphInfo& info) { // Return true iff (dst, dst_input) is specified on host memory. bool IsDstInputOnHost(const Edge* edge, const GraphInfo& info) { - Node* dst = edge->dst(); + const Node* dst = edge->dst(); int dst_port = edge->dst_input(); if (info.device_types[dst->id()] != DEVICE_CPU) { if (edge->IsControlEdge()) return false; diff --git a/tensorflow/core/graph/node_builder.cc b/tensorflow/core/graph/node_builder.cc index 138952dcb3..114962c0e4 100644 --- a/tensorflow/core/graph/node_builder.cc +++ b/tensorflow/core/graph/node_builder.cc @@ -88,7 +88,7 @@ NodeBuilder& NodeBuilder::ControlInput(Node* src_node) { NodeBuilder& NodeBuilder::ControlInputs(gtl::ArraySlice src_nodes) { control_inputs_.insert(control_inputs_.end(), src_nodes.begin(), src_nodes.end()); - for (Node* src_node : src_nodes) { + for (const Node* src_node : src_nodes) { def_builder_.ControlInput(src_node->name()); } return *this; @@ -127,7 +127,7 @@ Status NodeBuilder::Finalize(Graph* graph, Node** created_node) const { return Status::OK(); } -void NodeBuilder::AddIndexError(Node* node, int i) { +void NodeBuilder::AddIndexError(const Node* node, int i) { if (node == nullptr) { errors_.emplace_back( strings::StrCat("Attempt to add nullptr Node to node with type ", @@ -140,7 +140,7 @@ void NodeBuilder::AddIndexError(Node* node, int i) { } } -bool NodeBuilder::GetOutputType(Node* node, int i, DataType* dt) { +bool NodeBuilder::GetOutputType(const Node* node, int i, DataType* dt) { bool error; *dt = SafeGetOutput(node, i, &error); if (error) AddIndexError(node, i); diff --git a/tensorflow/core/graph/node_builder.h b/tensorflow/core/graph/node_builder.h index 86647a49c1..f6b7b5674b 100644 --- a/tensorflow/core/graph/node_builder.h +++ b/tensorflow/core/graph/node_builder.h @@ -120,7 +120,7 @@ class NodeBuilder { const OpDef& op_def() const { return def_builder_.op_def(); } private: - static DataType SafeGetOutput(Node* node, int i, bool* error) { + static DataType SafeGetOutput(const Node* node, int i, bool* error) { if (node != nullptr && i >= 0 && i < node->num_outputs()) { *error = false; return node->output_type(i); @@ -131,11 +131,11 @@ class NodeBuilder { } // If SafeGetOutput indicates a range error, add it to errors_. - void AddIndexError(Node* node, int i); + void AddIndexError(const Node* node, int i); // Set *dt and returns true if i is in range. Combines // SafeGetOutput() and AddIndexError(). - bool GetOutputType(Node* node, int i, DataType* dt); + bool GetOutputType(const Node* node, int i, DataType* dt); NodeDefBuilder def_builder_; std::vector inputs_; diff --git a/tensorflow/core/graph/optimizer_cse.cc b/tensorflow/core/graph/optimizer_cse.cc index 6b452a1d5d..4073255db3 100644 --- a/tensorflow/core/graph/optimizer_cse.cc +++ b/tensorflow/core/graph/optimizer_cse.cc @@ -65,8 +65,8 @@ class OptimizerCSE { }; static void FillInputs(const Node* n, - gtl::InlinedVector* control_edges, - gtl::InlinedVector, 4>* in) { + gtl::InlinedVector* control_edges, + gtl::InlinedVector, 4>* in) { DCHECK_EQ(in->size(), n->num_inputs()); control_edges->clear(); for (const Edge* e : n->in_edges()) { @@ -96,8 +96,8 @@ size_t OptimizerCSE::NodeHash(const Node* n) { const int N_in = n->num_inputs(); strings::StrAppend(&str_to_hash, N_in); - gtl::InlinedVector control_edges; - gtl::InlinedVector, 4> in(N_in); + gtl::InlinedVector control_edges; + gtl::InlinedVector, 4> in(N_in); FillInputs(n, &control_edges, &in); for (const auto& edge : in) { strings::StrAppend(&str_to_hash, edge.first->id(), edge.second); @@ -147,10 +147,10 @@ bool OptimizerCSE::Equivalent(const Node* a, const Node* b, // Compare input sources if (a->num_inputs() != b->num_inputs()) return false; const int N_in = a->num_inputs(); - gtl::InlinedVector a_control_edges; - gtl::InlinedVector b_control_edges; - gtl::InlinedVector, 4> a_in(N_in); - gtl::InlinedVector, 4> b_in(N_in); + gtl::InlinedVector a_control_edges; + gtl::InlinedVector b_control_edges; + gtl::InlinedVector, 4> a_in(N_in); + gtl::InlinedVector, 4> b_in(N_in); FillInputs(a, &a_control_edges, &a_in); FillInputs(b, &b_control_edges, &b_in); if (a_in != b_in) return false; -- GitLab From 2d5db0213258da2e97276af7e6e9d85e9a1e2100 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 26 Feb 2018 11:22:43 -0800 Subject: [PATCH 165/884] TFLite: Ensures pointers to tensors won't be invalidated unless 16+ tensors are added. PiperOrigin-RevId: 187052100 --- tensorflow/contrib/lite/interpreter.cc | 13 +++---- tensorflow/contrib/lite/interpreter.h | 20 +++++++++++ tensorflow/contrib/lite/interpreter_test.cc | 40 +++++++++++++++++++++ 3 files changed, 64 insertions(+), 9 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 370e495527..0f5e17f0de 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -27,13 +27,6 @@ limitations under the License. #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" -namespace { - -// std::vector preallocation tuning. -constexpr const int kSlotsToReserve = 128; - -} // namespace - namespace tflite { // A trivial implementation of GraphInfo around the Interpreter. @@ -85,8 +78,8 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.GetExecutionPlan = nullptr; // Reserve some space for the tensors to avoid excessive resizing. - tensors_.reserve(kSlotsToReserve); - nodes_and_registration_.reserve(kSlotsToReserve); + tensors_.reserve(kTensorsReservedCapacity); + nodes_and_registration_.reserve(kTensorsReservedCapacity); next_execution_plan_index_to_prepare_ = 0; UseNNAPI(false); } @@ -353,6 +346,7 @@ TfLiteStatus Interpreter::PrepareOpsStartingAt( TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpPrepare(registration, &node) == kTfLiteError) { return kTfLiteError; } @@ -430,6 +424,7 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index a9df2627e0..04c19644a0 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,14 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // The default capacity of `tensors_` vector. + static constexpr int kTensorsReservedCapacity = 128; + // The capacity headroom of `tensors_` vector before calling ops' + // `prepare` and `invoke` function. In these functions, it's guaranteed + // allocating up to `kTensorsCapacityHeadroom` more tensors won't invalidate + // pointers to existing tensors. + static constexpr int kTensorsCapacityHeadroom = 16; + private: // Give 'op_reg' a chance to initialize itself using the contents of // 'buffer'. @@ -377,6 +385,18 @@ class Interpreter { static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, TfLiteIntArray** execution_plan); + // Ensures that `tensors_` has at least `kTensorsCapacityHeadroom` extra + // capacity. Calling this function may invalidate existing pointers to + // tensors. After calling this function, adding `kTensorsCapacityHeadroom` + // more tensors won't invalidate the pointer to existing tensors. + void EnsureTensorsVectorCapacity() { + const int required_capacity = tensors_size() + kTensorsCapacityHeadroom; + if (required_capacity > tensors_.capacity()) { + tensors_.reserve(required_capacity); + context_.tensors = tensors_.data(); + } + } + // A pure C data structure used to communicate with the pure C plugin // interface. To avoid copying tensor metadata, this is also the definitive // structure to store tensors. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 28c96e5dde..2e6727b323 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -561,6 +561,46 @@ TEST(BasicInterpreter, TestCustomErrorReporter) { ASSERT_EQ(reporter.calls, 1); } +TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom, + &new_tensor_index); + EXPECT_EQ(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + +TEST(InterpreterTensorsCapacityTest, TestExceedHeadroom) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), + kTfLiteOk); + TfLiteRegistration registration = {nullptr, nullptr, nullptr, nullptr}; + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* first_tensor = context->tensors; + + int new_tensor_index; + context->AddTensors(context, Interpreter::kTensorsCapacityHeadroom + 1, + &new_tensor_index); + EXPECT_NE(first_tensor, context->tensors); + return kTfLiteOk; + }; + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + ASSERT_EQ(interpreter.AllocateTensors(), kTfLiteOk); +} + // Test fixture that allows playing with execution plans. It creates a two // node graph that can be executed in either [0,1] order or [1,0] order. // The CopyOp records when it is invoked in the class member run_order_ -- GitLab From 215af206b0cba3ac3d64fe01ec372c924662f97f Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 11:43:14 -0800 Subject: [PATCH 166/884] Actually expose smart_cond and smart_constant_value in tf.contrib.framework Also moves these methods into their own file in python/framework. This avoids further bloating control_flow_ops.py and makes the BUILD deps easier for a future change I'm working on. PiperOrigin-RevId: 187055501 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/contrib/framework/__init__.py | 7 +- tensorflow/python/BUILD | 26 ++++++ tensorflow/python/framework/smart_cond.py | 79 +++++++++++++++++++ .../python/framework/smart_cond_test.py | 66 ++++++++++++++++ tensorflow/python/layers/utils.py | 5 +- tensorflow/python/ops/control_flow_ops.py | 56 ------------- .../python/ops/control_flow_ops_test.py | 36 --------- 8 files changed, 180 insertions(+), 96 deletions(-) create mode 100644 tensorflow/python/framework/smart_cond.py create mode 100644 tensorflow/python/framework/smart_cond_test.py diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 1accb319d2..50868c6d6c 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -63,6 +63,7 @@ tf_custom_op_py_library( "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", "//tensorflow/python:script_ops", + "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", "//tensorflow/python:state_ops", "//tensorflow/python:state_ops_gen", diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index deeb5bec79..8063250091 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -87,6 +87,9 @@ See the @{$python/contrib.framework} guide. @@get_placeholders +@@smart_cond +@@smart_constant_value + @@CriticalSection @@BoundedTensorSpec @@ -104,10 +107,10 @@ from tensorflow.contrib.framework.python.ops import * from tensorflow.python.framework.ops import prepend_name_scope from tensorflow.python.framework.ops import strip_name_scope +from tensorflow.python.framework.smart_cond import smart_cond +from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec -from tensorflow.python.ops.control_flow_ops import smart_cond -from tensorflow.python.ops.control_flow_ops import smart_constant_value from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4c8c73548c..b0cb48c80c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -765,6 +765,31 @@ py_library( ], ) +py_library( + name = "smart_cond", + srcs = ["framework/smart_cond.py"], + srcs_version = "PY2AND3", + deps = [ + ":control_flow_ops", + ":tensor_util", + ], +) + +py_test( + name = "smart_cond_test", + size = "small", + srcs = ["framework/smart_cond_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":client_testlib", + ":constant_op", + ":framework_ops", + ":math_ops", + ":session", + ":smart_cond", + ], +) + py_library( name = "sparse_tensor", srcs = ["framework/sparse_tensor.py"], @@ -4091,6 +4116,7 @@ py_library( ":control_flow_ops", ":framework_for_generated_wrappers", ":platform", + ":smart_cond", ":tensor_util", ":util", ":variable_scope", diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py new file mode 100644 index 0000000000..f97bb01f54 --- /dev/null +++ b/tensorflow/python/framework/smart_cond.py @@ -0,0 +1,79 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""smart_cond and related utilties.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import control_flow_ops + + +def smart_cond(pred, true_fn=None, false_fn=None, name=None): + """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. + + If `pred` is a bool or has a constant value, we return either `true_fn()` + or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. + + Arguments: + pred: A scalar determining whether to return the result of `true_fn` or + `false_fn`. + true_fn: The callable to be performed if pred is true. + false_fn: The callable to be performed if pred is false. + name: Optional name prefix when using `tf.cond`. + + Returns: + Tensors returned by the call to either `true_fn` or `false_fn`. + + Raises: + TypeError: If `true_fn` or `false_fn` is not callable. + """ + if not callable(true_fn): + raise TypeError("`true_fn` must be callable.") + if not callable(false_fn): + raise TypeError("`false_fn` must be callable.") + + pred_value = smart_constant_value(pred) + if pred_value is not None: + if pred_value: + return true_fn() + else: + return false_fn() + else: + return control_flow_ops.cond(pred, true_fn=true_fn, false_fn=false_fn, + name=name) + + +def smart_constant_value(pred): + """Return the bool value for `pred`, or None if `pred` had a dynamic value. + + Arguments: + pred: A scalar, either a Python bool or tensor. + + Returns: + True or False if `pred` has a constant boolean value, None otherwise. + + Raises: + TypeError: If `pred` is not a Tensor or bool. + """ + if isinstance(pred, bool): + pred_value = pred + elif isinstance(pred, ops.Tensor): + pred_value = tensor_util.constant_value(pred) + else: + raise TypeError("`pred` must be a Tensor or a Python bool.") + return pred_value diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py new file mode 100644 index 0000000000..b682506da0 --- /dev/null +++ b/tensorflow/python/framework/smart_cond_test.py @@ -0,0 +1,66 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.client import session +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond +from tensorflow.python.framework import test_util +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import googletest + + +@test_util.with_c_api +class SmartCondTest(test_util.TensorFlowTestCase): + + def testSmartCondTrue(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(2) + y = constant_op.constant(5) + z = smart_cond.smart_cond(True, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 5)) + self.assertEqual(z.eval(), 32) + + def testSmartCondFalse(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(4) + y = constant_op.constant(3) + z = smart_cond.smart_cond(False, lambda: math_ops.multiply(x, 16), + lambda: math_ops.multiply(y, 3)) + self.assertEqual(z.eval(), 9) + + def testSmartCondMissingArg1(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, false_fn=lambda: x) + + def testSmartCondMissingArg2(self): + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + with self.assertRaises(TypeError): + smart_cond.smart_cond(True, lambda: x) + + +if __name__ == "__main__": + googletest.main() diff --git a/tensorflow/python/layers/utils.py b/tensorflow/python/layers/utils.py index 484c6fc466..3b156c36a2 100644 --- a/tensorflow/python/layers/utils.py +++ b/tensorflow/python/layers/utils.py @@ -24,6 +24,7 @@ from tensorflow.python.eager import context from tensorflow.python.ops import variables from tensorflow.python.ops import control_flow_ops from tensorflow.python.framework import ops +from tensorflow.python.framework import smart_cond as smart_module from tensorflow.python.framework import tensor_util from tensorflow.python.util import nest @@ -201,7 +202,7 @@ def smart_cond(pred, true_fn=None, false_fn=None, name=None): if isinstance(pred, variables.Variable): return control_flow_ops.cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) - return control_flow_ops.smart_cond( + return smart_module.smart_cond( pred, true_fn=true_fn, false_fn=false_fn, name=name) @@ -228,7 +229,7 @@ def constant_value(pred): if isinstance(pred, variables.Variable): return None - return control_flow_ops.smart_constant_value(pred) + return smart_module.smart_constant_value(pred) def object_list_uid(object_list): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 8218e60b53..152578c0c6 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -23,7 +23,6 @@ See the @{$python/control_flow_ops} guide. @@no_op @@count_up_to @@cond -@@smart_cond @@case @@while_loop @@logical_and @@ -2128,61 +2127,6 @@ def cond(pred, # pylint: enable=redefined-outer-name -def smart_cond(pred, true_fn=None, false_fn=None, name=None): - """Return either `true_fn()` if predicate `pred` is true else `false_fn()`. - - If `pred` is a bool or has a constant value, we return either `true_fn()` - or `false_fn()`, otherwise we use `tf.cond` to dynamically route to both. - - Arguments: - pred: A scalar determining whether to return the result of `true_fn` or - `false_fn`. - true_fn: The callable to be performed if pred is true. - false_fn: The callable to be performed if pred is false. - name: Optional name prefix when using `tf.cond`. - - Returns: - Tensors returned by the call to either `true_fn` or `false_fn`. - - Raises: - TypeError: If `true_fn` or `false_fn` is not callable. - """ - if not callable(true_fn): - raise TypeError("`true_fn` must be callable.") - if not callable(false_fn): - raise TypeError("`false_fn` must be callable.") - - pred_value = smart_constant_value(pred) - if pred_value is not None: - if pred_value: - return true_fn() - else: - return false_fn() - else: - return cond(pred, true_fn=true_fn, false_fn=false_fn, name=name) - - -def smart_constant_value(pred): - """Return the bool value for `pred`, or None if `pred` had a dynamic value. - - Arguments: - pred: A scalar, either a Python bool or tensor. - - Returns: - True or False if `pred` has a constant boolean value, None otherwise. - - Raises: - TypeError: If `pred` is not a Tensor or bool. - """ - if isinstance(pred, bool): - pred_value = pred - elif isinstance(pred, ops.Tensor): - pred_value = tensor_util.constant_value(pred) - else: - raise TypeError("`pred` must be a Tensor or a Python bool.") - return pred_value - - def _resource_safe_shape(t): """Returns the shape of t or the variable it points to.""" if t.dtype == dtypes.resource: diff --git a/tensorflow/python/ops/control_flow_ops_test.py b/tensorflow/python/ops/control_flow_ops_test.py index adc8c51e11..f22f3059d1 100644 --- a/tensorflow/python/ops/control_flow_ops_test.py +++ b/tensorflow/python/ops/control_flow_ops_test.py @@ -349,42 +349,6 @@ class SwitchTestCase(test_util.TensorFlowTestCase): self.assertEquals(grad_x_false.eval(), 0.) -@test_util.with_c_api -class SmartCondTest(test_util.TensorFlowTestCase): - - def testSmartCondTrue(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(2) - y = constant_op.constant(5) - z = control_flow_ops.smart_cond(True, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 5)) - self.assertEqual(z.eval(), 32) - - def testSmartCondFalse(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(4) - y = constant_op.constant(3) - z = control_flow_ops.smart_cond(False, lambda: math_ops.multiply(x, 16), - lambda: math_ops.multiply(y, 3)) - self.assertEqual(z.eval(), 9) - - def testSmartCondMissingArg1(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, false_fn=lambda: x) - - def testSmartCondMissingArg2(self): - with ops.Graph().as_default(): - with session.Session(): - x = constant_op.constant(1) - with self.assertRaises(TypeError): - control_flow_ops.smart_cond(True, lambda: x) - - @test_util.with_c_api class CondTest(test_util.TensorFlowTestCase): -- GitLab From 8525e1dbdcab467e545f09ecf60f0be11b48cd28 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 11:50:49 -0800 Subject: [PATCH 167/884] Add the internal module name prefix to the white list. PiperOrigin-RevId: 187056701 --- tensorflow/contrib/py2tf/impl/config.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/impl/config.py b/tensorflow/contrib/py2tf/impl/config.py index c90e85c96b..bdbc6663dd 100644 --- a/tensorflow/contrib/py2tf/impl/config.py +++ b/tensorflow/contrib/py2tf/impl/config.py @@ -31,12 +31,16 @@ PYTHON_LITERALS = { DEFAULT_UNCOMPILED_MODULES = set(( ('tensorflow',), (utils.__name__,), + + # All of tensorflow's subpackages. Unlike the root tf module, they don't + # have well-known names. Not refering to the module directly to avoid + # circular imports. + (utils.__name__[:-len('.contrib.py2tf.utils')],), )) NO_SIDE_EFFECT_CONSTRUCTORS = set(('tensorflow',)) # TODO(mdan): Also allow controlling the generated names (for testability). -# TODO(mdan): Make sure copybara renames the reference below. COMPILED_IMPORT_STATEMENTS = ( 'from __future__ import print_function', 'import tensorflow as tf', -- GitLab From 5caeb37e5d4314b702cf660db35b93a3bfc29819 Mon Sep 17 00:00:00 2001 From: Anna R Date: Mon, 26 Feb 2018 11:52:26 -0800 Subject: [PATCH 168/884] Internal change. PiperOrigin-RevId: 187056963 --- tensorflow/tools/api/tests/api_compatibility_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/api/tests/api_compatibility_test.py b/tensorflow/tools/api/tests/api_compatibility_test.py index c1e09cc531..2a784973e1 100644 --- a/tensorflow/tools/api/tests/api_compatibility_test.py +++ b/tensorflow/tools/api/tests/api_compatibility_test.py @@ -165,7 +165,7 @@ class ApiCompatibilityTest(test.TestCase): logging.error('%d differences found between API and golden.', diff_count) messages = verbose_diffs if verbose else diffs for i in range(diff_count): - logging.error('Issue %d\t: %s', i + 1, messages[i]) + print('Issue %d\t: %s' % (i + 1, messages[i]), file=sys.stderr) if update_goldens: # Write files if requested. -- GitLab From 0898ee302cb20d9fce50dae4f484816a2dc2d0e2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 11:57:30 -0800 Subject: [PATCH 169/884] Use optimized ops to handle GPU memory swapping: this avoids the need for 2 pairs of extra _send/_recv nodes which speeds things up a bit. This also ensures that performance doesn't depend on the recv scheduling built in TF, which isn't always optimal. PiperOrigin-RevId: 187057831 --- tensorflow/core/grappler/optimizers/BUILD | 36 +++++++- .../optimizers/gpu_swapping_kernels.cc | 88 +++++++++++++++++++ .../grappler/optimizers/gpu_swapping_ops.cc | 58 ++++++++++++ .../grappler/optimizers/memory_optimizer.cc | 9 +- .../optimizers/memory_optimizer_test.cc | 65 +++++++++++--- tensorflow/core/grappler/utils/BUILD | 1 + .../core/grappler/utils/grappler_test.cc | 17 ++++ .../core/grappler/utils/grappler_test.h | 3 + 8 files changed, 258 insertions(+), 19 deletions(-) create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc create mode 100644 tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 50ba48ea7a..908e58bcc7 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -1,6 +1,8 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") +load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") +load("//tensorflow:tensorflow.bzl", "tf_kernel_library") filegroup( name = "all_files", @@ -282,18 +284,48 @@ tf_cc_test( ], ) +tf_kernel_library( + name = "gpu_swapping_kernels", + srcs = [ + "gpu_swapping_kernels.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "gpu_swapping_ops", + srcs = [ + "gpu_swapping_ops.cc", + ], + deps = [ + "//tensorflow/core:core_cpu_base", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + ], + alwayslink = 1, +) + cc_library( name = "memory_optimizer", - srcs = ["memory_optimizer.cc"], + srcs = [ + "memory_optimizer.cc", + ], hdrs = [ "memory_optimizer.h", ], visibility = ["//visibility:public"], deps = [ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", "//tensorflow/core/grappler:grappler_item", @@ -307,7 +339,7 @@ cc_library( ], ) -tf_cc_test( +tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], deps = [ diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc new file mode 100644 index 0000000000..1820af6844 --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_kernels.cc @@ -0,0 +1,88 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Op kernels used to swap data in and out of GPU memory. + +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +class CopyFromGpuToHostKernel : public AsyncOpKernel { + public: + explicit CopyFromGpuToHostKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, !ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromGpuToHost kernel " + "must reside on the device."), + done); + + AllocatorAttributes alloc_attrs; + alloc_attrs.set_gpu_compatible(true); + alloc_attrs.set_on_host(true); + Tensor* output; + OP_REQUIRES_OK_ASYNC( + ctx, ctx->allocate_output(0, input.shape(), &output, alloc_attrs), + done); + + ctx->op_device_context()->CopyDeviceTensorToCPU( + &input, "CopyFromGpuToHost", static_cast(ctx->device()), + output, [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromGpuToHost").Device(DEVICE_GPU).HostMemory("output"), + CopyFromGpuToHostKernel); + +class CopyFromHostToGpuKernel : public AsyncOpKernel { + public: + explicit CopyFromHostToGpuKernel(OpKernelConstruction* context) + : AsyncOpKernel(context) {} + void ComputeAsync(OpKernelContext* ctx, DoneCallback done) override { + const Tensor& input = ctx->input(0); + OP_REQUIRES_ASYNC( + ctx, ctx->input_alloc_attr(0).on_host(), + errors::Internal("The input tensor to the _CopyFromHostToGpu kernel " + "must reside on the host."), + done); + + Tensor* output; + OP_REQUIRES_OK_ASYNC(ctx, ctx->allocate_output(0, input.shape(), &output), + done); + + ctx->op_device_context()->CopyCPUTensorToDevice( + &input, static_cast(ctx->device()), output, + [ctx, done](const Status& s) { + ctx->SetStatus(s); + done(); + }); + } +}; + +REGISTER_KERNEL_BUILDER( + Name("_CopyFromHostToGpu").Device(DEVICE_GPU).HostMemory("input"), + CopyFromHostToGpuKernel); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc new file mode 100644 index 0000000000..46828346da --- /dev/null +++ b/tensorflow/core/grappler/optimizers/gpu_swapping_ops.cc @@ -0,0 +1,58 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Definition for the ops used to swap data in and out of GPU memory. + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +namespace { + +// The _CopyFromGpuToHost op copies its input tensor to the host. The input must +// reside on GPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromGpuToHost") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from gpu to the host."); + +// The _CopyFromHostToGpu op copies its input tensor from the host to the GPU. +// The input must reside on CPU. The op itself must be placed on GPU. +REGISTER_OP("_CopyFromHostToGpu") + .Input("input: T") + .Output("output: T") + .Attr("T: type") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->input(0)); + auto* handle_data = c->input_handle_shapes_and_types(0); + if (handle_data != nullptr) { + c->set_output_handle_shapes_and_types(0, *handle_data); + } + return Status::OK(); + }) + .Doc("Copies the input tensor from the host to the GPU."); + +} // namespace +} // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index dec4f04a1c..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -720,18 +720,19 @@ Status BuildSwapPair(NodeDef* node, int input_to_swap, // Force the tensor to be copied to cpu. NodeDef* swap_out_node = graph->add_node(); swap_out_node->set_name(swap_out_name); - swap_out_node->set_op("Identity"); - swap_out_node->set_device("/device:CPU:0"); + swap_out_node->set_op("_CopyFromGpuToHost"); // Force the tensor to be restored to the device. NodeDef* swap_in_node = graph->add_node(); swap_in_node->set_name(swap_in_name); - swap_in_node->set_op("Identity"); + swap_in_node->set_op("_CopyFromHostToGpu"); *swap_in_node->add_input() = swap_out_node->name(); - // Colocate the swap_in_ node with the node itself. + // Colocate the swap_out_ and swap_in_ nodes with the node itself. + swap_out_node->set_device(node->device()); swap_in_node->set_device(node->device()); string coloc_group = strings::StrCat("loc@", tensor_to_swap); + (*swap_out_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*swap_in_node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); (*node->mutable_attr())["_class"].mutable_list()->add_s(coloc_group); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc index 5d7913e0c0..9595936e9e 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer_test.cc @@ -221,16 +221,20 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { // Build a simple graph with an op that's marked for swapping. tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output a = ops::Variable(s.WithOpName("a"), {10, 10}, DT_FLOAT); - Output b = ops::AddN(s.WithOpName("b"), {a}); - Output c = ops::AddN(s.WithOpName("c"), {b}); - Output d = ops::AddN(s.WithOpName("d"), {c}); - Output e = ops::AddN(s.WithOpName("e"), {b, d}); + Output a = + ops::Variable(s.WithOpName("a").WithDevice("/gpu:0"), {10, 10}, DT_FLOAT); + Output b = ops::AddN(s.WithOpName("b").WithDevice("/gpu:0"), {a}); + Output c = ops::AddN(s.WithOpName("c").WithDevice("/gpu:0"), {b}); + Output d = ops::AddN(s.WithOpName("d").WithDevice("/gpu:0"), {c}); + Output e = ops::AddN(s.WithOpName("e").WithDevice("/gpu:0"), {b, d}); + + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {10, 10}); + Output init = ops::Assign(s.WithOpName("init"), a, constant); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - EXPECT_EQ(5, item.graph.node_size()); + EXPECT_EQ(7, item.graph.node_size()); EXPECT_EQ(NodeName(e.name()), item.graph.node(4).name()); AttrValue& val = (*item.graph.mutable_node(4)->mutable_attr())["_swap_to_host"]; @@ -243,32 +247,43 @@ TEST_F(MemoryOptimizerTest, SimpleSwapping) { Status status = optimizer.Optimize(cluster.get(), item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(7, output.node_size()); - const NodeDef& new_e = output.node(4); + EXPECT_EQ(9, output.node_size()); + const NodeDef& new_e = output.node(6); EXPECT_EQ(NodeName(e.name()), new_e.name()); EXPECT_EQ(2, new_e.input_size()); EXPECT_EQ(NodeName(d.name()), new_e.input(1)); EXPECT_EQ("swap_in_e_0", new_e.input(0)); - const NodeDef& swap_out = output.node(5); + const NodeDef& swap_out = output.node(7); EXPECT_EQ("swap_out_e_0", swap_out.name()); + EXPECT_EQ("_CopyFromGpuToHost", swap_out.op()); - const NodeDef& swap_in = output.node(6); + const NodeDef& swap_in = output.node(8); EXPECT_EQ("swap_in_e_0", swap_in.name()); + EXPECT_EQ("_CopyFromHostToGpu", swap_in.op()); EXPECT_EQ(NodeName(b.name()), swap_out.input(0)); EXPECT_EQ(NodeName(swap_out.name()), swap_in.input(0)); EXPECT_EQ("^c", swap_in.input(1)); - const NodeDef& new_c = output.node(2); + const NodeDef& new_c = output.node(4); EXPECT_EQ(NodeName(c.name()), new_c.name()); EXPECT_EQ("^swap_out_e_0", new_c.input(1)); // Run the optimizer a second time to ensure it's idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(cluster.get(), item, &output); + GrapplerItem item_copy(item, std::move(output)); + status = optimizer.Optimize(cluster.get(), item_copy, &output); TF_EXPECT_OK(status); + +#if GOOGLE_CUDA + item.fetch = {"e"}; + item.init_ops = {init.name()}; + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, SwappingHeuristics) { @@ -287,9 +302,13 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { Output h = ops::Exp(s.WithOpName("h").WithDevice("/gpu:0"), c); Output i = ops::Log(s.WithOpName("i").WithDevice("/gpu:0"), d); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e", "f", "g", "h", "i"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -308,6 +327,15 @@ TEST_F(MemoryOptimizerTest, SwappingHeuristics) { EXPECT_EQ("axis", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + for (int i = 0; i < item.fetch.size(); ++i) { + test::ExpectTensorEqual(tensors_expected[i], tensors[i]); + } +#endif } TEST_F(MemoryOptimizerTest, UnswappableInputs) { @@ -325,9 +353,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { Output e = ops::Concat(s.WithOpName("e").WithDevice("/gpu:0"), {b, c, d}, axis); + Output constant = ops::Const(s.WithOpName("constant"), 0.0f, {128, 128, 8}); + Output init = ops::Assign(s.WithOpName("init"), v, constant); + GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); item.fetch = {"e"}; + item.init_ops = {init.name()}; std::unique_ptr cluster(CreateVirtualCluster()); @@ -344,6 +376,13 @@ TEST_F(MemoryOptimizerTest, UnswappableInputs) { EXPECT_EQ("^swap_out_d_2", node.input(4)); } } + +#if GOOGLE_CUDA + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +#endif } TEST_F(MemoryOptimizerTest, AccumulationRewrites) { diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 0a9dbe22cf..5d32609434 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -142,6 +142,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core:test", + "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", ], ) diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index fed46c05fb..fef8e97b6e 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -35,6 +35,23 @@ std::vector GrapplerTest::EvaluateNodes( return output_tensors; } +std::vector GrapplerTest::EvaluateFetchNodes(const GrapplerItem& item) { + SessionOptions options; + std::unique_ptr session(NewSession(options)); + TF_CHECK_OK(session->Create(item.graph)); + RunOptions run_options; + if (!item.init_ops.empty()) { + std::vector dummy; + TF_CHECK_OK( + session->Run(run_options, {}, {}, item.init_ops, &dummy, nullptr)); + } + std::vector output_tensors; + TF_CHECK_OK( + session->Run(run_options, {}, item.fetch, {}, &output_tensors, nullptr)); + TF_CHECK_OK(session->Close()); + return output_tensors; +} + void GrapplerTest::AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph) { auto* node = graph->add_node(); diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index 042b616aa4..fd6809b6e2 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -30,6 +31,8 @@ class GrapplerTest : public ::testing::Test { std::vector EvaluateNodes(const GraphDef& graph, const std::vector& node_names); + std::vector EvaluateFetchNodes(const GrapplerItem& item); + void AddNode(const string& name, const string& op, const std::vector& inputs, GraphDef* graph); -- GitLab From 33a447a3df13559d746b86e2446ee9174099cd3b Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 12:10:01 -0800 Subject: [PATCH 170/884] Fix bug calling gradients_function inside custom_gradient PiperOrigin-RevId: 187059871 --- tensorflow/python/eager/backprop_test.py | 13 +++++++++++++ tensorflow/python/eager/custom_gradient.py | 9 ++++----- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 734558dee2..48fd170764 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -115,6 +115,19 @@ class BackpropTest(test.TestCase): with self.assertRaises(RuntimeError): backprop.gradients_function(f)(constant_op.constant(1.0)) + def testGradientsFunctionInCustomGradient(self): + + @custom_gradient.custom_gradient + def f(x): + (y,) = backprop.gradients_function(lambda x: x * x)(x) + + def grad(dy): + return [2 * dy] + + return y, grad + + self.assertAllEqual(f(1.0), 2.0) + def testImplicitGradOverEmbeddingLookup(self): batch_size = 8 embedding_size = 512 diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py index 05460ff996..fb932a9372 100644 --- a/tensorflow/python/eager/custom_gradient.py +++ b/tensorflow/python/eager/custom_gradient.py @@ -71,11 +71,10 @@ def custom_gradient(f): input_tensors = [tf_ops.convert_to_tensor(x) for x in args] - with tape.stop_recording(): - result, grad_fn = f(*args, **kwargs) - flat_result = nest.flatten(result) - # TODO(apassos) consider removing the identity below. - flat_result = [gen_array_ops.identity(x) for x in flat_result] + result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] def actual_grad_fn(*outputs): return nest.flatten(grad_fn(*outputs)) -- GitLab From cfb6e1628cf752f6cb1d844b8bba3a2cfc98b1e3 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Mon, 26 Feb 2018 12:23:36 -0800 Subject: [PATCH 171/884] Internal change. PiperOrigin-RevId: 187061863 --- tensorflow/contrib/bayesflow/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 08b29fb6bc..270c309ec3 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -210,7 +210,7 @@ cuda_py_test( cuda_py_test( name = "hmc_test", - size = "medium", + size = "large", srcs = ["python/kernel_tests/hmc_test.py"], additional_deps = [ ":bayesflow_py", -- GitLab From 509e51bc809032bd3d9443bd4afc152fb5eaaf93 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 12:33:17 -0800 Subject: [PATCH 172/884] Maintain a cache of output dtypes of ops in TFE_Context. PiperOrigin-RevId: 187062992 --- tensorflow/c/eager/c_api.cc | 20 ++++++++++++++++++++ tensorflow/c/eager/runtime.cc | 15 ++++++++++++--- tensorflow/c/eager/runtime.h | 6 ++++++ 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index c27a7129fa..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_mgr.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/node_def_util.h" #include "tensorflow/core/framework/rendezvous.h" #include "tensorflow/core/framework/tensor_shape.pb.h" #include "tensorflow/core/framework/types.h" @@ -823,6 +824,25 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, delete kernel; return; } + // Update output_dtypes inside `kernel`. + const tensorflow::OpDef* op_def = nullptr; + const tensorflow::FunctionDef* function_def = + ctx->func_lib_def.Find(ndef.op()); + if (function_def != nullptr) { + op_def = &(function_def->signature()); + } + if (op_def == nullptr) { + status->status = OpDefForOp(ndef.op().c_str(), &op_def); + if (!status->status.ok()) { + return; + } + } + tensorflow::DataTypeVector input_dtypes; + status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, + kernel->output_dtypes()); + if (!status->status.ok()) { + return; + } tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index f77a937f1f..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -41,17 +41,26 @@ const uint32 kIsList = 1U << 31; } // namespace +Status OpDefForOp(const char* op_name, const OpDef** op_def) { + const OpRegistrationData* op_reg_data = nullptr; + Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (s.ok()) { + *op_def = &op_reg_data->op_def; + } + return s; +} + Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out) { mutex_lock l(g_op_name_to_attr_type_map_lock); *out = gtl::FindPtrOrNull(*OpNameToAttrTypeMap(), op_name); if (*out != nullptr) return Status::OK(); - const OpRegistrationData* op_reg_data = nullptr; - Status s = OpRegistry::Global()->LookUp(op_name, &op_reg_data); + const OpDef* op_def = nullptr; + Status s = OpDefForOp(op_name, &op_def); if (!s.ok()) return s; std::unique_ptr m(new AttrTypeMap); // TODO(agarwal): Avoid having to create this "registry" at runtime, // perhaps can be done at op registration time? - for (const auto& attr : op_reg_data->op_def.attr()) { + for (const auto& attr : op_def->attr()) { string type = attr.type(); const bool is_list = (type.length() > 6 && type.compare(0, 4, "list") == 0); if (is_list) { diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 4d20b5244a..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -39,6 +39,9 @@ namespace tensorflow { // represent the TF_AttrType type of the values in the list. typedef std::unordered_map AttrTypeMap; +// Look up OpDef for `op_name`. +Status OpDefForOp(const char* op_name, const OpDef** op_def); + // Returns the AttrTypeMap for the TensorFlow operation named op_name. Status AttrTypeMapForOp(const char* op_name, const AttrTypeMap** out); @@ -180,12 +183,15 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + DataTypeVector* output_dtypes() { return &output_dtypes_; } + private: std::unique_ptr kernel_; Device* device_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; + DataTypeVector output_dtypes_; }; } // namespace tensorflow -- GitLab From 19c601b53a8444a26fc6694a2766897df37fc336 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Mon, 26 Feb 2018 13:06:59 -0800 Subject: [PATCH 173/884] Include c_api_experimental in libtensorflow.so's dependencies. PiperOrigin-RevId: 187068103 --- tensorflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index dc995d231d..3828ee0ddb 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -787,6 +787,7 @@ tf_cc_shared_object( }), deps = [ "//tensorflow/c:c_api", + "//tensorflow/c:c_api_experimental", "//tensorflow/c:exported_symbols.lds", "//tensorflow/c:version_script.lds", "//tensorflow/c/eager:c_api", -- GitLab From 6c99456856973d7cfee31aeeabef8d79014a097f Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Mon, 26 Feb 2018 13:54:02 -0800 Subject: [PATCH 174/884] Update eager uniform replay buffer microbenchmarks to compare against graph functions when possible. PiperOrigin-RevId: 187075418 --- .../contrib/framework/python/ops/critical_section_ops.py | 6 ++++-- tensorflow/python/framework/ops.py | 9 ++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index 3c5c55ed65..ab603cc18e 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -143,7 +143,7 @@ class CriticalSection(object): def _init_from_args(self, name, shared_name): # pylint: disable=invalid-name """Initialize the CriticalSection from constructor arguments.""" with ops.name_scope(name, "CriticalSection", []) as name: - with ops.control_dependencies(None): + with ops.init_scope(): # pylint: disable=protected-access container = ops.get_default_graph()._container # pylint: enable=protected-access @@ -226,7 +226,9 @@ class CriticalSection(object): # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. for sg in ops.get_collection(CRITICAL_SECTION_EXECUTIONS): - if sg.handle.name == self._handle.name: + sg_handle_name = ops.convert_to_tensor(sg.handle).name + self_handle_name = ops.convert_to_tensor(self._handle).name + if sg_handle_name == self_handle_name: # Other executions in the same critical section are allowed. continue if not (exclusive_resource_access or sg.exclusive_resource_access): diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 5a14ea4176..b0d2704c07 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -4805,7 +4805,14 @@ def container(container_name): @tf_export("colocate_with") def colocate_with(op, ignore_existing=False): if context.in_graph_mode(): - return get_default_graph().colocate_with(op, ignore_existing) + default_graph = get_default_graph() + if isinstance(op, EagerTensor): + if default_graph.building_function: + op = internal_convert_to_tensor(op) + else: + raise ValueError("Encountered an Eager-defined Tensor during graph " + "construction, but a function was not being built.") + return default_graph.colocate_with(op, ignore_existing) else: if op is not None: return device(op.device) -- GitLab From 01b96c59f410b44a6279627529a643b1e4da4aa5 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 26 Feb 2018 14:00:07 -0800 Subject: [PATCH 175/884] TFTS: Switch to using core feature columns This fixes some shape issues that came up when using the tf.contrib.layers parsing functions. Adds a string -> embedding column API example to the LSTM example. PiperOrigin-RevId: 187076400 --- .../examples/data/multivariate_periods.csv | 200 +++++++++--------- .../timeseries/examples/known_anomaly.py | 8 +- .../contrib/timeseries/examples/lstm.py | 26 ++- .../python/timeseries/estimators.py | 53 +++-- .../timeseries/python/timeseries/model.py | 38 ++-- .../state_space_models/state_space_model.py | 10 +- 6 files changed, 177 insertions(+), 158 deletions(-) diff --git a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv index b49a0662c2..9b15b4f0b2 100644 --- a/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv +++ b/tensorflow/contrib/timeseries/examples/data/multivariate_periods.csv @@ -1,100 +1,100 @@ -0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0. -1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0. -2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0. -3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0. -4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0. -5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0. -6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0. -7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0. -8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0. -9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0. -10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0. -11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0. -12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0. -13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0. -14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0. -15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0. -16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0. -17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0. -18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0. -19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0. -20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0. -21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0. -22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0. -23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0. -24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0. -25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0. -26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0. -27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0. -28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0. -29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0. -30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0. -31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0. -32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0. -33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0. -34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0. -35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0. -36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0. -37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0. -38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0. -39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0. -40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0. -41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0. -42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0. -43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0. -44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0. -45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0. -46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0. -47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0. -48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0. -49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0. -50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0. -51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0. -52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0. -53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0. -54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0. -55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0. -56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0. -57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0. -58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0. -59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0. -60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0. -61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0. -62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0. -63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0. -64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0. -65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0. -66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0. -67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0. -68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0. -69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0. -70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0. -71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0. -72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0. -73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0. -74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0. -75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0. -76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0. -77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0. -78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0. -79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0. -80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0. -81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0. -82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0. -83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0. -84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0. -85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0. -86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0. -87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0. -88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0. -89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0. -90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0. -91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0. -92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0. -93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0. -94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0. -95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0. -96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0. -97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0. -98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0. -99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0. +0,0.926906299771,1.99107237682,2.56546245685,3.07914768197,4.04839057867,1.,0.,strkeya +1,0.108010001864,1.41645361423,2.1686839775,2.94963962176,4.1263503303,1.,0.,strkeyb +2,-0.800567600028,1.0172132907,1.96434754116,2.99885333086,4.04300485864,1.,0.,strkey +3,0.0607042871898,0.719540073421,1.9765012584,2.89265588817,4.0951014426,1.,0.,strkey +4,0.933712200629,0.28052120776,1.41018552514,2.69232603996,4.06481164223,1.,0.,strkey +5,-0.171730652974,0.260054421028,1.48770816369,2.62199129293,4.44572807842,1.,0.,strkey +6,-1.00180162933,0.333045158863,1.50006392277,2.88888309683,4.24755865606,1.,0.,strkey +7,0.0580061875336,0.688929398826,1.56543458772,2.99840358953,4.52726873347,1.,0.,strkey +8,0.764139447412,1.24704875327,1.77649279698,3.13578593851,4.63238922951,1.,0.,strkey +9,-0.230331874785,1.47903998963,2.03547545751,3.20624030377,4.77980005228,1.,0.,strkey +10,-1.03846045211,2.01133000781,2.31977503972,3.67951536251,5.09716775897,1.,0.,strkeyc +11,0.188643592253,2.23285349038,2.68338482249,3.49817168611,5.24928239634,1.,0.,strkey +12,0.91207302309,2.24244446841,2.71362604985,3.96332587625,5.37802271594,1.,0.,strkey +13,-0.296588665881,2.02594634141,3.07733910479,3.99698324956,5.56365901394,1.,0.,strkey +14,-0.959961476551,1.45078629833,3.18996420137,4.3763059609,5.65356015609,1.,0.,strkey +15,0.46313530679,1.01141441548,3.4980215948,4.20224896882,5.88842247449,1.,0.,strkey +16,0.929354125798,0.626635305936,3.70508262244,4.51791573544,5.73945973251,1.,0.,strkey +17,-0.519110731957,0.269249223148,3.39866823332,4.46802003061,5.82768174382,1.,0.,strkey +18,-0.924330981367,0.349602834684,3.21762413294,4.72803587499,5.94918925767,1.,0.,strkey +19,0.253239387885,0.345158023497,3.11071425333,4.79311566935,5.9489259713,1.,0.,strkey +20,0.637408390225,0.698996675371,3.25232492145,4.73814732384,5.9612010251,1.,0.,strkey +21,-0.407396859412,1.17456342803,2.49526823723,4.59323415742,5.82501686811,1.,0.,strkey +22,-0.967485452118,1.66655933642,2.47284606244,4.58316034754,5.88721406681,1.,0.,strkey +23,0.474480867904,1.95018556323,2.0228950072,4.48651142819,5.8255943735,1.,0.,strkey +24,1.04309652155,2.23519892356,1.91924131572,4.19094661783,5.87457348436,1.,0.,strkey +25,-0.517861513772,2.12501967336,1.70266619979,4.05280882887,5.72160912899,1.,0.,strkey +26,-0.945301585146,1.65464653549,1.81567174251,3.92309850635,5.58270493814,1.,0.,strkey +27,0.501153868974,1.40600764889,1.53991387719,3.72853247942,5.60169001727,1.,0.,strkey +28,0.972859524418,1.00344321868,1.5175642828,3.64092376655,5.10567722582,1.,0.,strkey +29,-0.70553406135,0.465306263885,1.7038540803,3.33236870312,5.09182481555,1.,0.,strkey +30,-0.946093634916,0.294539309453,1.88052827037,2.93011492669,4.97354922696,1.,0.,strkey +31,0.47922123231,0.308465865031,2.03445883031,2.90772899045,4.86241793548,1.,0.,strkey +32,0.754030014252,0.549752241167,2.46115815089,2.95063349534,4.71834614627,1.,0.,strkey +33,-0.64875949826,0.894615488148,2.5922463381,2.81269864022,4.43480095104,1.,0.,strkey +34,-0.757829951086,1.39123914261,2.69258079904,2.61834837315,4.36580046156,1.,0.,strkey +35,0.565653301088,1.72360022693,2.97794913834,2.80403840334,4.27327248459,1.,0.,strkey +36,0.867440092372,2.21100730052,3.38648090792,2.84057515729,4.12210169576,1.,0.,strkey +37,-0.894567758095,2.17549105818,3.45532493329,2.90446025717,4.00251740584,1.,0.,strkeyd +38,-0.715442356893,2.15105389965,3.52041791902,3.03650393392,4.12809249577,1.,0.,strkey +39,0.80671703672,1.81504564517,3.60463324866,3.00747789871,3.98440762467,1.,0.,strkey +40,0.527014790142,1.31803513865,3.43842186337,3.3332594663,4.03232406566,1.,0.,strkey +41,-0.795936862129,0.847809114454,3.09875133548,3.52863155938,3.94883924909,1.,0.,strkey +42,-0.610245806946,0.425530441018,2.92581949152,3.77238736123,4.27287245021,1.,0.,strkey +43,0.611662279431,0.178432049837,2.48128214822,3.73212087883,4.17319013831,1.,0.,strkey +44,0.650866553108,0.220341648392,2.41694642022,4.2609098519,4.27271645905,1.,0.,strkey +45,-0.774156982023,0.632667602331,2.05474356052,4.32889204886,4.18029723271,1.,0.,strkey +46,-0.714058448409,0.924562377599,1.75706135146,4.52492718422,4.3972678094,1.,0.,strkey +47,0.889627293379,1.46207968841,1.78299357672,4.64466731095,4.56317887554,1.,0.,strkey +48,0.520140662861,1.8996333843,1.41377633823,4.48899091177,4.78805049769,1.,0.,strkey +49,-1.03816935616,2.08997002059,1.51218375351,4.84167764204,4.93026048606,1.,0.,strkey +50,-0.40772951362,2.30878972136,1.44144415128,4.76854460997,5.01538444629,1.,0.,strkey +51,0.792730684781,1.91367048509,1.58887384677,4.71739397335,5.25690012199,1.,0.,strkey +52,0.371311881576,1.67565079528,1.81688563053,4.60353107555,5.44265822961,1.,0.,strkey +53,-0.814398070371,1.13374634126,1.80328814859,4.72264252878,5.52674761122,1.,0.,strkey +54,-0.469017949323,0.601244136627,2.29690896736,4.49859178859,5.54126153454,1.,0.,strkey +55,0.871044371426,0.407597593794,2.7499112487,4.19060637761,5.57693767301,1.,0.,strkey +56,0.523764933017,0.247705192709,3.09002071379,4.02095509006,5.80510362182,1.,0.,strkey +57,-0.881326403531,0.31513103164,3.11358205718,3.96079100808,5.81000652365,1.,0.,strkey +58,-0.357928025339,0.486163915865,3.17884556771,3.72634990659,5.85693642011,1.,0.,strkey +59,0.853038779822,1.04218094475,3.45835384454,3.36703969978,5.9585988449,1.,0.,strkey +60,0.435311516013,1.59715085283,3.63313338588,3.11276729421,5.93643818229,1.,0.,strkey +61,-1.02703719138,1.92205832542,3.47606111735,3.06247155999,6.02106646259,1.,0.,strkey +62,-0.246661325557,2.14653802542,3.29446326567,2.89936259181,5.67531541272,1.,0.,strkey +63,1.02554736569,2.25943737733,3.07031591528,2.78176218013,5.78206328989,1.,0.,strkey +64,0.337814475969,2.07589147224,2.80356226089,2.55888206331,5.7094075496,1.,0.,strkey +65,-1.12023369929,1.25333011618,2.56497288445,2.77361359194,5.50799418376,1.,0.,strkey +66,-0.178980246554,1.11937139901,2.51598681313,2.91438309151,5.47469577206,1.,0.,strkey +67,0.97550951531,0.60553823137,2.11657741073,2.88081098981,5.37034999502,1.,0.,strkey +68,0.136653357206,0.365828836075,1.97386033165,3.13217903204,5.07254490219,1.,0.,strkey +69,-1.05607596951,0.153152115069,1.52110743825,3.01308794192,5.08902539125,1.,0.,strkey +70,-0.13095280331,0.337113974483,1.52703079853,3.16687131599,4.86649398514,1.,0.,strkey +71,1.07081057754,0.714247566736,1.53761382634,3.45151989484,4.75892309166,1.,0.,strkey +72,0.0153410376082,1.24631231847,1.61690939161,3.85481994498,4.35683752832,1.,0.,strkey +73,-0.912801257303,1.60791309476,1.8729264524,4.03037260012,4.36072588913,1.,0.,strkey +74,-0.0894895640338,2.02535207407,1.93484909619,4.09557485132,4.35327025188,1.,0.,strkey +75,0.978646999652,2.20085086625,2.09003440427,4.27542353033,4.1805058388,1.,0.,strkey +76,-0.113312642876,2.2444100761,2.50789248839,4.4151861502,4.03267168136,1.,0.,strkey +77,-1.00215099149,1.84305628445,2.61691237246,4.45425147595,3.81203553766,1.,0.,strkey +78,-0.0183234614205,1.49573923116,2.99308471214,4.71134960112,4.0273804959,1.,0.,strkey +79,1.0823738177,1.12211589848,3.27079386925,4.94288270502,4.01851068083,1.,0.,strkey +80,0.124370187893,0.616474412808,3.4284236674,4.76942168327,3.9749536483,1.,0.,strkey +81,-0.929423379352,0.290977090976,3.34131726136,4.78590392707,4.10190661656,1.,0.,strkey +82,0.23766302648,0.155302052254,3.49779513794,4.64605656795,4.15571321107,1.,0.,strkey +83,1.03531486192,0.359702776204,3.4880725919,4.48167586667,4.21134561991,1.,0.,strkey +84,-0.261234571382,0.713877760378,3.42756426614,4.426443869,4.25208300527,1.,0.,strkey +85,-1.03572442277,1.25001113691,2.96908341113,4.25500915322,4.25723010649,1.,0.,strkey +86,0.380034261243,1.70543355622,2.73605932518,4.16703432307,4.63700400788,1.,0.,strkey +87,1.03734873488,1.97544410562,2.55586572141,3.84976673263,4.55282864289,1.,0.,strkey +88,-0.177344253372,2.22614526325,2.09565864891,3.77378097953,4.82577400298,1.,0.,strkey +89,-0.976821526892,2.18385079177,1.78522284118,3.67768223554,5.06302440873,1.,0.,strkey +90,0.264820472091,1.86981946157,1.50048403865,3.43619796921,5.05651761669,1.,0.,strkey +91,1.05642344868,1.47568646076,1.51347671977,3.20898518885,5.50149047462,1.,0.,strkey +92,-0.311607433358,1.04226467636,1.52089650905,3.02291865417,5.4889046232,1.,0.,strkey +93,-0.724285777937,0.553052311957,1.48573560173,2.7365973598,5.72549174225,1.,0.,strkey +94,0.519859192905,0.226520626591,1.61543723167,2.84102086852,5.69330622288,1.,0.,strkey +95,1.0323195039,0.260873217055,1.81913034804,2.83951143848,5.90325028086,1.,0.,strkey +96,-0.53285682538,0.387695521405,1.70935609313,2.57977050631,5.79579213161,1.,0.,strkey +97,-0.975127997215,0.920948771589,2.51292643636,2.71004616612,5.87016469227,1.,0.,strkey +98,0.540246804099,1.36445470181,2.61949412896,2.98482553485,6.02447664937,1.,0.,strkey +99,0.987764008058,1.85581989607,2.84685706149,2.94760204892,6.0212151724,1.,0.,strkey diff --git a/tensorflow/contrib/timeseries/examples/known_anomaly.py b/tensorflow/contrib/timeseries/examples/known_anomaly.py index 7659dd308a..c08c0b0acb 100644 --- a/tensorflow/contrib/timeseries/examples/known_anomaly.py +++ b/tensorflow/contrib/timeseries/examples/known_anomaly.py @@ -46,12 +46,12 @@ def train_and_evaluate_exogenous(csv_file_name=_DATA_FILE, train_steps=300): # Indicate the format of our exogenous feature, in this case a string # representing a boolean value. - string_feature = tf.contrib.layers.sparse_column_with_keys( - column_name="is_changepoint", keys=["no", "yes"]) + string_feature = tf.feature_column.categorical_column_with_vocabulary_list( + key="is_changepoint", vocabulary_list=["no", "yes"]) # Specify the way this feature is presented to the model, here using a one-hot # encoding. - one_hot_feature = tf.contrib.layers.one_hot_column( - sparse_id_column=string_feature) + one_hot_feature = tf.feature_column.indicator_column( + categorical_column=string_feature) estimator = tf.contrib.timeseries.StructuralEnsembleRegressor( periodicities=12, diff --git a/tensorflow/contrib/timeseries/examples/lstm.py b/tensorflow/contrib/timeseries/examples/lstm.py index f37cafcc50..2eee878196 100644 --- a/tensorflow/contrib/timeseries/examples/lstm.py +++ b/tensorflow/contrib/timeseries/examples/lstm.py @@ -59,10 +59,10 @@ class _LSTMModel(ts_model.SequentialTimeSeriesModel): num_units: The number of units in the model's LSTMCell. num_features: The dimensionality of the time series (features per timestep). - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects representing features which are inputs to the model but are - not predicted by it. These must then be present for training, - evaluation, and prediction. + exogenous_feature_columns: A list of `tf.feature_column`s representing + features which are inputs to the model but are not predicted by + it. These must then be present for training, evaluation, and + prediction. dtype: The floating point data type to use. """ super(_LSTMModel, self).__init__( @@ -189,12 +189,16 @@ def train_and_predict( export_directory=None): """Train and predict using a custom time series model.""" # Construct an Estimator from our LSTM model. + categorical_column = tf.feature_column.categorical_column_with_hash_bucket( + key="categorical_exogenous_feature", hash_bucket_size=16) exogenous_feature_columns = [ # Exogenous features are not part of the loss, but can inform # predictions. In this example the features have no extra information, but # are included as an API example. - tf.contrib.layers.real_valued_column( - "2d_exogenous_feature", dimension=2)] + tf.feature_column.numeric_column( + "2d_exogenous_feature", shape=(2,)), + tf.feature_column.embedding_column( + categorical_column=categorical_column, dimension=10)] estimator = ts_estimators.TimeSeriesRegressor( model=_LSTMModel(num_features=5, num_units=128, exogenous_feature_columns=exogenous_feature_columns), @@ -205,7 +209,11 @@ def train_and_predict( csv_file_name, column_names=((tf.contrib.timeseries.TrainEvalFeatures.TIMES,) + (tf.contrib.timeseries.TrainEvalFeatures.VALUES,) * 5 - + ("2d_exogenous_feature",) * 2)) + + ("2d_exogenous_feature",) * 2 + + ("categorical_exogenous_feature",)), + # Data types other than for `times` need to be specified if they aren't + # float32. In this case one of our exogenous features has string dtype. + column_dtypes=((tf.int64,) + (tf.float32,) * 7 + (tf.string,))) train_input_fn = tf.contrib.timeseries.RandomWindowInputFn( reader, batch_size=4, window_size=32) estimator.train(input_fn=train_input_fn, steps=training_steps) @@ -215,7 +223,9 @@ def train_and_predict( predict_exogenous_features = { "2d_exogenous_feature": numpy.concatenate( [numpy.ones([1, 100, 1]), numpy.zeros([1, 100, 1])], - axis=-1)} + axis=-1), + "categorical_exogenous_feature": numpy.array( + ["strkey"] * 100)[None, :, None]} (predictions,) = tuple(estimator.predict( input_fn=tf.contrib.timeseries.predict_continuation_input_fn( evaluation, steps=100, diff --git a/tensorflow/contrib/timeseries/python/timeseries/estimators.py b/tensorflow/contrib/timeseries/python/timeseries/estimators.py index f8355f366f..8d13343e82 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/estimators.py +++ b/tensorflow/contrib/timeseries/python/timeseries/estimators.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.layers.python.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import ar_model from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.contrib.timeseries.python.timeseries import head as ts_head_lib @@ -31,10 +29,12 @@ from tensorflow.contrib.timeseries.python.timeseries.state_space_models.filterin from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.export import export_lib +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.training import training as train @@ -117,22 +117,29 @@ class TimeSeriesRegressor(estimator_lib.Estimator): dtype=self._model.dtype), shape=(default_batch_size, default_series_length, self._model.num_features))) - with ops.Graph().as_default(): - # Default placeholders have only an unknown batch dimension. Make them - # in a separate graph, then splice in the series length to the shapes - # and re-create them in the outer graph. - exogenous_feature_shapes = { - key: (value.get_shape(), value.dtype) for key, value - in feature_column.make_place_holder_tensors_for_base_features( - self._model.exogenous_feature_columns).items()} - for feature_key, (batch_only_feature_shape, value_dtype) in ( - exogenous_feature_shapes.items()): - batch_only_feature_shape = batch_only_feature_shape.with_rank_at_least( - 1).as_list() - feature_shape = ([default_batch_size, default_series_length] - + batch_only_feature_shape[1:]) - placeholders[feature_key] = array_ops.placeholder( - dtype=value_dtype, name=feature_key, shape=feature_shape) + if self._model.exogenous_feature_columns: + with ops.Graph().as_default(): + # Default placeholders have only an unknown batch dimension. Make them + # in a separate graph, then splice in the series length to the shapes + # and re-create them in the outer graph. + parsed_features = ( + feature_column.make_parse_example_spec( + self._model.exogenous_feature_columns)) + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder( + shape=[None], dtype=dtypes.string), + features=parsed_features) + exogenous_feature_shapes = { + key: (value.get_shape(), value.dtype) for key, value + in placeholder_features.items()} + for feature_key, (batch_only_feature_shape, value_dtype) in ( + exogenous_feature_shapes.items()): + batch_only_feature_shape = ( + batch_only_feature_shape.with_rank_at_least(1).as_list()) + feature_shape = ([default_batch_size, default_series_length] + + batch_only_feature_shape[1:]) + placeholders[feature_key] = array_ops.placeholder( + dtype=value_dtype, name=feature_key, shape=feature_shape) # Models may not know the shape of their state without creating some # variables/ops. Avoid polluting the default graph by making a new one. We # use only static metadata from the returned Tensors. @@ -333,11 +340,11 @@ class StructuralEnsembleRegressor(StateSpaceRegressor): determine the model size. Learning autoregressive coefficients typically requires more steps and a smaller step size than other components. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments, `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]), and diff --git a/tensorflow/contrib/timeseries/python/timeseries/model.py b/tensorflow/contrib/timeseries/python/timeseries/model.py index bac7d1ebf5..7644764a74 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/model.py @@ -21,18 +21,17 @@ from __future__ import print_function import abc import collections -from tensorflow.contrib import layers -from tensorflow.contrib.layers import feature_column - from tensorflow.contrib.timeseries.python.timeseries import math_utils from tensorflow.contrib.timeseries.python.timeseries.feature_keys import PredictionFeatures from tensorflow.contrib.timeseries.python.timeseries.feature_keys import TrainEvalFeatures +from tensorflow.python.feature_column import feature_column from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope @@ -66,11 +65,11 @@ class TimeSeriesModel(object): Args: num_features: Number of features for the time series - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not + part of the series to be predicted. Passed to + `tf.feature_column.input_layer`. dtype: The floating point datatype to use. """ if exogenous_feature_columns: @@ -86,7 +85,7 @@ class TimeSeriesModel(object): @property def exogenous_feature_columns(self): - """`FeatureColumn` objects for features which are not predicted.""" + """`tf.feature_colum`s for features which are not predicted.""" return self._exogenous_feature_columns # TODO(allenl): Move more of the generic machinery for generating and @@ -265,11 +264,14 @@ class TimeSeriesModel(object): if not self._exogenous_feature_columns: return (0,) with ops.Graph().as_default(): - placeholder_features = ( - feature_column.make_place_holder_tensors_for_base_features( + parsed_features = ( + feature_column.make_parse_example_spec( self._exogenous_feature_columns)) - embedded = layers.input_from_feature_columns( - columns_to_tensors=placeholder_features, + placeholder_features = parsing_ops.parse_example( + serialized=array_ops.placeholder(shape=[None], dtype=dtypes.string), + features=parsed_features) + embedded = feature_column.input_layer( + features=placeholder_features, feature_columns=self._exogenous_feature_columns) return embedded.get_shape().as_list()[1:] @@ -308,13 +310,13 @@ class TimeSeriesModel(object): # Avoid shape warnings when embedding "scalar" exogenous features (those # with only batch and window dimensions); input_from_feature_columns # expects input ranks to match the embedded rank. - if tensor.get_shape().ndims == 1: + if tensor.get_shape().ndims == 1 and tensor.dtype != dtypes.string: exogenous_features_single_batch_dimension[name] = tensor[:, None] else: exogenous_features_single_batch_dimension[name] = tensor embedded_exogenous_features_single_batch_dimension = ( - layers.input_from_feature_columns( - columns_to_tensors=exogenous_features_single_batch_dimension, + feature_column.input_layer( + features=exogenous_features_single_batch_dimension, feature_columns=self._exogenous_feature_columns, trainable=True)) exogenous_regressors = array_ops.reshape( @@ -381,8 +383,8 @@ class SequentialTimeSeriesModel(TimeSeriesModel): may use _scale_back_data or _scale_back_variance to return predictions to the input scale. dtype: The floating point datatype to use. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects. See `TimeSeriesModel`. + exogenous_feature_columns: A list of `tf.feature_column`s objects. See + `TimeSeriesModel`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py index 6257002647..951c6546d5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/state_space_model.py @@ -112,11 +112,11 @@ class StateSpaceModelConfiguration( exogenous_noise_decreases: If True, exogenous regressors can "set" model state, decreasing uncertainty. If both this parameter and exogenous_noise_increases are False, exogenous regressors are ignored. - exogenous_feature_columns: A list of tf.contrib.layers.FeatureColumn - objects (for example tf.contrib.layers.embedding_column) corresponding - to exogenous features which provide extra information to the model but - are not part of the series to be predicted. Passed to - tf.contrib.layers.input_from_feature_columns. + exogenous_feature_columns: A list of `tf.feature_column`s (for example + `tf.feature_column.embedding_column`) corresponding to exogenous + features which provide extra information to the model but are not part + of the series to be predicted. Passed to + `tf.feature_column.input_layer`. exogenous_update_condition: A function taking two Tensor arguments `times` (shape [batch size]) and `features` (a dictionary mapping exogenous feature keys to Tensors with shapes [batch size, ...]) and returning a -- GitLab From 7b944492cbe1ac81ea728ecb84ce4ea272627990 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Mon, 26 Feb 2018 14:11:08 -0800 Subject: [PATCH 176/884] Adding documentation for dataset/iterator checkpointing. PiperOrigin-RevId: 187078347 --- .../docs_src/programmers_guide/datasets.md | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d19200e80c..d38fbddfa1 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -327,6 +327,35 @@ same op/node (created by `Iterator.get_next()`). Therefore, evaluating *any* of these tensors will advance the iterator for all components. A typical consumer of an iterator will include all components in a single expression. +### Saving iterator state + +The @{tf.contrib.data.make_saveable_from_iterator} function creates a +`SaveableObject` from an iterator, which can be used to save and +restore the current state of the iterator (and, effectively, the whole input +pipeline). A saveable object thus created can be added to @{tf.train.Saver} +variables list or the `tf.GraphKeys.SAVEABLE_OBJECTS` collection for saving and +restoring in the same manner as a @{tf.Variable}. Refer to +@{$saved_model$Saving and Restoring} for details on how to save and restore +variables. + +```python +# Create saveable object from iterator. +saveable = tf.contrib.data.make_saveable_from_iterator(iterator) + +# Save the iterator state by adding it to the saveable objects collection. +tf.add_to_collection(tf.GraphKeys.SAVEABLE_OBJECTS, saveable) +saver = tf.train.Saver() + +with tf.Session() as sess: + + if should_checkpoint: + saver.save(path_to_checkpoint) + +# Restore the iterator state. +with tf.Session() as sess: + saver.restore(sess, path_to_checkpoint) +``` + ## Reading input data ### Consuming NumPy arrays -- GitLab From 10aaee0c5d83649959d8b1a6c75ee3127c205259 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 26 Feb 2018 14:19:56 -0800 Subject: [PATCH 177/884] [XLA] GTE of a certain element of the tuple does not need not keep other elements alive. This achieves two things: 1. Heap simulation runtime is no longer quadratic in the number of tuple elements (as we don't add each GetTupleElement to the liveset of each buffer defined by the tuple). 2. A reduction in the heap memory footprint. PiperOrigin-RevId: 187079787 --- .../compiler/xla/service/heap_simulator.cc | 135 ++++++++++-------- .../xla/service/heap_simulator_test.cc | 50 +++++++ 2 files changed, 127 insertions(+), 58 deletions(-) diff --git a/tensorflow/compiler/xla/service/heap_simulator.cc b/tensorflow/compiler/xla/service/heap_simulator.cc index a2d13c013c..3dd4c4a079 100644 --- a/tensorflow/compiler/xla/service/heap_simulator.cc +++ b/tensorflow/compiler/xla/service/heap_simulator.cc @@ -27,38 +27,6 @@ namespace xla { using tensorflow::gtl::FlatMap; using tensorflow::gtl::FlatSet; -namespace { - -// Returns the set of buffers that may be sources of all operands of the given -// instruction. The returned buffers are guaranteed to have no duplicates, and -// to be sorted in a deterministic order. -std::vector UniqueOperandSourceBuffers( - const HloInstruction* instruction, - const TuplePointsToAnalysis& points_to_analysis) { - std::vector buffers; - for (const HloInstruction* operand : instruction->operands()) { - points_to_analysis.GetPointsToSet(operand).ForEachElement( - [&](const ShapeIndex& /*index*/, - const PointsToSet::BufferList& points_to) { - buffers.insert(buffers.end(), points_to.begin(), points_to.end()); - }); - } - - // Sort and then remove duplicates from buffers. - std::sort(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() < b->id(); - }); - buffers.erase(std::unique(buffers.begin(), buffers.end(), - [](const LogicalBuffer* a, const LogicalBuffer* b) { - return a->id() == b->id(); - }), - buffers.end()); - return buffers; -} - -} // namespace - /*static*/ StatusOr HeapSimulator::Run( std::unique_ptr algorithm, const HloModule& module, @@ -93,6 +61,7 @@ Status HeapSimulator::RunComputation( const HloComputation& computation, const std::vector& instruction_sequence, const TuplePointsToAnalysis& points_to_analysis) { + VLOG(3) << "Computation:\n" << computation.ToString(); // The goal here is to minimize memory usage, assuming the given sequential // ordering of instructions. The strategy is to walk through the instruction // sequence, calling Alloc and Free on the underlying heap algorithm. The @@ -101,7 +70,51 @@ Status HeapSimulator::RunComputation( // 'live_buffers' tracks the liveness of each buffer that we assign, by // associating it with a set of HloInstructions that need to be visited. When // the set becomes empty, the buffer is no longer used, and can be freed. + // 'used_buffers' is the reverse map - it tracks which buffers were used by an + // instruction, so that we can remove the instructions from a buffer's live + // set after they are visited. FlatMap> live_buffers; + FlatMap> used_buffers; + auto add_user_to_buffer = [this, &live_buffers, &used_buffers]( + const HloInstruction* user, + const LogicalBuffer* buffer) { + if (!IgnoreBuffer(buffer)) { + VLOG(4) << " Adding user " << user->name() << " to buffer " + << buffer->ToString(); + live_buffers[buffer].insert(user); + used_buffers[user].insert(buffer); + } + }; + + // Initialize live_buffers for each buffer that we're going to assign. The + // set of instructions that need to be visited contains all users of all + // aliases, that is, all users of all instructions that have the buffer + // contained in their points-to set. + for (const HloInstruction* instruction : instruction_sequence) { + const PointsToSet& points_to = + points_to_analysis.GetPointsToSet(instruction); + const PointsToSet::BufferSet& buffer_set = points_to.CreateFlattenedSet(); + for (const HloInstruction* user : instruction->users()) { + if (user->opcode() != HloOpcode::kGetTupleElement) { + for (const LogicalBuffer* buffer : buffer_set) { + add_user_to_buffer(user, buffer); + } + } else { + // A GetTupleElement doesn't need to keep all of its operand's buffers + // alive. It only needs the buffers that relate to the element its + // extracting, and the tuple it's extracting from, but not the buffers + // for the other elements. + for (const LogicalBuffer* buffer : points_to.element({})) { + add_user_to_buffer(user, buffer); + } + const PointsToSet& gte_points_to = + points_to_analysis.GetPointsToSet(user); + for (const LogicalBuffer* buffer : gte_points_to.CreateFlattenedSet()) { + add_user_to_buffer(user, buffer); + } + } + } + } const HloInstruction* root = computation.root_instruction(); auto output_source_buffers = @@ -114,34 +127,17 @@ Status HeapSimulator::RunComputation( buffers_defined_by_instruction = points_to_analysis.GetBuffersDefinedByInstruction(instruction); - // Initialize live_buffers for each buffer that we're going to assign. The - // set of instructions that need to be visited contains all users of all - // aliases. The alias itself is not necessary; if it has users, the users - // are necessarily scheduled after the alias. And if it has no users, it is - // either a dead value or an output, both of which are handled below. - // - // We ignore control dependencies here. The reasoning is that the control - // dependencies have already been accounted for in the ordering of the given - // 'instruction_sequence', and should not otherwise artificially extend the - // lifetime of buffers that aren't already connected by a data dependency. + VLOG(3) << "Instruction: " << instruction->ToString(); + for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { + VLOG(4) << " Defines: " << buffer->ToString() + << (IgnoreBuffer(buffer) ? " (Ignored)" : ""); + } + dead_buffers_to_free.clear(); for (const LogicalBuffer* buffer : buffers_defined_by_instruction) { if (IgnoreBuffer(buffer)) { continue; } - FlatSet* live_set = nullptr; - for (const BufferAlias& alias : - points_to_analysis.GetBufferAliases(*buffer)) { - const std::vector& users = - alias.instruction()->users(); - if (!users.empty()) { - if (live_set == nullptr) { - live_set = &live_buffers[buffer]; - } - live_set->insert(users.begin(), users.end()); - } - } - // Add a nullptr sentry to ensure entry parameters and output source // buffers are not freed until the very end. const bool entry_parameter = @@ -165,11 +161,12 @@ Status HeapSimulator::RunComputation( // have no instructions left to visit are moved from live_buffers to // operand_buffers_to_free. operand_buffers_to_free.clear(); - for (const LogicalBuffer* operand_buffer : - UniqueOperandSourceBuffers(instruction, points_to_analysis)) { + for (const LogicalBuffer* operand_buffer : used_buffers[instruction]) { if (IgnoreBuffer(operand_buffer)) { continue; } + VLOG(4) << " Removing user " << instruction->name() << " from buffer " + << operand_buffer->ToString(); auto it = live_buffers.find(operand_buffer); FlatSet* live_set = &it->second; live_set->erase(instruction); @@ -178,6 +175,11 @@ Status HeapSimulator::RunComputation( operand_buffers_to_free.push_back(operand_buffer); } } + // Sort to get a deterministic iteration order. + std::sort(operand_buffers_to_free.begin(), operand_buffers_to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); // Allocate buffers defined by this instruction. This is the latest point // that we can allocate; right before the buffer is first used. This must @@ -203,6 +205,8 @@ Status HeapSimulator::RunComputation( CanShareOperandBufferWithUser( operand_buffer->instruction(), operand_buffer->index(), buffer->instruction(), buffer->index(), points_to_analysis)) { + VLOG(3) << " Sharing: " << buffer->ToString() << " with " + << operand_buffer->ToString(); ShareBuffer(buffer, operand_buffer, instruction); shared = true; break; @@ -211,6 +215,7 @@ Status HeapSimulator::RunComputation( } if (!shared) { + VLOG(3) << " Allocating: " << buffer->ToString(); Alloc(buffer, instruction); } } @@ -244,20 +249,34 @@ Status HeapSimulator::RunComputation( // Free buffers that are no longer live. This is the earliest point that we // can de-allocate; right after the last use of the buffer. for (const LogicalBuffer* buffer : dead_buffers_to_free) { + VLOG(3) << " Freeing dead: " << buffer->ToString(); Free(buffer, instruction); } for (const LogicalBuffer* buffer : operand_buffers_to_free) { + VLOG(3) << " Freeing operand: " << buffer->ToString(); Free(buffer, instruction); } } // Any remaining live buffers must be entry parameters or output source - // buffers, which had a nullptr sentry added. Free them now. + // buffers, which had a nullptr sentry added. Free them now, in a + // deterministic order. + std::vector to_free; + to_free.reserve(live_buffers.size()); for (const auto& buffer_pending : live_buffers) { const LogicalBuffer* buffer = buffer_pending.first; const FlatSet& pending = buffer_pending.second; CHECK_EQ(pending.size(), 1) << *buffer; CHECK(*pending.begin() == nullptr) << *buffer; + to_free.push_back(buffer); + } + + std::sort(to_free.begin(), to_free.end(), + [](const LogicalBuffer* x, const LogicalBuffer* y) { + return x->id() < y->id(); + }); + for (const LogicalBuffer* buffer : to_free) { + VLOG(3) << "Freeing pending: " << buffer->ToString(); Free(buffer, root); } diff --git a/tensorflow/compiler/xla/service/heap_simulator_test.cc b/tensorflow/compiler/xla/service/heap_simulator_test.cc index 387b649a73..688a271712 100644 --- a/tensorflow/compiler/xla/service/heap_simulator_test.cc +++ b/tensorflow/compiler/xla/service/heap_simulator_test.cc @@ -410,6 +410,56 @@ TEST_F(HeapSimulatorTest, MultiplyDotDotTuple) { }); } +TEST_F(HeapSimulatorTest, IndependentTupleElements) { + auto builder = HloComputation::Builder(TestName()); + auto paramA = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32scalar_, "paramA")); + auto paramB = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32scalar_, "paramB")); + auto mul = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kMultiply, paramA, paramB)); + auto add = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kAdd, paramA, paramB)); + auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({mul, add})); + auto element0 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 0)); + auto broadcast = builder.AddInstruction( + HloInstruction::CreateBroadcast(f32vec4_, element0, {0})); + auto sub = builder.AddInstruction(HloInstruction::CreateBinary( + f32scalar_, HloOpcode::kSubtract, paramA, paramB)); + auto element1 = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(f32scalar_, tuple, 1)); + auto output = builder.AddInstruction( + HloInstruction::CreateTuple({broadcast, sub, element1})); + + HeapSimulatorTracker tracker(TestName(), builder.Build(), + {paramA, paramB, mul, add, tuple, element0, + broadcast, sub, element1, output}); + tracker.ExpectCallSequence({ + {kAlloc, tracker.BufferAt(paramA, {})}, + {kAlloc, tracker.BufferAt(paramB, {})}, + {kAlloc, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(add, {})}, + {kAlloc, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(broadcast, {})}, + // The mul can be freed right after the broadcast happens, even though + // The other GetTupleElement is still alive. + {kFree, tracker.BufferAt(mul, {})}, + {kAlloc, tracker.BufferAt(sub, {})}, + // The temporary tuple is now dead. + {kFree, tracker.BufferAt(tuple, {})}, + {kAlloc, tracker.BufferAt(output, {})}, + // All params and outputs are freed at the end. + {kFree, tracker.BufferAt(paramA, {})}, + {kFree, tracker.BufferAt(paramB, {})}, + {kFree, tracker.BufferAt(add, {})}, + {kFree, tracker.BufferAt(broadcast, {})}, + {kFree, tracker.BufferAt(sub, {})}, + {kFree, tracker.BufferAt(output, {})}, + {kFinish, nullptr}, + }); +} + TEST_F(HeapSimulatorTest, WholeModule) { HeapSimulatorTracker tracker(TestName()); -- GitLab From c3ad72500cd714a39af5ab530ab14f477cc717c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:30 -0800 Subject: [PATCH 178/884] 1st version of sequential feature columns. PiperOrigin-RevId: 187080635 --- tensorflow/contrib/feature_column/BUILD | 31 +- .../sequential_feature_column.py | 308 +++++++++++- .../sequential_feature_column_test.py | 471 ++++++++++++++++++ 3 files changed, 808 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py diff --git a/tensorflow/contrib/feature_column/BUILD b/tensorflow/contrib/feature_column/BUILD index 6fc053759c..a53e36c2d5 100644 --- a/tensorflow/contrib/feature_column/BUILD +++ b/tensorflow/contrib/feature_column/BUILD @@ -33,5 +33,34 @@ py_library( name = "sequential_feature_column", srcs = ["python/feature_column/sequential_feature_column.py"], srcs_version = "PY2AND3", - deps = [], + deps = [ + "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:parsing_ops", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:tensor_shape", + "//tensorflow/python:variable_scope", + "//tensorflow/python/feature_column", + ], +) + +py_test( + name = "sequential_feature_column_test", + srcs = ["python/feature_column/sequential_feature_column_test.py"], + srcs_version = "PY2AND3", + tags = ["no_pip"], + deps = [ + ":sequential_feature_column", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:framework_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:training", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], ) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py index 690a44ff43..4ed7268e7a 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py @@ -12,8 +12,314 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experimental methods for tf.feature_column sequential input.""" +"""Experimental methods for tf.feature_column sequence input.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function + + +import abc +import collections + + +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope + +# TODO(b/73160931): Fix pydoc. +# pylint: disable=g-doc-args,missing-docstring,protected-access +# TODO(b/73827486): Support SequenceExample. + + +def sequence_input_layer( + features, + feature_columns, + weight_collections=None, + trainable=True, + scope=None): + """"Builds input layer for sequence input. + + All `feature_columns` must be sequence dense columns with the same + `sequence_length`. The output of this method can be fed into sequence + networks, such as RNN. + + The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ from + batch to batch. + + If multiple `feature_columns` are given with `Di` `num_elements` each, their + outputs are concatenated. So, the final `Tensor` has shape + `[batch_size, T, D0 + D1 + ... + Dn]`. + + Example: + + ```python + rating = sequence_numeric_column('rating') + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [rating, watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ + from batch to batch. `D` is the sum of `num_elements` for all + `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence + length for each example. + Raises: + ValueError: If any of the `feature_columns` is the wrong type. + """ + feature_columns = fc._clean_feature_columns(feature_columns) + for c in feature_columns: + if not isinstance(c, _SequenceDenseColumn): + raise ValueError( + 'All feature_columns must be of type _SequenceDenseColumn. ' + 'Given (type {}): {}'.format(type(c), c)) + + with variable_scope.variable_scope( + scope, default_name='sequence_input_layer', values=features.values()): + builder = fc._LazyBuilder(features) + output_tensors = [] + sequence_lengths = [] + ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): + ordered_columns.append(column) + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): + dense_tensor, sequence_length = column._get_sequence_dense_tensor( + builder, + weight_collections=weight_collections, + trainable=trainable) + # Flattens the final dimension to produce a 3D Tensor. + num_elements = column._variable_shape.num_elements() + shape = array_ops.shape(dense_tensor) + output_tensors.append( + array_ops.reshape( + dense_tensor, + shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) + # TODO(b/73160931): Verify sequence_length equality. + return array_ops.concat(output_tensors, -1), sequence_lengths[0] + + +# TODO(b/73160931): Add remaining categorical columns. +def sequence_categorical_column_with_identity( + key, num_buckets, default_value=None): + return _SequenceCategoricalColumn( + fc.categorical_column_with_identity( + key=key, + num_buckets=num_buckets, + default_value=default_value)) + + +# TODO(b/73160931): Merge with embedding_column +def _sequence_embedding_column( + categorical_column, dimension, initializer=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceEmbeddingColumn( + fc.embedding_column( + categorical_column, + dimension=dimension, + initializer=initializer, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + +def sequence_numeric_column( + key, + shape=(1,), + default_value=0., + dtype=dtypes.float32): + # TODO(b/73160931): Add validations. + return _SequenceNumericColumn( + key, + shape=shape, + default_value=default_value, + dtype=dtype) + + +class _SequenceDenseColumn(fc._FeatureColumn): + """Represents dense sequence data.""" + + __metaclass__ = abc.ABCMeta + + TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + + @abc.abstractproperty + def _variable_shape(self): + """`TensorShape` without batch and sequence dimensions.""" + pass + + @abc.abstractmethod + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + """Returns a `TensorSequenceLengthPair`.""" + pass + + +def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): + with ops.name_scope(None, 'sequence_length') as name_scope: + row_ids = sp_tensor.indices[:, 0] + column_ids = sp_tensor.indices[:, 1] + column_ids += array_ops.ones_like(column_ids) + seq_length = ( + math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # If the last n rows do not have ids, seq_length will have shape + # [batch_size - n]. Pad the remaining values with zeros. + n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] + padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) + return array_ops.concat([seq_length, padding], axis=0, name=name_scope) + + +class _SequenceCategoricalColumn( + fc._CategoricalColumn, + collections.namedtuple( + '_SequenceCategoricalColumn', ['categorical_column'])): + + @property + def name(self): + return self.categorical_column.name + + @property + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) + + @property + def _num_buckets(self): + return self.categorical_column._num_buckets + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands final dimension, so that embeddings are not combined during + # embedding lookup. + check_id_rank = check_ops.assert_equal( + array_ops.rank(id_tensor), 2, + data=[ + 'Column {} expected ID tensor of rank 2. '.format(self.name), + 'id_tensor shape: ', array_ops.shape(id_tensor)]) + with ops.control_dependencies([check_id_rank]): + id_tensor = sparse_ops.sparse_reshape( + id_tensor, + shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + if weight_tensor is not None: + check_weight_rank = check_ops.assert_equal( + array_ops.rank(weight_tensor), 2, + data=[ + 'Column {} expected weight tensor of rank 2.'.format(self.name), + 'weight_tensor shape:', array_ops.shape(weight_tensor)]) + with ops.control_dependencies([check_weight_rank]): + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, + shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + + def _sequence_length(self, inputs): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) + + +class _SequenceEmbeddingColumn( + _SequenceDenseColumn, + collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + + @property + def name(self): + return self.embedding_column.name + + @property + def _parse_example_spec(self): + return self.embedding_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.embedding_column._transform_feature(inputs) + + @property + def _variable_shape(self): + return self.embedding_column._variable_shape + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + dense_tensor = self.embedding_column._get_dense_tensor( + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = self.embedding_column.categorical_column._sequence_length( + inputs) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + +class _SequenceNumericColumn( + _SequenceDenseColumn, + collections.namedtuple( + '_SequenceNumericColumn', + ['key', 'shape', 'default_value', 'dtype'])): + + @property + def name(self): + return self.key + + @property + def _parse_example_spec(self): + return {self.key: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.key) + + @property + def _variable_shape(self): + return tensor_shape.TensorShape(self.shape) + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + sp_tensor = inputs.get(self) + dense_tensor = sparse_ops.sparse_tensor_to_dense( + sp_tensor, default_value=self.default_value) + # Reshape into [batch_size, T, variable_shape]. + dense_shape = array_ops.concat( + [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], + axis=0) + dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) + sequence_length = _sequence_length_from_sparse_tensor( + sp_tensor, num_elements=self._variable_shape.num_elements()) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + +# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py new file mode 100644 index 0000000000..59674869a2 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py @@ -0,0 +1,471 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sequential_feature_column.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test +from tensorflow.python.training import monitored_session + + +class SequenceInputLayerTest(test.TestCase): + + def test_embedding_column(self): + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + + embedding_dimension_a = 2 + embedding_values_a = ( + (1., 2.), # id 0 + (3., 4.), # id 1 + (5., 6.) # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11., 12., 13.), # id 0 + (14., 15., 16.), # id 1 + (17., 18., 19.) # id 2 + ) + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = sfc._sequence_embedding_column( + categorical_column_a, dimension=embedding_dimension_a, + initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_b = sfc._sequence_embedding_column( + categorical_column_b, dimension=embedding_dimension_b, + initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[embedding_column_b, embedding_column_a]) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('sequence_input_layer/aaa_embedding/embedding_weights:0', + 'sequence_input_layer/bbb_embedding/embedding_weights:0'), + tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) + self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_input_layer = [ + [[0.], [1.]], + [[10.], [0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column_multi_dim(self): + """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + # The output of numeric_column._get_dense_tensor should be flattened. + expected_input_layer = [ + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +def _assert_sparse_tensor_value(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + + test_case.assertEqual( + np.array(expected.values).dtype, np.array(actual.values).dtype) + test_case.assertAllEqual(expected.values, actual.values) + + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) + test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) + + +class SequenceCategoricalColumnWithIdentityTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_get_sparse_tensors_inputs3d(self): + """Tests _get_sparse_tensors when the input is already 3D Tensor.""" + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 2, 0), + dense_shape=(2, 2, 1)) + + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Column aaa expected ID tensor of rank 2\.\s*' + r'id_tensor shape:\s*\[2 2 1\]'): + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': inputs})) + with monitored_session.MonitoredSession() as sess: + id_weight_pair.id_tensor.eval(session=sess) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_zeros(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((1, 0), (3, 0), (3, 1)), + values=(1, 2, 0), + dense_shape=(5, 2)) + expected_sequence_length = [0, 1, 0, 2, 0] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceEmbeddingColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + expected_lookups = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + + embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('embedding_weights:0',), tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) + self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceNumericColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_dense_tensor = [ + [[0.], [1.]], + [[10.], [0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa') + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_sequence_dense_tensor_with_shape(self): + """Tests get_sequence_dense_tensor with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_dense_tensor = [ + [[0., 1., 2.], [3., 4., 5.]], + [[10., 11., 12.], [0., 0., 0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_dense_tensor_multi_dim(self): + """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + expected_dense_tensor = [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_sequence_length(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_shape(self): + """Tests _sequence_length with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [] + # example 1, values [[0.], [1.]] + # example 2, [[2.]] + # example 3, values [] + # example 4, [[3.]] + # example 5, values [] + indices=((1, 0), (1, 1), (2, 0), (4, 0)), + values=(0., 1., 2., 3.), + dense_shape=(6, 2)) + expected_sequence_length = [0, 2, 1, 0, 1, 0] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +if __name__ == '__main__': + test.main() -- GitLab From 26cb7de9c03a9d73703decec8c917651369ee9ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 14:25:37 -0800 Subject: [PATCH 179/884] Add a function that allows to dynamically verify whether a function is white listed for graph mode. PiperOrigin-RevId: 187080654 --- tensorflow/contrib/py2tf/impl/conversion.py | 18 ++++++++++++++++++ .../contrib/py2tf/impl/conversion_test.py | 11 +++++++++++ 2 files changed, 29 insertions(+) diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 044de33568..d95469ea53 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -97,6 +97,24 @@ class ConversionMap(object): self.dependency_cache[original_entity] = converted_ast +def is_whitelisted_for_graph(o): + """Check whether an entity is whitelisted for use in graph mode. + + Examples of whitelisted entities include all members of the tensorflow + package. + + Args: + o: A Python entity. + Returns: + Boolean + """ + m = tf_inspect.getmodule(o) + for prefix, in config.DEFAULT_UNCOMPILED_MODULES: + if m.__name__.startswith(prefix): + return True + return False + + def entity_to_graph(o, conversion_map, arg_values, arg_types): """Compile a Python entity into equivalent TensorFlow. diff --git a/tensorflow/contrib/py2tf/impl/conversion_test.py b/tensorflow/contrib/py2tf/impl/conversion_test.py index 7816f95857..9ff256aace 100644 --- a/tensorflow/contrib/py2tf/impl/conversion_test.py +++ b/tensorflow/contrib/py2tf/impl/conversion_test.py @@ -20,12 +20,23 @@ from __future__ import print_function import gast +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import conversion +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test class ConversionTest(test.TestCase): + def test_is_whitelisted_for_graph(self): + + def test_fn(): + return constant_op.constant(1) + + self.assertFalse(conversion.is_whitelisted_for_graph(test_fn)) + self.assertTrue(conversion.is_whitelisted_for_graph(utils)) + self.assertTrue(conversion.is_whitelisted_for_graph(constant_op.constant)) + def test_entity_to_graph_unsupported_types(self): with self.assertRaises(ValueError): conversion_map = conversion.ConversionMap(True, (), (), None) -- GitLab From f4a396bcecd8b27caba0c10a50e1f6b56dbcf6a9 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:31:29 -0800 Subject: [PATCH 180/884] [TF:XLA] Bump open source llvm revision to r326083 PiperOrigin-RevId: 187081592 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 278a225f76..9009f08163 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -476,11 +476,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/8f7bcdf3c65b9a47e35653d525135beb18f3ac25.tar.gz", ], - sha256 = "f5721d9cc18a9109c9e9f847f48e69b710b961cee83e6691227e310cb3b5da58", - strip_prefix = "llvm-fc8ba497cd1a1af4ecae19a5b64bdbd71e065e14", + sha256 = "63d4da54dc7bc9a79e2ad266d230f4f759520cccb344a2dd49c2c6383ab75285", + strip_prefix = "llvm-8f7bcdf3c65b9a47e35653d525135beb18f3ac25", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From c1e22e9fc1b8db5390c466a2ffb5da8b1abf15b4 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 26 Feb 2018 14:32:08 -0800 Subject: [PATCH 181/884] Track DebugOptions in AotCompilationOptions In particular, I need this for supporting HLO profiling in the AOT backend. PiperOrigin-RevId: 187081674 --- tensorflow/compiler/xla/service/compile_only_service.cc | 3 +-- tensorflow/compiler/xla/service/compiler.cc | 3 +++ tensorflow/compiler/xla/service/compiler.h | 6 +++++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/compile_only_service.cc b/tensorflow/compiler/xla/service/compile_only_service.cc index dab73596e1..6664496ab6 100644 --- a/tensorflow/compiler/xla/service/compile_only_service.cc +++ b/tensorflow/compiler/xla/service/compile_only_service.cc @@ -72,8 +72,7 @@ CompileOnlyService::CompileAheadOfTime( VersionedComputationHandle versioned_handle = user_computation->GetVersionedHandle(); - // TODO(b/63773457): Track DebugOptions in AotCompilationOptions. - DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags(); + const DebugOptions& debug_options = options.debug_options(); // Dump computation proto state if flag is set. const string& directory_path = debug_options.xla_dump_computations_to(); diff --git a/tensorflow/compiler/xla/service/compiler.cc b/tensorflow/compiler/xla/service/compiler.cc index e2e9d2a0c0..0392d4af48 100644 --- a/tensorflow/compiler/xla/service/compiler.cc +++ b/tensorflow/compiler/xla/service/compiler.cc @@ -86,4 +86,7 @@ Compiler::GetPlatformCompilers() { return compilers->at(platform->id()).get(); } +AotCompilationOptions::AotCompilationOptions() + : debug_options_(legacy_flags::GetDebugOptionsFromFlags()) {} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/compiler.h b/tensorflow/compiler/xla/service/compiler.h index 74fd24edf8..33e19efc72 100644 --- a/tensorflow/compiler/xla/service/compiler.h +++ b/tensorflow/compiler/xla/service/compiler.h @@ -79,11 +79,15 @@ class AotCompilationOptions { device_allocator_ = device_allocator; } + const DebugOptions& debug_options() const { return debug_options_; } + DebugOptions* mutable_debug_options() { return &debug_options_; } + protected: - AotCompilationOptions() = default; + AotCompilationOptions(); private: DeviceMemoryAllocator* device_allocator_ = nullptr; + DebugOptions debug_options_; }; // Abstract compiler interface that is subclassed for compilation on a -- GitLab From 3653257c729f651c787b6fa04788084191478c3e Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 14:38:31 -0800 Subject: [PATCH 182/884] Enable de/serialization of nested control flow. This is a follow-up to the previous commit (https://github.com/tensorflow/tensorflow/commit/23851760b7b099214bdd4f1b88156d7ac2bdd2a2). It adds the new proto schemas, enables the behavior for reading and writing the new protos, and adds a test for de/serializing nested while loops. There's still a bug preventing deserializing conds, which will be addressed in another change. PiperOrigin-RevId: 187082713 --- tensorflow/core/protobuf/control_flow.proto | 17 ++++++- tensorflow/python/ops/control_flow_ops.py | 54 ++++++-------------- tensorflow/python/training/saver_test.py | 56 +++++++++++++++++++++ 3 files changed, 88 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/protobuf/control_flow.proto b/tensorflow/core/protobuf/control_flow.proto index 2c9476a08a..3c05b4f0e2 100644 --- a/tensorflow/core/protobuf/control_flow.proto +++ b/tensorflow/core/protobuf/control_flow.proto @@ -17,6 +17,15 @@ message ValuesDef { map external_values = 2; } +// Container for any kind of control flow context. Any other control flow +// contexts that are added below should also be added here. +message ControlFlowContextDef { + oneof ctxt { + CondContextDef cond_ctxt = 1; + WhileContextDef while_ctxt = 2; + } +} + // Protocol buffer representing a CondContext object. message CondContextDef { // Name of the context. @@ -33,6 +42,9 @@ message CondContextDef { // Values and external values in control flow context. ValuesDef values_def = 5; + + // Contexts contained inside this context (e.g. nested conds). + repeated ControlFlowContextDef nested_contexts = 6; } // Protocol buffer representing a WhileContext object. @@ -70,5 +82,8 @@ message WhileContextDef { // Optional name of the maximum_iterations tensor. string maximum_iterations_name = 11; - // Next available id: 12. + // Contexts contained inside this context (e.g. nested whiles). + repeated ControlFlowContextDef nested_contexts = 12; + + // Next available id: 13. } diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 152578c0c6..b16901effd 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1765,13 +1765,9 @@ class CondContext(ControlFlowContext): context_def.branch = self._branch context_def.values_def.MergeFrom(super(CondContext, self)._to_values_def( export_scope)) - # TODO(b/72868227): enable this once the corresponding control_flow.proto - # changes have been checked in (they aren't checked in and this is - # disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -1783,14 +1779,10 @@ class CondContext(ControlFlowContext): ret = CondContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is here for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def) + ret.Exit() return ret def to_control_flow_context_def(self, context_def, export_scope=None): @@ -2108,10 +2100,7 @@ def cond(pred, # Only add non-nested conds to the collection. Any nested control flow will # be encapsulated in the root context. assert context_t.outer_context == context_f.outer_context - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or context_t.outer_context is None: + if context_t.outer_context is None: ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_t) ops.add_to_collection(ops.GraphKeys.COND_CONTEXT, context_f) @@ -2334,13 +2323,9 @@ class WhileContext(ControlFlowContext): context_def.values_def.MergeFrom( super(WhileContext, self)._to_values_def( export_scope=export_scope)) - # TODO(b/72868227): remove "if True..." once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if False: # pylint: disable=using-constant-test - for nested in self._nested_contexts: - nested_def = context_def.nested_contexts.add() - nested.to_control_flow_context_def(nested_def) + for nested in self._nested_contexts: + nested_def = context_def.nested_contexts.add() + nested.to_control_flow_context_def(nested_def) return context_def else: @@ -2362,14 +2347,10 @@ class WhileContext(ControlFlowContext): """ ret = WhileContext(context_def=context_def, import_scope=import_scope) - # TODO(b/72868227): remove "if hasattr(...)" once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if hasattr(context_def, "nested_contexts"): - ret.Enter() - for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def, import_scope=import_scope) - ret.Exit() + ret.Enter() + for nested_def in context_def.nested_contexts: + from_control_flow_context_def(nested_def, import_scope=import_scope) + ret.Exit() return ret def GetWhileContext(self): @@ -3214,10 +3195,7 @@ def while_loop(cond, swap_memory=swap_memory) # Only add non-nested loops to the collection. Any nested control flow will # be encapsulated in the root context. - # TODO(b/72868227): enable condition once the corresponding - # control_flow.proto changes have been checked in (they aren't checked in - # and this is disabled for now to ensure forwards compatibility). - if True or loop_context.outer_context is None: + if loop_context.outer_context is None: ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context) result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants) if maximum_iterations is not None: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index f00f98db00..b366ed30f3 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -53,6 +53,7 @@ from tensorflow.python.lib.io import file_io from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import partitioned_variables @@ -2040,6 +2041,61 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) + def testNestedWhileLoops(self): + test_dir = self._get_test_dir("nested_whiles") + filename = os.path.join(test_dir, "metafile") + saver_ckpt = os.path.join(test_dir, "saver.ckpt") + + # Create two simple nested while loops. + with ops_lib.Graph().as_default(): + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + + var = variables.Variable(0) + var_name = var.name + + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + [0, var]) + output_name = output.name + + init_op = variables.global_variables_initializer() + + # Generate a MetaGraphDef containing the nested loops. + with session.Session() as sess: + sess.run(init_op) + sess.run(output) + saver = saver_module.Saver() + saver.save(sess, saver_ckpt) + saver.export_meta_graph(filename) + + # Build and run the gradients of the nested while loop. We use this below + # to verify that the gradients are correct with an imported MetaGraphDef. + grad = gradients_impl.gradients([output], [var]) + with session.Session() as sess: + sess.run(init_op) + expected_grad_value = sess.run(grad) + + # Restore the MetaGraphDef into a new Graph. + with ops_lib.Graph().as_default(): + with session.Session() as sess: + saver = saver_module.import_meta_graph(filename) + saver.restore(sess, saver_ckpt) + + # Make sure we can still build gradients and get the same result. + var = ops_lib.get_default_graph().get_tensor_by_name(var_name) + output = ops_lib.get_default_graph().get_tensor_by_name(output_name) + grad = gradients_impl.gradients([output], [var]) + + init_op = variables.global_variables_initializer() + + with session.Session() as sess: + sess.run(init_op) + actual_grad_value = sess.run(grad) + self.assertEqual(expected_grad_value, actual_grad_value) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 854a07650f33be545441a08f5db84a0f05a8b88e Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 15:37:27 -0800 Subject: [PATCH 183/884] [XLA::Interpreter] Add support for kCall to HloEvaluator. Also enable xla/tests/call_test to run on interpreter. PiperOrigin-RevId: 187092587 --- .../compiler/xla/service/hlo_evaluator.cc | 20 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 3 +++ 3 files changed, 25 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 15ae53128a..fd06b19144 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2445,6 +2445,26 @@ Status HloEvaluator::HandleCopy(HloInstruction* copy) { return Status::OK(); } +Status HloEvaluator::HandleCall(HloInstruction* call) { + auto* computation = call->to_apply(); + auto operands = call->operands(); + + std::vector arg_literals; + arg_literals.reserve(operands.size()); + for (auto operand : operands) { + const Literal& arg_literal = GetEvaluatedLiteralFor(operand); + arg_literals.push_back(&arg_literal); + } + + HloEvaluator embedded_evaluator; + std::unique_ptr result = + embedded_evaluator.Evaluate(*computation, arg_literals) + .ConsumeValueOrDie(); + + evaluated_[call] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 3b2b697e49..c65d9915e3 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleCall(HloInstruction* call) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 97abf217d7..33fde9737d 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1143,6 +1143,9 @@ xla_test( xla_test( name = "call_test", srcs = ["call_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", -- GitLab From acf78b20f71dd8c3a928b1f12ea4de6f5028fc48 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 26 Feb 2018 15:37:40 -0800 Subject: [PATCH 184/884] Uses a thread pool for graph functions in eager mode with inter_op_parallelism_threads. PiperOrigin-RevId: 187092622 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +++++++++++++- tensorflow/c/eager/runtime.cc | 14 ++++++++++---- tensorflow/c/eager/runtime.h | 3 +++ tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 35 insertions(+), 13 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e55cb672e9..16a2a15072 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,6 +21,7 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ + "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..b233dd5b93 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = - tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); + status->status = tensorflow::KernelAndDevice::Init( + ndef, ctx->func_lib(device), &ctx->runner, kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..29944df4c2 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -45,7 +46,15 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : thread_pool(new tensorflow::thread::ThreadPool( + opts.session_options.options.env, "EagerCompute", + opts.session_options.options.config + .inter_op_parallelism_threads() != 0 + ? opts.session_options.options.config + .inter_op_parallelism_threads() + : tensorflow::port::NumSchedulableCPUs())), + runner([this](std::function f) { thread_pool->Schedule(f); }), + policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +63,9 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const std::unique_ptr thread_pool; + std::function)> runner; + const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 4bf24fec2c..b9618420f0 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,17 +255,22 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; + out->runner_ = nullptr; + out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; + out->runner_ = runner; + out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -296,10 +301,11 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - // TODO(apassos): use a thread pool. - std::function)> runner = - [](std::function f) { f(); }; - params.runner = &runner; + if (runner_ == nullptr) { + params.runner = &default_runner_; + } else { + params.runner = runner_; + } OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..fa5f839977 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,6 +169,7 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, + std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -188,6 +189,8 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; + std::function)>* runner_; + std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 643153058c..ab0b535e1a 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); + Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK( - KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); + TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), + nullptr, &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 260f5b8fe144cd369fde755739806449a2901252 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Mon, 26 Feb 2018 15:42:52 -0800 Subject: [PATCH 185/884] [XLA] Fix #17090 a problem in IrArray::Index::SourceIndexOfTranspose. Agebraic simplification transforms bitcast-equivalent transpose/reshape instructions to bitcast instructions before IR emission. As such, we should skip the checking on whether a transpose/reshape instruction is bitcast-equivalent or not during IR emission. Remove the call from IrArray::Index::SourceIndexOfTranspose to ShapeUtil::TransposeIsBitcast. Also remove the call from IrArray::Index::SourceIndexOfReshape to ShapeUtil::ReshapeIsBitcast. Remove the calls to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast from NotWorthHoistingIndividually because layout assignment hasn't been done there yet. Instead, returns true when the input is a transpose or reshape instruction, to prevent it from being hoisted out of loops. Add a check to ShapeUtil::TransposeIsBitcast and ShapeUtil::ReshapeIsBitcast to make sure that both input shape and output shape have layouts. Add two test cases. PiperOrigin-RevId: 187093399 --- .../xla/service/layout_assignment_test.cc | 79 +++++++++++++++++++ .../compiler/xla/service/llvm_ir/ir_array.cc | 8 +- .../while_loop_invariant_code_motion.cc | 12 +-- tensorflow/compiler/xla/shape_util.cc | 14 +--- tensorflow/compiler/xla/shape_util.h | 4 + 5 files changed, 95 insertions(+), 22 deletions(-) diff --git a/tensorflow/compiler/xla/service/layout_assignment_test.cc b/tensorflow/compiler/xla/service/layout_assignment_test.cc index 88e5caaf47..62feb7c1e9 100644 --- a/tensorflow/compiler/xla/service/layout_assignment_test.cc +++ b/tensorflow/compiler/xla/service/layout_assignment_test.cc @@ -590,6 +590,85 @@ TEST_F(LayoutAssignmentTest, TransposeToBitcastToUser) { transpose->shape(), {2, 3, 0, 1})); } +// TransposeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, TransposeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = builder.AddInstruction( + HloInstruction::CreateTranspose(input_shape, param, {0, 2, 1})); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH(ShapeUtil::TransposeIsBitcast(hlo->operand(0)->shape(), + hlo->shape(), hlo->dimensions()), + "LayoutUtil::HasLayout"); +} + +// ReshapeIsBitcast shouldn't be called without layout information. +TEST_F(LayoutAssignmentTest, ReshapeIsBitcastFail) { + auto builder = HloComputation::Builder(TestName()); + Shape input_shape = ShapeUtil::MakeShape(F32, {2, 2, 2}); + Shape input_shape_with_layout(input_shape); + *input_shape_with_layout.mutable_layout() = LayoutUtil::MakeLayout({2, 1, 0}); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, input_shape_with_layout, "param")); + auto hlo = + builder.AddInstruction(HloInstruction::CreateReshape(input_shape, param)); + // Clear the default layout assigned to the instruction. + LayoutUtil::ClearLayout(hlo->mutable_shape()); + EXPECT_DEATH( + ShapeUtil::ReshapeIsBitcast(hlo->operand(0)->shape(), hlo->shape()), + "LayoutUtil::HasLayout"); +} + +// Check that the computation below doesn't crash the compiler. +// +// Within a fusion computation, only the parameters and result get assigned a +// layout. When we run the algebraic simplifier on this computation post layout +// assignment, it should not call TransposeIsBitcast on the `transpose` node +// inside the fusion computation as TransposeIsBitcast checks both input_shape +// and output_shape have layouts. +TEST_F(LayoutAssignmentTest, TransposeWithinFusionDoesNotCrash) { + const char* module_str = R"( + HloModule test_module + + fused_computation { + param_1 = f32[2,2,2]{2,1,0} parameter(1) + transpose = f32[2,2,2]{2,1,0} transpose(param_1), dimensions={0,2,1} + reduce_1 = f32[] parameter(0) + broadcast_1 = f32[2,2,2]{2,1,0} broadcast(reduce_1), dimensions={} + ROOT divide_1 = f32[2,2,2]{2,1,0} divide(transpose, broadcast_1) + } + + ENTRY entry_computation { + fusion.1 = f32[2,2,2]{2,1,0} parameter(1) + reduce.1 = f32[] parameter(0) + fusion.2 = f32[2,2,2]{2,1,0} fusion(reduce.1, fusion.1), kind=kLoop, calls=fused_computation + ROOT tuple.1 = (f32[2,2,2]{2,1,0}) tuple(fusion.2) + } + )"; + + auto module = tools::Parse(module_str).ValueOrDie(); + + module = + backend() + .compiler() + ->RunHloPasses(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .ConsumeValueOrDie(); + + EXPECT_EQ( + ::tensorflow::Status::OK(), + backend() + .compiler() + ->RunBackend(std::move(module), backend().default_stream_executor(), + /*device_allocator=*/nullptr) + .status()); +} + // A GTE inside of a fusion node inherits the layout of its operand (which // should, if we keep following operands, eventually be a parameter). TEST_F(LayoutAssignmentTest, GTEInheritsLayoutFromOperand) { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 6384c7f46f..f3642cf0a1 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -160,7 +160,8 @@ IrArray::Index IrArray::Index::SourceIndexOfReshape( } } - if (linear() != nullptr && + if (linear() != nullptr && LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape) && ShapeUtil::ReshapeIsBitcast(input_shape, output_shape)) { return Index(source_multidim_index, linear(), input_shape); } @@ -195,10 +196,13 @@ IrArray::Index IrArray::Index::SourceIndexOfTranspose( llvm::IRBuilder<>* builder) const { std::vector operand_multidim_index = Permute(dimension_mapping, multidim()); - if (linear() != nullptr && + + if (linear() != nullptr && LayoutUtil::HasLayout(operand_shape) && + LayoutUtil::HasLayout(shape) && ShapeUtil::TransposeIsBitcast(operand_shape, shape, dimension_mapping)) { return Index(operand_multidim_index, linear(), operand_shape); } + return Index(operand_multidim_index); } diff --git a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc index a5f9b01f01..3ef0cdff67 100644 --- a/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc +++ b/tensorflow/compiler/xla/service/while_loop_invariant_code_motion.cc @@ -106,20 +106,12 @@ static bool NotWorthHoistingIndividually(const HloInstruction& instruction) { case HloOpcode::kBitcast: case HloOpcode::kBroadcast: case HloOpcode::kConstant: + case HloOpcode::kReshape: case HloOpcode::kReverse: case HloOpcode::kSlice: + case HloOpcode::kTranspose: case HloOpcode::kTuple: return true; - - case HloOpcode::kTranspose: - return ShapeUtil::TransposeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape(), instruction.dimensions()); - - case HloOpcode::kReshape: - return ShapeUtil::ReshapeIsBitcast( - /*input_shape=*/instruction.operand(0)->shape(), - /*output_shape=*/instruction.shape()); } } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 604e0173e7..3152789016 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1073,11 +1073,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping) { - // Can't insert bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) && - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { @@ -1106,11 +1103,8 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, /* static */ bool ShapeUtil::ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape) { - // Can't convert reshapes into bitcasts without layout information. - if (!LayoutUtil::HasLayout(input_shape) || - !LayoutUtil::HasLayout(output_shape)) { - return false; - } + CHECK(LayoutUtil::HasLayout(input_shape) && + LayoutUtil::HasLayout(output_shape)); // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 19b1aa93bd..8ee263fe5e 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -522,12 +522,16 @@ class ShapeUtil { // Returns whether a transpose from input_shape to output_shape with dimension // mapping "dimension_mapping" produces a result which is bit-wise identical // to its input and thus may be replaced with a bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool TransposeIsBitcast( const Shape& input_shape, const Shape& output_shape, tensorflow::gtl::ArraySlice dimension_mapping); // Returns whether a reshape from "input_shape" to "output_shape" is a // bitcast. + // + // Precondition: Both input_shape and output_shape have explicit layouts. static bool ReshapeIsBitcast(const Shape& input_shape, const Shape& output_shape); -- GitLab From 6db1b213458ea7f0acd4476f70d930e15af8f35f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 16:01:04 -0800 Subject: [PATCH 186/884] [XLA] Add more supported dtypes to the local Python client. PiperOrigin-RevId: 187096144 --- tensorflow/compiler/xla/python/xla_client.py | 38 ++++++++++++-------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/python/xla_client.py b/tensorflow/compiler/xla/python/xla_client.py index 3b8ec851d5..90cda42f32 100644 --- a/tensorflow/compiler/xla/python/xla_client.py +++ b/tensorflow/compiler/xla/python/xla_client.py @@ -30,9 +30,9 @@ from tensorflow.compiler.xla import xla_data_pb2 from tensorflow.compiler.xla.python import pywrap_xla as c_api -# Most functions are snake_case for consistency with other modules, -# whereas method names of ComputationBuilder and LocalComputation are -# CamelCase for consistency with XLA. +# Most functions are snake_case for consistency with other modules, whereas +# method names of ComputationBuilder and LocalComputation are CamelCase for +# consistency with XLA. # pylint: disable=invalid-name @@ -123,24 +123,34 @@ _BINARY_OPS = [ 'Pow', ] + XLA_ELEMENT_TYPE_TO_DTYPE = { - xla_data_pb2.F32: np.dtype(np.float32), - xla_data_pb2.F64: np.dtype(np.float64), - xla_data_pb2.S32: np.dtype(np.int32), - xla_data_pb2.S64: np.dtype(np.int64), - xla_data_pb2.U32: np.dtype(np.uint32), - xla_data_pb2.U64: np.dtype(np.uint64), - xla_data_pb2.PRED: np.dtype(np.bool), + xla_data_pb2.PRED: np.dtype('bool'), + xla_data_pb2.S8: np.dtype('int8'), + xla_data_pb2.S16: np.dtype('int16'), + xla_data_pb2.S32: np.dtype('int32'), + xla_data_pb2.S64: np.dtype('int64'), + xla_data_pb2.U8: np.dtype('uint8'), + xla_data_pb2.U16: np.dtype('uint16'), + xla_data_pb2.U32: np.dtype('uint32'), + xla_data_pb2.U64: np.dtype('uint64'), + xla_data_pb2.F16: np.dtype('float16'), + xla_data_pb2.F32: np.dtype('float32'), + xla_data_pb2.F64: np.dtype('float64'), + xla_data_pb2.C64: np.dtype('complex64'), xla_data_pb2.TUPLE: np.dtype(np.object), } # Note the conversion on the key. Numpy has a known issue wherein dtype hashing # doesn't work as expected (https://github.com/numpy/numpy/issues/7242). Thus, # when keying by dtype in this dict, we use the string form of dtypes. -DTYPE_TO_XLA_ELEMENT_TYPE = { - str(v): k - for k, v in XLA_ELEMENT_TYPE_TO_DTYPE.items() -} +DTYPE_TO_XLA_ELEMENT_TYPE = {str(dt): et + for et, dt in XLA_ELEMENT_TYPE_TO_DTYPE.items()} + + +def dtype_to_etype(dtype): + """Convenience function for reading DTYPE_TO_XLA_ELEMENT_TYPE.""" + return DTYPE_TO_XLA_ELEMENT_TYPE[str(np.dtype(dtype))] class LocalBuffer(object): -- GitLab From c7caa2d87daa37b66811ac99f997ad02acd4ecc8 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Mon, 26 Feb 2018 16:23:46 -0800 Subject: [PATCH 187/884] Deprecate tf.contrib.learn. RELNOTES: Deprecated tf.contrib.learn. Please check contrib/learn/README.md for instructions on how to convert existing code. PiperOrigin-RevId: 187099439 --- .../python/framework/experimental_test.py | 1 - tensorflow/contrib/learn/README.md | 143 ++++++++++++++++++ tensorflow/contrib/learn/__init__.py | 7 +- tensorflow/contrib/learn/python/__init__.py | 7 +- .../contrib/learn/python/learn/__init__.py | 7 +- .../python/learn/basic_session_run_hooks.py | 43 +++++- .../learn/python/learn/datasets/__init__.py | 12 +- .../learn/python/learn/datasets/base.py | 26 +++- .../learn/python/learn/datasets/mnist.py | 23 ++- .../learn/datasets/produce_small_datasets.py | 7 +- .../learn/python/learn/datasets/synthetic.py | 10 +- .../python/learn/datasets/text_datasets.py | 10 +- .../learn/python/learn/estimators/__init__.py | 7 +- .../learn/python/learn/estimators/_sklearn.py | 4 +- .../learn/estimators/composable_model.py | 17 ++- .../python/learn/estimators/constants.py | 8 +- .../learn/python/learn/estimators/debug.py | 14 +- .../learn/python/learn/estimators/dnn.py | 19 ++- .../learn/estimators/dnn_linear_combined.py | 19 ++- .../learn/estimators/dynamic_rnn_estimator.py | 13 +- .../python/learn/estimators/estimator.py | 27 +++- .../learn/estimators/estimator_test_utils.py | 7 +- .../learn/python/learn/estimators/head.py | 20 ++- .../learn/python/learn/estimators/kmeans.py | 9 +- .../learn/python/learn/estimators/linear.py | 19 ++- .../learn/estimators/logistic_regressor.py | 10 +- .../python/learn/estimators/metric_key.py | 10 +- .../learn/python/learn/estimators/model_fn.py | 22 ++- .../python/learn/estimators/prediction_key.py | 8 +- .../python/learn/estimators/rnn_common.py | 7 +- .../python/learn/estimators/run_config.py | 19 ++- .../estimators/state_saving_rnn_estimator.py | 13 +- .../learn/python/learn/estimators/svm.py | 11 +- .../learn/estimators/tensor_signature.py | 11 +- .../python/learn/estimators/test_data.py | 7 +- .../contrib/learn/python/learn/evaluable.py | 11 +- .../contrib/learn/python/learn/experiment.py | 24 +-- .../learn/python/learn/export_strategy.py | 14 +- .../learn/python/learn/graph_actions.py | 8 +- .../learn/python/learn/learn_io/__init__.py | 7 +- .../learn/python/learn/learn_io/dask_io.py | 11 +- .../python/learn/learn_io/data_feeder.py | 29 +++- .../python/learn/learn_io/generator_io.py | 9 +- .../learn/python/learn/learn_io/graph_io.py | 16 +- .../learn/python/learn/learn_io/numpy_io.py | 9 +- .../learn/python/learn/learn_io/pandas_io.py | 12 +- .../learn/python/learn/learn_runner.py | 10 +- .../learn/python/learn/learn_runner_lib.py | 6 +- .../contrib/learn/python/learn/metric_spec.py | 13 +- .../contrib/learn/python/learn/models.py | 14 +- .../learn/python/learn/monitored_session.py | 6 +- .../contrib/learn/python/learn/monitors.py | 68 ++++++++- .../learn/python/learn/ops/__init__.py | 7 +- .../learn/python/learn/ops/embeddings_ops.py | 6 +- .../learn/python/learn/ops/losses_ops.py | 7 +- .../learn/python/learn/ops/seq2seq_ops.py | 12 +- .../python/learn/preprocessing/__init__.py | 7 +- .../python/learn/preprocessing/categorical.py | 15 +- .../preprocessing/categorical_vocabulary.py | 13 +- .../learn/python/learn/preprocessing/text.py | 26 +++- .../learn/python/learn/session_run_hook.py | 6 +- .../python/learn/summary_writer_cache.py | 5 +- .../contrib/learn/python/learn/trainable.py | 9 +- .../learn/python/learn/utils/__init__.py | 7 +- .../learn/python/learn/utils/export.py | 9 +- .../contrib/learn/python/learn/utils/gc.py | 13 +- .../python/learn/utils/input_fn_utils.py | 16 +- .../python/learn/utils/inspect_checkpoint.py | 2 +- .../learn/utils/saved_model_export_utils.py | 30 +++- tensorflow/python/util/decorator_utils.py | 2 +- 70 files changed, 945 insertions(+), 111 deletions(-) create mode 100644 tensorflow/contrib/learn/README.md diff --git a/tensorflow/contrib/framework/python/framework/experimental_test.py b/tensorflow/contrib/framework/python/framework/experimental_test.py index 8e54e09e04..cfdc7df7d8 100644 --- a/tensorflow/contrib/framework/python/framework/experimental_test.py +++ b/tensorflow/contrib/framework/python/framework/experimental_test.py @@ -49,7 +49,6 @@ class ExperimentalTest(test.TestCase): "\nTHIS FUNCTION IS EXPERIMENTAL. It may change or " "be removed at any time, and without warning." "\n" - "\n" "\nArgs:" "\n arg0: Arg 0." "\n arg1: Arg 1." diff --git a/tensorflow/contrib/learn/README.md b/tensorflow/contrib/learn/README.md new file mode 100644 index 0000000000..d516bffc5e --- /dev/null +++ b/tensorflow/contrib/learn/README.md @@ -0,0 +1,143 @@ +EVERYTHING IN THIS DIRECTORY IS DEPRECATED. + +Using functions or classes will result in warnings. + +Instructions for converting to current alternatives are included in the +warnings. A high-level overview is below. + +## Canned Estimators + +Many canned estimators (subclasses of `Estimator`) have equivalents in core: +`DNNClassifier`, `DNNRegressor`, `DNNEstimator`, `LinearClassifier`, +`LinearRegressor`, `DNNLinearCombinedClassifier` and +`DNNLinearCombinedRegressor`. They are exposed under `tf.estimator`. +`DNNEstimator`, `LinearEstimator` and `DNNLinearCombinedEstimator` +are exposed under `tf.contrib.estimator`. + +To migrate to the new api, users need to take the following steps: + +* Replace `tf.contrib.learn` with `tf.estimator`. +* If you subclass any of the estimators, stop doing that. You should be able to + write a factory method that returns a canned estimator instead. If this is not + possible (if you override methods from the canned estimator), consider writing + a custom estimator instead. See `tf.estimator.Estimator`. +* Set `loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE` to preserve loss + reduction as the average over batch. +* Some optimizer-related arguments are no longer passed in the estimator + constructor. Instead, we provide methods that perform the same job by wrapping + an optimizer. Specifically: + * `gradient_clip_norm`: Use `tf.contrib.estimator.clip_gradients_by_norm` + * `embedding_lr_multipliers`: Not supported. + Other arguments: + * `input_layer_min_slice_size`: Replaced by `input_layer_partitioner` + * `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. + * `feature_engineering_fn`: Not supported. You can call your + `feature_engineering_fn` inside your input_fn: + ```python + def new_input_fn(): + features, labels = old_input_fn() + return feature_engineering_fn(features, labels) + ``` +* Use `tf.reshape` to reshape labels in your `input_fn`. `tf.estimator` + classifiers and regressors expect labels as a 2D Tensor of shape + `[batch_size, 1]`, or `[batch_size, n_labels]`. In contrast, + `tf.contrib.learn` classifiers and regressors supported labels with shape + `[batch_size]`. +* If you pass custom metrics from the `evaluate()` method call, use + `tf.contrib.estimator.add_metrics`. +* Replace your `serving_input_fn` with a `serving_input_receiver_fn`. + Note this should be entirely distinct from your training `input_fn`, so if you + previously had one `input_fn` with different "modes", you should now factor + that apart. Where the former returned either a simple `(features, labels)` + tuple or `InputFnOps`, you should now return a `ServingInputReceiver`. + If you were generating your `serving_input_fn` using the + `build_parsing_serving_input_fn` helper, you can simply drop in the + replacement `build_parsing_serving_input_receiver_fn`. + +Some remaining estimators/classes: + +* `DynamicRnnEstimator`: Consider a custom `model_fn`. +* `KMeansClustering`: Use `tf.contrib.factorization.KMeansClustering`. +* `LogisticRegressor`: Not supported. Instead, use `binary_classification_head` + with a custom `model_fn`, or with `DNNEstimator`. +* `StateSavingRnnEstimator`: Consider a custom `model_fn`. +* SVM: Consider a custom `model_fn`. +* `LinearComposableModel` and `DNNComposableModel`: Not supported. + Consider `tf.contrib.estimator.DNNEstimator`, or write a custom model_fn. +* `MetricSpec`: Deprecated. For adding custom metrics to canned Estimators, use + `tf.contrib.estimator.add_metrics`. + +## Estimator +`tf.contrib.learn.Estimator` is migrated to `tf.estimator.Estimator`. + +To migrate, users need to take the following steps: + +* Replace `tf.contrib.learn.Estimator` with `tf.estimator.Estimator`. +* If you pass a `config` argument to `Estimator`, this must be + `tf.estimator.RunConfig`. You may need to edit your code accordingly. +* Edit your `model_fn` to return `tf.estimator.EstimatorSpec`. Refer to + `EstimatorSpec` for documentation of specific fields. +* If your `model_fn` uses the `mode` argument, use `tf.estimator.ModeKeys`. + +Some related classes: +* `Evaluable`, `Trainable`: Not supported, merged into `tf.estimator.Estimator`. +* ExportStrategy: Replaced by `tf.estimator.Exporter`. + +## Head/MultiHead +These classes are now supported under `tf.contrib.estimator`, e.g. +`tf.contrib.estimator.multi_class_head` and `tf.contrib.estimator.multi_head`. + +Some differences: + +* `multi_class_head`: If you use `tf.contrib.learn.multi_class_head` with + `n_classes=2`, switch to `tf.contrib.estimator.binary_classification_head`. +* `loss_only_head`: Not supported. +* `poisson_regression_head`: Not supported (yet). +* `binary_svm_head`: Not supported (yet). +* `no_op_train_fn`: Replace it with `tf.no_op`. + +Some arguments are renamed, please refer to documentation. In addition: + +* `loss_fn`: Supported for `multi_label_head`. If you need it for other heads, + please open an issue. +* `metric_class_ids`: Not supported (yet). +* `enable_centered_bias`: Not supported. Dropping this argument is unlikely to + harm your model. +* `label_name`: Not needed in `tf.estimator`. If you don’t use `multi_head`, + drop this argument. If you use `multi_head`, refer to + `tf.contrib.estimator.multi_head` documentation. + +## Experiment Class - Distributed Training Tooling + +Switch to `tf.estimator.train_and_evaluate`. Some differences: + +* Most of the constructor arguments, like `train_input_fn`, `eval_input_fn`, + should be wrapped into `tf.estimator.TrainSpec` and `tf.estimator.EvalSpec`. +* Remove the `experiment_fn`. Instead, create the `Estimator`, + `train_spec` and `eval_spec`, then call `tf.estimator.train_and_evaluate` + directly. +* Inside `tf.estimator.EvalSpec`, the `exporter` field is the replacement + for `export_strategy`. To be precise, `tf.estimator.LatestExporter` is the + replacement for `tf.contrib.learn.make_export_strategy`. If you want to export + only at the end of training use `tf.estimator.FinalExporter`. +* If the `TF_CONFIG` environment variable is constructed manually, please read + the `train_and_evaluate` documentation for the new requirementds (in + particular, the chief node and evaluator node). + +## Others Classes and Functions + +* `tf.contrib.learn.datasets` is deprecated. We are adding ready to use datasets + to tensorflow/models. Many smaller datasets are available from other sources, + such as scikits.learn. Some Python processing may have to be written, but this + is straightforward to implement using the standard modules. +* `tf.contrib.learn.preprocessing`: Deprecated. The python-only preprocessing + functions are not a good fit for TensorFlow. Please use `tf.data`, and + consider tensorflow/transform for more complex use cases. +* `tf.contrib.learn.models`: Not supported, use canned estimators instead. +* `tf.contrib.learn.monitors`: Implement `SessionRunHook` instead. Hook + implementations are in `tf.train`. +* `tf.contrib.learn.learn_io`: Use the methods in `tf.estimator.inputs`, such as + `tf.estimator.inputs.numpy_input_fn`. Some utility functions have no + equivalent, we encourage the use of `tf.data`. + diff --git a/tensorflow/contrib/learn/__init__.py b/tensorflow/contrib/learn/__init__.py index 3698af027e..79bd73faaf 100644 --- a/tensorflow/contrib/learn/__init__.py +++ b/tensorflow/contrib/learn/__init__.py @@ -13,8 +13,11 @@ # limitations under the License. # ============================================================================== -# TODO(ptucker,ipolosukhin): Improve descriptions. -"""High level API for learning. +"""High level API for learning (DEPRECATED). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. See the @{$python/contrib.learn} guide. diff --git a/tensorflow/contrib/learn/python/__init__.py b/tensorflow/contrib/learn/python/__init__.py index bbebd5ab97..df23aeb2c4 100644 --- a/tensorflow/contrib/learn/python/__init__.py +++ b/tensorflow/contrib/learn/python/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/__init__.py b/tensorflow/contrib/learn/python/learn/__init__.py index cdc67c77d5..76e0e8ac8f 100644 --- a/tensorflow/contrib/learn/python/learn/__init__.py +++ b/tensorflow/contrib/learn/python/learn/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level API for learning with TensorFlow.""" +"""High level API for learning with TensorFlow (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py index 2284ec46e9..fed1c44d19 100644 --- a/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py +++ b/tensorflow/contrib/learn/python/learn/basic_session_run_hooks.py @@ -12,20 +12,47 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Some common SessionRunHook classes.""" +"""Some common SessionRunHook classes (deprected). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.training import basic_session_run_hooks +from tensorflow.python.util.deprecation import deprecated_alias # pylint: disable=invalid-name -LoggingTensorHook = basic_session_run_hooks.LoggingTensorHook -StopAtStepHook = basic_session_run_hooks.StopAtStepHook -CheckpointSaverHook = basic_session_run_hooks.CheckpointSaverHook -StepCounterHook = basic_session_run_hooks.StepCounterHook -NanLossDuringTrainingError = basic_session_run_hooks.NanLossDuringTrainingError -NanTensorHook = basic_session_run_hooks.NanTensorHook -SummarySaverHook = basic_session_run_hooks.SummarySaverHook +LoggingTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.LoggingTensorHook', + 'tf.train.LoggingTensorHook', + basic_session_run_hooks.LoggingTensorHook) +StopAtStepHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StopAtStepHook', + 'tf.train.StopAtStepHook', + basic_session_run_hooks.StopAtStepHook) +CheckpointSaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.CheckpointSaverHook', + 'tf.train.CheckpointSaverHook', + basic_session_run_hooks.CheckpointSaverHook) +StepCounterHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.StepCounterHook', + 'tf.train.StepCounterHook', + basic_session_run_hooks.StepCounterHook) +NanLossDuringTrainingError = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanLossDuringTrainingError', + 'tf.train.NanLossDuringTrainingError', + basic_session_run_hooks.NanLossDuringTrainingError) +NanTensorHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.NanTensorHook', + 'tf.train.NanTensorHook', + basic_session_run_hooks.NanTensorHook) +SummarySaverHook = deprecated_alias( + 'tf.contrib.learn.basic_session_run_hooks.SummarySaverHook', + 'tf.train.SummarySaverHook', + basic_session_run_hooks.SummarySaverHook) # pylint: enable=invalid-name diff --git a/tensorflow/contrib/learn/python/learn/datasets/__init__.py b/tensorflow/contrib/learn/python/learn/datasets/__init__.py index 7240b0de14..3c34712ac8 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/__init__.py +++ b/tensorflow/contrib/learn/python/learn/datasets/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Dataset utilities and synthetic/reference datasets.""" +"""Dataset utilities and synthetic/reference datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.contrib.learn.python.learn.datasets import mnist from tensorflow.contrib.learn.python.learn.datasets import synthetic from tensorflow.contrib.learn.python.learn.datasets import text_datasets +from tensorflow.python.util.deprecation import deprecated # Export load_iris and load_boston. load_iris = base.load_iris @@ -51,6 +57,7 @@ SYNTHETIC = { } +@deprecated(None, 'Please use tf.data.') def load_dataset(name, size='small', test_with_fake_data=False): """Loads dataset by name. @@ -73,8 +80,9 @@ def load_dataset(name, size='small', test_with_fake_data=False): return DATASETS[name]() +@deprecated(None, 'Please use tf.data.') def make_dataset(name, n_samples=100, noise=None, seed=42, *args, **kwargs): - """Creates binary synthetic datasets + """Creates binary synthetic datasets. Args: name: str, name of the dataset to generate diff --git a/tensorflow/contrib/learn/python/learn/datasets/base.py b/tensorflow/contrib/learn/python/learn/datasets/base.py index ca720ae5ed..3b5c9b97c0 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/base.py +++ b/tensorflow/contrib/learn/python/learn/datasets/base.py @@ -12,7 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base utilities for loading datasets.""" + +"""Base utilities for loading datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +35,14 @@ import numpy as np from six.moves import urllib from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated + Dataset = collections.namedtuple('Dataset', ['data', 'target']) Datasets = collections.namedtuple('Datasets', ['train', 'validation', 'test']) +@deprecated(None, 'Use tf.data instead.') def load_csv_with_header(filename, target_dtype, features_dtype, @@ -53,6 +62,7 @@ def load_csv_with_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def load_csv_without_header(filename, target_dtype, features_dtype, @@ -70,6 +80,7 @@ def load_csv_without_header(filename, return Dataset(data=data, target=target) +@deprecated(None, 'Use tf.data instead.') def shrink_csv(filename, ratio): """Create a smaller dataset of only 1/ratio of original data.""" filename_small = filename.replace('.', '_small.') @@ -84,6 +95,7 @@ def shrink_csv(filename, ratio): i += 1 +@deprecated(None, 'Use scikits.learn.datasets.') def load_iris(data_path=None): """Load Iris dataset. @@ -100,6 +112,7 @@ def load_iris(data_path=None): data_path, target_dtype=np.int, features_dtype=np.float) +@deprecated(None, 'Use scikits.learn.datasets.') def load_boston(data_path=None): """Load Boston housing dataset. @@ -116,7 +129,12 @@ def load_boston(data_path=None): data_path, target_dtype=np.float, features_dtype=np.float) -def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): +@deprecated(None, 'Use the retry module or similar alternatives.') +def retry(initial_delay, + max_delay, + factor=2.0, + jitter=0.25, + is_retriable=None): """Simple decorator for wrapping retriable functions. Args: @@ -152,7 +170,7 @@ def retry(initial_delay, max_delay, factor=2.0, jitter=0.25, is_retriable=None): for delay in delays(): try: return fn(*args, **kwargs) - except Exception as e: # pylint: disable=broad-except) + except Exception as e: # pylint: disable=broad-except if is_retriable is None: continue @@ -176,11 +194,13 @@ def _is_retriable(e): return isinstance(e, IOError) and e.errno in _RETRIABLE_ERRNOS +@deprecated(None, 'Please use urllib or similar directly.') @retry(initial_delay=1.0, max_delay=16.0, is_retriable=_is_retriable) def urlretrieve_with_retry(url, filename=None): return urllib.request.urlretrieve(url, filename) +@deprecated(None, 'Please write your own downloading logic.') def maybe_download(filename, work_directory, source_url): """Download the data from source url, unless it's already here. diff --git a/tensorflow/contrib/learn/python/learn/datasets/mnist.py b/tensorflow/contrib/learn/python/learn/datasets/mnist.py index 37f9175015..abbb44c2f5 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/mnist.py +++ b/tensorflow/contrib/learn/python/learn/datasets/mnist.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Functions for downloading and reading MNIST data.""" +"""Functions for downloading and reading MNIST data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -27,6 +32,7 @@ from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.framework import dtypes from tensorflow.python.framework import random_seed from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated # CVDF mirror of http://yann.lecun.com/exdb/mnist/ DEFAULT_SOURCE_URL = 'https://storage.googleapis.com/cvdf-datasets/mnist/' @@ -37,6 +43,7 @@ def _read32(bytestream): return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_images(f): """Extract the images into a 4D uint8 numpy array [index, y, x, depth]. @@ -65,6 +72,7 @@ def extract_images(f): return data +@deprecated(None, 'Please use tf.one_hot on tensors.') def dense_to_one_hot(labels_dense, num_classes): """Convert class labels from scalars to one-hot vectors.""" num_labels = labels_dense.shape[0] @@ -74,6 +82,7 @@ def dense_to_one_hot(labels_dense, num_classes): return labels_one_hot +@deprecated(None, 'Please use tf.data to implement this functionality.') def extract_labels(f, one_hot=False, num_classes=10): """Extract the labels into a 1D uint8 numpy array [index]. @@ -103,7 +112,15 @@ def extract_labels(f, one_hot=False, num_classes=10): class DataSet(object): + """Container class for a dataset (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def __init__(self, images, labels, @@ -210,6 +227,8 @@ class DataSet(object): return self._images[start:end], self._labels[start:end] +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def read_data_sets(train_dir, fake_data=False, one_hot=False, @@ -275,5 +294,7 @@ def read_data_sets(train_dir, return base.Datasets(train=train, validation=validation, test=test) +@deprecated(None, 'Please use alternatives such as official/mnist/dataset.py' + ' from tensorflow/models.') def load_mnist(train_dir='MNIST-data'): return read_data_sets(train_dir) diff --git a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py index 6e0ba38941..a4848fa64a 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Produce DBpedia datasets of a smaller size.""" +"""Produce DBpedia datasets of a smaller size (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py index 9a843168c2..6a0e3350b3 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/synthetic.py +++ b/tensorflow/contrib/learn/python/learn/datasets/synthetic.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Synthetic dataset generators.""" +"""Synthetic dataset generators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,8 +26,10 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.learn.python.learn.datasets.base import Dataset +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def circles(n_samples=100, noise=None, seed=None, @@ -93,6 +100,7 @@ def circles(n_samples=100, return Dataset(data=X[indices], target=y[indices]) +@deprecated(None, 'Consider using synthetic datasets from scikits.learn.') def spirals(n_samples=100, noise=None, seed=None, diff --git a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py index 2596a2ecaf..ce94663017 100644 --- a/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py +++ b/tensorflow/contrib/learn/python/learn/datasets/text_datasets.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Text datasets.""" +"""Text datasets (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,10 +31,12 @@ import numpy as np from tensorflow.contrib.learn.python.learn.datasets import base from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated DBPEDIA_URL = 'https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz' +@deprecated(None, 'See contrib/learn/README.md') def maybe_download_dbpedia(data_dir): """Download if DBpedia data is not present.""" train_path = os.path.join(data_dir, 'dbpedia_csv/train.csv') @@ -41,6 +48,7 @@ def maybe_download_dbpedia(data_dir): tfile.extractall(data_dir) +@deprecated(None, 'See contrib/learn/README.md') def load_dbpedia(size='small', test_with_fake_data=False): """Get DBpedia datasets from CSV files.""" if not test_with_fake_data: diff --git a/tensorflow/contrib/learn/python/learn/estimators/__init__.py b/tensorflow/contrib/learn/python/learn/estimators/__init__.py index 4981750c94..3e64595f31 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/__init__.py +++ b/tensorflow/contrib/learn/python/learn/estimators/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""An estimator is a rule for calculating an estimate of a given quantity. +"""An estimator is a rule for calculating an estimate of a given quantity (deprecated). + +These classes are deprecated and replaced with `tf.estimator`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. # Estimators diff --git a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py index 15277415a1..1f0e4663d0 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/_sklearn.py @@ -13,7 +13,7 @@ # limitations under the License. # ============================================================================== -"""sklearn cross-support.""" +"""sklearn cross-support (deprecated).""" from __future__ import absolute_import from __future__ import division @@ -132,6 +132,8 @@ class _TransformerMixin(): class NotFittedError(ValueError, AttributeError): """Exception class to raise if estimator is used before fitting. + USE OF THIS EXCEPTION IS DEPRECATED. + This class inherits from both ValueError and AttributeError to help with exception handling and backward compatibility. diff --git a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py index a02c726c74..1fa58271e2 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py +++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow composable models used as building blocks for estimators.""" +"""TensorFlow composable models used as building blocks for estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,6 +39,7 @@ from tensorflow.python.ops import nn from tensorflow.python.ops import partitioned_variables from tensorflow.python.ops import variable_scope from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated class _ComposableModel(object): @@ -46,6 +52,7 @@ class _ComposableModel(object): _ComposableModel and its subclasses are not part of the public tf.learn API. """ + @deprecated(None, "Please use model_fns in tf.estimator.") def __init__(self, num_label_columns, optimizer, @@ -141,6 +148,10 @@ class _ComposableModel(object): class LinearComposableModel(_ComposableModel): """A _ComposableModel that implements linear regression. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ @@ -252,6 +263,10 @@ class LinearComposableModel(_ComposableModel): class DNNComposableModel(_ComposableModel): """A _ComposableModel that implements a DNN. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Instances of this class can be used to build estimators through the use of composition. """ diff --git a/tensorflow/contrib/learn/python/learn/estimators/constants.py b/tensorflow/contrib/learn/python/learn/estimators/constants.py index fc69e81024..d2548946bc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/constants.py +++ b/tensorflow/contrib/learn/python/learn/estimators/constants.py @@ -13,9 +13,11 @@ # limitations under the License. # ============================================================================== -"""Constants regarding Estimators. +"""Constants regarding Estimators (deprecated). -This file is obsoleted in the move of Estimator to core. +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ from __future__ import absolute_import from __future__ import division @@ -25,6 +27,8 @@ from __future__ import print_function class ProblemType(object): """Enum-like values for the type of problem that the model solves. + THIS CLASS IS DEPRECATED. + These values are used when exporting the model to produce the appropriate signature function for serving. diff --git a/tensorflow/contrib/learn/python/learn/estimators/debug.py b/tensorflow/contrib/learn/python/learn/estimators/debug.py index 9d5f6c2bf9..24b067b7e3 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/debug.py +++ b/tensorflow/contrib/learn/python/learn/estimators/debug.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Debug estimators. +"""Debug estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Debug estimators are bias-only estimators that can be used for debugging and as simple baselines. @@ -118,6 +122,10 @@ def debug_model_fn(features, labels, mode, params, config=None): class DebugClassifier(estimator.Estimator): """A classifier for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -237,6 +245,10 @@ class DebugClassifier(estimator.Estimator): class DebugRegressor(estimator.Estimator): """A regressor for TensorFlow Debug models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn.py b/tensorflow/contrib/learn/python/learn/estimators/dnn.py index c17b41c0f7..eabebb7e88 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Deep Neural Network estimators.""" +"""Deep Neural Network estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -212,6 +217,10 @@ def _dnn_model_fn(features, labels, mode, params, config=None): class DNNClassifier(estimator.Estimator): """A classifier for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -521,6 +530,10 @@ class DNNClassifier(estimator.Estimator): class DNNRegressor(estimator.Estimator): """A regressor for TensorFlow DNN models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python @@ -796,6 +809,10 @@ class DNNRegressor(estimator.Estimator): class DNNEstimator(estimator.Estimator): """A Estimator for TensorFlow DNN models with user specified _Head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Example: ```python diff --git a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py index 7266122350..3d85533d92 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow estimators for Linear and DNN joined training models.""" +"""TensorFlow estimators for Linear and DNN joined training models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -372,6 +377,10 @@ def _dnn_linear_combined_model_fn(features, labels, mode, params, config=None): class DNNLinearCombinedEstimator(estimator.Estimator): """An estimator for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -490,6 +499,10 @@ class DNNLinearCombinedEstimator(estimator.Estimator): class DNNLinearCombinedClassifier(estimator.Estimator): """A classifier for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. @@ -832,6 +845,10 @@ class DNNLinearCombinedClassifier(estimator.Estimator): class DNNLinearCombinedRegressor(estimator.Estimator): """A regressor for TensorFlow Linear and DNN joined training models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note: New users must set `fix_global_step_increment_bug=True` when creating an estimator. diff --git a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py index 69440e823e..a703dc66e9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for Dynamic RNNs.""" +"""Estimator for Dynamic RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -540,6 +545,12 @@ def _get_dynamic_rnn_model_fn( class DynamicRnnEstimator(estimator.Estimator): + """Dynamically unrolled RNN (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 4b63e08ab3..5262e04e16 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Base Estimator class.""" +"""Base Estimator class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -138,6 +143,7 @@ def _get_input_fn(x, y, input_fn, feed_fn, batch_size, shuffle=False, epochs=1): return df.input_builder, df.get_feed_dict_fn() +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input_fn(input_fn): """Creates `FeatureColumn` objects for inputs defined by `input_fn`. @@ -158,6 +164,7 @@ def infer_real_valued_columns_from_input_fn(input_fn): return layers.infer_real_valued_columns(features) +@deprecated(None, 'Please specify feature columns explicitly.') def infer_real_valued_columns_from_input(x): """Creates `FeatureColumn` objects for inputs defined by input `x`. @@ -389,6 +396,10 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, trainable.Trainable): """Abstract BaseEstimator class to train and evaluate TensorFlow models. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Users should not instantiate or subclass this class. Instead, use an `Estimator`. """ @@ -399,6 +410,8 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, # TODO(wicke): Remove this once launcher takes over config functionality _Config = run_config.RunConfig # pylint: disable=invalid-name + @deprecated(None, 'Please replace uses of any Estimator from tf.contrib.learn' + ' with an Estimator from tf.estimator.*') def __init__(self, model_dir=None, config=None): """Initializes a BaseEstimator instance. @@ -1074,6 +1087,10 @@ def _identity_feature_engineering_fn(features, labels): class Estimator(BaseEstimator): """Estimator class is the basic TensorFlow model trainer/evaluator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ def __init__(self, @@ -1458,8 +1475,14 @@ class Estimator(BaseEstimator): # For time of deprecation x,y from Estimator allow direct access. # pylint: disable=protected-access class SKCompat(sklearn.BaseEstimator): - """Scikit learn wrapper for TensorFlow Learn Estimator.""" + """Scikit learn wrapper for TensorFlow Learn Estimator. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please switch to the Estimator interface.') def __init__(self, estimator): self._estimator = estimator diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py index fd47710e30..e4c31396ba 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utils for Estimator.""" +"""Utils for Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/head.py b/tensorflow/contrib/learn/python/learn/estimators/head.py index 9b124b2c19..2b4b6eff39 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head.py @@ -12,8 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Abstractions for the head(s) of a model. +"""Abstractions for the head(s) of a model (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. """ + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -47,11 +52,16 @@ from tensorflow.python.summary import summary from tensorflow.python.training import training from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated class Head(object): """Interface for the head/top of a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Given logits (or output of a hidden layer), a Head knows how to compute predictions, loss, default metric and export signature. It is meant to, @@ -177,6 +187,7 @@ class Head(object): raise NotImplementedError("Calling an abstract method.") +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -216,6 +227,7 @@ def regression_head(label_name=None, link_fn=(link_fn if link_fn is not None else array_ops.identity)) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def poisson_regression_head(label_name=None, weight_column_name=None, label_dimension=1, @@ -254,6 +266,7 @@ def poisson_regression_head(label_name=None, # TODO(zakaria): Consider adding a _RegressionHead for logistic_regression +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_class_head(n_classes, label_name=None, weight_column_name=None, @@ -335,6 +348,7 @@ def multi_class_head(n_classes, label_keys=label_keys) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def binary_svm_head( label_name=None, weight_column_name=None, @@ -370,6 +384,7 @@ def binary_svm_head( thresholds=thresholds) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_label_head(n_classes, label_name=None, weight_column_name=None, @@ -430,6 +445,7 @@ def multi_label_head(n_classes, loss_fn=_wrap_custom_loss_fn(loss_fn) if loss_fn else None) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def loss_only_head(loss_fn, head_name=None): """Creates a Head that contains only loss terms. @@ -447,6 +463,7 @@ def loss_only_head(loss_fn, head_name=None): return _LossOnlyHead(loss_fn, head_name=head_name) +@deprecated(None, "Please switch to tf.contrib.estimator.*_head.") def multi_head(heads, loss_weights=None): """Creates a MultiHead stemming from same logits/hidden layer. @@ -479,6 +496,7 @@ def multi_head(heads, loss_weights=None): return _MultiHead(heads, loss_merger=_weighted_loss_merger) +@deprecated(None, "Use 'lambda _: tf.no_op()'.") def no_op_train_fn(loss): del loss return control_flow_ops.no_op() diff --git a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py index 8f9d6fc318..66ebcfd1d8 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py +++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementation of k-means clustering on top of `Estimator` API. +"""Implementation of k-means clustering on top of `Estimator` API (deprecated). This module is deprecated. Please use @{tf.contrib.factorization.KMeansClustering} instead of @@ -153,7 +153,12 @@ def _kmeans_clustering_model_fn(features, labels, mode, params, config): # TODO(agarwal,ands): support sharded input. class KMeansClustering(estimator.Estimator): - """An Estimator for K-Means clustering.""" + """An Estimator for K-Means clustering. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ SQUARED_EUCLIDEAN_DISTANCE = clustering_ops.SQUARED_EUCLIDEAN_DISTANCE COSINE_DISTANCE = clustering_ops.COSINE_DISTANCE RANDOM_INIT = clustering_ops.RANDOM_INIT diff --git a/tensorflow/contrib/learn/python/learn/estimators/linear.py b/tensorflow/contrib/learn/python/learn/estimators/linear.py index 37aa8b3396..64d7ecc68e 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/linear.py +++ b/tensorflow/contrib/learn/python/learn/estimators/linear.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Linear Estimators.""" +"""Linear Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -305,6 +310,10 @@ class _SdcaUpdateWeightsHook(session_run_hook.SessionRunHook): class LinearClassifier(estimator.Estimator): """Linear classifier model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear model to classify instances into one of multiple possible classes. When number of possible classes is 2, this is binary classification. @@ -625,6 +634,10 @@ class LinearClassifier(estimator.Estimator): class LinearRegressor(estimator.Estimator): """Linear regressor model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a linear regression model to predict label value given observation of feature values. @@ -860,6 +873,10 @@ class LinearRegressor(estimator.Estimator): class LinearEstimator(estimator.Estimator): """Linear model with user specified head. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Train a generalized linear model to predict label value given observation of feature values. diff --git a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py index fb339160d5..3cbcc6e98d 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py +++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Logistic regression (aka binary classifier) class. +"""Logistic regression (aka binary classifier) class (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This defines some useful basic metrics for using logistic regression to classify a binary event (0 vs 1). @@ -75,6 +79,10 @@ def LogisticRegressor( # pylint: disable=invalid-name feature_engineering_fn=None): """Builds a logistic regression Estimator for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This method provides a basic Estimator with some additional metrics for custom binary classification models, including AUC, precision/recall and accuracy. diff --git a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py index 99388f116b..f264248e44 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/metric_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/metric_key.py @@ -12,14 +12,20 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for metric keys.""" +"""Enum for metric keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function class MetricKey(object): - """Metric key strings.""" + """Metric key strings (deprecated).""" + LOSS = "loss" AUC = "auc" AUC_PR = "auc_precision_recall" diff --git a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py index 44e6c7c52d..dcb161180c 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/model_fn.py +++ b/tensorflow/contrib/learn/python/learn/estimators/model_fn.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Classes and methods related to model_fn.""" +"""Classes and methods related to model_fn (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -37,10 +42,13 @@ from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.training import session_run_hook +from tensorflow.python.util.deprecation import deprecated class ModeKeys(object): - """Standard names for model modes. + """Standard names for model modes (deprecated). + + THIS CLASS IS DEPRECATED. The following standard keys are defined: @@ -65,8 +73,16 @@ class ModelFnOps( 'output_alternatives', 'training_chief_hooks', 'training_hooks', 'scaffold', 'mode' ])): - """Ops returned from a model_fn.""" + """Ops returned from a model_fn. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'When switching to tf.estimator.Estimator, use ' + 'tf.estimator.EstimatorSpec. You can use the `estimator_spec`' + ' method to create an equivalent one.') def __new__(cls, mode, predictions=None, diff --git a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py index f8d87b8914..6fd2fc9d59 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py +++ b/tensorflow/contrib/learn/python/learn/estimators/prediction_key.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Enum for model prediction keys. +"""Enum for model prediction keys (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This file is obsoleted in the move of Estimator to core. """ @@ -22,6 +26,8 @@ from __future__ import print_function class PredictionKey(object): + """THIS CLASS IS DEPRECATED.""" + CLASSES = "classes" PROBABILITIES = "probabilities" LOGITS = "logits" diff --git a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py index 2752bc2d90..215022e5d9 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py +++ b/tensorflow/contrib/learn/python/learn/estimators/rnn_common.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Common operations for RNN Estimators.""" +"""Common operations for RNN Estimators (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/estimators/run_config.py b/tensorflow/contrib/learn/python/learn/estimators/run_config.py index fd90fd1cc6..1d161093de 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py +++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Run Config.""" +"""Run Config (deprecated, use tf.estimator.RunConfig instead). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -29,11 +34,12 @@ from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as core_run_config from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib +from tensorflow.python.util.deprecation import deprecated # A list of the property names in RunConfig user allows to change. They will # not affect the execution framework, so when execution framework checks the -# `uid` of the RunConfig, it should be ingored. +# `uid` of the RunConfig, it should be ignored. _DEFAULT_UID_WHITE_LIST = [ 'tf_random_seed', 'save_summary_steps', @@ -47,6 +53,7 @@ _DEFAULT_UID_WHITE_LIST = [ class Environment(object): + """DEPRECATED CLASS.""" # For running general distributed training. CLOUD = 'cloud' # For running Google-internal distributed training. @@ -56,6 +63,7 @@ class Environment(object): class TaskType(object): + """DEPRECATED CLASS.""" MASTER = 'master' PS = 'ps' WORKER = 'worker' @@ -64,6 +72,8 @@ class TaskType(object): class ClusterConfig(object): """This class specifies the configurations for a distributed run. + THIS CLASS IS DEPRECATED. Use tf.estimator.RunConfig instead. + If you're using an `Estimator`, you should probably use the subclass RunConfig instead. """ @@ -211,10 +221,13 @@ class ClusterConfig(object): class RunConfig(ClusterConfig, core_run_config.RunConfig): """This class specifies the configurations for an `Estimator` run. - This class is the implementation of @{tf.estimator.RunConfig} interface. + This class is a deprecated implementation of @{tf.estimator.RunConfig} + interface. """ _USE_DEFAULT = 0 + @deprecated(None, 'When switching to tf.estimator.Estimator, use' + ' tf.estimator.RunConfig instead.') def __init__(self, master=None, num_cores=0, diff --git a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py index 0cea35e219..de78c72c3a 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/state_saving_rnn_estimator.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Estimator for State Saving RNNs.""" +"""Estimator for State Saving RNNs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -528,6 +533,12 @@ def _get_rnn_model_fn(cell_type, class StateSavingRnnEstimator(estimator.Estimator): + """RNN with static unrolling and state saving (deprecated). + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, problem_type, diff --git a/tensorflow/contrib/learn/python/learn/estimators/svm.py b/tensorflow/contrib/learn/python/learn/estimators/svm.py index 72920d73c0..3459997bab 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/svm.py +++ b/tensorflow/contrib/learn/python/learn/estimators/svm.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Support Vector Machine (SVM) Estimator.""" +"""Support Vector Machine (SVM) Estimator (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -36,6 +41,10 @@ def _as_iterable(preds, output): class SVM(estimator.Estimator): """Support Vector Machine (SVM) model for binary classification. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Currently, only linear SVMs are supported. For the underlying optimization problem, the `SDCAOptimizer` is used. For performance and convergence tuning, the num_loss_partitions parameter passed to `SDCAOptimizer` (see `__init__()` diff --git a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py index a120bc6cc3..71b5658dd1 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py +++ b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorSignature class and utilities.""" +"""TensorSignature class and utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -33,6 +38,10 @@ class TensorSignature(collections.namedtuple( "TensorSignature", ["dtype", "shape", "is_sparse"])): """Signature of the `Tensor` object. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Useful to check compatibility of tensors. Example: diff --git a/tensorflow/contrib/learn/python/learn/estimators/test_data.py b/tensorflow/contrib/learn/python/learn/estimators/test_data.py index ed201bfc58..e4b057b4f5 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/test_data.py +++ b/tensorflow/contrib/learn/python/learn/estimators/test_data.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Test data utilities.""" +"""Test data utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/evaluable.py b/tensorflow/contrib/learn/python/learn/evaluable.py index 8f6cd39864..10881ca885 100644 --- a/tensorflow/contrib/learn/python/learn/evaluable.py +++ b/tensorflow/contrib/learn/python/learn/evaluable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Evaluable` interface.""" +"""`Evaluable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,10 @@ import abc class Evaluable(object): """Interface for objects that are evaluatable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/experiment.py b/tensorflow/contrib/learn/python/learn/experiment.py index 331bc11549..9a7c4cd685 100644 --- a/tensorflow/contrib/learn/python/learn/experiment.py +++ b/tensorflow/contrib/learn/python/learn/experiment.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Experiment class collecting information needed for a single training run.""" +"""Experiment class collecting information for a single training run (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -25,7 +30,6 @@ import os import time from tensorflow.contrib.framework import deprecated -from tensorflow.contrib.framework import deprecated_args from tensorflow.contrib.framework.python.framework import experimental from tensorflow.contrib.learn.python.learn import evaluable from tensorflow.contrib.learn.python.learn import export_strategy @@ -118,6 +122,10 @@ class _EvalAndExportListener(basic_session_run_hooks.CheckpointSaverListener): class Experiment(object): """Experiment is a class containing all information needed to train a model. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + After an experiment is created (by passing an Estimator and inputs for training and evaluation), an Experiment instance knows how to invoke training and eval loops in a sensible fashion for distributed training. @@ -125,16 +133,8 @@ class Experiment(object): # TODO(ispir): remove delay_workers_by_global_step and make global step based # waiting as only behavior. - @deprecated_args( - "2016-10-23", - "local_eval_frequency is deprecated as local_run will be renamed to " - "train_and_evaluate. Use min_eval_frequency and call train_and_evaluate " - "instead. Note, however, that the default for min_eval_frequency is 1, " - "meaning models will be evaluated every time a new checkpoint is " - "available. In contrast, the default for local_eval_frequency is None, " - "resulting in evaluation occurring only after training has completed. " - "min_eval_frequency is ignored when calling the deprecated local_run.", - "local_eval_frequency") + @deprecated(None, "Please switch to tf.estimator.train_and_evaluate. You will" + " also have to convert to a tf.estimator.Estimator.") def __init__(self, estimator, train_input_fn, diff --git a/tensorflow/contrib/learn/python/learn/export_strategy.py b/tensorflow/contrib/learn/python/learn/export_strategy.py index 55a8b82431..075cab536e 100644 --- a/tensorflow/contrib/learn/python/learn/export_strategy.py +++ b/tensorflow/contrib/learn/python/learn/export_strategy.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""ExportStrategy class represents different flavors of model export.""" +"""ExportStrategy class represents different flavors of model export (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,7 @@ from __future__ import print_function import collections from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated __all__ = ['ExportStrategy'] @@ -30,6 +36,10 @@ class ExportStrategy( ['name', 'export_fn', 'strip_default_attrs'])): """A class representing a type of model export. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Typically constructed by a utility function specific to the exporter, such as `saved_model_export_utils.make_export_strategy()`. @@ -56,6 +66,8 @@ class ExportStrategy( forward compatibility of the resulting `SavedModel`. """ + @deprecated(None, 'Please switch to tf.estimator.train_and_evaluate, and use ' + 'tf.estimator.Exporter.') def __new__(cls, name, export_fn, strip_default_attrs=None): return super(ExportStrategy, cls).__new__( cls, name, export_fn, strip_default_attrs) diff --git a/tensorflow/contrib/learn/python/learn/graph_actions.py b/tensorflow/contrib/learn/python/learn/graph_actions.py index 98365c05f6..a997fab723 100644 --- a/tensorflow/contrib/learn/python/learn/graph_actions.py +++ b/tensorflow/contrib/learn/python/learn/graph_actions.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""High level operations on graphs.""" +"""High level operations on graphs (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -68,6 +73,7 @@ def clear_summary_writers(): return summary_io.SummaryWriterCache.clear() +@deprecated(None, 'Use `SummaryWriterCache.get` directly.') def get_summary_writer(logdir): """Returns single SummaryWriter per logdir in current run. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py index 06c3782a47..8b133a4440 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/__init__.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/__init__.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tools to allow different io formats.""" +"""Tools to allow different io formats (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py index 7d666391ce..e0a1948d95 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/dask_io.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Methods to allow dask.DataFrame.""" +"""Methods to allow dask.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -21,6 +26,8 @@ from __future__ import print_function import numpy as np +from tensorflow.python.util.deprecation import deprecated + try: # pylint: disable=g-import-not-at-top import dask.dataframe as dd @@ -60,6 +67,7 @@ def _construct_dask_df_with_divisions(df): return dd.Series(merge(dsk, df.dask), name, df.name, divisions) +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_data(data): """Extract data from dask.Series or dask.DataFrame for predictors. @@ -81,6 +89,7 @@ def extract_dask_data(data): return data +@deprecated(None, 'Please feed input to tf.data to support dask.') def extract_dask_labels(labels): """Extract data from dask.Series or dask.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py index 96be8b1bc4..c45b1d1864 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Implementations of different data feeders to provide data for TF trainer.""" +"""Implementations of different data feeders to provide data for TF trainer (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" # TODO(ipolosukhin): Replace this module with feed-dict queue runners & queues. @@ -31,6 +36,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # pylint: disable=g-multiple-import,g-bad-import-order from .pandas_io import HAS_PANDAS, extract_pandas_data, extract_pandas_matrix, extract_pandas_labels @@ -101,6 +107,7 @@ def _is_iterable(x): return hasattr(x, 'next') or hasattr(x, '__next__') +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_train_data_feeder(x, y, n_classes, @@ -188,6 +195,7 @@ def _batch_data(x, batch_size=None): yield np.matrix(chunk) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_predict_data_feeder(x, batch_size=None): """Returns an iterable for feeding into predict step. @@ -219,6 +227,7 @@ def setup_predict_data_feeder(x, batch_size=None): return [x] +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def setup_processor_data_feeder(x): """Sets up processor iterable. @@ -233,6 +242,7 @@ def setup_processor_data_feeder(x): return x +@deprecated(None, 'Please convert numpy dtypes explicitly.') def check_array(array, dtype): """Checks array on dtype and converts it if different. @@ -275,8 +285,14 @@ def _check_dtype(dtype): class DataFeeder(object): - """Data feeder is an example class to sample data for TF trainer.""" + """Data feeder is an example class to sample data for TF trainer. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, x, y, @@ -563,6 +579,10 @@ class DataFeeder(object): class StreamingDataFeeder(DataFeeder): """Data feeder for TF trainer that reads data from iterator. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Streaming data feeder allows to read data as it comes it from disk or somewhere else. It's custom to have this iterators rotate infinetly over the dataset, to allow control of how much to learn on the trainer side. @@ -771,11 +791,16 @@ class StreamingDataFeeder(DataFeeder): class DaskDataFeeder(object): """Data feeder for that reads data from dask.Series and dask.DataFrame. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Numpy arrays can be serialized to disk and it's possible to do random seeks into them. DaskDataFeeder will remove requirement to have full dataset in the memory and still do random seeks for sampling of batches. """ + @deprecated(None, 'Please feed input to tf.data to support dask.') def __init__(self, x, y, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py index 884faf8335..f8aaa0c9e3 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/generator_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow generator of dict with numpy arrays.""" +"""Methods to allow generator of dict with numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,8 +28,10 @@ from types import FunctionType from types import GeneratorType from tensorflow.python.estimator.inputs.queues.feeding_functions import _enqueue_data as enqueue_data +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.data.') def generator_input_fn(x, target_key=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py index 3a46c23968..9e816f54b6 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to read data in the graph.""" +"""Methods to read data in the graph (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -34,11 +39,13 @@ from tensorflow.python.platform import gfile from tensorflow.python.summary import summary from tensorflow.python.training import input as input_ops from tensorflow.python.training import queue_runner +from tensorflow.python.util.deprecation import deprecated # Default name for key in the feature dict. KEY_FEATURE_NAME = '__key__' +@deprecated(None, 'Use tf.data.') def read_batch_examples(file_pattern, batch_size, reader, @@ -106,6 +113,7 @@ def read_batch_examples(file_pattern, return examples +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples(file_pattern, batch_size, reader, @@ -175,6 +183,7 @@ def read_keyed_batch_examples(file_pattern, seed=seed) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_examples_shared_queue(file_pattern, batch_size, reader, @@ -452,6 +461,7 @@ def _read_keyed_batch_examples_helper(file_pattern, return queued_examples_with_keys +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features(file_pattern, batch_size, features, @@ -540,6 +550,7 @@ def read_keyed_batch_features(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def read_keyed_batch_features_shared_queue(file_pattern, batch_size, features, @@ -620,6 +631,7 @@ def read_keyed_batch_features_shared_queue(file_pattern, name=scope) +@deprecated(None, 'Use tf.data.') def queue_parsed_features(parsed_features, keys=None, feature_queue_capacity=100, @@ -742,6 +754,7 @@ def queue_parsed_features(parsed_features, return dequeued_keys, dequeued_parsed_features +@deprecated(None, 'Use tf.data.') def read_batch_features(file_pattern, batch_size, features, @@ -821,6 +834,7 @@ def read_batch_features(file_pattern, return features +@deprecated(None, 'Use tf.data.') def read_batch_record_features(file_pattern, batch_size, features, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py index 692438807f..29552d24f1 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io.py @@ -12,15 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Methods to allow dict of numpy arrays.""" +"""Methods to allow dict of numpy arrays (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.numpy_io import numpy_input_fn as core_numpy_input_fn +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Use tf.estimator.inputs.numpy_input_fn.') def numpy_input_fn(x, y=None, batch_size=128, diff --git a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py index ede7558eaf..b4ef055f5a 100644 --- a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py +++ b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io.py @@ -13,13 +13,19 @@ # limitations under the License. # ============================================================================== -"""Methods to allow pandas.DataFrame.""" +"""Methods to allow pandas.DataFrame (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.python.estimator.inputs.pandas_io import pandas_input_fn as core_pandas_input_fn +from tensorflow.python.util.deprecation import deprecated try: # pylint: disable=g-import-not-at-top @@ -47,6 +53,7 @@ PANDAS_DTYPES = { } +@deprecated(None, 'Please use tf.estimator.inputs.pandas_input_fn') def pandas_input_fn(x, y=None, batch_size=128, @@ -66,6 +73,7 @@ def pandas_input_fn(x, target_column=target_column) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_data(data): """Extract data from pandas.DataFrame for predictors. @@ -96,6 +104,7 @@ def extract_pandas_data(data): 'float, or bool. Found: ' + ', '.join(error_report)) +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_matrix(data): """Extracts numpy matrix from pandas DataFrame. @@ -111,6 +120,7 @@ def extract_pandas_matrix(data): return data.as_matrix() +@deprecated(None, 'Please access pandas data directly.') def extract_pandas_labels(labels): """Extract data from pandas.DataFrame for labels. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner.py b/tensorflow/contrib/learn/python/learn/learn_runner.py index 2af723a0d6..d719a3e488 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Runs an Experiment.""" +"""Runs an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ from tensorflow.contrib.learn.python.learn.estimators import run_config as run_c from tensorflow.contrib.learn.python.learn.experiment import Experiment from tensorflow.contrib.training.python.training import hparam as hparam_lib from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util.deprecation import deprecated # TODO(xiejw): Refactor the learn_runner to make code reusable. @@ -99,6 +105,7 @@ def _wrapped_experiment_fn_with_uid_check(experiment_fn, require_hparams=False): return wrapped_experiment_fn +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def run(experiment_fn, output_dir=None, schedule=None, run_config=None, hparams=None): """Make and run an experiment. @@ -218,6 +225,7 @@ def run(experiment_fn, output_dir=None, schedule=None, run_config=None, return _execute_schedule(experiment, schedule) +@deprecated(None, 'Use tf.estimator.train_and_evaluate.') def tune(experiment_fn, tuner): """Tune an experiment with hyper-parameters. diff --git a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py index 7d9b1c7716..ba2d067787 100644 --- a/tensorflow/contrib/learn/python/learn/learn_runner_lib.py +++ b/tensorflow/contrib/learn/python/learn/learn_runner_lib.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities to run and tune an Experiment. +"""Utilities to run and tune an Experiment (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@run @@tune diff --git a/tensorflow/contrib/learn/python/learn/metric_spec.py b/tensorflow/contrib/learn/python/learn/metric_spec.py index 6440bc204b..97220365d5 100644 --- a/tensorflow/contrib/learn/python/learn/metric_spec.py +++ b/tensorflow/contrib/learn/python/learn/metric_spec.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""The metric spec class to flexibly connect models and metrics.""" +"""The metric spec class to flexibly connect models and metrics (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,7 @@ import six from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect +from tensorflow.python.util.deprecation import deprecated def _assert_named_args(sentinel): @@ -223,6 +229,10 @@ def _adapt_metric_fn( class MetricSpec(object): """MetricSpec connects a model to metric functions. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + The MetricSpec class contains all information necessary to connect the output of a `model_fn` to the metrics (usually, streaming metrics) that are used in evaluation. @@ -284,6 +294,7 @@ class MetricSpec(object): """ + @deprecated(None, 'Use tf.estimator.EstimatorSpec.eval_metric_ops.') def __init__(self, metric_fn, prediction_key=None, diff --git a/tensorflow/contrib/learn/python/learn/models.py b/tensorflow/contrib/learn/python/learn/models.py index 4283240d01..bd4bbf9f8c 100644 --- a/tensorflow/contrib/learn/python/learn/models.py +++ b/tensorflow/contrib/learn/python/learn/models.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Various high level TF models.""" +"""Various high level TF models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -28,8 +33,10 @@ from tensorflow.python.ops import array_ops as array_ops_ from tensorflow.python.ops import init_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.summary import summary +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Consider using a tf.estimator.LinearRegressor') def linear_regression_zero_init(x, y): """Linear regression subgraph with zero-value initial weights and bias. @@ -43,6 +50,7 @@ def linear_regression_zero_init(x, y): return linear_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.LinearClassifier') def logistic_regression_zero_init(x, y): """Logistic regression subgraph with zero-value initial weights and bias. @@ -56,6 +64,7 @@ def logistic_regression_zero_init(x, y): return logistic_regression(x, y, init_mean=0.0, init_stddev=0.0) +@deprecated(None, 'Consider using a class from tf.estimator.') def linear_regression(x, y, init_mean=None, init_stddev=1.0): """Creates linear regression TensorFlow subgraph. @@ -107,6 +116,7 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0): return losses_ops.mean_squared_error_regressor(x, y, weights, bias) +@deprecated(None, 'Consider using a class from tf.estimator.') def logistic_regression(x, y, class_weight=None, @@ -203,6 +213,7 @@ def _reverse_seq(input_seq, lengths): return result +@deprecated(None, 'Please consider `tf.nn.bidirectional_dynamic_rnn`.') def bidirectional_rnn(cell_fw, cell_bw, inputs, @@ -283,6 +294,7 @@ def bidirectional_rnn(cell_fw, # End of TensorFlow 0.7 +@deprecated(None, 'Please consider tensorflow/tensor2tensor.') def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional, target_predictor_fn, sequence_length, initial_state, attn_length, attn_size, attn_vec_size): diff --git a/tensorflow/contrib/learn/python/learn/monitored_session.py b/tensorflow/contrib/learn/python/learn/monitored_session.py index 22602e9f69..ac0433f177 100644 --- a/tensorflow/contrib/learn/python/learn/monitored_session.py +++ b/tensorflow/contrib/learn/python/learn/monitored_session.py @@ -13,7 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A wrapper of Session API which runs hooks.""" +"""A wrapper of Session API which runs hooks (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/monitors.py b/tensorflow/contrib/learn/python/learn/monitors.py index 9457a73ecf..77f7c73d54 100644 --- a/tensorflow/contrib/learn/python/learn/monitors.py +++ b/tensorflow/contrib/learn/python/learn/monitors.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Monitors instrument the training process. +"""Monitors instrument the training process (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. @@get_default_monitors @@BaseMonitor @@ -59,6 +63,10 @@ from tensorflow.python.util import tf_inspect class BaseMonitor(object): """Base class for Monitors. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Defines basic interfaces of Monitors. Monitors can either be run on all workers or, more commonly, restricted to run exclusively on the elected chief worker. @@ -229,6 +237,10 @@ def _extract_output(outputs, request): class EveryN(BaseMonitor): """Base class for monitors that execute callbacks every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This class adds three new callbacks: - every_n_step_begin - every_n_step_end @@ -418,6 +430,10 @@ class StopAtStep(BaseMonitor): class PrintTensor(EveryN): """Prints given tensors every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This is an `EveryN` monitor and has consistent semantic for `every_n` and `first_n`. @@ -455,9 +471,12 @@ class PrintTensor(EveryN): class LoggingTrainable(EveryN): """Writes trainable variable values into log every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Write the tensors in trainable variables `every_n` steps, starting with the `first_n`th step. - """ def __init__(self, scope=None, every_n=100, first_n=1): @@ -493,7 +512,12 @@ class LoggingTrainable(EveryN): class SummarySaver(EveryN): - """Saves summaries every N steps.""" + """Saves summaries every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, summary_op, @@ -554,6 +578,10 @@ class SummarySaver(EveryN): class ValidationMonitor(EveryN): """Runs evaluation of a given estimator, at most every N steps. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note that the evaluation is done based on the saved checkpoint, which will usually be older than the current step. @@ -756,6 +784,10 @@ class ValidationMonitor(EveryN): class CaptureVariable(EveryN): """Captures a variable's values into a collection. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + This monitor is useful for unit testing. You should exercise caution when using this monitor in production, since it never discards values. @@ -794,6 +826,7 @@ class CaptureVariable(EveryN): self._var_values[step] = _extract_output(outputs, self._var_name) +@deprecation.deprecated(None, "Use tf.train.MonitoredTrainingSession.") def get_default_monitors(loss_op=None, summary_op=None, save_summary_steps=100, @@ -828,6 +861,10 @@ def get_default_monitors(loss_op=None, class GraphDump(BaseMonitor): """Dumps almost all tensors in the graph at every step. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Note, this is very expensive, prefer `PrintTensor` in production. """ @@ -917,7 +954,12 @@ class GraphDump(BaseMonitor): class ExportMonitor(EveryN): - """Monitor that exports Estimator every N steps.""" + """Monitor that exports Estimator every N steps. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ @deprecation.deprecated("2017-03-25", "ExportMonitor is deprecated. Please pass an " @@ -1040,7 +1082,12 @@ class ExportMonitor(EveryN): class CheckpointSaver(BaseMonitor): - """Saves checkpoints every N steps or N seconds.""" + """Saves checkpoints every N steps or N seconds. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, checkpoint_dir, @@ -1125,7 +1172,12 @@ class CheckpointSaver(BaseMonitor): class StepCounter(EveryN): - """Steps per second monitor.""" + """Steps per second monitor. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ def __init__(self, every_n_steps=100, output_dir=None, summary_writer=None): super(StepCounter, self).__init__(every_n_steps=every_n_steps) @@ -1165,6 +1217,10 @@ class NanLossDuringTrainingError(RuntimeError): class NanLoss(EveryN): """NaN Loss monitor. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Monitors loss and stops training if loss is NaN. Can either fail with exception or just stop training. """ diff --git a/tensorflow/contrib/learn/python/learn/ops/__init__.py b/tensorflow/contrib/learn/python/learn/ops/__init__.py index 33962e34cc..efb1f47cf5 100644 --- a/tensorflow/contrib/learn/python/learn/ops/__init__.py +++ b/tensorflow/contrib/learn/python/learn/ops/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Various TensorFlow Ops.""" +"""Various TensorFlow Ops (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py index fa3b7323e3..b3b067b8e1 100644 --- a/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/embeddings_ops.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops to work with embeddings. +"""TensorFlow Ops to work with embeddings (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Note: categorical variables are handled via embeddings in many cases. For example, in case of words. diff --git a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py index b040ab3bb6..92976d1539 100644 --- a/tensorflow/contrib/learn/python/learn/ops/losses_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/losses_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for loss computation.""" +"""TensorFlow Ops for loss computation (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py index 45727faab4..aa37cb4a76 100644 --- a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py +++ b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Ops for Sequence to Sequence models.""" +"""TensorFlow Ops for Sequence to Sequence models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -26,8 +31,10 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import variable_scope as vs +from tensorflow.python.util.deprecation import deprecated +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): """Returns predictions and loss for sequence of predictions. @@ -57,6 +64,7 @@ def sequence_classifier(decoding, labels, sampling_decoding=None, name=None): return array_ops.stack(predictions, axis=1), loss +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): """Processes inputs for Sequence to Sequence models. @@ -87,6 +95,7 @@ def seq2seq_inputs(x, y, input_length, output_length, sentinel=None, name=None): return in_x, in_y, out_y +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): """RNN Decoder that creates training and sampling sub-graphs. @@ -123,6 +132,7 @@ def rnn_decoder(decoder_inputs, initial_state, cell, scope=None): return outputs, states, sampling_outputs, sampling_states +@deprecated(None, 'Please use tf.nn/tf.layers directly.') def rnn_seq2seq(encoder_inputs, decoder_inputs, encoder_cell, diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py index 7bcc177d4e..e8c6e1acf8 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Preprocessing tools useful for building models.""" +"""Preprocessing tools useful for building models (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py index 154739d497..faba3b2025 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements preprocessing transformers for categorical variables.""" +"""Implements preprocessing transformers for categorical variables (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -22,6 +27,8 @@ from __future__ import print_function import math import numpy as np +from tensorflow.python.util.deprecation import deprecated + # pylint: disable=g-bad-import-order from . import categorical_vocabulary from ..learn_io.data_feeder import setup_processor_data_feeder @@ -31,10 +38,16 @@ from ..learn_io.data_feeder import setup_processor_data_feeder class CategoricalProcessor(object): """Maps documents to sequences of word ids. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + As a common convention, Nan values are handled as unknown tokens. Both float('nan') and np.nan are accepted. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data for sequence ' + 'processing.') def __init__(self, min_frequency=0, share=False, vocabularies=None): """Initializes a CategoricalProcessor instance. diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py index 5709955c49..3ac370a6ab 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/categorical_vocabulary.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -"""Categorical vocabulary classes to map categories to indexes. +"""Categorical vocabulary classes to map categories to indexes (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Can be used for categorical variables, sparse variables and words. """ @@ -25,14 +29,21 @@ from __future__ import print_function import collections import six +from tensorflow.python.util.deprecation import deprecated + class CategoricalVocabulary(object): """Categorical variables vocabulary class. + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + Accumulates and provides mapping from classes to indexes. Can be easily used for words. """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, unknown_token="", support_reverse=True): self._unknown_token = unknown_token self._mapping = {unknown_token: 0} diff --git a/tensorflow/contrib/learn/python/learn/preprocessing/text.py b/tensorflow/contrib/learn/python/learn/preprocessing/text.py index 3af2074c2a..f2b6776be7 100644 --- a/tensorflow/contrib/learn/python/learn/preprocessing/text.py +++ b/tensorflow/contrib/learn/python/learn/preprocessing/text.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""Implements a number of text preprocessing utilities.""" +"""Implements a number of text preprocessing utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -24,6 +29,7 @@ import numpy as np import six from tensorflow.python.platform import gfile +from tensorflow.python.util.deprecation import deprecated from .categorical_vocabulary import CategoricalVocabulary # pylint: disable=g-bad-import-order @@ -38,6 +44,7 @@ TOKENIZER_RE = re.compile(r"[A-Z]{2,}(?![a-z])|[A-Z][a-z]+(?=[A-Z])|[\'\w\-]+", re.UNICODE) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') def tokenizer(iterator): """Tokenizer generator. @@ -51,9 +58,16 @@ def tokenizer(iterator): yield TOKENIZER_RE.findall(value) +@deprecated(None, 'Please use tensorflow/transform or tf.data.') class ByteProcessor(object): - """Maps documents into sequence of ids for bytes.""" + """Maps documents into sequence of ids for bytes. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length): self.max_document_length = max_document_length @@ -108,8 +122,14 @@ class ByteProcessor(object): class VocabularyProcessor(object): - """Maps documents to sequences of word ids.""" + """Maps documents to sequences of word ids. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Please use tensorflow/transform or tf.data.') def __init__(self, max_document_length, min_frequency=0, diff --git a/tensorflow/contrib/learn/python/learn/session_run_hook.py b/tensorflow/contrib/learn/python/learn/session_run_hook.py index a8ba2be972..87edc9b720 100644 --- a/tensorflow/contrib/learn/python/learn/session_run_hook.py +++ b/tensorflow/contrib/learn/python/learn/session_run_hook.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""This file is deprecated. Use tensorflow.python.training.session_run_hook.""" +"""This file is deprecated. Use `tensorflow.python.training.session_run_hook`. + +See [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py index 919d415c30..d663cf5fb7 100644 --- a/tensorflow/contrib/learn/python/learn/summary_writer_cache.py +++ b/tensorflow/contrib/learn/python/learn/summary_writer_cache.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Wrapper for a Session-like object that handles threads and recovery. +"""Wrapper for a Session-like object that handles threads and recovery (deprecated). + +These are deprecated aliases for classes and functions in `tf.train`. Please use +those directly. Based on an original design of Illia Polosukhin. """ diff --git a/tensorflow/contrib/learn/python/learn/trainable.py b/tensorflow/contrib/learn/python/learn/trainable.py index 429b6040be..a1a3f20dcd 100644 --- a/tensorflow/contrib/learn/python/learn/trainable.py +++ b/tensorflow/contrib/learn/python/learn/trainable.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""`Trainable` interface.""" +"""`Trainable` interface (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division @@ -23,6 +28,8 @@ import abc class Trainable(object): """Interface for objects that are trainable by, e.g., `Experiment`. + + THIS CLASS IS DEPRECATED. """ __metaclass__ = abc.ABCMeta diff --git a/tensorflow/contrib/learn/python/learn/utils/__init__.py b/tensorflow/contrib/learn/python/learn/utils/__init__.py index 48978d0ac3..66d8dc6fd4 100644 --- a/tensorflow/contrib/learn/python/learn/utils/__init__.py +++ b/tensorflow/contrib/learn/python/learn/utils/__init__.py @@ -13,7 +13,12 @@ # limitations under the License. # ============================================================================== -"""TensorFlow Learn Utils.""" +"""TensorFlow Learn Utils (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/export.py b/tensorflow/contrib/learn/python/learn/utils/export.py index cb34cb1d26..3eacac7a3d 100644 --- a/tensorflow/contrib/learn/python/learn/utils/export.py +++ b/tensorflow/contrib/learn/python/learn/utils/export.py @@ -13,14 +13,18 @@ # limitations under the License. # ============================================================================== -"""Export utilities.""" +"""Export utilities (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. +""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.framework import deprecated -from tensorflow.python.training import training_util from tensorflow.contrib.session_bundle import exporter from tensorflow.contrib.session_bundle import gc from tensorflow.python.client import session as tf_session @@ -32,6 +36,7 @@ from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import variables from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import saver as tf_saver +from tensorflow.python.training import training_util @deprecated('2017-03-25', 'Please use Estimator.export_savedmodel() instead.') diff --git a/tensorflow/contrib/learn/python/learn/utils/gc.py b/tensorflow/contrib/learn/python/learn/utils/gc.py index 226915987a..916aecbea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/gc.py +++ b/tensorflow/contrib/learn/python/learn/utils/gc.py @@ -13,7 +13,11 @@ # limitations under the License. # ============================================================================== -r"""System for specifying garbage collection (GC) of path based data. +r"""System for specifying garbage collection (GC) of path based data (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. This framework allows for GC of data specified by path names, for example files on disk. gc.Path objects each represent a single item stored at a path and may @@ -73,10 +77,12 @@ import os from tensorflow.python.platform import gfile from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated Path = collections.namedtuple('Path', 'path export_version') +@deprecated(None, 'Please implement your own file management or use Saver.') def largest_export_versions(n): """Creates a filter that keeps the largest n export versions. @@ -97,6 +103,7 @@ def largest_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def one_of_every_n_export_versions(n): """Creates a filter that keeps one of every n export versions. @@ -128,6 +135,7 @@ def one_of_every_n_export_versions(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def mod_export_version(n): """Creates a filter that keeps every export that is a multiple of n. @@ -146,6 +154,7 @@ def mod_export_version(n): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def union(lf, rf): """Creates a filter that keeps the union of two filters. @@ -163,6 +172,7 @@ def union(lf, rf): return keep +@deprecated(None, 'Please implement your own file management or use Saver.') def negation(f): """Negate a filter. @@ -179,6 +189,7 @@ def negation(f): return keep +@deprecated(None, 'Please implement your own file name management.') def get_paths(base_dir, parser): """Gets a list of Paths in a given directory. diff --git a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py index b2521933e5..b92eb9fea8 100644 --- a/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/input_fn_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities for creating input_fns. +"""Utilities for creating input_fns (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Contents of this file are moved to tensorflow/python/estimator/export.py. InputFnOps is renamed to ServingInputReceiver. @@ -32,13 +36,17 @@ from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops +from tensorflow.python.util.deprecation import deprecated class InputFnOps(collections.namedtuple('InputFnOps', ['features', 'labels', 'default_inputs'])): - """A return type for an input_fn. + """A return type for an input_fn (deprecated). + + THIS CLASS IS DEPRECATED. Please use tf.estimator.export.ServingInputReceiver + instead. This return type is currently only supported for serving input_fn. Training and eval input_fn should return a `(features, labels)` tuple. @@ -56,6 +64,8 @@ class InputFnOps(collections.namedtuple('InputFnOps', """ +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_parsing_serving_input_receiver_fn.') def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): """Build an input_fn appropriate for serving, expecting fed tf.Examples. @@ -84,6 +94,8 @@ def build_parsing_serving_input_fn(feature_spec, default_batch_size=None): return input_fn +@deprecated(None, 'Please use ' + 'tf.estimator.export.build_raw_serving_input_receiver_fn.') def build_default_serving_input_fn(features, default_batch_size=None): """Build an input_fn appropriate for serving, expecting feature Tensors. diff --git a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py index 6a63fb545a..6dbaa15f83 100644 --- a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py +++ b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""A simple script for inspect checkpoint files.""" +"""A simple script for inspect checkpoint files (deprecated).""" from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 1593380007..213619a187 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -12,7 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Utilities supporting export to SavedModel. +"""Utilities supporting export to SavedModel (deprecated). + +This module and all its submodules are deprecated. See +[contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) +for migration instructions. Some contents of this file are moved to tensorflow/python/estimator/export.py: @@ -52,8 +56,9 @@ from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils from tensorflow.python.summary import summary_iterator from tensorflow.python.training import saver - from tensorflow.python.util import compat +from tensorflow.python.util.deprecation import deprecated + # A key for use in the input_alternatives dict indicating the default input. # This is the input that will be expected when a serving request does not @@ -77,6 +82,7 @@ FEATURES_INPUT_ALTERNATIVE_KEY = 'features_input_alternative' _FALLBACK_DEFAULT_OUTPUT_ALTERNATIVE_KEY = 'default_output_alternative' +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_standardized_signature_def(input_tensors, output_tensors, problem_type): """Build a SignatureDef using problem type and input and output Tensors. @@ -156,6 +162,7 @@ def _is_regression_problem(problem_type, input_tensors, output_tensors): len(input_tensors) == 1 and len(output_tensors) == 1) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_input_alternatives(input_ops): """Obtain all input alternatives using the input_fn output and heuristics.""" input_alternatives = {} @@ -181,6 +188,7 @@ def get_input_alternatives(input_ops): return input_alternatives, features +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): """Obtain all output alternatives using the model_fn output and heuristics. @@ -246,6 +254,7 @@ def get_output_alternatives(model_fn_ops, default_output_alternative_key=None): sorted(output_alternatives.keys()))) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def build_all_signature_defs(input_alternatives, output_alternatives, actual_default_output_alternative_key): """Build `SignatureDef`s from all pairs of input and output alternatives.""" @@ -279,6 +288,7 @@ def build_all_signature_defs(input_alternatives, output_alternatives, MAX_DIRECTORY_CREATION_ATTEMPTS = 10 +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_timestamped_export_dir(export_dir_base): """Builds a path to a new subdirectory within the base directory. @@ -317,6 +327,7 @@ def get_timestamped_export_dir(export_dir_base): '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_temp_export_dir(timestamped_export_dir): """Builds a directory name based on the argument but starting with 'temp-'. @@ -344,6 +355,7 @@ def _export_version_parser(path): return path._replace(export_version=int(filename)) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def get_most_recent_export(export_dir_base): """Locate the most recent SavedModel export in a directory of many exports. @@ -363,6 +375,7 @@ def get_most_recent_export(export_dir_base): return next(iter(results or []), None) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def garbage_collect_exports(export_dir_base, exports_to_keep): """Deletes older exports, retaining only a given number of the most recent. @@ -387,6 +400,7 @@ def garbage_collect_exports(export_dir_base, exports_to_keep): logging.warn('Can not delete %s recursively: %s', p.path, e) +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_export_strategy(serving_input_fn, default_output_alternative_key=None, assets_extra=None, @@ -469,6 +483,8 @@ def make_export_strategy(serving_input_fn, return export_strategy.ExportStrategy('Servo', export_fn, strip_default_attrs) +@deprecated(None, + 'Use tf.estimator.export.build_parsing_serving_input_receiver_fn') def make_parsing_export_strategy(feature_columns, default_output_alternative_key=None, assets_extra=None, @@ -555,8 +571,14 @@ def _default_compare_fn(curr_best_eval_result, cand_eval_result): class BestModelSelector(object): - """A helper that keeps track of export selection candidates.""" + """A helper that keeps track of export selection candidates. + + THIS CLASS IS DEPRECATED. See + [contrib/learn/README.md](https://www.tensorflow.org/code/tensorflow/contrib/learn/README.md) + for general migration instructions. + """ + @deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def __init__(self, event_file_pattern=None, compare_fn=None): """Constructor of this class. @@ -622,6 +644,7 @@ class BestModelSelector(object): return best_eval_result +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def make_best_model_export_strategy( serving_input_fn, exports_to_keep=1, @@ -707,6 +730,7 @@ def make_best_model_export_strategy( # TODO(b/67013778): Revisit this approach when corresponding changes to # TF Core are finalized. +@deprecated(None, 'Switch to tf.estimator.Exporter and associated utilities.') def extend_export_strategy(base_export_strategy, post_export_fn, post_export_name=None): diff --git a/tensorflow/python/util/decorator_utils.py b/tensorflow/python/util/decorator_utils.py index df259c7f7c..7b4363c0e4 100644 --- a/tensorflow/python/util/decorator_utils.py +++ b/tensorflow/python/util/decorator_utils.py @@ -82,7 +82,7 @@ def add_notice_to_docstring( lines = _normalize_docstring(doc).splitlines() lines[0] += ' ' + suffix_str - notice = [''] + notice + [instructions] + notice = [''] + notice + ([instructions] if instructions else []) if len(lines) > 1: # Make sure that we keep our distance from the main body -- GitLab From d1ba271902a91a044e7515e248cd9f384a91067b Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 16:24:54 -0800 Subject: [PATCH 188/884] [XLA] In HloEvaluator, fix an issue for HandleAbs to handle complex numbers more correctly: - abs([complex numbers]) would yield floats. However since the specilization for HandleAbs is based on the return type (float), we'd CHECK fail due to float != complex when accessing the elements of the operand (complex). - enable unary_op_test for interpreter. PiperOrigin-RevId: 187099576 --- .../compiler/xla/service/hlo_evaluator.cc | 32 +++++++++++++++++-- tensorflow/compiler/xla/tests/BUILD | 1 + 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index fd06b19144..cf8b35908f 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -57,6 +57,12 @@ struct is_complex_t : public std::false_type {}; template <> struct is_complex_t : public std::true_type {}; +template +struct is_complex64_t : public std::false_type {}; + +template <> +struct is_complex64_t : public std::true_type {}; + template StatusOr> Compare(const Shape& shape, HloOpcode opcode, const Literal& lhs_literal, @@ -248,17 +254,37 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { template < typename NativeT, - typename std::enable_if::value || - is_complex_t::value>::type* = nullptr> + typename std::enable_if::value>::type* = nullptr> Status HandleAbs(HloInstruction* abs) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[abs], - ElementWiseUnaryOp(abs, [](ElementwiseT elem_operand) { + ElementWiseUnaryOp(abs, [](NativeT elem_operand) { return std::abs(elem_operand); })); return Status::OK(); } + template < + typename NativeT, + typename std::enable_if::value>::type* = nullptr> + Status HandleAbs(HloInstruction* abs) { + const Literal& operand_literal = + parent_->GetEvaluatedLiteralFor(abs->operand(0)); + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[abs], + (ElementWiseUnaryOpImpl( + abs, [](NativeT elem_operand) { return std::abs(elem_operand); }, + operand_literal))); + + return Status::OK(); + } + Status HandleAbs(HloInstruction* abs) override { + // If the operand is of C64 type, the return type of abs will be F32. + // However, ElementwiseT would still be the return type, F32, and thus + // specifying the ElementwiseT explicitly as C64 is needed below. + if (abs->operand(0)->shape().element_type() == C64) { + return HandleAbs(abs); + } return HandleAbs(abs); } diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 33fde9737d..f3ecfc1604 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -494,6 +494,7 @@ xla_test( xla_test( name = "unary_op_test", srcs = ["unary_op_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From 60ff3890e98f53c1037440d5e535f6f79ad42d7d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 26 Feb 2018 17:01:24 -0800 Subject: [PATCH 189/884] Only link the swapping code when compiling TensorFlow with CUDA support. PiperOrigin-RevId: 187104273 --- tensorflow/core/grappler/optimizers/BUILD | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 908e58bcc7..a52d1c8df2 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -3,6 +3,7 @@ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "tf_cc_test") load("//tensorflow:tensorflow.bzl", "tf_cc_test_gpu") load("//tensorflow:tensorflow.bzl", "tf_kernel_library") +load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") filegroup( name = "all_files", @@ -319,8 +320,6 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ - ":gpu_swapping_kernels", - ":gpu_swapping_ops", ":graph_optimizer", ":graph_rewriter", ":static_schedule", @@ -336,7 +335,10 @@ cc_library( "//tensorflow/core/grappler/costs:graph_properties", "//tensorflow/core/grappler/utils:topological_sort", "//tensorflow/core/grappler/utils:traversal", - ], + ] + if_cuda([ + ":gpu_swapping_kernels", + ":gpu_swapping_ops", + ]), ) tf_cc_test_gpu( -- GitLab From 7bcc7ee1a9da4ec55395a935123a46b4ecb2364f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:04:09 -0800 Subject: [PATCH 190/884] Consolidate the builtin function overrides into a single module, and use a generic `dynamic_builtin` function to dispatch between implementations. Use the generic dispatcher in the generated code. PiperOrigin-RevId: 187104685 --- .../py2tf/converters/builtin_functions.py | 13 ++++--- tensorflow/contrib/py2tf/utils/BUILD | 12 +----- tensorflow/contrib/py2tf/utils/__init__.py | 4 +- .../py2tf/utils/{printing.py => builtins.py} | 32 +++++++++++++-- .../{printing_test.py => builtins_test.py} | 39 +++++++++++++++---- tensorflow/contrib/py2tf/utils/misc.py | 13 ------- tensorflow/contrib/py2tf/utils/misc_test.py | 27 +------------ 7 files changed, 72 insertions(+), 68 deletions(-) rename tensorflow/contrib/py2tf/utils/{printing.py => builtins.py} (62%) rename tensorflow/contrib/py2tf/utils/{printing_test.py => builtins_test.py} (56%) diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index e69038aced..b5aa9756da 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -36,23 +36,24 @@ class BuiltinFunctionTransformer(transformer.Base): # pylint:disable=invalid-name - def _convert_len(self, node): + def _convert_builtin(self, node): template = """ - py2tf_utils.dynamic_len(args) + py2tf_utils.dynamic_builtin(func, args) """ - return templates.replace(template, args=node.args)[0].value + return templates.replace(template, func=node.func, args=node.args)[0].value def _convert_print(self, node): template = """ - py2tf_utils.call_print(args) + py2tf_utils.dynamic_print(args) """ return templates.replace(template, args=node.args)[0].value def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id == 'len': - return self._convert_len(node) + if isinstance(node.func, gast.Name) and node.func.id in ('len',): + return self._convert_builtin(node) + # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': return self._convert_print(node) return node diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c2fdd40707..2086a9ef60 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -20,10 +20,10 @@ py_library( name = "utils", srcs = [ "__init__.py", + "builtins.py", "context_managers.py", "misc.py", "multiple_dispatch.py", - "printing.py", "py_func.py", "tensor_list.py", "type_check.py", @@ -76,16 +76,6 @@ py_test( ], ) -py_test( - name = "printing_test", - srcs = ["printing_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":utils", - "//tensorflow/python:client_testlib", - ], -) - py_test( name = "type_check_test", srcs = ["type_check_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index d931322bf3..19bf2272bc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -18,11 +18,11 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin +from tensorflow.contrib.py2tf.utils.builtins import dynamic_print from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while -from tensorflow.contrib.py2tf.utils.printing import call_print from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/printing.py b/tensorflow/contrib/py2tf/utils/builtins.py similarity index 62% rename from tensorflow/contrib/py2tf/utils/printing.py rename to tensorflow/contrib/py2tf/utils/builtins.py index 95a62bd80b..0a50b80b60 100644 --- a/tensorflow/contrib/py2tf/utils/printing.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -12,14 +12,40 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""TensorFlow printing support utilities.""" +"""Builtin conversion utilities.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.util import tf_inspect + + +def dynamic_builtin(f, *args, **kwargs): + """Converts a builtin function call inline.""" + if not tf_inspect.isbuiltin(f): + return f(*args, **kwargs) + + if f is len: + return dynamic_len(*args, **kwargs) + + raise NotImplementedError('The "%s" builtin is not yet supported.' % f) + + +def dynamic_len(list_or_tensor): + """Implementation of len using dynamic dispatch.""" + if tensor_util.is_tensor(list_or_tensor): + shape = list_or_tensor.shape + if not shape: + raise ValueError( + 'len requires non-zero rank for tensor "%s"' % list_or_tensor) + return array_ops.shape(list_or_tensor)[0] + + return len(list_or_tensor) def is_tf_print_compatible(value): @@ -30,8 +56,8 @@ def is_tf_print_compatible(value): return False -def call_print(*values): - """Compiled counterpart of the print builtin. +def dynamic_print(*values): + """Implementartion of print using dynamic dispatch. The function attempts to use tf.Print if all the values are compatible. Otherwise, it will fall back to py_func. diff --git a/tensorflow/contrib/py2tf/utils/printing_test.py b/tensorflow/contrib/py2tf/utils/builtins_test.py similarity index 56% rename from tensorflow/contrib/py2tf/utils/printing_test.py rename to tensorflow/contrib/py2tf/utils/builtins_test.py index 2070deb304..19a72c63ec 100644 --- a/tensorflow/contrib/py2tf/utils/printing_test.py +++ b/tensorflow/contrib/py2tf/utils/builtins_test.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Tests for printing module.""" +"""Tests for builtins module.""" from __future__ import absolute_import from __future__ import division @@ -22,28 +22,53 @@ import sys import six -from tensorflow.contrib.py2tf.utils import printing +from tensorflow.contrib.py2tf.utils import builtins +from tensorflow.python.framework import constant_op from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): +class BuiltinsTest(test.TestCase): - def test_call_print_tf(self): + def test_dynamic_len_tf_scalar(self): + a = constant_op.constant(1) + + with self.assertRaises(ValueError): + with self.test_session() as sess: + sess.run(builtins.dynamic_builtin(len, a)) + + def test_dynamic_len_tf_array(self): + a = constant_op.constant([1, 2, 3]) + + with self.test_session() as sess: + self.assertEqual(3, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_tf_matrix(self): + a = constant_op.constant([[1, 2], [3, 4]]) + + with self.test_session() as sess: + self.assertEqual(2, sess.run(builtins.dynamic_builtin(len, a))) + + def test_dynamic_len_py_list(self): + a = [3] * 5 + + self.assertEqual(5, builtins.dynamic_builtin(len, a)) + + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', 1)) + sess.run(builtins.dynamic_print('test message', 1)) self.assertEqual(out_capturer.getvalue(), 'test message 1\n') finally: sys.stdout = sys.__stdout__ - def test_call_print_py_func(self): + def test_dynamic_print_complex(self): try: out_capturer = six.StringIO() sys.stdout = out_capturer with self.test_session() as sess: - sess.run(printing.call_print('test message', [1, 2])) + sess.run(builtins.dynamic_print('test message', [1, 2])) self.assertEqual(out_capturer.getvalue(), 'test message [1, 2]\n') finally: sys.stdout = sys.__stdout__ diff --git a/tensorflow/contrib/py2tf/utils/misc.py b/tensorflow/contrib/py2tf/utils/misc.py index 7548048388..1b06caf0bd 100644 --- a/tensorflow/contrib/py2tf/utils/misc.py +++ b/tensorflow/contrib/py2tf/utils/misc.py @@ -19,22 +19,9 @@ from __future__ import division from __future__ import print_function from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops -def dynamic_len(list_or_tensor): - """Implementation of len using dynamic dispatch.""" - if tensor_util.is_tensor(list_or_tensor): - shape = list_or_tensor.shape - if not shape: - raise ValueError( - 'len requires non-zero rank for tensor "%s"' % list_or_tensor) - return array_ops.shape(list_or_tensor)[0] - - return len(list_or_tensor) - - def alias_tensors(*args): """Wrap any Tensor arguments with an identity op. diff --git a/tensorflow/contrib/py2tf/utils/misc_test.py b/tensorflow/contrib/py2tf/utils/misc_test.py index ec88e7cb74..8aedd4cd64 100644 --- a/tensorflow/contrib/py2tf/utils/misc_test.py +++ b/tensorflow/contrib/py2tf/utils/misc_test.py @@ -19,37 +19,12 @@ from __future__ import division from __future__ import print_function from tensorflow.contrib.py2tf.utils.misc import alias_tensors -from tensorflow.contrib.py2tf.utils.misc import dynamic_len from tensorflow.python.framework.constant_op import constant from tensorflow.python.ops.variables import Variable from tensorflow.python.platform import test -class ContextManagersTest(test.TestCase): - - def test_dynamic_len_tf_scalar(self): - a = constant(1) - - with self.assertRaises(ValueError): - with self.test_session() as sess: - sess.run(dynamic_len(a)) - - def test_dynamic_len_tf_array(self): - a = constant([1, 2, 3]) - - with self.test_session() as sess: - self.assertEqual(3, sess.run(dynamic_len(a))) - - def test_dynamic_len_tf_matrix(self): - a = constant([[1, 2], [3, 4]]) - - with self.test_session() as sess: - self.assertEqual(2, sess.run(dynamic_len(a))) - - def test_dynamic_len_py_list(self): - a = [3] * 5 - - self.assertEqual(5, dynamic_len(a)) +class MiscTest(test.TestCase): def test_alias_single_tensor(self): a = constant(1) -- GitLab From cb0984df5549c077621049416f69b914635208ce Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 17:27:20 -0800 Subject: [PATCH 191/884] Fix buffer assignment for conditional instruction. PiperOrigin-RevId: 187107432 --- .../compiler/xla/service/buffer_assignment.cc | 358 +++++++++--------- .../compiler/xla/service/copy_insertion.cc | 72 +++- 2 files changed, 241 insertions(+), 189 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index b1e693da9d..d44d3d71d9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -48,6 +48,183 @@ using ::tensorflow::strings::HumanReadableNumBytes; using ::tensorflow::strings::Printf; using ::tensorflow::strings::StrAppend; +namespace { + +template +string ColocatedBufferSetsToString(const T& container, const char* title) { + string result; + StrAppend(&result, title, "\n"); + for (const auto& it : container) { + StrAppend(&result, "\t", it->ToString(), "\n"); + } + return result; +} + +// Walk the call graph of the HLO module and place each computation into either +// thread_local_computations or global_computations depending upon whether the +// computation requires thread-local allocations or global allocations. The +// elements in thread_local_computations and global_computations are in post +// order (if computation A has an instruction which calls computation B, then A +// will appear after B in the vector). +Status GatherComputationsByAllocationType( + const HloModule* module, + std::vector* thread_local_computations, + std::vector* global_computations) { + // Create a worklist of computations paired with whether the allocation must + // be thread-local. + std::deque> worklist; + worklist.push_back(std::make_pair(module->entry_computation(), + /*is_thread_local*/ false)); + + // Sets for quickly checking membership. Computations are returned in vectors + // for stable iteration. + FlatSet thread_local_set; + FlatSet global_set; + + while (!worklist.empty()) { + auto worklist_front = worklist.front(); + worklist.pop_front(); + const HloComputation* computation = worklist_front.first; + bool is_thread_local = worklist_front.second; + bool in_thread_local_set = thread_local_set.count(computation) > 0; + bool in_global_set = global_set.count(computation) > 0; + + // If the computation has already been added to the respective set, then + // nothing to do. + if ((is_thread_local && in_thread_local_set) || + (!is_thread_local && in_global_set)) { + continue; + } + + // If the computation has already been added to the other set this is an + // error condition because the global call to the computation (eg, + // while/call) may return a reference to one of the thread-local buffers to + // the calling computation which will become a dangling reference when the + // thread-local is deallocated with the call return. + if ((is_thread_local && in_global_set) || + (!is_thread_local && in_thread_local_set)) { + return InvalidArgument( + "computation %s has conflicting allocation requirements (global " + "and thread-local)", + computation->name().c_str()); + } + + if (is_thread_local) { + thread_local_set.insert(computation); + } else { + global_set.insert(computation); + } + + for (auto* instruction : computation->instructions()) { + for (HloComputation* subcomputation : + instruction->called_computations()) { + switch (instruction->opcode()) { + case HloOpcode::kCall: + case HloOpcode::kConditional: + case HloOpcode::kWhile: + // Call and while must be called from a computation with global + // allocations as they may return references to buffers inside the + // called computation which cannot be thread-local. + if (is_thread_local) { + return InvalidArgument( + "computation %s cannot contain call/while op because it " + "requires thread-local buffer allocations", + computation->name().c_str()); + } + worklist.push_back(std::make_pair(subcomputation, + false)); // Not thread local. + break; + case HloOpcode::kMap: + case HloOpcode::kReduce: + case HloOpcode::kReduceWindow: + case HloOpcode::kSelectAndScatter: + case HloOpcode::kFusion: + // Map/reduce etc computations are always thread-local. + worklist.push_back(std::make_pair(subcomputation, + true)); // Thread local. + break; + default: + return InternalError( + "Unexpected calling opcode: %s", + HloOpcodeString(instruction->opcode()).c_str()); + } + } + } + } + + // Add the computations to the vectors in post order. + for (auto* computation : module->MakeComputationPostOrder()) { + if (thread_local_set.count(computation) > 0) { + thread_local_computations->push_back(computation); + } else if (global_set.count(computation) > 0) { + global_computations->push_back(computation); + } + // If the computation is not reachable from the entry computation, then it + // will not appear in either thread_local_set or global_set. We don't bother + // assigning buffers for these. + } + return Status::OK(); +} + +// Checks that points-to set of 'instruction' is unambiguous and distinct +// (ensured by CopyInsertion), then adds the buffer from the points-to set at +// 'index' to 'colocated_set'. +const LogicalBuffer* AddBufferToColocatedSet( + const HloInstruction* instruction, const ShapeIndex& index, + const TuplePointsToAnalysis& points_to_analysis, + std::vector* colocated_set) { + // CopyInsertion ensures root points-to set is unambiguous and distinct. + const auto& points_to = points_to_analysis.GetPointsToSet(instruction); + DCHECK(!points_to.IsAmbiguous()); + colocated_set->push_back(points_to.element(index)[0]); + return colocated_set->back(); +} + +// Given the interference map of a graph (the list of interfering node indices +// for each node), perform graph coloring such that interfering nodes are +// assigned to different colors. Returns the assigned color of the nodes, where +// the colors are represented as integer values [0, color_count). +std::vector ColorInterferenceGraph( + const std::vector>& interference_map) { + const int64 node_count = interference_map.size(); + + // Sort the nodes such that we assign nodes with more interference first. This + // relies on the common heuristic of assigning the most constrained node + // first, but it would be good to investigate other ordering heuristics too. + std::vector nodes(node_count); + std::iota(nodes.begin(), nodes.end(), 0); + std::sort(nodes.begin(), nodes.end(), + [&interference_map](const int64 i, const int64 j) { + return interference_map[i].size() > interference_map[j].size(); + }); + + const int64 kColorUnassigned = -1; + std::vector assigned_colors(node_count, kColorUnassigned); + for (int64 node : nodes) { + // Mark the colors that are already assigned to the neighbors. + std::vector available_colors(node_count, true); + for (int64 neighbor : interference_map[node]) { + int64 color = assigned_colors[neighbor]; + if (color != kColorUnassigned) { + available_colors[color] = false; + } + } + + // Find the color that is not yet assigned to the neighbors. + int64 color = kColorUnassigned; + for (color = 0; color < available_colors.size(); ++color) { + if (available_colors[color]) { + break; + } + } + CHECK_NE(color, kColorUnassigned); + assigned_colors[node] = color; + } + return assigned_colors; +} + +} // namespace + size_t BufferAllocation::Slice::Hasher::operator()(Slice s) const { uint64 h = std::hash()(s.index()); h = tensorflow::Hash64Combine(h, std::hash()(s.offset())); @@ -523,116 +700,6 @@ BufferAssignmentProto BufferAssignment::ToProto() const { return proto; } -namespace { - -// Walk the call graph of the HLO module and place each computation into either -// thread_local_computations or global_computations depending upon whether the -// computation requires thread-local allocations or global allocations. The -// elements in thread_local_computations and global_computations are in post -// order (if computation A has an instruction which calls computation B, then A -// will appear after B in the vector). -Status GatherComputationsByAllocationType( - const HloModule* module, - std::vector* thread_local_computations, - std::vector* global_computations) { - // Create a worklist of computations paired with whether the allocation must - // be thread-local. - std::deque> worklist; - worklist.push_back(std::make_pair(module->entry_computation(), - /*is_thread_local*/ false)); - - // Sets for quickly checking membership. Computations are returned in vectors - // for stable iteration. - FlatSet thread_local_set; - FlatSet global_set; - - while (!worklist.empty()) { - auto worklist_front = worklist.front(); - worklist.pop_front(); - const HloComputation* computation = worklist_front.first; - bool is_thread_local = worklist_front.second; - bool in_thread_local_set = thread_local_set.count(computation) > 0; - bool in_global_set = global_set.count(computation) > 0; - - // If the computation has already been added to the respective set, then - // nothing to do. - if ((is_thread_local && in_thread_local_set) || - (!is_thread_local && in_global_set)) { - continue; - } - - // If the computation has already been added to the other set this is an - // error condition because the global call to the computation (eg, - // while/call) may return a reference to one of the thread-local buffers to - // the calling computation which will become a dangling reference when the - // thread-local is deallocated with the call return. - if ((is_thread_local && in_global_set) || - (!is_thread_local && in_thread_local_set)) { - return InvalidArgument( - "computation %s has conflicting allocation requirements (global " - "and thread-local)", - computation->name().c_str()); - } - - if (is_thread_local) { - thread_local_set.insert(computation); - } else { - global_set.insert(computation); - } - - for (auto* instruction : computation->instructions()) { - for (HloComputation* subcomputation : - instruction->called_computations()) { - switch (instruction->opcode()) { - case HloOpcode::kCall: - case HloOpcode::kConditional: - case HloOpcode::kWhile: - // Call and while must be called from a computation with global - // allocations as they may return references to buffers inside the - // called computation which cannot be thread-local. - if (is_thread_local) { - return InvalidArgument( - "computation %s cannot contain call/while op because it " - "requires thread-local buffer allocations", - computation->name().c_str()); - } - worklist.push_back(std::make_pair(subcomputation, - false)); // Not thread local. - break; - case HloOpcode::kMap: - case HloOpcode::kReduce: - case HloOpcode::kReduceWindow: - case HloOpcode::kSelectAndScatter: - case HloOpcode::kFusion: - // Map/reduce etc computations are always thread-local. - worklist.push_back(std::make_pair(subcomputation, - true)); // Thread local. - break; - default: - return InternalError( - "Unexpected calling opcode: %s", - HloOpcodeString(instruction->opcode()).c_str()); - } - } - } - } - - // Add the computations to the vectors in post order. - for (auto* computation : module->MakeComputationPostOrder()) { - if (thread_local_set.count(computation) > 0) { - thread_local_computations->push_back(computation); - } else if (global_set.count(computation) > 0) { - global_computations->push_back(computation); - } - // If the computation is not reachable from the entry computation, then it - // will not appear in either thread_local_set or global_set. We don't bother - // assigning buffers for these. - } - return Status::OK(); -} - -} // namespace - /* static */ StatusOr> BufferAssigner::Run( const HloModule* module, std::unique_ptr hlo_ordering, @@ -1085,7 +1152,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( if (colocated_set.empty()) { return; } - + VLOG(5) << ColocatedBufferSetsToString(colocated_set, + "Adding colocated buffer set"); // Find existing sets that overlap with at least one buffer from the // colocated_set. The resulting 'overlap_set_indices' will have at most // colocated_buffer_sets->size() entries, and will be in increasing order. @@ -1093,6 +1161,10 @@ void BufferAssigner::AddSetToColocatedBufferSets( for (size_t index = 0; index < colocated_buffer_sets->size(); ++index) { for (const LogicalBuffer* buffer : colocated_set) { if ((*colocated_buffer_sets)[index].count(buffer) > 0) { + VLOG(5) << "Found overlap with existing set on buffer " + << buffer->ToString() << "\n" + << ColocatedBufferSetsToString((*colocated_buffer_sets)[index], + "Overlapping set"); overlap_set_indices.push_back(index); break; } @@ -1104,6 +1176,7 @@ void BufferAssigner::AddSetToColocatedBufferSets( colocated_buffer_sets->emplace_back(); colocated_buffer_sets->back().insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << "No overlap found, new group created"; return; } @@ -1115,6 +1188,8 @@ void BufferAssigner::AddSetToColocatedBufferSets( first->insert(overlap_set.begin(), overlap_set.end()); } first->insert(colocated_set.begin(), colocated_set.end()); + VLOG(5) << ColocatedBufferSetsToString( + *first, "Result of the colocated buffer set merging"); // Remove overlap sets that we just merged. The offset accounts for the fact // that as elements are erased, the indices need to be adjusted. Keep in mind @@ -1125,67 +1200,6 @@ void BufferAssigner::AddSetToColocatedBufferSets( } } -namespace { - -// Checks that points-to set of 'instruction' is unambiguous and distinct -// (ensured by CopyInsertion), then adds the buffer from the points-to set at -// 'index' to 'colocated_set'. -const LogicalBuffer* AddBufferToColocatedSet( - const HloInstruction* instruction, const ShapeIndex& index, - const TuplePointsToAnalysis& points_to_analysis, - std::vector* colocated_set) { - // CopyInsertion ensures root points-to set is unambiguous and distinct. - const auto& points_to = points_to_analysis.GetPointsToSet(instruction); - DCHECK(!points_to.IsAmbiguous()); - colocated_set->push_back(points_to.element(index)[0]); - return colocated_set->back(); -} - -// Given the interference map of a graph (the list of interfering node indices -// for each node), perform graph coloring such that interfering nodes are -// assigned to different colors. Returns the assigned color of the nodes, where -// the colors are represented as integer values [0, color_count). -std::vector ColorInterferenceGraph( - const std::vector>& interference_map) { - const int64 node_count = interference_map.size(); - - // Sort the nodes such that we assign nodes with more interference first. This - // relies on the common heuristic of assigning the most constrained node - // first, but it would be good to investigate other ordering heuristics too. - std::vector nodes(node_count); - std::iota(nodes.begin(), nodes.end(), 0); - std::sort(nodes.begin(), nodes.end(), - [&interference_map](const int64 i, const int64 j) { - return interference_map[i].size() > interference_map[j].size(); - }); - - const int64 kColorUnassigned = -1; - std::vector assigned_colors(node_count, kColorUnassigned); - for (int64 node : nodes) { - // Mark the colors that are already assigned to the neighbors. - std::vector available_colors(node_count, true); - for (int64 neighbor : interference_map[node]) { - int64 color = assigned_colors[neighbor]; - if (color != kColorUnassigned) { - available_colors[color] = false; - } - } - - // Find the color that is not yet assigned to the neighbors. - int64 color = kColorUnassigned; - for (color = 0; color < available_colors.size(); ++color) { - if (available_colors[color]) { - break; - } - } - CHECK_NE(color, kColorUnassigned); - assigned_colors[node] = color; - } - return assigned_colors; -} - -} // namespace - std::vector BufferAssigner::MergeColocatedBufferSets( const std::vector& colocated_buffer_sets, diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index cc195879a6..df73c28597 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -58,6 +58,45 @@ bool ValueIsReadOnly(const HloValue& value) { return IsConstantValue(value) || IsEntryParameterValue(value); } +// Data structure describing the action which should be taken on parts of a +// computation buffers, with respect to the adding of special case copies. +struct SpecialCaseCopyPolicy { + // Insert a copy if the same buffer is found at multiple indices within the + // output tuple. + bool copy_root_replicated_buffers = false; + // If true, insert a copy if a buffer coming from a constant or a parameter + // is found wihtin the output tuple. + bool copy_parameters_and_constants = false; +}; + +SpecialCaseCopyPolicy GetSpecialCaseCopyPolicy(const CallGraphNode& node, + HloModule* module, + HloComputation* computation) { + SpecialCaseCopyPolicy policy; + if (computation == module->entry_computation()) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + for (const CallSite& site : node.caller_callsites()) { + // The kWhile instruction does not have an handling here, as the + // AddCopiesForWhile() API takes care of adding its own copies. + if (site.instruction()->opcode() == HloOpcode::kConditional) { + policy.copy_parameters_and_constants = true; + policy.copy_root_replicated_buffers = true; + } + } + return policy; +} + +bool ShouldCopyRootValue(const HloValue& value, + const SpecialCaseCopyPolicy& policy) { + if (policy.copy_parameters_and_constants) { + return IsConstantValue(value) || + value.defining_instruction()->opcode() == HloOpcode::kParameter; + } + return false; +} + // Deep copy the given instructions 'from' and 'to' at the ShapeIndexes given in // 'indices_to_copy'. Add control edges from the respective kCopy instructions // in deep copy of 'from' to the respective kCopy instruction in the deep copy @@ -957,7 +996,8 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { } TF_RET_CHECK(node.context() == CallContext::kSequential); - const bool is_entry = computation == module->entry_computation(); + SpecialCaseCopyPolicy policy = + GetSpecialCaseCopyPolicy(node, module, computation); HloInstruction* root = computation->root_instruction(); // Mark nondistinct/ambiguous indices. @@ -970,27 +1010,26 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { for (const HloBuffer* buffer : buffers_at_index) { buffer_seen_before |= !seen.insert(buffer).second; } - if (buffers_at_index.size() > 1 || (buffer_seen_before && is_entry)) { - VLOG(2) << "Index " << index << " of root of computation " + if (buffers_at_index.size() > 1 || + (buffer_seen_before && policy.copy_root_replicated_buffers)) { + VLOG(2) << "Index " << index << " of computation " << computation->name() << " (" << root->name() << ") has ambiguous or non-distinct buffer. Copying."; add_index_to_copy(root, index); } }); - // For entry instructions, mark any parameter or constant values. - if (is_entry) { - for (const auto& pair : - alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { - const ShapeIndex& index = pair.first; - const HloValueSet& value_set = pair.second; - for (const HloValue* value : value_set.values()) { - if (ValueIsReadOnly(*value)) { - VLOG(2) << "Root of entry computation (" << root->name() - << ") has constant or entry parameter value at index " - << index << ". Copying."; - add_index_to_copy(root, index); - } + for (const auto& pair : + alias_analysis->dataflow_analysis().GetInstructionValueSet(root)) { + const ShapeIndex& index = pair.first; + const HloValueSet& value_set = pair.second; + for (const HloValue* value : value_set.values()) { + if (ShouldCopyRootValue(*value, policy)) { + VLOG(2) << "Root of (" << root->name() << ") of computation(" + << computation->name() + << ") has constant or parameter value at index " << index + << ". Copying."; + add_index_to_copy(root, index); } } } @@ -1012,7 +1051,6 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { instruction->parent()->set_root_instruction(deep_copy); } } - return Status::OK(); } -- GitLab From ef7c481b0aa563ab8a3bf387e97121382cbaa588 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Mon, 26 Feb 2018 17:55:31 -0800 Subject: [PATCH 192/884] [XLA::Interpreter] Add support for kConditional to HloEvaluator. Also enable xla/tests/conditional_tests to run on interpreter. PiperOrigin-RevId: 187110438 --- .../compiler/xla/service/hlo_evaluator.cc | 28 +++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 ++ tensorflow/compiler/xla/tests/BUILD | 1 + 3 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index cf8b35908f..afbfdac05e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2491,6 +2491,34 @@ Status HloEvaluator::HandleCall(HloInstruction* call) { return Status::OK(); } +Status HloEvaluator::HandleConditional(HloInstruction* conditional) { + const auto& pred = GetEvaluatedLiteralFor(conditional->operand(0)); + const auto& true_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(1)); + const auto& false_computation_arg = + GetEvaluatedLiteralFor(conditional->operand(2)); + + auto* true_computation = conditional->true_computation(); + auto* false_computation = conditional->false_computation(); + + auto result = Literal::CreateFromShape(conditional->shape()); + HloEvaluator embedded_evaluator; + if (pred.Get({})) { + result = embedded_evaluator + .Evaluate(*true_computation, + {&true_computation_arg}) + .ConsumeValueOrDie(); + } else { + result = embedded_evaluator + .Evaluate(*false_computation, + {&false_computation_arg}) + .ConsumeValueOrDie(); + } + + evaluated_[conditional] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index c65d9915e3..fc82011630 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -153,6 +153,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCopy(HloInstruction* copy) override; + Status HandleConditional(HloInstruction* conditional) override; + Status HandleCall(HloInstruction* call) override; private: diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index f3ecfc1604..19b3dfae4e 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -478,6 +478,7 @@ xla_test( xla_test( name = "conditional_test", srcs = ["conditional_test.cc"], + tags = ["enable_for_xla_interpreter"], deps = [ "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/client:computation_builder", -- GitLab From d888a77dc31bb45dfd0416fa9202c83206f2d07e Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 17:56:15 -0800 Subject: [PATCH 193/884] Support configurable stats publishers in the grpc server. PiperOrigin-RevId: 187110497 --- .../distributed_runtime/rpc/grpc_server_lib.cc | 15 ++++++++++++--- .../distributed_runtime/rpc/grpc_server_lib.h | 6 ++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc index c4ac92d809..a6f4be3eaf 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.cc @@ -106,7 +106,8 @@ GrpcServer::~GrpcServer() { Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, - const WorkerCreationFunction& worker_func) { + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory) { mutex_lock l(mu_); CHECK_EQ(state_, NEW); master_env_.env = env_; @@ -218,7 +219,7 @@ Status GrpcServer::Init( master_env_.ops = OpRegistry::Global(); master_env_.worker_cache = worker_cache; master_env_.master_session_factory = - [config]( + [config, stats_factory]( SessionOptions options, const MasterEnv* env, std::unique_ptr>> remote_devs, std::unique_ptr worker_cache, @@ -226,7 +227,7 @@ Status GrpcServer::Init( options.config.MergeFrom(config); return new MasterSession(options, env, std::move(remote_devs), std::move(worker_cache), std::move(device_set), - CreateNoOpStatsPublisher); + stats_factory); }; master_env_.worker_cache_factory = [this](const WorkerCacheFactoryOptions& options, @@ -241,6 +242,14 @@ Status GrpcServer::Init( return Status::OK(); } +Status GrpcServer::Init( + ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func) { + return Init(std::move(service_func), rendezvous_mgr_func, worker_func, + CreateNoOpStatsPublisher); +} + Status GrpcServer::Init( ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func) { diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h index 8b12ac1461..7c2f06f618 100644 --- a/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h +++ b/tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h @@ -22,6 +22,7 @@ limitations under the License. #include "grpc++/security/credentials.h" #include "tensorflow/core/common_runtime/process_util.h" +#include "tensorflow/core/common_runtime/stats_publisher_interface.h" #include "tensorflow/core/distributed_runtime/master_env.h" #include "tensorflow/core/distributed_runtime/rpc/async_service_interface.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_channel.h" @@ -68,6 +69,11 @@ class GrpcServer : public ServerInterface { const string target() const override; protected: + Status Init(ServiceInitFunction service_func, + const RendezvousMgrCreationFunction& rendezvous_mgr_func, + const WorkerCreationFunction& worker_func, + const StatsPublisherFactory& stats_factory); + Status Init(ServiceInitFunction service_func, const RendezvousMgrCreationFunction& rendezvous_mgr_func, const WorkerCreationFunction& worker_func); -- GitLab From 7a2ba8edbaa6491ff33ae1412d9ba45e80c2cc3c Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Mon, 26 Feb 2018 18:04:55 -0800 Subject: [PATCH 194/884] Modify retrain script to output TFLite compatible quantized models. -Also fix flaky input name selection introduced by last PR. -Also rely on tf.contrib.quantize to do graph transformations. -Also, update retrain script to use new float mobilenet_v1 and quantized mobilenet_v1 models. PiperOrigin-RevId: 187111533 --- .../examples/image_retraining/retrain.py | 317 +++++++++++------- .../examples/image_retraining/retrain_test.py | 44 ++- 2 files changed, 229 insertions(+), 132 deletions(-) diff --git a/tensorflow/examples/image_retraining/retrain.py b/tensorflow/examples/image_retraining/retrain.py index 25e09fecbf..99a71206ac 100644 --- a/tensorflow/examples/image_retraining/retrain.py +++ b/tensorflow/examples/image_retraining/retrain.py @@ -75,13 +75,16 @@ python tensorflow/examples/image_retraining/retrain.py \ --image_dir ~/flower_photos --architecture mobilenet_1.0_224 ``` -Run quantized version of mobilenet: +Run mobilenet, instrumented for quantization: ```bash python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quantized + --image_dir ~/flower_photos/ --architecture mobilenet_1.0_224_quant ``` +These instrumented models can be converted to fully quantized mobile models via +TensorFlow Lite. + There are 32 different Mobilenet models to choose from, with a variety of file size and latency options. The first number can be '1.0', '0.75', '0.50', or '0.25' to control the size, and the second controls the input image size, either @@ -121,7 +124,6 @@ import numpy as np from six.moves import urllib import tensorflow as tf -from tensorflow.contrib.quantize.python import quant_ops from tensorflow.python.framework import graph_util from tensorflow.python.framework import tensor_shape from tensorflow.python.platform import gfile @@ -135,6 +137,9 @@ FLAGS = None # need to update these to reflect the values in the network you're using. MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1 # ~134M +# The location where variable checkpoints will be stored. +CHECKPOINT_NAME = '/tmp/_retrain_checkpoint' + def create_image_lists(image_dir, testing_percentage, validation_percentage): """Builds a list of training images from the file system. @@ -745,9 +750,9 @@ def variable_summaries(var): tf.summary.histogram('histogram', var) -def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, - bottleneck_tensor_size, quantize_layer): - """Adds a new softmax and fully-connected layer for training. +def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor, + bottleneck_tensor_size, quantize_layer, is_training): + """Adds a new softmax and fully-connected layer for training and eval. We need to retrain the top layer to identify our new classes, so this function adds the right operations to the graph, along with some variables to hold the @@ -763,7 +768,9 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, bottleneck_tensor: The output of the main CNN graph. bottleneck_tensor_size: How many entries in the bottleneck vector. quantize_layer: Boolean, specifying whether the newly added layer should be - quantized. + instrumented for quantized. + is_training: Boolean, specifying whether the newly add layer is for training + or eval. Returns: The tensors for the training and cross entropy results, and tensors for the @@ -778,50 +785,41 @@ def add_final_training_ops(class_count, final_tensor_name, bottleneck_tensor, ground_truth_input = tf.placeholder( tf.int64, [None], name='GroundTruthInput') - # Organizing the following ops as `final_training_ops` so they're easier - # to see in TensorBoard - layer_name = 'final_training_ops' + # Organizing the following ops so they are easier to see in TensorBoard. + layer_name = 'final_retrain_ops' with tf.name_scope(layer_name): with tf.name_scope('weights'): initial_value = tf.truncated_normal( [bottleneck_tensor_size, class_count], stddev=0.001) layer_weights = tf.Variable(initial_value, name='final_weights') - if quantize_layer: - quantized_layer_weights = quant_ops.MovingAvgQuantize( - layer_weights, is_training=True) - variable_summaries(quantized_layer_weights) - variable_summaries(layer_weights) + with tf.name_scope('biases'): layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases') - if quantize_layer: - quantized_layer_biases = quant_ops.MovingAvgQuantize( - layer_biases, is_training=True) - variable_summaries(quantized_layer_biases) - variable_summaries(layer_biases) with tf.name_scope('Wx_plus_b'): - if quantize_layer: - logits = tf.matmul(bottleneck_input, - quantized_layer_weights) + quantized_layer_biases - logits = quant_ops.MovingAvgQuantize( - logits, - init_min=-32.0, - init_max=32.0, - is_training=True, - num_bits=8, - narrow_range=False, - ema_decay=0.5) - tf.summary.histogram('pre_activations', logits) - else: - logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases - tf.summary.histogram('pre_activations', logits) + logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases + tf.summary.histogram('pre_activations', logits) final_tensor = tf.nn.softmax(logits, name=final_tensor_name) + # The tf.contrib.quantize functions rewrite the graph in place for + # quantization. The imported model graph has already been rewritten, so upon + # calling these rewrites, only the newly added final layer will be + # transformed. + if quantize_layer: + if is_training: + tf.contrib.quantize.create_training_graph() + else: + tf.contrib.quantize.create_eval_graph() + tf.summary.histogram('activations', final_tensor) + # If this is an eval graph, we don't need to add loss ops or an optimizer. + if not is_training: + return None, None, bottleneck_input, ground_truth_input, final_tensor + with tf.name_scope('cross_entropy'): cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy( labels=ground_truth_input, logits=logits) @@ -857,13 +855,91 @@ def add_evaluation_step(result_tensor, ground_truth_tensor): return evaluation_step, prediction -def save_graph_to_file(sess, graph, graph_file_name): +def run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor): + """Runs a final evaluation on an eval graph using the test data set. + + Args: + sess: Session for the train graph. + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + image_lists: Dictionary of training images for each label. + jpeg_data_tensor: The layer to feed jpeg image data into. + decoded_image_tensor: The output of decoding and resizing the image. + resized_image_tensor: The input node of the recognition graph. + bottleneck_tensor: The bottleneck output layer of the CNN graph. + """ + (sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) = build_eval_session(model_info, class_count) + + test_bottlenecks, test_ground_truth, test_filenames = ( + get_random_cached_bottlenecks(sess, image_lists, FLAGS.test_batch_size, + 'testing', FLAGS.bottleneck_dir, + FLAGS.image_dir, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor, FLAGS.architecture)) + test_accuracy, predictions = sess.run( + [evaluation_step, prediction], + feed_dict={ + bottleneck_input: test_bottlenecks, + ground_truth_input: test_ground_truth + }) + tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % + (test_accuracy * 100, len(test_bottlenecks))) + + if FLAGS.print_misclassified_test_images: + tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') + for i, test_filename in enumerate(test_filenames): + if predictions[i] != test_ground_truth[i]: + tf.logging.info('%70s %s' % (test_filename, + list(image_lists.keys())[predictions[i]])) + + +def build_eval_session(model_info, class_count): + """Builds an restored eval session without train operations for exporting. + + Args: + model_info: Model info dictionary from create_model_info() + class_count: Number of classes + + Returns: + Eval session containing the restored eval graph. + The bottleneck input, ground truth, eval step, and prediction tensors. + """ + # If quantized, we need to create the correct eval graph for exporting. + eval_graph, bottleneck_tensor, _ = create_model_graph(model_info) + + eval_sess = tf.Session(graph=eval_graph) + with eval_graph.as_default(): + # Add the new layer for exporting. + (_, _, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + False) + + # Now we need to restore the values from the training graph to the eval + # graph. + tf.train.Saver().restore(eval_sess, CHECKPOINT_NAME) + + evaluation_step, prediction = add_evaluation_step(final_tensor, + ground_truth_input) + + return (eval_sess, bottleneck_input, ground_truth_input, evaluation_step, + prediction) + + +def save_graph_to_file(graph, graph_file_name, model_info, class_count): + """Saves an graph to file, creating a valid quantized one if necessary.""" + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + output_graph_def = graph_util.convert_variables_to_constants( sess, graph.as_graph_def(), [FLAGS.final_tensor_name]) with gfile.FastGFile(graph_file_name, 'wb') as f: f.write(output_graph_def.SerializeToString()) - return def prepare_file_system(): @@ -916,11 +992,10 @@ def create_model_info(architecture): return None version_string = parts[1] if (version_string != '1.0' and version_string != '0.75' and - version_string != '0.50' and version_string != '0.25'): + version_string != '0.5' and version_string != '0.25'): tf.logging.error( - """"The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', - but found '%s' for architecture '%s'""", - version_string, architecture) + """"The Mobilenet version should be '1.0', '0.75', '0.5', or '0.25', + but found '%s' for architecture '%s'""", version_string, architecture) return None size_string = parts[2] if (size_string != '224' and size_string != '192' and @@ -933,35 +1008,26 @@ def create_model_info(architecture): if len(parts) == 3: is_quantized = False else: - if parts[3] != 'quantized': + if parts[3] != 'quant': tf.logging.error( "Couldn't understand architecture suffix '%s' for '%s'", parts[3], architecture) return None is_quantized = True + data_url = 'http://download.tensorflow.org/models/mobilenet_v1_2018_02_22/' + model_name = 'mobilenet_v1_' + version_string + '_' + size_string if is_quantized: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'Placeholder:0' - model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + - '_quantized_frozen') - model_base_name = 'quantized_frozen_graph.pb' - - else: - data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' - data_url += version_string + '_' + size_string + '_frozen.tgz' - bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' - resized_input_tensor_name = 'input:0' - model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string - model_base_name = 'frozen_graph.pb' + model_name += '_quant' + data_url += model_name + '.tgz' + bottleneck_tensor_name = 'MobilenetV1/Predictions/Reshape:0' + resized_input_tensor_name = 'input:0' + model_file_name = model_name + '_frozen.pb' bottleneck_tensor_size = 1001 input_width = int(size_string) input_height = int(size_string) input_depth = 3 - model_file_name = os.path.join(model_dir_name, model_base_name) input_mean = 127.5 input_std = 127.5 else: @@ -1011,43 +1077,45 @@ def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, return jpeg_data, mul_image -def export_model(sess, architecture, saved_model_dir): +def export_model(model_info, class_count, saved_model_dir): """Exports model for serving. Args: - sess: Current active TensorFlow Session. - architecture: Model architecture. + model_info: The modelinfo for the current model. + class_count: The number of classes. saved_model_dir: Directory in which to save exported model and variables. """ - if architecture == 'inception_v3': - input_tensor = 'DecodeJpeg/contents:0' - elif architecture.startswith('mobilenet_'): - input_tensor = 'input:0' - else: - raise ValueError('Unknown architecture', architecture) - in_image = sess.graph.get_tensor_by_name(input_tensor) - inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} - - out_classes = sess.graph.get_tensor_by_name('final_result:0') - outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)} + # The SavedModel should hold the eval graph. + sess, _, _, _, _ = build_eval_session(model_info, class_count) + graph = sess.graph + with graph.as_default(): + input_tensor = model_info['resized_input_tensor_name'] + in_image = sess.graph.get_tensor_by_name(input_tensor) + inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)} + + out_classes = sess.graph.get_tensor_by_name('final_result:0') + outputs = { + 'prediction': tf.saved_model.utils.build_tensor_info(out_classes) + } - signature = tf.saved_model.signature_def_utils.build_signature_def( - inputs=inputs, - outputs=outputs, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) + signature = tf.saved_model.signature_def_utils.build_signature_def( + inputs=inputs, + outputs=outputs, + method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME) - legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') + legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op') - # Save out the SavedModel. - builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) - builder.add_meta_graph_and_variables( - sess, [tf.saved_model.tag_constants.SERVING], - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: - signature - }, - legacy_init_op=legacy_init_op) - builder.save() + # Save out the SavedModel. + builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir) + builder.add_meta_graph_and_variables( + sess, [tf.saved_model.tag_constants.SERVING], + signature_def_map={ + tf.saved_model.signature_constants. + DEFAULT_SERVING_SIGNATURE_DEF_KEY: + signature + }, + legacy_init_op=legacy_init_op) + builder.save() def main(_): @@ -1064,11 +1132,6 @@ def main(_): tf.logging.error('Did not recognize architecture flag') return -1 - # Set up the pre-trained graph. - maybe_download_and_extract(model_info['data_url']) - graph, bottleneck_tensor, resized_image_tensor = ( - create_model_graph(model_info)) - # Look at the folder structure, and create lists of all the images. image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage, FLAGS.validation_percentage) @@ -1087,6 +1150,19 @@ def main(_): FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale, FLAGS.random_brightness) + # Set up the pre-trained graph. + maybe_download_and_extract(model_info['data_url']) + graph, bottleneck_tensor, resized_image_tensor = ( + create_model_graph(model_info)) + + # Add the new layer that we'll be training. + with graph.as_default(): + (train_step, cross_entropy, bottleneck_input, + ground_truth_input, final_tensor) = add_final_retrain_ops( + class_count, FLAGS.final_tensor_name, bottleneck_tensor, + model_info['bottleneck_tensor_size'], model_info['quantize_layer'], + True) + with tf.Session(graph=graph) as sess: # Set up the image decoding sub-graph. jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( @@ -1110,15 +1186,8 @@ def main(_): decoded_image_tensor, resized_image_tensor, bottleneck_tensor, FLAGS.architecture) - # Add the new layer that we'll be training. - (train_step, cross_entropy, bottleneck_input, ground_truth_input, - final_tensor) = add_final_training_ops( - len(image_lists.keys()), FLAGS.final_tensor_name, bottleneck_tensor, - model_info['bottleneck_tensor_size'], model_info['quantize_layer']) - # Create the operations we need to evaluate the accuracy of our new layer. - evaluation_step, prediction = add_evaluation_step( - final_tensor, ground_truth_input) + evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input) # Merge all the summaries and write them out to the summaries_dir merged = tf.summary.merge_all() @@ -1128,6 +1197,10 @@ def main(_): validation_writer = tf.summary.FileWriter( FLAGS.summaries_dir + '/validation') + # Create a train saver that is used to restore values into an eval graph + # when exporting models. + train_saver = tf.train.Saver() + # Set up all our weights to their initial default values. init = tf.global_variables_initializer() sess.run(init) @@ -1168,6 +1241,9 @@ def main(_): (datetime.now(), i, train_accuracy * 100)) tf.logging.info('%s: Step %d: Cross entropy = %f' % (datetime.now(), i, cross_entropy_value)) + # TODO(suharshs): Make this use an eval graph, to avoid quantization + # moving averages being updated by the validation set, though in + # practice this makes a negligable difference. validation_bottlenecks, validation_ground_truth, _ = ( get_random_cached_bottlenecks( sess, image_lists, FLAGS.validation_batch_size, 'validation', @@ -1190,42 +1266,32 @@ def main(_): if (intermediate_frequency > 0 and (i % intermediate_frequency == 0) and i > 0): + # If we want to do an intermediate save, save a checkpoint of the train + # graph, to restore into the eval graph. + train_saver.save(sess, CHECKPOINT_NAME) intermediate_file_name = (FLAGS.intermediate_output_graphs_dir + 'intermediate_' + str(i) + '.pb') tf.logging.info('Save intermediate result to : ' + intermediate_file_name) - save_graph_to_file(sess, graph, intermediate_file_name) + save_graph_to_file(graph, intermediate_file_name, model_info, + class_count) + + # After training is complete, force one last save of the train checkpoint. + train_saver.save(sess, CHECKPOINT_NAME) # We've completed all our training, so run a final test evaluation on # some new images we haven't used before. - test_bottlenecks, test_ground_truth, test_filenames = ( - get_random_cached_bottlenecks( - sess, image_lists, FLAGS.test_batch_size, 'testing', - FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor, - decoded_image_tensor, resized_image_tensor, bottleneck_tensor, - FLAGS.architecture)) - test_accuracy, predictions = sess.run( - [evaluation_step, prediction], - feed_dict={bottleneck_input: test_bottlenecks, - ground_truth_input: test_ground_truth}) - tf.logging.info('Final test accuracy = %.1f%% (N=%d)' % - (test_accuracy * 100, len(test_bottlenecks))) - - if FLAGS.print_misclassified_test_images: - tf.logging.info('=== MISCLASSIFIED TEST IMAGES ===') - for i, test_filename in enumerate(test_filenames): - if predictions[i] != test_ground_truth[i]: - tf.logging.info('%70s %s' % - (test_filename, - list(image_lists.keys())[predictions[i]])) + run_final_eval(sess, model_info, class_count, image_lists, jpeg_data_tensor, + decoded_image_tensor, resized_image_tensor, + bottleneck_tensor) # Write out the trained graph and labels with the weights stored as # constants. - save_graph_to_file(sess, graph, FLAGS.output_graph) + save_graph_to_file(graph, FLAGS.output_graph, model_info, class_count) with gfile.FastGFile(FLAGS.output_labels, 'w') as f: f.write('\n'.join(image_lists.keys()) + '\n') - export_model(sess, FLAGS.architecture, FLAGS.saved_model_dir) + export_model(model_info, class_count, FLAGS.saved_model_dir) if __name__ == '__main__': @@ -1406,8 +1472,9 @@ if __name__ == '__main__': form 'mobilenet__[_quantized]'. For example, 'mobilenet_1.0_224' will pick a model that is 17 MB in size and takes 224 pixel input images, while 'mobilenet_0.25_128_quantized' will choose a much - less accurate, but smaller and faster network that's 920 KB on disk and - takes 128x128 images. See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html + smaller and less accurate model, taking 128x128 images, and instrumented + for eventual quantization via TensorFlow Lite. + See https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html for more information on Mobilenet.\ """) parser.add_argument( diff --git a/tensorflow/examples/image_retraining/retrain_test.py b/tensorflow/examples/image_retraining/retrain_test.py index 8b8dd45fd7..fb7324c58a 100644 --- a/tensorflow/examples/image_retraining/retrain_test.py +++ b/tensorflow/examples/image_retraining/retrain_test.py @@ -67,22 +67,52 @@ class ImageRetrainingTest(test_util.TensorFlowTestCase): self.assertIsNotNone(sess.graph.get_tensor_by_name('DistortResult:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOps(self, flags_mock): + def testAddFinalRetrainOps(self, flags_mock): with tf.Graph().as_default(): with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, False) + # Test creating final training op with quantization. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, False, + False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) @tf.test.mock.patch.object(retrain, 'FLAGS', learning_rate=0.01) - def testAddFinalTrainingOpsQuantized(self, flags_mock): - with tf.Graph().as_default(): + def testAddFinalRetrainOpsQuantized(self, flags_mock): + # Ensure that the training and eval graph for quantized models are correctly + # created. + with tf.Graph().as_default() as g: + with tf.Session() as sess: + bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') + # Test creating final training op with quantization, set is_training to + # true. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, True) + self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + # Ensure that the inputs of each FakeQuant operations has 2 Assign + # operations in the training graph (Assign[Min,Max]Last, + # Assign[Min,Max]Ema) + self.assertEqual(2, + len([i for i in op.inputs if 'Assign' in i.name])) + self.assertEqual(found_fake_quant, 2) + with tf.Graph().as_default() as g: with tf.Session() as sess: bottleneck = tf.placeholder(tf.float32, [1, 1024], name='bottleneck') - # Test creating final training op with quantization - retrain.add_final_training_ops(5, 'final', bottleneck, 1024, True) + # Test creating final training op with quantization, set is_training to + # false. + retrain.add_final_retrain_ops(5, 'final', bottleneck, 1024, True, False) self.assertIsNotNone(sess.graph.get_tensor_by_name('final:0')) + found_fake_quant = 0 + for op in g.get_operations(): + if op.type == 'FakeQuantWithMinMaxVars': + found_fake_quant += 1 + for i in op.inputs: + # Ensure that no operations are Assign operation since this is the + # evaluation graph. + self.assertTrue('Assign' not in i.name) + self.assertEqual(found_fake_quant, 2) def testAddEvaluationStep(self): with tf.Graph().as_default(): -- GitLab From 9139a571f852d06541b0c9f2343c701ac4b7d4ff Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 18:05:59 -0800 Subject: [PATCH 195/884] Remove old implementation of the adaptive shared batcher, the in flight batches implemntation delivers similar performance but is simpler and requires less tuning. PiperOrigin-RevId: 187111685 --- .../adaptive_shared_batch_scheduler.h | 172 +----- .../adaptive_shared_batch_scheduler_test.cc | 488 +++++------------- 2 files changed, 140 insertions(+), 520 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 25c5f9cf42..661ed239d3 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -50,43 +50,26 @@ class ASBSQueue; // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see -// shared_batch_scheduler.h for more details). The AdaptiveSharedBatchScheduler -// prioritizes batches by age (i.e. the batch's oldest request) irrespective of -// queue or batch size. +// shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler +// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) +// irrespective of queue or batch size. // -// The scheduling decision currently exists in two flavors, controlled by the -// option use_in_flight_batches_implementation. It is expected that setting this -// option to true will give universally better results; after a period of -// testing to confirm, the old implementation will be removed. -// -// If use_in_flight_batches_implementation is set to true, the scheduler -// limits the number of batches which can be processed concurrently. If a new -// batch is created, and the number of in flight batches is below the limit, -// the next (i.e. oldest) batch is immediately scheduled. Similarly, when a -// batch finishes processing, the limit is rechecked, and another batch may be -// scheduled. To avoid the need to carefully tune the limit for workload, -// model type, platform, etc, it is dynamically adjusted in order to provide the -// lowest latency. -// -// If use_in_flight_batches_implementation is set to false, the scheduler will -// process the oldest batch at an adjustable rate, regardless of batch size. -// The user can provide feedback to help set this rate to achieve some goal -// (i.e. minimize overall latency, limit cpu usage, etc). The rate (or rather, -// the corresponding period) is adjusted each time a batch is processed, using -// an exponentially weighted moving average to smooth noisy feedback: -// ewma_feedback = ((N - 1) * ewma_feedback + feedback()) / N -// period *= (1 + K * emwa_feedback) +// ASBS tries to keep the system busy by maintaining an adjustable number of +// concurrently processed batches. If a new batch is created, and the number of +// in flight batches is below the target, the next (i.e. oldest) batch is +// immediately scheduled. Similarly, when a batch finishes processing, the +// target is rechecked, and another batch may be scheduled. To avoid the need +// to carefully tune the target for workload, model type, platform, etc, it is +// dynamically adjusted in order to provide the lowest average latency. // // Some potential use cases: // Hardware Accelerators (GPUs & TPUs) - If some phase of batch processing // involves serial processing by a device, from a latency perspective it is // desirable to keep the device evenly loaded, avoiding the need to wait for // the device to process prior batches. -// feedback = num_pending_on_device() - desired_pending. // CPU utilization - If the batch processing is cpu dominated, you can reap // latency gains when underutilized by increasing the processing rate, but // back the rate off when the load increases to avoid overload. -// feedback = cpu_rate() - desired_cpu_rate. template class AdaptiveSharedBatchScheduler @@ -101,13 +84,17 @@ class AdaptiveSharedBatchScheduler struct Options { // The name to use for the pool of batch threads. string thread_pool_name = {"batch_threads"}; - // Number of batch processing threads; equivalently the maximum number of - // concurrently running batches. + // Number of batch processing threads - the maximum value of + // in_flight_batches_limit_. It is recommended that this value be set by + // running the system under load, observing the learned value for + // in_flight_batches_limit_, and setting this maximum to ~ 2x the value. + // Under low load, in_flight_batches_limit_ has no substantial effect on + // latency and therefore undergoes a random walk. Unreasonably large values + // for num_batch_threads allows for large in_flight_batches_limit_, which + // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); // The environment to use (typically only overridden by test code). Env* env = Env::Default(); - // Which implementation to use (described in class comments above). - bool use_in_flight_batches_implementation = false; // Initial limit for number of batches being concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of // 3.2 results in an actual cap of 3 80% of the time, and 4 20% of the time. @@ -116,28 +103,6 @@ class AdaptiveSharedBatchScheduler // numbers will give less noisy latency measurements, but will be less // responsive to changes in workload. int64 batches_to_average_over = 1000; - - // TODO(kte): remove the rate based implementation and corresponding options - // below once testing confirms the superiority of the in flight batches - // implementation. - // Initial batch scheduling period in microseconds. Will be altered for - // non-zero rate_feedback. - double initial_scheduling_period_micros = 500; - // Minimum batch scheduling period in microseconds. Recommend setting this - // value greater than 0, otherwise it may take a while to recover from a - // sustained time of negative scheduling_period_feedback (which may occur - // under low load). - double min_scheduling_period_micros = 100; - // Maximum batch scheduling period in microseconds. - double max_scheduling_period_micros = 10000; - // Feedback function used to modify the scheduling period each time a batch - // is scheduled. Should return values roughly O(1), with positive values - // resulting in an increased period. - std::function scheduling_period_feedback{[] { return 0.; }}; - // To handle potentially noisy scheduling_period_feedback, the period is - // adjusted using an exponentially weighted moving average over the previous - // feedback_smoothing_batches batches. Must be greater than 0. - int64 feedback_smoothing_batches = 10; }; // Ownership is shared between the caller of Create() and any queues created @@ -171,17 +136,11 @@ class AdaptiveSharedBatchScheduler explicit AdaptiveSharedBatchScheduler(const Options& options); - // Batch scheduling function which runs every scheduling_period_ microseconds. - // Only used when options_.use_in_flight_batches_implementation == false. - void ProcessOneBatch(); - // Tracks processing latency and adjusts in_flight_batches_limit to minimize. - // Only used when options_.use_in_flight_batches_implementation == true. void CallbackWrapper(const internal::ASBSBatch* batch, BatchProcessor callback); // Schedules batch if in_flight_batches_limit_ is not met. - // Only used when options_.use_in_flight_batches_implementation == true. void MaybeScheduleNextBatch() EXCLUSIVE_LOCKS_REQUIRED(mu_); // Notifies scheduler of non-empty batch which is eligible for processing. @@ -212,41 +171,22 @@ class AdaptiveSharedBatchScheduler mutex mu_; - // Responsible for running ProcessOneBatch. PeriodicFunction was used in order - // to check for deletion so that the thread can be shut down. - // Only used when options_.use_in_flight_batches_implementation == false. - std::unique_ptr scheduling_thread_; - // Responsible for running the batch processing callbacks. std::unique_ptr batch_thread_pool_; - // Time interval in microseconds between successive ProcessOneBatch calls. - // Only used when options_.use_in_flight_batches_implementation == false. - double scheduling_period_; - - // Exponentially weighted moving average of - // options_.scheduling_period_feedback() evaluated in each ProcessOneBatch - // call. - // Only used when options_.use_in_flight_batches_implementation == false. - double ewma_feedback_ = 0; - // Limit on number of batches which can be concurrently processed. // Non-integer values correspond to probabilistic limits - i.e. a value of 3.2 // results in an actual cap of 3 80% of the time, and 4 20% of the time. - // Only used when options_.use_in_flight_batches_implementation == true. double in_flight_batches_limit_ GUARDED_BY(mu_); // Number of batches currently being processed. - // Only used when options_.use_in_flight_batches_implementation == true. int64 in_flight_batches_ GUARDED_BY(mu_) = 0; // RNG engine and distribution. - // Only used when options_.use_in_flight_batches_implementation == true. std::default_random_engine rand_engine_; std::uniform_real_distribution rand_double_; // Fields controlling the dynamic adjustment of in_flight_batches_limit_. - // Only used when options_.use_in_flight_batches_implementation == true. // Number of batches since the last in_flight_batches_limit_ adjustment. int64 batch_count_ GUARDED_BY(mu_) = 0; // Sum of processing latency for batches counted by batch_count_. @@ -348,32 +288,6 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } - if (options.min_scheduling_period_micros < 0) { - return errors::InvalidArgument( - "min_scheduling_period_micros must be >= 0; was ", - options.min_scheduling_period_micros); - } - if (options.min_scheduling_period_micros > - options.initial_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be >= min_scheduling_period_micros (", - options.min_scheduling_period_micros, ")"); - } - if (options.initial_scheduling_period_micros > - options.max_scheduling_period_micros) { - return errors::InvalidArgument( - "initial_scheduling_period_micros (", - options.initial_scheduling_period_micros, - ") must be <= max_scheduling_period_micros (", - options.max_scheduling_period_micros, ")"); - } - if (options.feedback_smoothing_batches < 1) { - return errors::InvalidArgument( - "feedback_smoothing_batches must be positive; was ", - options.feedback_smoothing_batches); - } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -401,20 +315,12 @@ template AdaptiveSharedBatchScheduler::AdaptiveSharedBatchScheduler( const Options& options) : options_(options), - scheduling_period_(options.initial_scheduling_period_micros), in_flight_batches_limit_(options.initial_in_flight_batches_limit), rand_double_(0.0, 1.0) { std::random_device device; rand_engine_.seed(device()); - PeriodicFunction::Options opts; - opts.thread_name_prefix = "scheduling_thread"; - opts.env = GetEnv(); batch_thread_pool_.reset(new thread::ThreadPool( GetEnv(), options.thread_pool_name, options.num_batch_threads)); - if (!options.use_in_flight_batches_implementation) { - scheduling_thread_.reset( - new PeriodicFunction([this] { ProcessOneBatch(); }, 0, opts)); - } } template @@ -443,9 +349,7 @@ void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); batches_.push(batch); - if (options_.use_in_flight_batches_implementation) { - MaybeScheduleNextBatch(); - } + MaybeScheduleNextBatch(); } template @@ -523,44 +427,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -void AdaptiveSharedBatchScheduler::ProcessOneBatch() { - static const double kFeedbackMultiplier = .001; - const internal::ASBSBatch* batch = nullptr; - BatchProcessor callback; - const int64 start_time_micros = GetEnv()->NowMicros(); - { - mutex_lock l(mu_); - if (!batches_.empty()) { - batch = batches_.top(); - batches_.pop(); - callback = queues_and_callbacks_[batch->queue()]; - } - } - if (batch != nullptr) { - double feedback = options_.scheduling_period_feedback(); - const int64 N = options_.feedback_smoothing_batches; - ewma_feedback_ = ((N - 1) * ewma_feedback_ + feedback) / N; - scheduling_period_ *= (1 + kFeedbackMultiplier * ewma_feedback_); - if (scheduling_period_ < options_.min_scheduling_period_micros) { - scheduling_period_ = options_.min_scheduling_period_micros; - } else if (scheduling_period_ > options_.max_scheduling_period_micros) { - scheduling_period_ = options_.max_scheduling_period_micros; - } - // Queue may destroy itself after ReleaseBatch is called. - batch->queue()->ReleaseBatch(batch); - batch_thread_pool_->Schedule([callback, batch] { - callback(std::unique_ptr>( - const_cast*>(batch))); - }); - } - const int64 sleep_time = - scheduling_period_ - (GetEnv()->NowMicros() - start_time_micros); - if (sleep_time > 0) { - GetEnv()->SleepForMicroseconds(sleep_time); - } -} - template bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( const internal::ASBSBatch* a, diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 8ae8ca02ec..109234287e 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -64,59 +64,6 @@ std::unique_ptr CreateFakeClockAdvancerThread( })); } -TEST(AdaptiveSharedBatchSchedulerTest, Basic) { - for (const bool delete_scheduler_early : {false, true}) { - for (const bool delete_queue_1_early : {false, true}) { - int queue_0_tasks = 0; - auto queue_0_callback = - [&queue_0_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - }; - int queue_1_tasks = 0; - auto queue_1_callback = - [&queue_1_tasks](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - }; - { - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create({}, &scheduler)); - - // Create two queues. - std::unique_ptr> queue_0; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_0_callback, &queue_0)); - std::unique_ptr> queue_1; - TF_ASSERT_OK(scheduler->AddQueue({}, queue_1_callback, &queue_1)); - - if (delete_scheduler_early) { - // Delete our copy of the scheduler. The queues should keep it alive - // under the covers. - scheduler = nullptr; - } - // Submit tasks to the two queues, and (optionally) remove the queues. - TF_ASSERT_OK(ScheduleTask(1, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(2, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(3, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(4, queue_1.get())); - if (delete_queue_1_early) { - queue_1 = nullptr; - } - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - } - EXPECT_EQ(queue_0_tasks, 9); - EXPECT_EQ(queue_1_tasks, 6); - } - } -} - TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { using Scheduler = AdaptiveSharedBatchScheduler; std::shared_ptr scheduler; @@ -124,24 +71,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { options.num_batch_threads = 0; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 50; - options.max_scheduling_period_micros = 100; - options.initial_scheduling_period_micros = 1000; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.min_scheduling_period_micros = 100; - options.max_scheduling_period_micros = 50; - options.initial_scheduling_period_micros = 75; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); - options.feedback_smoothing_batches = 0; - EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); - options = Scheduler::Options(); options.initial_in_flight_batches_limit = 0.5; EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); options = Scheduler::Options(); @@ -153,301 +82,8 @@ TEST(AdaptiveSharedBatchSchedulerTest, BadOptions) { EXPECT_FALSE(Scheduler::Create(options, &scheduler).ok()); } -TEST(AdaptiveSharedBatchSchedulerTest, ObeysQueueOptions) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue_0; - std::unique_ptr> queue_1; - int queue_0_tasks = 0; - int queue_1_tasks = 0; - auto queue_0_callback = [&queue_0_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_0_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - auto queue_1_callback = [&queue_1_tasks, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - for (int i = 0; i < batch->num_tasks(); i++) { - queue_1_tasks += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 0; - // Queue must have max_enqueued_batchs > 1. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0).ok()); - queue_options.max_enqueued_batches = 2; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_0_callback, &queue_0)); - EXPECT_EQ(10, queue_0->max_task_size()); - queue_options.max_batch_size = 0; - // Queue must have max_batch_size > 0. - EXPECT_FALSE( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1).ok()); - queue_options.max_batch_size = 2; - queue_options.max_enqueued_batches = 1; - TF_ASSERT_OK( - scheduler->AddQueue(queue_options, queue_1_callback, &queue_1)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(15, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - env.AdvanceByMicroseconds(1); - - // Task larger than max_batch_size shouldn't schedule. - EXPECT_FALSE(ScheduleTask(3, queue_1.get()).ok()); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - TF_ASSERT_OK(ScheduleTask(1, queue_1.get())); - env.AdvanceByMicroseconds(1); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(1, queue_1.get()).ok()); - - TF_ASSERT_OK(ScheduleTask(5, queue_0.get())); - // Exceeds max_enqueued_batches, shouldn't schedule. - EXPECT_FALSE(ScheduleTask(6, queue_0.get()).ok()); - TF_ASSERT_OK(ScheduleTask(4, queue_0.get())); - - // Batches should be processed in order from oldest to newest. - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 0); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 10); - EXPECT_EQ(queue_1_tasks, 2); - - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(queue_0_tasks, 19); - EXPECT_EQ(queue_1_tasks, 2); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, RateFeedback) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.min_scheduling_period_micros = 200; - options.max_scheduling_period_micros = 2000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 1; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 6 batches. - for (int i = 0; i < 6; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -500; - env.AdvanceByMicroseconds(994); - env.BlockUntilThreadsAsleep(2); // scheduling period = 500 usec. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(500); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 901); - feedback = 0; - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 250 usec. - EXPECT_EQ(scheduled_items, 902); - feedback = 10000; // large feedback should hit max_scheduling_period. - env.AdvanceByMicroseconds(250); - env.BlockUntilThreadsAsleep(2); // scheduling period = 2000 usec. - EXPECT_EQ(scheduled_items, 903); - feedback = -10000; // large feedback should hit min_scheduling_period. - env.AdvanceByMicroseconds(1999); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 903); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); // scheduling period = 200 usec. - EXPECT_EQ(scheduled_items, 904); - env.AdvanceByMicroseconds(200); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 905); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, FeedbackSmoothing) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - double feedback = 0; - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - options.scheduling_period_feedback = [&feedback] { return feedback; }; - options.feedback_smoothing_batches = 3; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - - TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 4 batches. - for (int i = 0; i < 4; i++) { - TF_ASSERT_OK(ScheduleTask(900 + i, queue.get())); - env.AdvanceByMicroseconds(1); - } - feedback = -300; - env.AdvanceByMicroseconds(996); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 100, scheduling_period = 900. - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(899); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 900); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // ewma_feedback = 167, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 901); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 901); - feedback = 1000 / 3.; - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - // emwa_feedback = 0, scheduling_period = 750. - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(749); - // No callback scheduled, only scheduling thread sleeping. - env.BlockUntilThreadsAsleep(1); - EXPECT_EQ(scheduled_items, 902); - env.AdvanceByMicroseconds(1); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 903); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { - test_util::FakeClockEnv env(Env::Default()); - Notification start_teardown, stop_teardown; - std::unique_ptr teardown_thread = - CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); - { - AdaptiveSharedBatchScheduler::Options options; - options.initial_scheduling_period_micros = 1000; - options.env = &env; - std::shared_ptr> scheduler; - TF_ASSERT_OK( - AdaptiveSharedBatchScheduler::Create(options, &scheduler)); - std::unique_ptr> queue; - int scheduled_items = 0; - auto queue_callback = [&scheduled_items, - &env](std::unique_ptr> batch) { - ASSERT_TRUE(batch->IsClosed()); - EXPECT_GT(batch->num_tasks(), 0); - scheduled_items = 0; - for (int i = 0; i < batch->num_tasks(); i++) { - scheduled_items += batch->task(i).size(); - } - env.SleepForMicroseconds(1); - }; - AdaptiveSharedBatchScheduler::QueueOptions queue_options; - queue_options.max_batch_size = 10; - queue_options.max_enqueued_batches = 10; - TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue)); - - // Wait for scheduling_thread to sleep. - env.BlockUntilThreadsAsleep(1); - // Enqueue 3 tasks. - EXPECT_EQ(queue->NumEnqueuedTasks(), 0); - EXPECT_EQ(queue->SchedulingCapacity(), 100); - TF_ASSERT_OK(ScheduleTask(5, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 1); - EXPECT_EQ(queue->SchedulingCapacity(), 95); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(6, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 2); - EXPECT_EQ(queue->SchedulingCapacity(), 84); - env.AdvanceByMicroseconds(1); - TF_ASSERT_OK(ScheduleTask(1, queue.get())); - EXPECT_EQ(queue->NumEnqueuedTasks(), 3); - EXPECT_EQ(queue->SchedulingCapacity(), 83); - - env.AdvanceByMicroseconds(998); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 5); - env.AdvanceByMicroseconds(1000); - env.BlockUntilThreadsAsleep(2); - EXPECT_EQ(scheduled_items, 7); - start_teardown.Notify(); - } - stop_teardown.Notify(); -} - -TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { +TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimit) { AdaptiveSharedBatchScheduler::Options options; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1000; mutex mu; @@ -476,7 +112,7 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesImplementation) { std::unique_ptr> queue; TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); - // Enqueue 3 batches. + // Enqueue 3 tasks, should result in 3 batches. for (int i = 0; i < 3; i++) { TF_ASSERT_OK(ScheduleTask(100, queue.get())); } @@ -490,7 +126,6 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { { AdaptiveSharedBatchScheduler::Options options; options.env = &env; - options.use_in_flight_batches_implementation = true; options.initial_in_flight_batches_limit = 2; options.batches_to_average_over = 1; auto queue_callback = [&env](std::unique_ptr> batch) { @@ -544,6 +179,125 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { } stop_teardown.Notify(); } + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::unique_ptr queue_deleter; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete queue, should be kept alive until empty. + queue_deleter.reset(Env::Default()->StartThread( + {}, "QueueDeleterThread", [&queue, &mu, &processed_batches] { + queue.reset(); + mutex_lock l(mu); + EXPECT_EQ(processed_batches, 2); + })); + // Give queue_deleter thread time to delete queue. + Env::Default()->SleepForMicroseconds(1000); + finish_processing.Notify(); +} + +TEST(AdaptiveSharedBatchSchedulerTest, DeleteScheduler) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + finish_processing.WaitForNotification(); + mu.lock(); + processed_batches++; + mu.unlock(); + }; + + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // Delete scheduler, should be kept alive until queues are empty. + scheduler.reset(); + finish_processing.Notify(); + while (true) { + mutex_lock l(mu); + if (processed_batches == 2) break; + } +} + +TEST(AdaptiveSharedBatchSchedulerTest, QueueCapacityInfo) { + AdaptiveSharedBatchScheduler::Options options; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + EXPECT_GT(batch->num_tasks(), 0); + mu.lock(); + int batch_num = ++processed_batches; + mu.unlock(); + if (batch_num == 1) { + finish_processing.WaitForNotification(); + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + std::unique_ptr> queue; + TF_ASSERT_OK(scheduler->AddQueue({}, queue_callback, &queue)); + + // Enqueue 2 tasks, should result in 2 batches. + for (int i = 0; i < 2; i++) { + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + } + // First batch was immediately processed, no longer counts as enqueued. + EXPECT_EQ(queue->NumEnqueuedTasks(), 1); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 900); + // Enqueue 2 more tasks, should fall in same batch. + TF_ASSERT_OK(ScheduleTask(100, queue.get())); + TF_ASSERT_OK(ScheduleTask(200, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 3); + EXPECT_EQ(queue->SchedulingCapacity(), 9 * 1000 + 600); + // Enqueue 1 more task, should create new batch. + TF_ASSERT_OK(ScheduleTask(700, queue.get())); + EXPECT_EQ(queue->NumEnqueuedTasks(), 4); + EXPECT_EQ(queue->SchedulingCapacity(), 8 * 1000 + 300); + finish_processing.Notify(); +} } // namespace anonymous } // namespace serving } // namespace tensorflow -- GitLab From 9ba9cf259b38af8425f4ee3b8967b811575fd149 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 19:46:27 -0800 Subject: [PATCH 196/884] Make sure rounding and handling of denormals in Grappler is the same as in TensorFlow. Enable constant folding for more types, particularly on GPUs. PiperOrigin-RevId: 187120456 --- tensorflow/core/grappler/op_types.cc | 6 +- .../grappler/optimizers/constant_folding.cc | 96 ++++++++++++------- tensorflow/core/kernels/constant_op.cc | 11 +++ 3 files changed, 74 insertions(+), 39 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index e225e99a9e..9b3755ddce 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -354,7 +354,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { return false; } const OpDef* op_def = nullptr; - Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); + const string& op_name = node.op(); + Status status = OpRegistry::Global()->LookUpOpDef(op_name, &op_def); if (!status.ok()) { return false; } @@ -368,7 +369,8 @@ bool IsFreeOfSideEffect(const NodeDef& node) { } } // Some nodes do in-place updates on regular tensor inputs. - if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace")) { + if (GetBoolAttr(node, "in_place") || GetBoolAttr(node, "inplace") || + StringPiece(op_name).starts_with("Inplace")) { return false; } return true; diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 10ca7dcce0..a5417aaa51 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -35,7 +35,9 @@ limitations under the License. #include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/strings/numbers.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/denormal.h" #include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/setround.h" #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/public/version.h" #include "tensorflow/core/util/bcast.h" @@ -51,7 +53,14 @@ class EigenThreadPoolWrapper : public Eigen::ThreadPoolInterface { explicit EigenThreadPoolWrapper(thread::ThreadPool* pool) : pool_(pool) {} ~EigenThreadPoolWrapper() override {} void Schedule(std::function fn) override { - pool_->Schedule(std::move(fn)); + auto wrapped = [=]() { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + fn(); + }; + pool_->Schedule(std::move(wrapped)); } int NumThreads() const override { return pool_->NumThreads(); } int CurrentThreadId() const override { return pool_->CurrentThreadId(); } @@ -292,16 +301,16 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // graph. const int node_count = graph_->node_size(); for (int i = 0; i < node_count; ++i) { - NodeDef& node = *graph_->mutable_node(i); - const string op = node.op(); + NodeDef* node = graph_->mutable_node(i); + const string op = node->op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; } const std::vector& output = - properties.GetOutputProperties(node.name()); + properties.GetOutputProperties(node->name()); const std::vector& input = - properties.GetInputProperties(node.name()); + properties.GetInputProperties(node->name()); if (input.empty() || output.empty()) { continue; } @@ -328,35 +337,35 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // could have multiple outputs). if (op == "Shape" || op == "Size" || op == "Rank") { // Replace the node with the corresponding constant. - node.set_op("Const"); - node.clear_attr(); - (*node.mutable_attr())["dtype"].set_type(type); + node->set_op("Const"); + node->clear_attr(); + (*node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( - (*node.mutable_attr())["value"].mutable_tensor()); + (*node->mutable_attr())["value"].mutable_tensor()); // Turn the data input into a control dependency: this is needed to // ensure that the constant value will only be run in the // cases where the shape/rank/size would have been run in // the original graph. Additional inputs are extra control string ctrl_dep = - AddControlDependency(node.input(0), graph_, node_map_.get()); - node.set_input(0, ctrl_dep); - node_map_->AddOutput(NodeName(ctrl_dep), node.name()); + AddControlDependency(node->input(0), graph_, node_map_.get()); + node->set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node->name()); } else { - auto outputs = node_map_->GetOutputs(node.name()); + auto outputs = node_map_->GetOutputs(node->name()); for (const auto& output : outputs) { for (int k = 0; k < output->input_size(); ++k) { int port; string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node.name() && port == j) { + if (node_name == node->name() && port == j) { // Create a const node as ShapeN's output if not already. const string const_name = - OptimizedNodeName(node, strings::StrCat("-matshapes-", j)); + OptimizedNodeName(*node, strings::StrCat("-matshapes-", j)); if (node_map_->GetNode(const_name) == nullptr) { NodeDef* added_node = graph_->add_node(); added_node->set_name(const_name); added_node->set_op("Const"); - added_node->set_device(node.device()); + added_node->set_device(node->device()); node_map_->AddNode(added_node->name(), added_node); (*added_node->mutable_attr())["dtype"].set_type(type); value.AsProtoTensorContent( @@ -364,7 +373,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { // We add a control dependency to the original ShapeN node, // so that the node will only be run if all inputs of the // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node.name(), graph_, + string ctrl_dep = AddControlDependency(node->name(), graph_, node_map_.get()); *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); @@ -679,7 +688,7 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { nodes_whitelist_.find(node.name()) == nodes_whitelist_.end()) { return false; } - // Skip control flow nodes, they can't be folded + // Skip control flow nodes, they can't be folded. if (ModifiesFrameInfo(node)) { return false; } @@ -688,12 +697,16 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Skips ops that don't benefit from folding. - const string& op = node.op(); + // Don't fold stateful ops such as TruncatedNormal. + if (!IsFreeOfSideEffect(node)) { + return false; + } - if (op.find("Placeholder") == 0) { + // Skips ops that don't benefit from folding. + if (IsPlaceholder(node)) { return false; } + const string& op = node.op(); if (op.find("Save") != string::npos || op.find("Restore") != string::npos || op.find("Reader") != string::npos) { return false; @@ -705,16 +718,12 @@ bool ConstantFolding::IsFoldable(const NodeDef& node) const { return false; } - // Don't fold stateful ops such as TruncatedNormal. const OpDef* op_def = nullptr; Status status = OpRegistry::Global()->LookUpOpDef(node.op(), &op_def); if (!status.ok()) { return false; } - if (op_def->is_stateful()) { - return false; - } - + // Don't fold ops without outputs. if (op_def->output_arg_size() == 0) { return false; } @@ -779,8 +788,11 @@ Status CreateConstantTensorAttrValue(DataType type, double value, SET_TENSOR_VAL_CASE(DT_FLOAT, float, float); SET_TENSOR_VAL_CASE(DT_DOUBLE, double, double); SET_TENSOR_VAL_CASE(DT_INT64, int64, int64); + SET_TENSOR_VAL_CASE(DT_UINT64, int64, int64); SET_TENSOR_VAL_CASE(DT_INT32, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT32, int32, int); SET_TENSOR_VAL_CASE(DT_INT16, int32, int); + SET_TENSOR_VAL_CASE(DT_UINT16, int32, int); SET_TENSOR_VAL_CASE(DT_INT8, int32, int); SET_TENSOR_VAL_CASE(DT_UINT8, int32, int); SET_TENSOR_VAL_CASE(DT_BOOL, bool, bool); @@ -843,10 +855,16 @@ Status ConstantFolding::CreateNodeDef(const string& name, POPULATE_TENSOR_PROTO(tensor, t, double, double); case DT_INT64: POPULATE_TENSOR_PROTO(tensor, t, int64, int64); + case DT_UINT64: + POPULATE_TENSOR_PROTO(tensor, t, uint64, int64); case DT_INT32: POPULATE_TENSOR_PROTO(tensor, t, int32, int); + case DT_UINT32: + POPULATE_TENSOR_PROTO(tensor, t, uint32, int); case DT_INT16: POPULATE_TENSOR_PROTO(tensor, t, int16, int); + case DT_UINT16: + POPULATE_TENSOR_PROTO(tensor, t, uint16, int); case DT_INT8: POPULATE_TENSOR_PROTO(tensor, t, int8, int); case DT_UINT8: @@ -1166,9 +1184,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { std::unordered_set processed_nodes; std::deque queue; for (int i = 0; i < graph_->node_size(); i++) { - auto node = graph_->mutable_node(i); - if (IsFoldable(*node)) { - queue.push_back(node); + if (IsFoldable(graph_->node(i))) { + queue.push_back(graph_->mutable_node(i)); } } while (!queue.empty()) { @@ -1203,8 +1220,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { int last = output->node_size() - 1; for (int i = output->node_size() - 1; i >= 0; --i) { const NodeDef& node = output->node(i); - auto outputs = node_map_->GetOutputs(node.name()); - if (outputs.empty()) { + auto fanout = node_map_->GetOutputs(node.name()); + if (fanout.empty()) { output->mutable_node()->SwapElements(i, last); last--; } @@ -1216,8 +1233,8 @@ Status ConstantFolding::FoldGraph(GraphDef* output) { // If no fetch nodes is provided, we conservatively // keep all nodes in the original graph in case users need to fetch // their values. - auto outputs = node_map_->GetOutputs(node.name()); - if (!outputs.empty() || !has_fetch_ || + auto fanout = node_map_->GetOutputs(node.name()); + if (!fanout.empty() || !has_fetch_ || nodes_to_preserve_.find(node.name()) != nodes_to_preserve_.end()) { auto added_node = output->add_node(); *added_node = node; @@ -1331,14 +1348,14 @@ bool ConstantFolding::IsOnes(const NodeDef& node) const { // IS_ONES_CASE(DT_HALF); IS_ONES_CASE(DT_FLOAT); IS_ONES_CASE(DT_DOUBLE); + IS_ONES_CASE(DT_COMPLEX64); + IS_ONES_CASE(DT_COMPLEX128); IS_ONES_CASE(DT_UINT8); IS_ONES_CASE(DT_INT8); IS_ONES_CASE(DT_UINT16); IS_ONES_CASE(DT_INT16); IS_ONES_CASE(DT_INT32); IS_ONES_CASE(DT_INT64); - IS_ONES_CASE(DT_COMPLEX64); - IS_ONES_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1362,14 +1379,14 @@ bool ConstantFolding::IsZeros(const NodeDef& node) const { // IS_ZEROS_CASE(DT_HALF); IS_ZEROS_CASE(DT_FLOAT); IS_ZEROS_CASE(DT_DOUBLE); + IS_ZEROS_CASE(DT_COMPLEX64); + IS_ZEROS_CASE(DT_COMPLEX128); IS_ZEROS_CASE(DT_UINT8); IS_ZEROS_CASE(DT_INT8); IS_ZEROS_CASE(DT_UINT16); IS_ZEROS_CASE(DT_INT16); IS_ZEROS_CASE(DT_INT32); IS_ZEROS_CASE(DT_INT64); - IS_ZEROS_CASE(DT_COMPLEX64); - IS_ZEROS_CASE(DT_COMPLEX128); default: VLOG(1) << "Unsupported type " << DataTypeString(dtype); return false; @@ -1869,6 +1886,11 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* output) { + // TensorFlow flushes denormals to zero and rounds to nearest, so we do + // the same here. + port::ScopedFlushDenormal flush; + port::ScopedSetRound round(FE_TONEAREST); + nodes_to_preserve_ = item.NodesToPreserve(); for (const auto& feed : item.feed) { feed_nodes_.insert(NodeName(feed.first)); diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index fdb03a5aae..312c1a41d3 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -105,7 +105,12 @@ REGISTER_KERNEL(GPU, int8); REGISTER_KERNEL(GPU, qint8); REGISTER_KERNEL(GPU, uint16); REGISTER_KERNEL(GPU, int16); +REGISTER_KERNEL(GPU, qint16); +REGISTER_KERNEL(GPU, quint16); +REGISTER_KERNEL(GPU, uint32); +REGISTER_KERNEL(GPU, qint32); REGISTER_KERNEL(GPU, int64); +REGISTER_KERNEL(GPU, uint64); REGISTER_KERNEL(GPU, complex64); REGISTER_KERNEL(GPU, complex128); REGISTER_KERNEL(GPU, bool); @@ -122,9 +127,15 @@ REGISTER_SYCL_KERNEL(SYCL, float); REGISTER_SYCL_KERNEL(SYCL, double); REGISTER_SYCL_KERNEL(SYCL, uint8); REGISTER_SYCL_KERNEL(SYCL, int8); +REGISTER_SYCL_KERNEL(SYCL, qint8); REGISTER_SYCL_KERNEL(SYCL, uint16); REGISTER_SYCL_KERNEL(SYCL, int16); +REGISTER_SYCL_KERNEL(SYCL, qint16); +REGISTER_SYCL_KERNEL(SYCL, quint16); +REGISTER_SYCL_KERNEL(SYCL, uint32); +REGISTER_SYCL_KERNEL(SYCL, qint32); REGISTER_SYCL_KERNEL(SYCL, int64); +REGISTER_SYCL_KERNEL(SYCL, uint64); REGISTER_SYCL_KERNEL(SYCL, bool); #undef REGISTER_SYCL_KERNEL #endif -- GitLab From ccefd0a1307ac5dd39d0a254c49ce71f8c2b93e2 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 26 Feb 2018 19:57:42 -0800 Subject: [PATCH 197/884] Fixes and simplification in the Keras training engine. - Explicitly disallow sample/class weighting in eager (it was never supported) - Remove tests for it (which were actually ignoring sample/class weights) - Make sample weight placeholders placeholder_with_default, and do not create all-ones numpy arrays to feed them when no sample weights are provided (this might lead to better performance) PiperOrigin-RevId: 187121215 --- .../python/keras/_impl/keras/backend.py | 11 +- .../python/keras/_impl/keras/callbacks.py | 20 +- .../keras/_impl/keras/engine/training.py | 151 +++--- .../_impl/keras/engine/training_eager.py | 17 +- .../_impl/keras/engine/training_eager_test.py | 436 ------------------ .../keras/_impl/keras/engine/training_test.py | 8 - 6 files changed, 110 insertions(+), 533 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index a2db05f6cf..2b75666b9e 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -2749,7 +2749,7 @@ class Function(object): self.updates_op = control_flow_ops.group(*updates_ops) self.name = name # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', {}) + self.feed_dict = session_kwargs.pop('feed_dict', None) # additional operations self.fetches = session_kwargs.pop('fetches', []) if not isinstance(self.fetches, list): @@ -2759,8 +2759,15 @@ class Function(object): def __call__(self, inputs): if not isinstance(inputs, (list, tuple)): raise TypeError('`inputs` should be a list or tuple.') - feed_dict = self.feed_dict.copy() + + if self.feed_dict: + feed_dict = self.feed_dict.copy() + else: + feed_dict = {} + for tensor, value in zip(self.inputs, inputs): + if value is None: + continue if is_sparse(tensor): sparse_coo = value.tocoo() indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), diff --git a/tensorflow/python/keras/_impl/keras/callbacks.py b/tensorflow/python/keras/_impl/keras/callbacks.py index f6c4661425..deb1e8867d 100644 --- a/tensorflow/python/keras/_impl/keras/callbacks.py +++ b/tensorflow/python/keras/_impl/keras/callbacks.py @@ -778,16 +778,24 @@ class TensorBoard(Callback): while i < val_size: step = min(self.batch_size, val_size - i) batch_val = [] - batch_val.append(val_data[0][i:i + step]) - batch_val.append(val_data[1][i:i + step]) - batch_val.append(val_data[2][i:i + step]) + batch_val.append(val_data[0][i:i + step] + if val_data[0] is not None else None) + batch_val.append(val_data[1][i:i + step] + if val_data[1] is not None else None) + batch_val.append(val_data[2][i:i + step] + if val_data[2] is not None else None) if self.model.uses_learning_phase: # do not slice the learning phase - batch_val = [x[i:i + step] for x in val_data[:-1]] + batch_val = [x[i:i + step] if x is not None else None + for x in val_data[:-1]] batch_val.append(val_data[-1]) else: - batch_val = [x[i:i + step] for x in val_data] - feed_dict = dict(zip(tensors, batch_val)) + batch_val = [x[i:i + step] if x is not None else None + for x in val_data] + feed_dict = {} + for key, val in zip(tensors, batch_val): + if val is not None: + feed_dict[key] = val result = self.sess.run([self.merged], feed_dict=feed_dict) summary_str = result[0] self.writer.add_summary(summary_str, epoch) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 57451ad470..63bea08ac5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -40,6 +40,7 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor +from tensorflow.python.ops import array_ops from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export @@ -225,9 +226,9 @@ def _check_array_lengths(inputs, targets, weights=None): # return a set with the variation between # different shapes, with None => 0 if x is None: - return {0} + return {} else: - return set([0 if y is None else y.shape[0] for y in x]) + return set([y.shape[0] for y in x if y is not None]) set_x = set_of_lengths(inputs) set_y = set_of_lengths(targets) @@ -259,7 +260,8 @@ def _check_array_lengths(inputs, targets, weights=None): def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): """Does validation on the compatibility of targets and loss functions. - This helps prevent users from using loss functions incorrectly. + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. Arguments: targets: list of Numpy arrays of targets. @@ -275,7 +277,7 @@ def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): losses.categorical_crossentropy } for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None: + if y is None or loss is None or tensor_util.is_tensor(y): continue if loss is losses.categorical_crossentropy: if y.shape[-1] == 1: @@ -507,10 +509,7 @@ def _standardize_weights(y, (existing_classes - existing_class_weight)) return weights else: - if sample_weight_mode is None: - return np.ones((y.shape[0],), dtype=K.floatx()) - else: - return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) + return None @tf_export('keras.models.Model', 'keras.Model') @@ -862,12 +861,12 @@ class Model(Network): sample_weights.append(None) else: if sample_weight_mode == 'temporal': - sample_weights.append( - K.placeholder(ndim=2, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [[1.]], shape=[None, None], name=name + '_sample_weights')) sample_weight_modes.append('temporal') else: - sample_weights.append( - K.placeholder(ndim=1, name=name + '_sample_weights')) + sample_weights.append(array_ops.placeholder_with_default( + [1.], shape=[None], name=name + '_sample_weights')) sample_weight_modes.append(None) self.sample_weight_modes = sample_weight_modes self._feed_sample_weight_modes = [] @@ -1314,7 +1313,7 @@ class Model(Network): for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1424,7 +1423,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): + if ins and isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -1518,7 +1517,7 @@ class Model(Network): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): + if isinstance(ins[-1], int): # Do not slice the training phase flag. ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] else: @@ -2070,10 +2069,6 @@ class Model(Network): val_y, sample_weight=val_sample_weight, batch_size=batch_size) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights elif validation_split and 0. < validation_split < 1.: do_validation = True @@ -2085,36 +2080,34 @@ class Model(Network): y, val_y = (slice_arrays(y, 0, split_at), slice_arrays(y, split_at)) sample_weights, val_sample_weights = (slice_arrays( sample_weights, 0, split_at), slice_arrays(sample_weights, split_at)) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0.] - else: - val_ins = val_x + val_y + val_sample_weights - elif validation_steps: + val_x = [] + val_y = [] + val_sample_weights = [] do_validation = True - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = [0.] - - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights # Prepare display labels. out_labels = self.metrics_names if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + if do_validation: + if any([w is not None for w in val_sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported' + ' when eager execution is enabled, for now.') callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + val_ins = val_x + val_y else: callback_metrics = copy.copy(out_labels) return training_eager.fit_loop( self, - ins, + x + y, out_labels=out_labels, batch_size=batch_size, epochs=epochs, @@ -2127,18 +2120,25 @@ class Model(Network): steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: + # Prepare input arrays and training function. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() f = self.train_function if do_validation: - if context.in_graph_mode(): - self._make_test_function() - val_f = self.test_function - else: - val_f = None + self._make_test_function() + val_f = self.test_function callback_metrics = copy.copy(out_labels) + [ 'val_' + n for n in out_labels ] + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_ins = val_x + val_y + val_sample_weights + [0] + else: + val_ins = val_x + val_y + val_sample_weights else: val_f = None callback_metrics = copy.copy(out_labels) @@ -2229,16 +2229,20 @@ class Model(Network): y, sample_weight=sample_weight, batch_size=batch_size) - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_test_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights + self._make_test_function() f = self.test_function return self._test_loop( @@ -2276,16 +2280,16 @@ class Model(Network): 'argument.') x, _, _ = self._standardize_user_data(x) - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): return training_eager.predict_loop( - self, ins, batch_size=batch_size, verbose=verbose, steps=steps) + self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: + # Prepare inputs, delegate logic to `_predict_loop`. + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() f = self.predict_function @@ -2327,20 +2331,26 @@ class Model(Network): and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. + Raises: + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight, class_weight=class_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.train_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.train_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [1] + else: + ins = x + y + sample_weights + self._make_train_function() outputs = self.train_function(ins) @@ -2377,18 +2387,21 @@ class Model(Network): the display labels for the scalar outputs. Raises: - ValueError: in case of invalid arguments. + ValueError: In case of invalid user-provided arguments. """ x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights if context.in_eager_mode(): - outputs = training_eager.test_on_batch(self, ins) + if any([w is not None for w in sample_weights]): + raise ValueError('`sample_weight` and `class_weight` is not supported ' + 'when eager execution is enabled, for now.') + outputs = training_eager.test_on_batch(self, x + y) else: + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + y + sample_weights + [0] + else: + ins = x + y + sample_weights self._make_test_function() outputs = self.test_function(ins) @@ -2408,14 +2421,9 @@ class Model(Network): """ x, _, _ = self._standardize_user_data(x) - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0.] - else: - ins = x - if context.in_eager_mode(): ins_batch_converted = [] - for ib in ins: + for ib in x: ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] @@ -2426,6 +2434,11 @@ class Model(Network): return outs if context.in_graph_mode(): + if self.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = x + [0] + else: + ins = x + self._make_predict_function() outputs = self.predict_function(ins) if len(outputs) == 1: @@ -2643,7 +2656,7 @@ class Model(Network): val_data = val_x + val_y + val_sample_weights if self.uses_learning_phase and not isinstance( K.learning_phase(), int): - val_data += [0.] + val_data += [0] for cbk in callbacks: cbk.validation_data = val_data diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 282dd0dc0d..cdf189adef 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -139,6 +139,8 @@ def _model_loss(model, inputs, targets, training=False): model.output_names[i]) loss_metrics.append(K.mean(output_loss)) + # TODO(fchollet): support masking; in practice `_keras_mask` is never + # set in this context currently. mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: @@ -148,17 +150,7 @@ def _model_loss(model, inputs, targets, training=False): # to the number of unmasked samples. output_loss /= K.mean(mask) - # adapted from weighted_loss_fn - # apply sample weighting - if model.sample_weights: - # reduce score_array to same ndim as weight array - ndim = K.ndim(output_loss) - weight_ndim = K.ndim(model.sample_weights) - output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) - output_loss *= model.sample_weights - output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0), - K.floatx())) - output_loss = K.mean(output_loss) + # TODO(fchollet): support sample weighting loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -231,7 +223,8 @@ def train_on_batch(model, ins): """ ins_batch_converted = [] for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + if ib is not None: + ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) eager_model_inputs = [] eager_model_outputs = [] for i in range(len(model.inputs)): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 3d94b7537f..550b86a71d 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -24,9 +24,7 @@ import numpy as np from tensorflow.python.framework import ops from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test -from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training.rmsprop import RMSPropOptimizer @@ -311,440 +309,6 @@ class TrainingTest(test.TestCase): optimizer='rms') -class LossWeightingTest(test.TestCase): - - def test_class_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train, sample_weight)) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 2, - verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch( - x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) - ref_score = model.evaluate(x_test, y_test, verbose=0) - score = model.evaluate( - x_test[test_ids, :], y_test[test_ids, :], verbose=0) - self.assertLess(score, ref_score) - - def test_sample_weights(self): - num_classes = 5 - batch_size = 5 - epochs = 5 - weighted_class = 3 - train_samples = 3000 - test_samples = 3000 - input_dim = 5 - - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_shape=(input_dim,))) - model.add(keras.layers.Activation('relu')) - model.add(keras.layers.Dense(num_classes)) - model.add(keras.layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - np.random.seed(43) - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight) - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs // 3, - verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - model.train_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - model.test_on_batch( - x_train[:batch_size], - y_train[:batch_size], - sample_weight=sample_weight[:batch_size]) - - def test_temporal_sample_weights(self): - num_classes = 5 - weighted_class = 3 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - - np.random.seed(1337) - (_, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - - class_weight = dict([(i, 1.) for i in range(num_classes)]) - class_weight[weighted_class] = 2. - - sample_weight = np.ones((y_train.shape[0])) - sample_weight[int_y_train == weighted_class] = 2. - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode='temporal') - - def test_class_weight_invalid_use_case(self): - num_classes = 5 - train_samples = 1000 - test_samples = 1000 - input_dim = 5 - timesteps = 3 - - model = keras.models.Sequential() - model.add( - keras.layers.TimeDistributed( - keras.layers.Dense(num_classes), - input_shape=(timesteps, input_dim))) - model.add(keras.layers.Activation('softmax')) - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001)) - - (x_train, y_train), _ = testing_utils.get_test_data( - train_samples=train_samples, - test_samples=test_samples, - input_shape=(input_dim,), - num_classes=num_classes) - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(y_train, num_classes) - class_weight = dict([(i, 1.) for i in range(num_classes)]) - - del class_weight[1] - with self.assertRaises(ValueError): - model.fit(x_train, y_train, - epochs=0, verbose=0, class_weight=class_weight) - - with self.assertRaises(ValueError): - model.compile( - loss='binary_crossentropy', - optimizer=RMSPropOptimizer(learning_rate=0.001), - sample_weight_mode=[]) - - # Build multi-output model - x = keras.Input((3,)) - y1 = keras.layers.Dense(4, name='1')(x) - y2 = keras.layers.Dense(4, name='2')(x) - model = keras.models.Model(x, [y1, y2]) - model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') - x_np = np.random.random((10, 3)) - y_np = np.random.random((10, 4)) - w_np = np.random.random((10,)) - # This will work - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) - # These will not - with self.assertRaises(ValueError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) - with self.assertRaises(TypeError): - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((11,)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - with self.assertRaises(ValueError): - bad_w_np = np.random.random((10, 2, 2)) - model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) - - -class TestDynamicTrainability(test.TestCase): - - def test_trainable_warning(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3)) - model.trainable = False - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - model.trainable = True - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - - def test_trainable_argument(self): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=3, trainable=False)) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - # test with nesting - inputs = keras.layers.Input(shape=(3,)) - output = model(inputs) - model = keras.models.Model(inputs, output) - model.compile(RMSPropOptimizer(learning_rate=0.001), 'mse') - out = model.predict(x) - with test.mock.patch.object(logging, 'warning') as mock_log: - model.train_on_batch(x, y) - self.assertRegexpMatches(str(mock_log.call_args), - 'trainable weights is empty') - out_2 = model.predict(x) - self.assertAllClose(out, out_2) - - def test_layer_trainability_switch(self): - # with constructor argument, in Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, trainable=False, input_dim=1)) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Sequential - model = keras.models.Sequential() - layer = keras.layers.Dense(2, input_dim=1) - model.add(layer) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # with constructor argument, in Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2, trainable=False)(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, []) - - # by setting the `trainable` argument, in Model - x = keras.layers.Input(shape=(1,)) - layer = keras.layers.Dense(2) - y = layer(x) - model = keras.models.Model(x, y) - self.assertListEqual(model.trainable_weights, layer.trainable_weights) - layer.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_model_trainability_switch(self): - # a non-trainable model has no trainable weights - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - model = keras.models.Model(x, y) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - # same for Sequential - model = keras.models.Sequential() - model.add(keras.layers.Dense(2, input_dim=1)) - model.trainable = False - self.assertListEqual(model.trainable_weights, []) - - def test_nested_model_trainability(self): - - # a Sequential inside a Model - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Sequential inside a Sequential - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(2, input_dim=1)) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Model - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - x = keras.layers.Input(shape=(1,)) - y = inner_model(x) - outer_model = keras.models.Model(x, y) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - # a Model inside a Sequential - x = keras.layers.Input(shape=(1,)) - y = keras.layers.Dense(2)(x) - inner_model = keras.models.Model(x, y) - outer_model = keras.models.Sequential() - outer_model.add(inner_model) - self.assertListEqual(outer_model.trainable_weights, - inner_model.trainable_weights) - inner_model.trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - inner_model.trainable = True - inner_model.layers[-1].trainable = False - self.assertListEqual(outer_model.trainable_weights, []) - - -class TestTrainingUtils(test.TestCase): - - def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) - a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( - [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) - - b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) - - def test_slice_arrays(self): - input_a = np.random.random((10, 3)) - slice_arrays(None) - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None, [1, 1], None, [1, 1]] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = None - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - - def test_fit_with_BatchNorm(self): - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, input_dim=4)) - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Activation('tanh')) - model.add(keras.layers.Dropout(0.2)) - - input_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 10)) - - model.compile(loss='binary_crossentropy', optimizer=RMSPropOptimizer(0.001)) - model.fit(input_a_np, output_b_np, epochs=1, batch_size=5, verbose=0) - - def test_fit_with_regularization(self): - model = keras.models.Sequential() - with self.assertRaises(ValueError): - model.add( - keras.layers.Dense(4, input_dim=3, - kernel_regularizer=keras.regularizers.l2(0.01), - activity_regularizer=keras.regularizers.l1(0.01))) - - if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 9651eb9f14..6ca5941e9a 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -1045,16 +1045,8 @@ class TestTrainingUtils(test.TestCase): keras.engine.training._check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, None, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths(a_np, a_np, None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [None], None) with self.assertRaises(ValueError): keras.engine.training._check_array_lengths([a_np], [b_np], None) - with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], None, [b_np]) def test_slice_arrays(self): input_a = np.random.random((10, 3)) -- GitLab From 78d10e5800a058c6d1865c5282aaa4094f7bc36d Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 26 Feb 2018 19:58:18 -0800 Subject: [PATCH 198/884] Fix bug in deserializing CondContexts. PiperOrigin-RevId: 187121244 --- tensorflow/python/ops/control_flow_ops.py | 11 ++++- tensorflow/python/training/saver_test.py | 49 ++++++++++++++++------- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index b16901effd..0815527c96 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1716,8 +1716,15 @@ class CondContext(ControlFlowContext): self._pivot = g.as_graph_element( ops.prepend_name_scope(context_def.pivot_name, import_scope)) self._branch = context_def.branch - super(CondContext, self).__init__( - values_def=context_def.values_def, import_scope=import_scope) + super(CondContext, self).__init__(values_def=context_def.values_def, + import_scope=import_scope) + # The predicate and pivot ops appear in self._values, but don't have self + # set as their control context. The __init__ call above will set self for + # all values, so manually override the predicate and pivot contexts here. + # pylint: disable=protected-access + self._pred.op._set_control_flow_context(self.outer_context) + self._pivot.op._set_control_flow_context(self.outer_context) + # pylint: enable=protected-access @property def pred(self): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b366ed30f3..b758ceaab0 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2041,29 +2041,24 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) - def testNestedWhileLoops(self): - test_dir = self._get_test_dir("nested_whiles") + def _testWhileLoopAndGradientSerDes(self, outer_body_fn): + # Build a while loop with `outer_body_fn`, export it, and verify that it can + # be imported and the gradient can be built and run correctly. + + test_dir = self._get_test_dir("nested_control_flow") filename = os.path.join(test_dir, "metafile") saver_ckpt = os.path.join(test_dir, "saver.ckpt") - # Create two simple nested while loops. + # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - def body(i, x): - _, r = control_flow_ops.while_loop(lambda j, y: j < 3, - lambda j, y: (j + 1, y + x), - [0, 0]) - return i + 1, x + r - var = variables.Variable(0) var_name = var.name - - _, output = control_flow_ops.while_loop(lambda i, x: i < 5, body, + _, output = control_flow_ops.while_loop(lambda i, x: i < 5, outer_body_fn, [0, var]) output_name = output.name - init_op = variables.global_variables_initializer() - # Generate a MetaGraphDef containing the nested loops. + # Generate a MetaGraphDef containing the while loop. with session.Session() as sess: sess.run(init_op) sess.run(output) @@ -2071,8 +2066,8 @@ class MetaGraphTest(test.TestCase): saver.save(sess, saver_ckpt) saver.export_meta_graph(filename) - # Build and run the gradients of the nested while loop. We use this below - # to verify that the gradients are correct with an imported MetaGraphDef. + # Build and run the gradients of the while loop. We use this below to + # verify that the gradients are correct with an imported MetaGraphDef. grad = gradients_impl.gradients([output], [var]) with session.Session() as sess: sess.run(init_op) @@ -2096,6 +2091,30 @@ class MetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def testNestedWhileLoopsSerDes(self): + # Test two simple nested while loops. + def body(i, x): + _, r = control_flow_ops.while_loop(lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0]) + return i + 1, x + r + self._testWhileLoopAndGradientSerDes(body) + + def testNestedControlFlowSerDes(self): + # Test while loop in a cond in a while loop. + # pylint: disable=g-long-lambda + def body(i, x): + cond_result = control_flow_ops.cond( + i > 0, + lambda: control_flow_ops.while_loop( + lambda j, y: j < 3, + lambda j, y: (j + 1, y + x), + [0, 0])[1], + lambda: x) + return i + 1, cond_result + # pylint: enable=g-long-lambda + self._testWhileLoopAndGradientSerDes(body) + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 7b15f7a55dcd5e908211e86ec42b49136b1ccc25 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 26 Feb 2018 20:21:07 -0800 Subject: [PATCH 199/884] Add helpers to stream data from the GCE VM to a Cloud TPU. PiperOrigin-RevId: 187122870 --- tensorflow/contrib/tpu/BUILD | 28 +++ tensorflow/contrib/tpu/python/tpu/datasets.py | 192 ++++++++++++++++++ .../contrib/tpu/python/tpu/datasets_test.py | 181 +++++++++++++++++ 3 files changed, 401 insertions(+) create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets.py create mode 100644 tensorflow/contrib/tpu/python/tpu/datasets_test.py diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index c48e84ddfa..095b4821f1 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -163,6 +163,7 @@ py_library( ], srcs_version = "PY2AND3", deps = [ + ":datasets", ":profiler", ":tpu_py", "//tensorflow/contrib/tpu/proto:topology_proto_py", @@ -181,6 +182,33 @@ py_library( ], ) +py_library( + name = "datasets", + srcs = [ + "python/tpu/datasets.py", + ], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:dtypes", + "//tensorflow/python:function", + "//tensorflow/python:functional_ops", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:iterator_ops", + "//tensorflow/python/data/ops:readers", + ], +) + +tf_py_test( + name = "datasets_test", + srcs = ["python/tpu/datasets_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + ":datasets", + ], + grpc_enabled = True, +) + tf_py_test( name = "tpu_test", size = "small", diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py new file mode 100644 index 0000000000..29aea98542 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -0,0 +1,192 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ====================================== +"""Library of Cloud TPU helper functions for data loading.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.data.python.ops import batching +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function +from tensorflow.python.framework import ops +from tensorflow.python.ops import functional_ops + + +def _TextLineDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TextLineDataset(filename, buffer_size=buffer_size) + return dataset + + +def _TFRecordDataset(filename): + buffer_size = 8 * 1024 * 1024 # 8 MiB per file + dataset = readers.TFRecordDataset(filename, buffer_size=buffer_size) + return dataset + + +_FILETYPE_MAP = { + 'tfrecord': _TFRecordDataset, + 'textline': _TextLineDataset, + 'text': _TextLineDataset, +} + + +def StreamingFilesDataset(files, + filetype=None, + file_reader_job=None, + worker_job=None, + num_epochs=None, + filename_shuffle_buffer_size=None, + num_parallel_reads=None, + batch_transfer_size=None, + sloppy=None): + """StreamingFilesDataset constructs a dataset to stream from workers (GCE VM). + + Because Cloud TPUs are allocated over the network, a Cloud TPU cannot read + files local to your GCE VM. In order to train using files stored on your local + VM (e.g. on local SSD for extreme performance), use the StreamingFilesDataset + helper to generate a dataset to feed your Cloud TPU with files from your GCE + VM. + + The resulting dataset may return an OutOfRangeError if there are no files + found as a result of the fileglob expansion. + + Note: StreamingFilesDataset assumes that the session is using a + TPUClusterResolver and has therefore a worker and a coordinator job. File + loading will be done on the coordinator job. + + Args: + files: A string glob to match files, or a `tf.data.Dataset` generating file + names. + filetype: A string (one of 'tfrecord', or 'textline') or a single-argument + TensorFlow function that when given a filename returns a dataset. + file_reader_job: An optional string that corresponds to the job that should + perform the file reads. + worker_job: An optional string that corresponds to the job that should + process the tensors (i.e. your GPU or TPU worker). + num_epochs: The number of epochs through the training set that should be + generated. By default, it will repeat infinitely. + filename_shuffle_buffer_size: An optional integer whose value controls the + shuffling of the file names. If you would like to read from the files in + the same order, set to 0 or False. + num_parallel_reads: An optional integer controlling the number of files to + read from concurrently. (Set to 1 for no parallelism.) + batch_transfer_size: An optional integer controlling the batching used to + amortize the remote function invocation overhead. Set to a very large + number to increase throughput. Set to a very small number to reduce memory + consumption. Set to False to skip batching. + sloppy: (Optional.) If `True`, read input data as fast as possible, without + maintaining a deterministic order. Defaults to `False`. + Returns: + A `tf.data.Dataset` with an infinite stream of elements generated by a + parallel interleaving of the set of files matched (or generated) by `files` + with a type is the output of the dataset specified by `filetype`. + + Raises: + ValueError: if any argument is not of the expected type. + """ + if filetype is None: + filetype = 'tfrecord' + + if isinstance(filetype, str): + if filetype not in _FILETYPE_MAP: + raise ValueError('Unexpected filetype: %s' % filetype) + reader_fn = _FILETYPE_MAP[filetype] + elif callable(filetype): + reader_fn = filetype + else: + raise ValueError('filetype should be a string or a callable') + + file_reader_job = file_reader_job or 'coordinator' + + worker_job = worker_job or 'worker' + + if filename_shuffle_buffer_size is None: + filename_shuffle_buffer_size = 4096 + + num_parallel_reads = num_parallel_reads or 8 + + if batch_transfer_size is None: + batch_transfer_size = 1024 + + if sloppy is None: + sloppy = False + + with ops.device('/job:%s' % file_reader_job): + if isinstance(files, str): + source_dataset = dataset_ops.Dataset.list_files(files) + elif isinstance(files, dataset_ops.Dataset): + source_dataset = files + else: + raise ValueError('files was not a string or a dataset: %s' % files) + + if filename_shuffle_buffer_size: + source_dataset = source_dataset.shuffle( + buffer_size=filename_shuffle_buffer_size) + + # NOTE: We perform the `repeat` on the source dataset, because the output + # dataset does not currently have enough information to recreate an iterator + # over the source dataset when it reaches the end. + source_dataset = source_dataset.repeat(num_epochs) + + source_dataset = source_dataset.apply( + interleave_ops.parallel_interleave( + reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy)) + + if batch_transfer_size: + # Note: we can safely call batch_and_drop_remainder because we have an + # infinite stream of TFRecords. + source_dataset = source_dataset.apply( + batching.batch_and_drop_remainder(batch_transfer_size)) + + source_dataset = source_dataset.prefetch(1) + + source_iterator = source_dataset.make_one_shot_iterator() + source_handle = source_iterator.string_handle() + + @function.Defun(dtypes.string) + def LoadingFunc(h): + remote_iterator = iterator_ops.Iterator.from_string_handle( + h, source_dataset.output_types, source_dataset.output_shapes) + return remote_iterator.get_next() + + def MapFn(unused_input): + return functional_ops.remote_call( + args=[source_handle], + Tout=[dtypes.string], + f=LoadingFunc, + target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) + + with ops.device('/job:%s' % worker_job): + # TODO(saeta,mrry): Switch to using _GeneratorDataset. + + # identity = lambda x: x + # dummy = constant_op.constant(0) + # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn, + # identity) + + output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn) + output_dataset = output_dataset.prefetch(1) + + if batch_transfer_size: + # Undo the batching used during the transfer. + output_dataset = output_dataset.apply(batching.unbatch()).prefetch(1) + + return output_dataset diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py new file mode 100644 index 0000000000..2c40797792 --- /dev/null +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -0,0 +1,181 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU datasets tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +from tensorflow.contrib.tpu.python.tpu import datasets +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.core.protobuf import config_pb2 +from tensorflow.python.client import session +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers +from tensorflow.python.lib.io import python_io +from tensorflow.python.platform import test +from tensorflow.python.training import server_lib +from tensorflow.python.util import compat + +_NUM_FILES = 10 +_NUM_ENTRIES = 200 + + +class DatasetsTest(test.TestCase): + + def setUp(self): + super(DatasetsTest, self).setUp() + self._coord = server_lib.Server.create_local_server() + self._worker = server_lib.Server.create_local_server() + + self._cluster_def = cluster_pb2.ClusterDef() + worker_job = self._cluster_def.job.add() + worker_job.name = 'worker' + worker_job.tasks[0] = self._worker.target[len('grpc://'):] + coord_job = self._cluster_def.job.add() + coord_job.name = 'coordinator' + coord_job.tasks[0] = self._coord.target[len('grpc://'):] + + session_config = config_pb2.ConfigProto(cluster_def=self._cluster_def) + + self._sess = session.Session(self._worker.target, config=session_config) + + def testTextLineDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'text_line.%d.txt' % i) + contents = [] + for j in range(_NUM_ENTRIES): + contents.append(compat.as_bytes('%d: %d' % (i, j))) + with open(filename, 'wb') as f: + f.write(b'\n'.join(contents)) + all_contents.extend(contents) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'text_line.*.txt'), filetype='text') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDataset(self): + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'tf_record*'), filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testTFRecordDatasetFromDataset(self): + filenames = [] + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'tf_record.%d' % i) + filenames.append(filename) + writer = python_io.TFRecordWriter(filename) + for j in range(_NUM_ENTRIES): + record = compat.as_bytes('Record %d of file %d' % (j, i)) + writer.write(record) + all_contents.append(record) + writer.close() + + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + dataset = datasets.StreamingFilesDataset(filenames, filetype='tfrecord') + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testArbitraryReaderFunc(self): + + def MakeRecord(i, j): + return compat.as_bytes('%04d-%04d' % (i, j)) + + record_bytes = len(MakeRecord(10, 200)) + + all_contents = [] + for i in range(_NUM_FILES): + filename = os.path.join(self.get_temp_dir(), 'fixed_length.%d' % i) + with open(filename, 'wb') as f: + for j in range(_NUM_ENTRIES): + record = MakeRecord(i, j) + f.write(record) + all_contents.append(record) + + def FixedLengthFile(filename): + return readers.FixedLengthRecordDataset(filename, record_bytes) + + dataset = datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), 'fixed_length*'), + filetype=FixedLengthFile) + + iterator = dataset.make_initializable_iterator() + self._sess.run(iterator.initializer) + get_next = iterator.get_next() + + retrieved_values = [] + for _ in range(2 * len(all_contents)): + retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) + + self.assertEqual(set(all_contents), set(retrieved_values)) + + def testUnexpectedFiletypeString(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype='foo') + + def testUnexpectedFiletypeType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset( + os.path.join(self.get_temp_dir(), '*'), filetype=3) + + def testUnexpectedFilesType(self): + with self.assertRaises(ValueError): + datasets.StreamingFilesDataset(123, filetype='tfrecord') + + +if __name__ == '__main__': + test.main() -- GitLab From 557611cefba99a7c94dc7dd0932723c0a9f96087 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:09:30 -0800 Subject: [PATCH 200/884] Automated g4 rollback of changelist 187092622 PiperOrigin-RevId: 187125995 --- tensorflow/c/eager/BUILD | 1 - tensorflow/c/eager/c_api.cc | 4 ++-- tensorflow/c/eager/c_api_internal.h | 14 +------------- tensorflow/c/eager/runtime.cc | 14 ++++---------- tensorflow/c/eager/runtime.h | 3 --- tensorflow/c/eager/runtime_test.cc | 12 ++++++------ 6 files changed, 13 insertions(+), 35 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index 16a2a15072..e55cb672e9 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -21,7 +21,6 @@ tf_cuda_library( visibility = ["//visibility:public"], deps = select({ "//tensorflow:android": [ - "//tensorflow/core:lib", "//tensorflow/core:android_tensorflow_lib_lite", ], "//conditions:default": [ diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b233dd5b93..bebb63c746 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -818,8 +818,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // See WARNING comment below - would be nice to rework to avoid this // subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); - status->status = tensorflow::KernelAndDevice::Init( - ndef, ctx->func_lib(device), &ctx->runner, kernel); + status->status = + tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); if (!status->status.ok()) { delete kernel; return; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 29944df4c2..3356054cd0 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -31,7 +31,6 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" -#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" @@ -46,15 +45,7 @@ struct TFE_ContextOptions { struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : thread_pool(new tensorflow::thread::ThreadPool( - opts.session_options.options.env, "EagerCompute", - opts.session_options.options.config - .inter_op_parallelism_threads() != 0 - ? opts.session_options.options.config - .inter_op_parallelism_threads() - : tensorflow::port::NumSchedulableCPUs())), - runner([this](std::function f) { thread_pool->Schedule(f); }), - policy(opts.policy), + : policy(opts.policy), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -63,9 +54,6 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} - const std::unique_ptr thread_pool; - std::function)> runner; - const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index b9618420f0..4bf24fec2c 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -255,22 +255,17 @@ Status KernelAndDevice::InitOp(Device* device, const NodeDef& ndef, out->device_ = device; out->kernel_.reset(k); out->flib_ = nullptr; - out->runner_ = nullptr; - out->default_runner_ = [](std::function f) { f(); }; return s; } // static Status KernelAndDevice::Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out) { OpKernel* k = nullptr; Status s = flib->CreateKernel(ndef, &k); out->device_ = flib->device(); out->kernel_.reset(k); out->flib_ = flib; - out->runner_ = runner; - out->default_runner_ = [](std::function f) { f(); }; return s; } @@ -301,11 +296,10 @@ Status KernelAndDevice::Run(std::vector* input_tensors, if (stats != nullptr) { params.track_allocations = true; } - if (runner_ == nullptr) { - params.runner = &default_runner_; - } else { - params.runner = runner_; - } + // TODO(apassos): use a thread pool. + std::function)> runner = + [](std::function f) { f(); }; + params.runner = &runner; OpKernelContext context(¶ms); device_->Compute(kernel_.get(), &context); diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index fa5f839977..7fede4dae9 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -169,7 +169,6 @@ class KernelAndDevice { // the FunctionLibraryRuntime is pushed on to the caller (see locking in // c_api.cc). static Status Init(const NodeDef& ndef, FunctionLibraryRuntime* flib, - std::function)>* runner, KernelAndDevice* out); // TODO(ashankar): Remove this static Status InitOp(Device* device, const NodeDef& ndef, @@ -189,8 +188,6 @@ class KernelAndDevice { private: std::unique_ptr kernel_; Device* device_; - std::function)>* runner_; - std::function)> default_runner_; FunctionLibraryRuntime* flib_; checkpoint::TensorSliceReaderCacheWrapper slice_reader_cache_; Rendezvous* rendez_; diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index ab0b535e1a..643153058c 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -92,8 +92,8 @@ TEST(KernelAndDevice, Run) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - Status s = KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel); + Status s = + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel); ASSERT_TRUE(s.ok()) << s; std::vector outputs; s = kernel.Run(&inputs, &outputs, nullptr); @@ -158,8 +158,8 @@ void BM_KernelAndDeviceInit(int iters) { KernelAndDevice k(nullptr); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &k)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &k)); } } BENCHMARK(BM_KernelAndDeviceInit); @@ -179,8 +179,8 @@ void BM_KernelAndDeviceRun(int iters) { .BuildNodeDef()); TestEnv env; KernelAndDevice kernel(nullptr); - TF_CHECK_OK(KernelAndDevice::Init(ndef, env.function_library_runtime(), - nullptr, &kernel)); + TF_CHECK_OK( + KernelAndDevice::Init(ndef, env.function_library_runtime(), &kernel)); tensorflow::testing::StartTiming(); for (int i = 0; i < iters; ++i) { TF_CHECK_OK(kernel.Run(&inputs, &outputs, nullptr)); -- GitLab From 46306ad7bd02c613a59aa6074f830f0de011cfbf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 26 Feb 2018 21:25:22 -0800 Subject: [PATCH 201/884] Improve error handling in strided_slice_op to fail more gracefully and return an error status instead of crashing. PiperOrigin-RevId: 187126888 --- tensorflow/core/kernels/strided_slice_op.cc | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/strided_slice_op.cc b/tensorflow/core/kernels/strided_slice_op.cc index 7745effe2a..1e3e92a68a 100644 --- a/tensorflow/core/kernels/strided_slice_op.cc +++ b/tensorflow/core/kernels/strided_slice_op.cc @@ -109,17 +109,27 @@ class StridedSliceOp : public OpKernel { if (is_identity) { VLOG(1) << "Strided slice identity "; Tensor tmp; - CHECK(tmp.CopyFrom(input, final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(input, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } // Optimization #2, slice is memory contiguous (only occurs in dim 0) if (slice_dim0 && IsDim0SliceAligned(input.shape(), begin[0], end[0])) { - CHECK_GE(input.dims(), 1); // Otherwise, is_identity should be true. + OP_REQUIRES(context, input.dims() >= 1, + errors::InvalidArgument( + "Input must have rank at least 1, got: ", input.dims())); + // Otherwise, is_identity should be true. VLOG(1) << "Strided slice dim 0: " << input.shape().DebugString(); + OP_REQUIRES( + context, begin[0] <= end[0], + errors::InvalidArgument("begin[0] (", begin[0], + ") must less or equal to end[0] (", end[0])); + Tensor slice = input.Slice(begin[0], end[0]); Tensor tmp; - CHECK(tmp.CopyFrom(input.Slice(begin[0], end[0]), final_shape)); + OP_REQUIRES(context, tmp.CopyFrom(slice, final_shape), + errors::Internal("Copy failed")); context->set_output(0, tmp); return; } @@ -238,7 +248,8 @@ class StridedSliceGradOp : public OpKernel { if (processing_shape.dims() == 0) { auto in = context->input(4); - CHECK(result->CopyFrom(in, processing_shape)); + OP_REQUIRES(context, result->CopyFrom(in, processing_shape), + errors::Internal("Copy failed")); return; } -- GitLab From 129bb5400e20b322016c4a8f378da63be8d58e5e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 01:02:36 -0800 Subject: [PATCH 202/884] Add documentation to Grappler RewriterConfig to give a short description for each of the optimizer on what they do. PiperOrigin-RevId: 187143156 --- tensorflow/core/protobuf/rewriter_config.proto | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 504ed5d819..875e4663db 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -30,12 +30,17 @@ message RewriterConfig { } // Optimize tensor layouts (default is ON) + // e.g. This will try to use NCHW layout on GPU which is faster. Toggle layout_optimizer = 1; // Fold constants (default is ON) + // Statically infer the value of tensors when possible, and materialize the + // result using constants. Toggle constant_folding = 3; // Arithmetic optimizations (default is ON) + // e.g. Simplify arithmetic ops; merge ops with same value (like constants). Toggle arithmetic_optimization = 7; // Control dependency optimizations (default is ON). + // Remove redundant control dependencies, which may enable other optimization. Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; @@ -49,12 +54,20 @@ message RewriterConfig { NO_MEM_OPT = 1; // Driven by manual op-level annotations. MANUAL = 2; + // Driven by heuristics. The behavior of these heuristics is subject to // change. Currently includes an experimental recomputation and swapping // heuristics. Manual annotations are respected, but additional nodes are // selected automatically. + + // Swapping heuristic will move a tensor from the GPU to the CPU and move + // it back when needed to reduce peak memory usage. SWAPPING_HEURISTICS = 4; + // Recomputation heuristics will recompute ops (such as Relu activation) + // during backprop instead of storing them, reducing peak memory usage. RECOMPUTATION_HEURISTICS = 5; + // Scheduling will split big ops such as AddN and try to enforce a schedule + // of the new computations that decreases peak memory usage. SCHEDULING_HEURISTICS = 6; // Use any combination of swapping and recomputation heuristics. HEURISTICS = 3; -- GitLab From efa9a8ec649c72887cd286a78b3a2bf95e34f924 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 06:00:21 -0800 Subject: [PATCH 203/884] Enable dynamic function calls. These are compiled just in time by inserting a call to compile. PiperOrigin-RevId: 187165096 --- tensorflow/contrib/py2tf/__init__.py | 4 +- tensorflow/contrib/py2tf/converters/BUILD | 13 +-- .../contrib/py2tf/converters/call_trees.py | 76 +++++++------- .../py2tf/converters/call_trees_test.py | 16 +++ .../py2tf/converters/converter_test_base.py | 32 ++++-- tensorflow/contrib/py2tf/impl/api.py | 99 ++++++++++++++----- 6 files changed, 163 insertions(+), 77 deletions(-) diff --git a/tensorflow/contrib/py2tf/__init__.py b/tensorflow/contrib/py2tf/__init__.py index 379fa7fd5c..6531183cb5 100644 --- a/tensorflow/contrib/py2tf/__init__.py +++ b/tensorflow/contrib/py2tf/__init__.py @@ -23,6 +23,7 @@ from __future__ import print_function from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl.api import convert +from tensorflow.contrib.py2tf.impl.api import converted_call from tensorflow.contrib.py2tf.impl.api import graph_ready from tensorflow.contrib.py2tf.impl.api import to_code from tensorflow.contrib.py2tf.impl.api import to_graph @@ -30,7 +31,8 @@ from tensorflow.contrib.py2tf.pyct.transformer import PyFlowParseError from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'to_graph', 'to_code', 'convert', 'graph_ready', 'utils', 'PyFlowParseError' + 'to_graph', 'to_code', 'convert', 'graph_ready', 'converted_call', 'utils', + 'PyFlowParseError' ] remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 42baaaaba7..78f46bc05f 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -46,6 +46,7 @@ py_library( visibility = ["//tensorflow:__subpackages__"], deps = [ ":converters", + "//tensorflow/contrib/py2tf/pyct", "//tensorflow/contrib/py2tf/pyct/static_analysis", "//tensorflow/contrib/py2tf/utils", "@gast_archive//:gast", @@ -59,7 +60,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -70,7 +70,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -81,7 +80,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -92,7 +90,7 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/py2tf/impl", "//tensorflow/python:client_testlib", ], ) @@ -103,7 +101,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -114,7 +111,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -125,7 +121,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -136,7 +131,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -157,7 +151,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -168,7 +161,6 @@ py_test( srcs_version = "PY2AND3", deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) @@ -184,7 +176,6 @@ py_test( ], deps = [ ":test_lib", - "//tensorflow/contrib/py2tf/pyct", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index 1050ba654c..f18f9f6086 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -27,6 +27,7 @@ import types import gast from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser from tensorflow.contrib.py2tf.pyct import templates from tensorflow.contrib.py2tf.pyct import transformer @@ -72,9 +73,8 @@ class CallTreeTransformer(transformer.Base): self.uncompiled_modules = uncompiled_modules self.nocompile_decorators = nocompile_decorators - # pylint:disable=invalid-name - def _resolve_name(self, node): + """Used to resolve decorator info.""" if isinstance(node, gast.Call): return self._resolve_name(node.func) if isinstance(node, gast.Name): @@ -99,7 +99,13 @@ class CallTreeTransformer(transformer.Base): (owner_type, node.attr)) return None + def _function_is_compilable(self, target_entity): + """Determines whether an entity can be compiled at all.""" + # TODO(mdan): This is just a placeholder. Implement. + return not isinstance(target_entity, types.BuiltinFunctionType) + def _should_compile(self, node, fqn): + """Determines whether an entity should be compiled in the context.""" for i in range(1, len(fqn)): if fqn[:i] in self.uncompiled_modules: return False @@ -141,33 +147,6 @@ class CallTreeTransformer(transformer.Base): return True - def _determine_function_owner(self, m): - # TODO(mdan): The parent type should be known at analysis. Use that instead. - if hasattr(m, 'im_class'): # Python 2 - return m.im_class - if hasattr(m, '__qualname__'): # Python 3 - # Object attributes: should be bound to "self". - if hasattr(m, '__self__'): - return type(m.__self__) - - # Class attributes: should have the owner name in their namespace. - qn = m.__qualname__.split('.') - if len(qn) < 2: - return None - owner_name, func_name = qn[-2:] - if func_name != m.__name__: - raise ValueError('Inconsistent names detected ' - '(__qualname__[1] = "%s", __name__ = "%s") for %s.' % - (func_name, m.__name__, m)) - if owner_name == '': - return None - if owner_name not in self.context.namespace: - raise ValueError( - 'Could not resolve name "%s" while analyzing %s. Namespace:\n%s' % - (owner_name, m, self.context.namespace)) - return self.context.namespace[owner_name] - return None - def _rename_compilable_function(self, node): assert anno.hasanno(node.func, 'live_val') assert anno.hasanno(node.func, 'fqn') @@ -182,7 +161,11 @@ class CallTreeTransformer(transformer.Base): target_fqn, live_entity=target_entity) do_rename = True else: - owner_type = self._determine_function_owner(target_entity) + if anno.hasanno(node.func, 'parent_type'): + owner_type = anno.getanno(node.func, 'parent_type') + else: + # Fallback - not reliable. + owner_type = inspect_utils.getmethodclass(target_entity) new_name, do_rename = self.context.namer.compiled_function_name( target_fqn, live_entity=target_entity, owner_type=owner_type) @@ -202,9 +185,32 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _function_is_compilable(self, target_entity): - # TODO(mdan): This is just a placeholder. Implement. - return not isinstance(target_entity, types.BuiltinFunctionType) + def _converted_call(self, node): + """Inlines a dynamic conversion for a dynamic function.""" + # TODO(mdan): Pass information on the statically compiled functions. + # Having access to the statically compiled functions can help avoid + # unnecessary compilation. + # For example, this would lead to function `a` being compiled twice: + # + # def a(): + # v = b + # b() + # def b(): + # a() + # + # This is really a problem with recursive calls, which currently can + # only be gated by a static condition, and should be rare. + # TODO(mdan): It probably makes sense to use dynamic conversion every time. + # Before we could convert all the time though, we'd need a reasonable + # caching mechanism. + template = """ + py2tf_api.converted_call(func, True, False, {}, original_args) + """ + call_expr = templates.replace( + template, func=node.func, original_args=node.args) + return call_expr[0].value + + # pylint:disable=invalid-name def visit_Expr(self, node): if isinstance(node.value, gast.Call): @@ -245,9 +251,9 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - raise NotImplementedError('Could not resolve target function.') + node = self._converted_call(node) else: - # TODO(mdan): Double check. Is this reachable code? + # Unresolved functions are allowed in non-recursive mode. pass return node diff --git a/tensorflow/contrib/py2tf/converters/call_trees_test.py b/tensorflow/contrib/py2tf/converters/call_trees_test.py index 777648dc0b..d482a9ef78 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees_test.py +++ b/tensorflow/contrib/py2tf/converters/call_trees_test.py @@ -47,6 +47,21 @@ class CallTreesTest(converter_test_base.TestCase): result.renamed_test_fn_1 = renamed_test_fn_1 self.assertEquals(3, result.test_fn_2(1)) + def test_dynamic_function(self): + + def test_fn_1(): + raise ValueError('This should be masked by the mock.') + + def test_fn_2(f): + return f() + 3 + + node = self.parse_and_analyze(test_fn_2, {}) + node = call_trees.transform(node, self.ctx, (), ()) + + with self.compiled(node) as result: + # 10 = 7 (from the mock) + 3 (from test_fn_2) + self.assertEquals(10, result.test_fn_2(test_fn_1)) + def test_simple_methods(self): class TestClass(object): @@ -59,6 +74,7 @@ class CallTreesTest(converter_test_base.TestCase): node = self.parse_and_analyze( TestClass.test_fn_2, {'TestClass': TestClass}, + namer=converter_test_base.FakeNoRenameNamer(), arg_types={'self': (TestClass.__name__, TestClass)}) node = call_trees.transform(node, self.ctx, (), ()) diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/py2tf/converters/converter_test_base.py index afa5c2f96f..1f98d8469c 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/py2tf/converters/converter_test_base.py @@ -25,6 +25,7 @@ from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.pyct import compiler from tensorflow.contrib.py2tf.pyct import context from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import pretty_printer from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity from tensorflow.contrib.py2tf.pyct.static_analysis import live_values @@ -52,26 +53,43 @@ class FakeNamer(object): return ('renamed_%s' % '_'.join(original_fqn)), True +class FakeNoRenameNamer(FakeNamer): + + def compiled_function_name(self, original_fqn, **_): + return str(original_fqn), False + + class TestCase(test.TestCase): """Base class for unit tests in this module. Contains relevant utilities.""" @contextlib.contextmanager def compiled(self, node, *symbols): - source = '' + source = None + + self.dynamic_calls = [] + def converted_call(*args): + """Mock version of api.converted_call.""" + self.dynamic_calls.append(args) + return 7 + try: result, source = compiler.ast_to_object(node) - result.tf = self.make_fake_tf(*symbols) + result.tf = self.make_fake_mod('fake_tf', *symbols) result.py2tf_utils = utils + result.py2tf_api = self.make_fake_mod('fake_api', converted_call) yield result except Exception: # pylint:disable=broad-except - print('Offending compiled code:\n%s' % source) + if source is None: + print('Offending AST:\n%s' % pretty_printer.fmt(node, color=False)) + else: + print('Offending compiled code:\n%s' % source) raise - def make_fake_tf(self, *symbols): - fake_tf = imp.new_module('fake_tf') + def make_fake_mod(self, name, *symbols): + fake_mod = imp.new_module(name) for s in symbols: - setattr(fake_tf, s.__name__, s) - return fake_tf + setattr(fake_mod, s.__name__, s) + return fake_mod def attach_namespace(self, module, **ns): for k, v in ns.items(): diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 29d2e038a7..48100aac32 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -26,7 +26,9 @@ import six from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import conversion from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import inspect_utils from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.utils import builtins from tensorflow.python.platform import tf_logging as logging from tensorflow.python.util import tf_inspect @@ -110,28 +112,7 @@ def convert(recursive=False, verbose=False, arg_types=None): @wraps(f) def wrapper(*args, **kwargs): - """Wrapper that calls the compiled version of the wrapped function.""" - partial_types = () - arg_values = {} - arg_names = tf_inspect.getargspec(f)[0] - for name, arg in zip(arg_names, args): - arg_values[name] = arg - arg_class = arg.__class__ - # If arg_value_hints specifies any name, use that instead. - if name not in arg_types: - arg_types[name] = (arg_class.__name__, arg_class) - if name == 'self' and tf_inspect.isclass(arg_class): - # Annotated methods need to specify that their owner type is partial, - # otherwise other members they call will not be converted. - partial_types = (arg_class,) - wrapped = to_graph( - f, - recursive=recursive, - verbose=verbose, - arg_values=arg_values, - arg_types=arg_types, - partial_types=partial_types) - return wrapped(*args, **kwargs) + return converted_call(f, recursive, verbose, arg_types, *args, **kwargs) # Sometimes the decorator is just desugared, making it impossible to detect. # This attribute makes detection easier. @@ -141,6 +122,78 @@ def convert(recursive=False, verbose=False, arg_types=None): return decorator +def converted_call(f, recursive, verbose, arg_types, *args, **kwargs): + """Compiles a function call inline.""" + # TODO(mdan): This needs cleanup. + # In particular, we may want to avoid renaming functions altogether. + + if conversion.is_whitelisted_for_graph(f): + return f(*args, **kwargs) + + unknown_arg_value = object() # Sentinel for arguments of unknown value + + if tf_inspect.isbuiltin(f): + return builtins.dynamic_builtin(f, *args, **kwargs) + + if tf_inspect.isfunction(f) or tf_inspect.ismethod(f): + # Regular functions + target_entity = f + arg_map_target = f + effective_args = args + f_class = inspect_utils.getmethodclass(f) + + if f_class is not None: + partial_types = (f_class,) + else: + partial_types = () + + elif tf_inspect.isclass(f): + # Constructors + target_entity = f + arg_map_target = f.__init__ + effective_args = (unknown_arg_value,) + args + partial_types = () + + elif hasattr(f, '__call__') and hasattr(f, '__class__'): + # Callable objects + target_entity = f.__call__ + arg_map_target = f.__call__ + effective_args = (f,) + args + partial_types = (f.__class__,) + + else: + NotImplementedError('unknown callable type "%s"' % type(f)) + + arg_values = tf_inspect.getcallargs(arg_map_target, *args, **kwargs) + for name, arg in arg_values.items(): + if arg is unknown_arg_value: + continue + arg_class = arg.__class__ + # If arg_value_hints specifies any name, use that instead. + if name not in arg_types: + arg_types[name] = (arg_class.__name__, arg_class) + + # When called from within a decorator, this is the only indication that + # the function is a method - it appears that the decorator is applied + # before the method is bound. + if not partial_types: + if 'self' in arg_values: + if tf_inspect.isclass(arg_values['self'].__class__): + partial_types = (arg_values['self'].__class__,) + elif 'cls' in arg_values: + if tf_inspect.isclass(arg_values['cls']): + partial_types = (arg_values['cls'],) + + converted_f = to_graph( + target_entity, + recursive=recursive, + verbose=verbose, + arg_values=arg_values, + arg_types=arg_types, + partial_types=partial_types) + return converted_f(*effective_args, **kwargs) + + def to_graph(e, recursive=True, verbose=False, @@ -189,7 +242,7 @@ def to_graph(e, # The compiled code should see everything the entry function saw. # TODO(mdan): This might not work well if the call tree spans modules? if tf_inspect.isfunction(e): - compiled_node.__dict__.update(six.get_function_globals(e)) + compiled_node.__dict__.update(inspect_utils.getnamespace(e)) compiled_fn = getattr(compiled_node, name) if verbose: -- GitLab From 7f53659bc67bba5567ea3f0b69710329843e0228 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 10:19:08 -0800 Subject: [PATCH 204/884] Bump the version of CUB in cmake build. --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 8368898955..98a8c7e736 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip) -set(cub_HASH SHA256=20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31) +set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip) +set(cub_HASH SHA256=6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) -- GitLab From 246cad289498357523517b67a3f214960dfa0f92 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 27 Feb 2018 14:32:57 -0800 Subject: [PATCH 205/884] "soft placement" for eager PiperOrigin-RevId: 187233434 --- tensorflow/c/eager/c_api.cc | 69 ++++++++++++++++++++++++++--- tensorflow/c/eager/c_api.h | 6 ++- tensorflow/c/eager/c_api_internal.h | 8 +++- tensorflow/c/eager/runtime.h | 2 + tensorflow/python/eager/ops_test.py | 20 +++++++++ 5 files changed, 96 insertions(+), 9 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index bebb63c746..29c709b06d 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/copy_tensor.h" #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/device_set.h" #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/node_def_util.h" @@ -68,6 +69,18 @@ std::atomic_int_fast64_t func_id_generator(0); #endif // TENSORFLOW_EAGER_USE_XLA } // namespace +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy) { + if (!soft_placement) { + return original_policy; + } + if (original_policy == TFE_DEVICE_PLACEMENT_EXPLICIT || + original_policy == TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) { + return TFE_DEVICE_PLACEMENT_SILENT; + } + return original_policy; +} + extern "C" { TFE_ContextOptions* TFE_NewContextOptions() { return new TFE_ContextOptions; } @@ -777,15 +790,38 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { return launch_op; } #endif // TENSORFLOW_EAGER_USE_XLA + +tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, + TFE_Context* ctx, TF_Status* status) { + tensorflow::DeviceSet ds; + for (tensorflow::Device* d : ctx->devices()) { + ds.AddDevice(d); + } + tensorflow::DeviceTypeVector final_devices; + status->status = tensorflow::SupportedDeviceTypesForNode( + ds.PrioritizedDeviceTypeList(), ndef, &final_devices); + if (!status->status.ok()) { + return nullptr; + } + if (final_devices.empty()) { + status->status = tensorflow::errors::Internal( + "Could not find valid device for node ", ndef.DebugString()); + return nullptr; + } + for (tensorflow::Device* d : ctx->devices()) { + if (d->device_type() == final_devices[0].type_string()) { + return d; + } + } + status->status = tensorflow::errors::Unknown( + "Could not find a device for node ", ndef.DebugString()); + return nullptr; +} + } // namespace void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { - TFE_Context* ctx = op->ctx; - // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU - tensorflow::Device* device = - (op->device == nullptr) ? ctx->devices()[0] : op->device; - #ifdef TENSORFLOW_EAGER_USE_XLA std::unique_ptr xla_launch_op; if (op->use_xla && op->name != "_XlaLaunch") { @@ -797,9 +833,17 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } #endif // TENSORFLOW_EAGER_USE_XLA + TFE_Context* ctx = op->ctx; + tensorflow::Device* device = op->device; + if (!ctx->soft_placement && device == nullptr) { + // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU + device = ctx->devices()[0]; + } + std::vector outputs(1); const tensorflow::MemoryTypeVector* output_memory_types = nullptr; - tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device->name()); + tensorflow::Fprint128 cache_key = + op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); tensorflow::KernelAndDevice* kernel; { tensorflow::tf_shared_lock l(ctx->cache_mu); @@ -807,6 +851,13 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } if (kernel == nullptr) { const tensorflow::NodeDef& ndef = op->attrs.BuildNodeDef(); + if (ctx->soft_placement && device == nullptr) { + device = SelectDevice(ndef, ctx, status); + if (!status->status.ok()) { + return; + } + } + CHECK(device != nullptr); if (ctx->log_device_placement) { LOG(INFO) << "Executing op " << ndef.op() << " in device " << device->name(); @@ -846,6 +897,12 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } + if (device == nullptr) { + // TODO(apassos) debug how the assignment below might return a different + // device from the one requested above. + device = kernel->device(); + } + std::vector copied_tensors; status->status = ValidateInputTypeAndPlacement( ctx, ctx->devices()[0], device, op, kernel->kernel(), &copied_tensors); diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 90cfb7500e..9610ca1b3b 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -61,7 +61,8 @@ TF_CAPI_EXPORT extern void TFE_ContextOptionsSetConfig( // Controls how to act when we try to run an operation on a given device but // some input tensors are not on that device. typedef enum TFE_ContextDevicePlacementPolicy { - // Running operations with input tensors on the wrong device will fail. + // Running operations with input tensors on the wrong device will fail. When + // soft placement is enabled acts like TFE_DEVICE_PLACEMENT_SILENT. TFE_DEVICE_PLACEMENT_EXPLICIT = 0, // Copy the tensor to the right device but log a warning. TFE_DEVICE_PLACEMENT_WARN = 1, @@ -69,7 +70,8 @@ typedef enum TFE_ContextDevicePlacementPolicy { // operation will be blocked till the copy completes. TFE_DEVICE_PLACEMENT_SILENT = 2, // Default placement policy which silently copies int32 tensors but not other - // dtypes. + // dtypes. When soft placement is enabled acts like + // TFE_DEVICE_PLACEMENT_SILENT. TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, } TFE_ContextDevicePlacementPolicy; diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 3356054cd0..53c21b64cb 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -43,9 +43,14 @@ struct TFE_ContextOptions { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; +TFE_ContextDevicePlacementPolicy PlacementPolicy( + bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); + struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) - : policy(opts.policy), + : soft_placement( + opts.session_options.options.config.allow_soft_placement()), + policy(PlacementPolicy(soft_placement, opts.policy)), session(s), rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), pflr(new tensorflow::ProcessFunctionLibraryRuntime( @@ -54,6 +59,7 @@ struct TFE_Context { log_device_placement( opts.session_options.options.config.log_device_placement()) {} + const bool soft_placement; const TFE_ContextDevicePlacementPolicy policy; // Note: we cannot use C++11 thread_local here as there is no concept of a diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 7fede4dae9..985ed96735 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -183,6 +183,8 @@ class KernelAndDevice { const OpKernel* kernel() const { return kernel_.get(); } + Device* device() const { return device_; } + DataTypeVector* output_dtypes() { return &output_dtypes_; } private: diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index f2e70341d9..553571d267 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -19,6 +19,7 @@ from __future__ import print_function import numpy as np +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.eager import context from tensorflow.python.eager import execute from tensorflow.python.eager import test @@ -277,6 +278,25 @@ class OpsTest(test_util.TensorFlowTestCase): context._context = context.Context() # pylint: enable=protected-access + def testSoftPlacement(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + # Temporarily replace the context + # pylint: disable=protected-access + del context._context + try: + context._context = context.Context( + device_policy=context.DEVICE_PLACEMENT_SILENT, + config=config_pb2.ConfigProto(allow_soft_placement=True)) + cpu_tensor = constant_op.constant(1.0) + result = cpu_tensor + cpu_tensor + self.assertEqual(result.device, + '/job:localhost/replica:0/task:0/device:GPU:0') + finally: + del context._context + context._context = context.Context() + # pylint: enable=protected-access + def testRandomUniform(self): scalar_shape = constant_op.constant([], dtype=dtypes.int32) -- GitLab From 80b6956b7cf4a092ff0780d133cd2faad4cda704 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 14:37:14 -0800 Subject: [PATCH 206/884] Added a TFLite Java API to get last inference latency in nanoseconds. PiperOrigin-RevId: 187234119 --- .../lite/NativeInterpreterWrapper.java | 16 +++++++- .../src/main/native/duration_utils_jni.cc | 38 +++++++++++++++++ .../native/nativeinterpreterwrapper_jni.cc | 12 +++++- .../native/nativeinterpreterwrapper_jni.h | 9 +++- .../lite/NativeInterpreterWrapperTest.java | 41 +++++++++++++++++++ .../java/org/tensorflow/lite/TestHelper.java | 15 +++++++ 6 files changed, 126 insertions(+), 5 deletions(-) create mode 100644 tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 5ee594dec4..7612be0ddd 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -91,8 +91,9 @@ final class NativeInterpreterWrapper implements AutoCloseable { i, inputs.length)); } } + inferenceDurationNanoseconds = -1; long[] outputsHandles = - run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs); + run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs, this); if (outputsHandles == null || outputsHandles.length == 0) { throw new IllegalStateException("Interpreter has no outputs."); } @@ -109,7 +110,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { Object[] sizes, int[] dtypes, int[] numsOfBytes, - Object[] values); + Object[] values, + NativeInterpreterWrapper wrapper); /** Resizes dimensions of a specific input. */ void resizeInput(int idx, int[] dims) { @@ -236,6 +238,14 @@ final class NativeInterpreterWrapper implements AutoCloseable { } } + /** + * Gets the last inference duration in nanoseconds. It returns null if there is no previous + * inference run or the last inference run failed. + */ + Long getLastNativeInferenceDurationNanoseconds() { + return (inferenceDurationNanoseconds < 0) ? null : inferenceDurationNanoseconds; + } + private static final int ERROR_BUFFER_SIZE = 512; private long errorHandle; @@ -246,6 +256,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private int inputSize; + private long inferenceDurationNanoseconds = -1; + private MappedByteBuffer modelByteBuffer; private Map inputsIndexes; diff --git a/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc b/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc new file mode 100644 index 0000000000..0e08a04370 --- /dev/null +++ b/tensorflow/contrib/lite/java/src/main/native/duration_utils_jni.cc @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include + +namespace tflite { + +// Gets the elapsed wall-clock timespec. +timespec getCurrentTime() { + timespec time; + clock_gettime(CLOCK_MONOTONIC, &time); + return time; +} + +// Computes the time diff from two timespecs. Returns '-1' if 'stop' is earlier +// than 'start'. +jlong timespec_diff_nanoseconds(struct timespec* start, struct timespec* stop) { + jlong result = stop->tv_sec - start->tv_sec; + if (result < 0) return -1; + result = 1000000000 * result + (stop->tv_nsec - start->tv_nsec); + if (result < 0) return -1; + return result; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index c346f9f92e..e405df0745 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -353,7 +353,7 @@ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values) { + jobjectArray values, jobject wrapper) { tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); if (interpreter == nullptr) return nullptr; @@ -384,6 +384,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( status = setInputs(env, interpreter, input_size, data_types, nums_of_bytes, values); if (status != kTfLiteOk) return nullptr; + timespec beforeInference = ::tflite::getCurrentTime(); // runs inference if (interpreter->Invoke() != kTfLiteOk) { throwException(env, kIllegalArgumentException, @@ -391,6 +392,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( error_reporter->CachedErrorMessage()); return nullptr; } + timespec afterInference = ::tflite::getCurrentTime(); + jclass wrapper_clazz = env->GetObjectClass(wrapper); + jfieldID fid = + env->GetFieldID(wrapper_clazz, "inferenceDurationNanoseconds", "J"); + if (fid != 0) { + env->SetLongField( + wrapper, fid, + ::tflite::timespec_diff_nanoseconds(&beforeInference, &afterInference)); + } // returns outputs const std::vector& results = interpreter->outputs(); if (results.empty()) { diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index c52a7e4e43..31c8f1bc88 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/interpreter.h" @@ -28,6 +29,9 @@ limitations under the License. namespace tflite { // This is to be provided at link-time by a library. extern std::unique_ptr CreateOpResolver(); +extern timespec getCurrentTime(); +extern jlong timespec_diff_nanoseconds(struct timespec* start, + struct timespec* stop); } // namespace tflite #ifdef __cplusplus @@ -104,13 +108,14 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;)[J + * Signature: + * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Lorg/tensorflow/lite/NativeInterpreterWrapper;)[J */ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values); + jobjectArray values, jobject wrapper); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 90323555d8..8c1f2406f7 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -417,4 +417,45 @@ public final class NativeInterpreterWrapperTest { assertThat(shape[1]).isEqualTo(3); assertThat(shape[2]).isEqualTo(1); } + + @Test + public void testGetInferenceLatency() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isGreaterThan(0L); + wrapper.close(); + } + + @Test + public void testGetInferenceLatencyWithNewWrapper() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isNull(); + wrapper.close(); + } + + @Test + public void testGetLatencyAfterFailedInference() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + try { + wrapper.run(inputs); + fail(); + } catch (IllegalArgumentException e) { + assertThat(e) + .hasMessageThat() + .contains("0-th input dimension should be [?,8,8,3], but found [?,8,7,3]"); + } + assertThat(wrapper.getLastNativeInferenceDurationNanoseconds()).isNull(); + wrapper.close(); + } } diff --git a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java index 8660cabf70..a5c13053d7 100644 --- a/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java +++ b/tensorflow/contrib/lite/java/src/testhelper/java/org/tensorflow/lite/TestHelper.java @@ -32,4 +32,19 @@ public class TestHelper { throw new IllegalArgumentException("Interpreter has not initialized; Failed to setUseNNAPI."); } } + + /** + * Gets the last inference duration in nanoseconds. It returns null if there is no previous + * inference run or the last inference run failed. + * + * @param interpreter an instance of {@code Interpreter}. If it is not initialized, an {@code + * IllegalArgumentException} will be thrown. + */ + public static Long getLastNativeInferenceDurationNanoseconds(Interpreter interpreter) { + if (interpreter != null && interpreter.wrapper != null) { + return interpreter.wrapper.getLastNativeInferenceDurationNanoseconds(); + } else { + throw new IllegalArgumentException("Interpreter has not initialized; Failed to get latency."); + } + } } -- GitLab From e101ce9c1c8399fecd6679293d8cb2065ce8d47f Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 27 Feb 2018 14:55:13 -0800 Subject: [PATCH 207/884] Properly handle inlining failures PiperOrigin-RevId: 187237044 --- .../core/grappler/optimizers/function_optimizer.cc | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 3c96ff869b..ba8a76ad5f 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -27,12 +27,15 @@ namespace tensorflow { namespace grappler { Status InlineFunction(const NodeDef& node, const FunctionDef& func, - GraphDef* graph) { + const FunctionDefLibrary& library, GraphDef* graph) { const std::unordered_map attr(node.attr().begin(), node.attr().end()); - FunctionDefLibrary library; std::unique_ptr item = GrapplerItemFromFunctionDef(func, attr, library); + if (!item) { + return errors::InvalidArgument("Failed to inline function ", node.op(), + " instantiated by ", node.name()); + } std::unordered_map input_nodes; for (int i = 0; i < func.signature().input_arg_size(); ++i) { @@ -129,7 +132,8 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (it == functions.end()) { *optimized_graph->add_node() = node; } else { - TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, optimized_graph)); + TF_RETURN_IF_ERROR(InlineFunction(node, *it->second, item.graph.library(), + optimized_graph)); } } -- GitLab From a3bcaec316306c07aa1718ce06efd5fd0e525d58 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Tue, 27 Feb 2018 15:17:39 -0800 Subject: [PATCH 208/884] Set oplib visibility to public --- tensorflow/contrib/tensorrt/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index dd83c34dfb..d62bca353a 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -93,6 +93,7 @@ cc_library( "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), alwayslink=1, + visibility=["//visibility:public"], ) tf_gen_op_libs( -- GitLab From 2c25f08b6f97155bd5ce95aada5a3cc9b916176f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 15:19:47 -0800 Subject: [PATCH 209/884] Implement support for unpartitioning tf.nn.embedding_lookup into a single gather. PiperOrigin-RevId: 187241089 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../propagate_fixed_sizes.cc | 6 + .../remove_trivial_passthrough.cc | 4 +- .../unpartition_embedding_lookup.cc | 237 ++++++++++++++++++ .../contrib/lite/toco/import_tensorflow.cc | 41 +++ tensorflow/contrib/lite/toco/model.h | 26 ++ tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + tensorflow/contrib/lite/toco/tooling_util.cc | 11 + tensorflow/contrib/lite/toco/tooling_util.h | 6 +- 10 files changed, 331 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 17407f3db2..845bc0460f 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -240,6 +240,7 @@ cc_library( "graph_transformations/resolve_tensorflow_tile.cc", "graph_transformations/resolve_transpose_attributes.cc", "graph_transformations/unfuse_activation_functions.cc", + "graph_transformations/unpartition_embedding_lookup.cc", "graph_transformations/unroll_batch_matmul.cc", ], hdrs = [ diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index f2c81ebc81..f0739990ad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -177,6 +177,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) DECLARE_GRAPH_TRANSFORMATION(Dequantize) +DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) class ResolveReshapeAttributes : public GraphTransformation { public: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index 0e2e5ecf30..fc26f997a6 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -1542,6 +1542,12 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { case OperatorType::kTranspose: ProcessTransposeOperator(model, static_cast(op)); break; + case OperatorType::kDynamicPartition: + case OperatorType::kDynamicStitch: + // DynamicPartition/DynamicStitch are currently only supported for + // transforms that remove them, so we avoid propagating shapes through + // them and let things settle once they've been removed. + break; default: // Unimplemented, another graph transformation should drop it. LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(op->type); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc index 587f171bbf..aa93ace03a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.cc @@ -60,7 +60,9 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation, for (int i = 0; i < passthru_op->inputs.size(); i++) { if (!model->GetArray(passthru_op->inputs[i]).buffer) { count_nonconstant_input_arrays++; - main_input_array_index = i; + if (count_nonconstant_input_arrays == 1) { + main_input_array_index = i; + } } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc new file mode 100644 index 0000000000..419fb9a799 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -0,0 +1,237 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" + +namespace toco { + +bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { + // Collapses a partitioned tf.nn.embedding_lookup back into a single Gather. + // https://www.tensorflow.org/api_docs/python/tf/nn/embedding_lookup + // This transform attempts to identify the len(params) > 1 case and collapse + // it to the len(params) = 1 case by concatenating the original params and + // reversing the partitioning. + // + // If len(params) to the tf.nn.embedding_lookup == 1, the whole op becomes + // simply a gather: + // https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/python/ops/embedding_ops.py#L150 + // + // Notes on this implementation: + // - only supports partition_strategy='mod' + // + // A rough graph of a partitioned embedding_lookup looks like: + // (ids)--+-->FloorDiv--+-->DynamicPartition-->[[Gather]]--\ + // \-->FloorMod--/ | + // V | + // Range-->DynamicPartition-------->DynamicStitch<---------/ + // (const) V + // (embeddings) + + // First look for the final DynamicStitch. + auto op_it = model->operators.begin() + op_index; + if (op_it->get()->type != OperatorType::kDynamicStitch) { + return false; + } + auto* stitch_op = static_cast(op_it->get()); + + // Split up the DynamicStitch inputs into the indices and data. + std::vector stitch_indices_inputs; + std::vector stitch_data_inputs; + for (size_t i = 0; i < stitch_op->num_partitions; ++i) { + stitch_indices_inputs.push_back(stitch_op->inputs[i]); + } + for (size_t i = stitch_op->num_partitions; i < stitch_op->num_partitions * 2; + ++i) { + stitch_data_inputs.push_back(stitch_op->inputs[i]); + } + + // Validate all indices come from the same DynamicPartition. + DynamicPartitionOperator* indices_partition_op = nullptr; + for (const string& indices_partition_output_name : stitch_indices_inputs) { + auto* op = GetOpWithOutput(*model, indices_partition_output_name); + CHECK(op) << "Source of " << indices_partition_output_name << " not found"; + if (op->type != OperatorType::kDynamicPartition) { + AddMessageF( + "Skipping because indices input %s into " + "%s is unexpected", + LogName(*op), LogName(*stitch_op)); + return false; + } + if (!indices_partition_op) { + indices_partition_op = static_cast(op); + } else { + // Ensure this is the same op as previous ones. + if (op != indices_partition_op) { + AddMessageF( + "Skipping because indices input %s into " + "%s is from a different source op than others", + LogName(*op), LogName(*stitch_op)); + return false; + } + } + } + CHECK(indices_partition_op) << "No indices inputs"; + + // The data for the indices must be a constant range of the array shape. + if (!IsConstantParameterArray(*model, indices_partition_op->inputs[0])) { + AddMessageF("Skipping because indices partition data is non-constant"); + return false; + } + auto& indices_data_array = model->GetArray(indices_partition_op->inputs[0]); + if (indices_data_array.data_type == ArrayDataType::kNone) { + // Yield until data types are propagated. + return false; + } + CHECK(indices_data_array.data_type == ArrayDataType::kInt32) + << "Indices partition inputs must be int32"; + const auto& indices_data_buffer = + indices_data_array.GetBuffer().data; + for (size_t i = 0; i < indices_data_buffer.size(); ++i) { + CHECK_EQ(indices_data_buffer[i], i) << "Indices range must be identity"; + } + + // Find all of the gathers used for the data inputs. + std::vector gather_ops; + for (const string& gather_output_name : stitch_data_inputs) { + auto* op = GetOpWithOutput(*model, gather_output_name); + CHECK(op) << "Source of " << gather_output_name << " not found"; + if (op->type != OperatorType::kGather) { + AddMessageF( + "Skipping because data input %s into %s " + "is unexpected", + LogName(*op), LogName(*stitch_op)); + return false; + } + gather_ops.push_back(static_cast(op)); + } + + // Validate all gathers come from the same DynamicPartition. + DynamicPartitionOperator* data_partition_op = nullptr; + for (auto* gather_op : gather_ops) { + auto* op = GetOpWithOutput(*model, gather_op->inputs[1]); + CHECK(op) << "Source of " << gather_op->inputs[1] << " not found"; + if (op->type != OperatorType::kDynamicPartition) { + AddMessageF( + "Skipping because data input %s into " + "%s is unexpected", + LogName(*op), LogName(*gather_op)); + return false; + } + if (!data_partition_op) { + data_partition_op = static_cast(op); + } else { + // Ensure this is the same op as previous ones. + if (op != data_partition_op) { + AddMessageF( + "Skipping because data input %s into " + "%s is from a different source op than others", + LogName(*op), LogName(*gather_op)); + return false; + } + } + } + CHECK(data_partition_op) << "No data inputs"; + + // Validate the partition ops have the same sizes. + CHECK_EQ(indices_partition_op->num_partitions, + data_partition_op->num_partitions) + << "Indices and data partition ops have differing dimensions"; + int num_partitions = indices_partition_op->num_partitions; + + // Partition strategy of 'mod' gives us a FloorMod and FloorDiv. + // The gather partition uses the FloorDiv as the data and FloorMod as the + // partitions and the indices use the FloorMod as their partitions. + Operator* div_op = GetOpWithOutput(*model, data_partition_op->inputs[0]); + Operator* mod_op = GetOpWithOutput(*model, data_partition_op->inputs[1]); + CHECK(div_op && div_op->type == OperatorType::kFloorDiv) + << "Unsupported partition strategy"; + CHECK(mod_op && mod_op->type == OperatorType::kFloorMod) + << "Unsupported partition strategy"; + CHECK_EQ(mod_op, GetOpWithOutput(*model, indices_partition_op->inputs[1])) + << "Indices and data parition ops require the same partition strategy " + "and inputs"; + + // Glob together all of the gather data. This is not yet in the correct order. + auto* gather_params_concat_op = new ConcatenationOperator; + for (const auto& gather_op : gather_ops) { + gather_params_concat_op->inputs.push_back(gather_op->inputs[0]); + } + gather_params_concat_op->outputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_unpartitioned")); + op_it = model->operators.emplace(op_it, gather_params_concat_op) + 1; + model->GetOrCreateArray(gather_params_concat_op->outputs[0]); + + // Permute the gather params to undo the partitioning that was originally + // done. + auto* gather_params_permute_op = new GatherOperator; + gather_params_permute_op->inputs.push_back( + gather_params_concat_op->outputs[0]); + gather_params_permute_op->inputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_permuted/perm")); + gather_params_permute_op->outputs.push_back( + AvailableArrayName(*model, gather_ops[0]->inputs[0] + "_permuted")); + op_it = model->operators.emplace(op_it, gather_params_permute_op) + 1; + model->GetOrCreateArray(gather_params_permute_op->outputs[0]); + const auto& partition_array = model->GetArray(gather_ops[0]->inputs[0]); + const auto& partition_array_dims = partition_array.shape().dims(); + auto& perm_array = + model->GetOrCreateArray(gather_params_permute_op->inputs[1]); + perm_array.data_type = ArrayDataType::kInt32; + perm_array.mutable_shape()->ReplaceDims( + {num_partitions * partition_array_dims[0]}); + auto& perm_data = perm_array.GetMutableBuffer().data; + perm_data.resize(RequiredBufferSizeForShape(perm_array.shape())); + // NOTE: this is what relies on the partition_strategy. + for (int i = 0; i < num_partitions * partition_array_dims[0]; ++i) { + int p = i % num_partitions; + perm_data[i] = p * partition_array_dims[0] + i / num_partitions; + } + + // Insert the new unpartitioned gather op. + auto* merged_gather_op = new GatherOperator; + merged_gather_op->inputs = {gather_params_permute_op->outputs[0], + mod_op->inputs[0]}; + merged_gather_op->outputs = {stitch_op->outputs[0]}; + model->operators.emplace(op_it, merged_gather_op); + + AddMessageF( + "Replacing suspected partitioned tf.nn.embedding_lookup (starting at %s " + "+ %s and ending at %s) with a single unpartitioned gather %s", + LogName(*div_op), LogName(*mod_op), LogName(*stitch_op), + LogName(*merged_gather_op)); + + // Ensure the stitch output array is dead, as we don't want whatever was in it + // previously now that we've redefined it. It'll be recreated when needed. + model->EraseArray(stitch_op->outputs[0]); + model->GetOrCreateArray(merged_gather_op->outputs[0]); + + // Erase all the original ops. + DeleteOpAndArraysIfUnused(model, div_op); + DeleteOpAndArraysIfUnused(model, mod_op); + for (auto* gather_op : gather_ops) { + DeleteOpAndArraysIfUnused(model, gather_op); + } + DeleteOpAndArraysIfUnused(model, indices_partition_op); + DeleteOpAndArraysIfUnused(model, data_partition_op); + DeleteOpAndArraysIfUnused(model, stitch_op); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 52a0512e23..41abca864d 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -1896,6 +1896,42 @@ void ConvertTopKV2Operator(const NodeDef& node, op->outputs.push_back(node.name() + ":1"); model->operators.emplace_back(op.release()); } + +void ConvertDynamicPartitionOperator( + const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, + Model* model) { + auto op = absl::make_unique(); + CHECK(HasAttr(node, "num_partitions")); + op->num_partitions = GetIntAttr(node, "num_partitions"); + CheckInputsCount(node, tf_import_flags, 2); + op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + CHECK_GT(op->num_partitions, 1); + op->outputs.push_back(node.name()); // Implicit :0. + for (int i = 1; i < op->num_partitions; ++i) { + op->outputs.push_back(node.name() + ":" + std::to_string(i)); + } + model->operators.emplace_back(op.release()); +} + +void ConvertDynamicStitchOperator(const NodeDef& node, + const TensorFlowImportFlags& tf_import_flags, + Model* model) { + // The parallel and non-parallel variants are the same besides whether they + // have a parallel loop; there are no behavioral differences. + CHECK(node.op() == "DynamicStitch" || node.op() == "ParallelDynamicStitch"); + auto op = absl::make_unique(); + CHECK(HasAttr(node, "N")); + op->num_partitions = GetIntAttr(node, "N"); + // Expect all ID partitions + all value partitions. + CheckInputsCount(node, tf_import_flags, op->num_partitions * 2); + for (int i = 0; i < op->num_partitions * 2; ++i) { + op->inputs.push_back(node.input(i)); + } + op->outputs.push_back(node.name()); + model->operators.emplace_back(op.release()); +} + } // namespace std::unique_ptr ImportTensorFlowGraphDef( @@ -2081,6 +2117,11 @@ std::unique_ptr ImportTensorFlowGraphDef( ConvertExpOperator(node, tf_import_flags, model); } else if (node.op() == "TopK" || node.op() == "TopKV2") { ConvertTopKV2Operator(node, tf_import_flags, model); + } else if (node.op() == "DynamicPartition") { + ConvertDynamicPartitionOperator(node, tf_import_flags, model); + } else if (node.op() == "DynamicStitch" || + node.op() == "ParallelDynamicStitch") { + ConvertDynamicStitchOperator(node, tf_import_flags, model); } else { ConvertUnsupportedOperator(node, tf_import_flags, model); } diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index d5df0fb951..ed0dedc003 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -115,6 +115,8 @@ enum class OperatorType { kTensorFlowTile, kTranspose, kTopK_V2, + kDynamicPartition, + kDynamicStitch, // An unsupported TF operation. It's only needed to be able to represent TF // graph internally and is expected to be dropped by graph transformations. kTensorFlowUnsupported, @@ -1414,6 +1416,30 @@ struct TopKV2Operator : Operator { TopKV2Operator() : Operator(OperatorType::kTopK_V2) {} }; +// DynamicPartition operator: +// +// Inputs: +// inputs[0]: required: data. +// inputs[1]: required: partitions. +// +// TensorFlow equivalent: DynamicPartition +struct DynamicPartitionOperator : Operator { + DynamicPartitionOperator() : Operator(OperatorType::kDynamicPartition) {} + int num_partitions; +}; + +// DynamicStitch operator: +// +// Inputs: +// inputs[0,N): required: indices. +// inputs[N,2N): required: data. +// +// TensorFlow equivalent: DynamicStitch/ParallelDynamicStitch +struct DynamicStitchOperator : Operator { + DynamicStitchOperator() : Operator(OperatorType::kDynamicStitch) {} + int num_partitions; +}; + // Alloc's are used for transient arrays only. An Alloc specifies which interval // of the "transient_data" workspace buffer passed to inference functions, is to // be used for the transient array at hand. The 'start' and 'end' values are diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index a09a3c4ef5..42e0a89017 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -102,6 +102,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveConstantShapeOrRank); transformations->Add(new MakeInitialDequantizeOperator); transformations->Add(new ResolveConstantFakeQuant); + transformations->Add(new UnpartitionEmbeddingLookup); } bool SupportsQuantization(FileFormat format) { diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index d23b3737fc..f92e10752d 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -159,6 +159,15 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) { return false; } +void DeleteOpAndArraysIfUnused(Model* model, Operator* op) { + for (const string& array_name : op->inputs) { + DeleteArrayIfUsedOnce(array_name, model); + } + auto op_it = FindOp(*model, op); + CHECK(op_it != model->operators.end()); + model->operators.erase(op_it); +} + std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name) { for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { @@ -347,6 +356,8 @@ const char* OperatorTypeName(OperatorType type) { HANDLE_OPERATORTYPENAME_CASE(TopK_V2) HANDLE_OPERATORTYPENAME_CASE(TensorFlowUnsupported) HANDLE_OPERATORTYPENAME_CASE(Exp) + HANDLE_OPERATORTYPENAME_CASE(DynamicPartition) + HANDLE_OPERATORTYPENAME_CASE(DynamicStitch) default: LOG(FATAL) << "Unhandled op type"; #undef HANDLE_OPERATORTYPENAME_CASE diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 11208ed667..01917b29de 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -64,6 +64,10 @@ int CountOpsWithInput(const Model& model, const string& array_name); bool DeleteArrayIfUnused(const string& array_name, Model* model); bool DeleteArrayIfUsedOnce(const string& array_name, Model* model); +// Deletes the op and any of its input and output arrays if they are unused +// after the op has been deleted. +void DeleteOpAndArraysIfUnused(Model* model, Operator* op); + std::vector>::const_iterator FindOpWithOutput( const Model& model, const string& array_name); Operator* GetOpWithOutput(const Model& model, const string& array_name); @@ -71,8 +75,6 @@ Operator* GetOpWithOutput(const Model& model, const string& array_name); std::vector>::iterator FindOpWithOutput( Model& model, const string& array_name); -Operator* GetOpWithOutput(const Model& model, const string& array_name); - std::vector>::const_iterator FindOpWithInput( const Model& model, const string& array_name); -- GitLab From 53b2181ea5cff054d40c583f05da942a9a56a283 Mon Sep 17 00:00:00 2001 From: Jeremy Lau Date: Tue, 27 Feb 2018 15:32:16 -0800 Subject: [PATCH 210/884] Make RecentRequestIds more efficient. PiperOrigin-RevId: 187242940 --- tensorflow/core/distributed_runtime/BUILD | 1 + .../core/distributed_runtime/recent_request_ids.cc | 9 ++++++--- .../core/distributed_runtime/recent_request_ids.h | 6 ++++-- .../distributed_runtime/recent_request_ids_test.cc | 13 +++++++++++++ 4 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/distributed_runtime/BUILD b/tensorflow/core/distributed_runtime/BUILD index 9e152aa082..434626bd2d 100644 --- a/tensorflow/core/distributed_runtime/BUILD +++ b/tensorflow/core/distributed_runtime/BUILD @@ -595,6 +595,7 @@ tf_cc_test( srcs = ["recent_request_ids_test.cc"], deps = [ ":recent_request_ids", + ":request_id", "//tensorflow/core:lib", "//tensorflow/core:test", "//tensorflow/core:test_main", diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.cc b/tensorflow/core/distributed_runtime/recent_request_ids.cc index c30879406c..4f6866c5d1 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.cc +++ b/tensorflow/core/distributed_runtime/recent_request_ids.cc @@ -15,6 +15,8 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/recent_request_ids.h" +#include + #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" @@ -29,12 +31,14 @@ RecentRequestIds::RecentRequestIds(int num_tracked_request_ids) Status RecentRequestIds::TrackUnique(int64 request_id, const string& method_name, const protobuf::Message& request) { - mutex_lock l(mu_); if (request_id == 0) { // For backwards compatibility, allow all requests with request_id 0. return Status::OK(); } - if (set_.count(request_id) > 0) { + + mutex_lock l(mu_); + const bool inserted = set_.insert(request_id).second; + if (!inserted) { // Note: RecentRequestIds is not strict LRU because we don't update // request_id's age in the circular_buffer_ if it's tracked again. Strict // LRU is not useful here because returning this error will close the @@ -49,7 +53,6 @@ Status RecentRequestIds::TrackUnique(int64 request_id, // when the buffer is not yet full. set_.erase(circular_buffer_[next_index_]); circular_buffer_[next_index_] = request_id; - set_.insert(request_id); next_index_ = (next_index_ + 1) % circular_buffer_.size(); return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/recent_request_ids.h b/tensorflow/core/distributed_runtime/recent_request_ids.h index e8e45331dd..11cf937c94 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids.h +++ b/tensorflow/core/distributed_runtime/recent_request_ids.h @@ -16,11 +16,13 @@ limitations under the License. #ifndef TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RECENT_REQUEST_IDS_H_ #define TENSORFLOW_CORE_DISTRIBUTED_RUNTIME_RECENT_REQUEST_IDS_H_ +#include +#include #include #include "tensorflow/core/lib/core/status.h" -#include "tensorflow/core/lib/gtl/flatset.h" #include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/worker.pb.h" @@ -64,7 +66,7 @@ class RecentRequestIds { // request_id. int next_index_ GUARDED_BY(mu_) = 0; std::vector circular_buffer_ GUARDED_BY(mu_); - gtl::FlatSet set_ GUARDED_BY(mu_); + std::unordered_set set_ GUARDED_BY(mu_); }; } // namespace tensorflow diff --git a/tensorflow/core/distributed_runtime/recent_request_ids_test.cc b/tensorflow/core/distributed_runtime/recent_request_ids_test.cc index 9a0facf540..8910a50e9c 100644 --- a/tensorflow/core/distributed_runtime/recent_request_ids_test.cc +++ b/tensorflow/core/distributed_runtime/recent_request_ids_test.cc @@ -17,8 +17,10 @@ limitations under the License. #include +#include "tensorflow/core/distributed_runtime/request_id.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" +#include "tensorflow/core/platform/test_benchmark.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/worker.pb.h" @@ -93,4 +95,15 @@ TEST(RecentRequestIds, Ordered3) { TestOrdered(3); } TEST(RecentRequestIds, Ordered4) { TestOrdered(4); } TEST(RecentRequestIds, Ordered5) { TestOrdered(5); } +void BM_TrackUnique(int iters) { + RecentRequestIds recent_request_ids(100000); + RecvTensorRequest request; + for (int i = 0; i < iters; ++i) { + TF_CHECK_OK(recent_request_ids.TrackUnique(GetUniqueRequestId(), + "BM_TrackUnique", request)); + } +} + +BENCHMARK(BM_TrackUnique); + } // namespace tensorflow -- GitLab From c54a6ce4b53172569caa19991ec36be04121a359 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 15:39:58 -0800 Subject: [PATCH 211/884] tf.contrib.data.bucket_by_sequence_length for variable length inputs PiperOrigin-RevId: 187244061 --- tensorflow/contrib/data/__init__.py | 2 + .../python/kernel_tests/bucketing_test.py | 90 ++++++++++++++ .../contrib/data/python/ops/grouping.py | 115 ++++++++++++++++++ 3 files changed, 207 insertions(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index fcdccdd26c..1777727de8 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -25,6 +25,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Counter @@batch_and_drop_remainder +@@bucket_by_sequence_length @@dense_to_sparse_batch @@enumerate_dataset @@group_by_window @@ -58,6 +59,7 @@ from tensorflow.contrib.data.python.ops.counter import Counter from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset from tensorflow.contrib.data.python.ops.error_ops import ignore_errors from tensorflow.contrib.data.python.ops.get_single_element import get_single_element +from tensorflow.contrib.data.python.ops.grouping import bucket_by_sequence_length from tensorflow.contrib.data.python.ops.grouping import group_by_window from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave diff --git a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py index f1b494e1a6..94f800e8a5 100644 --- a/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/bucketing_test.py @@ -17,6 +17,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import random + import numpy as np from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base @@ -379,5 +381,93 @@ class BucketTest(test.TestCase): self.assertEqual(batches, 15) +class BucketBySequenceLength(test.TestCase): + + def testBucket(self): + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + lengths = [8, 13, 25, 35] + + def element_gen(): + # Produce 1 batch for each bucket + elements = [] + for batch_size, length in zip(batch_sizes, lengths): + for _ in range(batch_size): + elements.append([1] * length) + random.shuffle(elements) + for el in elements: + yield (el,) + + element_len = lambda el: array_ops.shape(el)[0] + dataset = dataset_ops.Dataset.from_generator( + element_gen, (dtypes.int64,), ([None],)).apply( + grouping.bucket_by_sequence_length( + element_len, boundaries, batch_sizes)) + batch, = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + batches = [] + for _ in range(4): + batches.append(sess.run(batch)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(batch) + batch_sizes_val = [] + lengths_val = [] + for batch in batches: + batch_size = batch.shape[0] + length = batch.shape[1] + batch_sizes_val.append(batch_size) + lengths_val.append(length) + self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) + self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) + self.assertEqual(sorted(lengths), sorted(lengths_val)) + + def testPadToBoundary(self): + + boundaries = [10, 20, 30] + batch_sizes = [10, 8, 4, 2] + lengths = [8, 13, 25] + + def element_gen(): + # Produce 1 batch for each bucket + elements = [] + for batch_size, length in zip(batch_sizes[:-1], lengths): + for _ in range(batch_size): + elements.append([1] * length) + random.shuffle(elements) + for el in elements: + yield (el,) + for _ in range(batch_sizes[-1]): + el = [1] * (boundaries[-1] + 5) + yield (el,) + + element_len = lambda el: array_ops.shape(el)[0] + dataset = dataset_ops.Dataset.from_generator( + element_gen, (dtypes.int64,), ([None],)).apply( + grouping.bucket_by_sequence_length( + element_len, boundaries, batch_sizes, + pad_to_bucket_boundary=True)) + batch, = dataset.make_one_shot_iterator().get_next() + + with self.test_session() as sess: + batches = [] + for _ in range(3): + batches.append(sess.run(batch)) + with self.assertRaisesOpError("bucket_boundaries"): + sess.run(batch) + batch_sizes_val = [] + lengths_val = [] + for batch in batches: + batch_size = batch.shape[0] + length = batch.shape[1] + batch_sizes_val.append(batch_size) + lengths_val.append(length) + batch_sizes = batch_sizes[:-1] + self.assertEqual(sum(batch_sizes_val), sum(batch_sizes)) + self.assertEqual(sorted(batch_sizes), sorted(batch_sizes_val)) + self.assertEqual(sorted(boundaries), sorted(lengths_val)) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/grouping.py b/tensorflow/contrib/data/python/ops/grouping.py index 67b085002a..a19be22254 100644 --- a/tensorflow/contrib/data/python/ops/grouping.py +++ b/tensorflow/contrib/data/python/ops/grouping.py @@ -17,13 +17,20 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import math_ops def group_by_window(key_func, @@ -85,6 +92,114 @@ def group_by_window(key_func, return _apply_fn +def bucket_by_sequence_length(element_length_func, + bucket_boundaries, + bucket_batch_sizes, + padded_shapes=None, + padding_values=None, + pad_to_bucket_boundary=False): + """A transformation that buckets elements in a `Dataset` by length. + + Elements of the `Dataset` are grouped together by length and then are padded + and batched. + + This is useful for sequence tasks in which the elements have variable length. + Grouping together elements that have similar lengths reduces the total + fraction of padding in a batch which increases training step efficiency. + + Args: + element_length_func: function from element in `Dataset` to `tf.int64`, + determines the length of the element, which will determine the bucket it + goes into. + bucket_boundaries: `list`, upper length boundaries of the buckets. + bucket_batch_sizes: `list`, batch size per bucket. Length should be + `len(bucket_boundaries) + 1`. + padded_shapes: Nested structure of `tf.TensorShape` to pass to + @{tf.data.Dataset.padded_batch}. If not provided, will use + `dataset.output_shapes`, which will result in variable length dimensions + being padded out to the maximum length in each batch. + padding_values: Values to pad with, passed to + @{tf.data.Dataset.padded_batch}. Defaults to padding with 0. + pad_to_bucket_boundary: bool, if `False`, will pad dimensions with unknown + size to maximum length in batch. If `True`, will pad dimensions with + unknown size to bucket boundary, and caller must ensure that the source + `Dataset` does not contain any elements with length longer than + `max(bucket_boundaries)`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + + Raises: + ValueError: if `len(bucket_batch_sizes) != len(bucket_boundaries) + 1`. + """ + with ops.name_scope("bucket_by_seq_length"): + if len(bucket_batch_sizes) != (len(bucket_boundaries) + 1): + raise ValueError( + "len(bucket_batch_sizes) must equal len(bucket_boundaries) + 1") + + batch_sizes = constant_op.constant(bucket_batch_sizes, dtype=dtypes.int64) + + def element_to_bucket_id(element): + """Return int64 id of the length bucket for this element.""" + seq_length = element_length_func(element) + + boundaries = list(bucket_boundaries) + buckets_min = [np.iinfo(np.int32).min] + boundaries + buckets_max = boundaries + [np.iinfo(np.int32).max] + conditions_c = math_ops.logical_and( + math_ops.less_equal(buckets_min, seq_length), + math_ops.less(seq_length, buckets_max)) + bucket_id = math_ops.reduce_min(array_ops.where(conditions_c)) + + return bucket_id + + def window_size_fn(bucket_id): + # The window size is set to the batch size for this bucket + window_size = batch_sizes[bucket_id] + return window_size + + def make_padded_shapes(shapes, none_filler=None): + padded = [] + for shape in nest.flatten(shapes): + shape = tensor_shape.TensorShape(shape) + shape = [ + none_filler if d.value is None else d + for d in shape + ] + padded.append(shape) + return nest.pack_sequence_as(shapes, padded) + + def batching_fn(bucket_id, grouped_dataset): + """Batch elements in dataset.""" + batch_size = batch_sizes[bucket_id] + none_filler = None + if pad_to_bucket_boundary: + err_msg = ("When pad_to_bucket_boundary=True, elements must have " + "length <= max(bucket_boundaries).") + check = check_ops.assert_less( + bucket_id, + constant_op.constant(len(bucket_batch_sizes) - 1, + dtype=dtypes.int64), + message=err_msg) + with ops.control_dependencies([check]): + boundaries = constant_op.constant(bucket_boundaries, + dtype=dtypes.int64) + bucket_boundary = boundaries[bucket_id] + none_filler = bucket_boundary + shapes = make_padded_shapes( + padded_shapes or grouped_dataset.output_shapes, + none_filler=none_filler) + return grouped_dataset.padded_batch(batch_size, shapes, padding_values) + + def _apply_fn(dataset): + return dataset.apply( + group_by_window(element_to_bucket_id, batching_fn, + window_size_func=window_size_fn)) + + return _apply_fn + + class _VariantDataset(dataset_ops.Dataset): """A Dataset wrapper for a tf.variant-typed function argument.""" -- GitLab From 64d98b3803e3d53e53f14fadd70fa0332de987a0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 15:41:18 -0800 Subject: [PATCH 212/884] Bump the version of CUB in cmake build. PiperOrigin-RevId: 187244251 --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 8368898955..98a8c7e736 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip) -set(cub_HASH SHA256=20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31) +set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip) +set(cub_HASH SHA256=6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) -- GitLab From 3ba1f72f8829c566372208062fcea04ab5695dc6 Mon Sep 17 00:00:00 2001 From: vihanjain Date: Tue, 27 Feb 2018 16:05:26 -0800 Subject: [PATCH 213/884] Pull request for fixing warm-starting device placement (#17312) * Update checkpoint_utils.py Fix device allocation bug for warm-starting op * Update checkpoint_utils_test.py Fix test --- tensorflow/python/training/checkpoint_utils.py | 6 +++++- tensorflow/python/training/checkpoint_utils_test.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py index 0af1cdecfa..8384d0ae94 100644 --- a/tensorflow/python/training/checkpoint_utils.py +++ b/tensorflow/python/training/checkpoint_utils.py @@ -289,7 +289,11 @@ def _set_checkpoint_initializer(variable, name: Name of the operation. """ base_type = variable.dtype.base_dtype - with ops.colocate_with(variable): + # Do not colocate with variable since RestoreV2 op only runs on CPU and + # colocation will force variable (and other ops that colocate with variable) + # to be on CPU as well. It is okay to place the variable's initializer op on + # CPU since it will only be run once at the start. + with ops.device(variable.device), ops.device("/cpu:0"): restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index a461b24cbb..f564871315 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -206,7 +206,9 @@ class CheckpointsTest(test.TestCase): checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"useful_scope/": "useful_scope/"}) - self.assertEqual(my4._initializer_op.op.inputs[1].device, "/job:ps") + # initializer runs on the same task but always on CPU. + self.assertEqual(my4._initializer_op.op.inputs[1].device, + "/job:ps/device:CPU:0") def testInitFromRootCheckpoint(self): checkpoint_dir = self.get_temp_dir() -- GitLab From 72b5d12847764d74dd026d97d663c9101a7ff58a Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 16:13:08 -0800 Subject: [PATCH 214/884] Bump the version of CUB in cmake build. (#17310) --- tensorflow/contrib/cmake/external/cub.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cmake/external/cub.cmake b/tensorflow/contrib/cmake/external/cub.cmake index 8368898955..98a8c7e736 100644 --- a/tensorflow/contrib/cmake/external/cub.cmake +++ b/tensorflow/contrib/cmake/external/cub.cmake @@ -14,8 +14,8 @@ # ============================================================================== include (ExternalProject) -set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.7.4.zip) -set(cub_HASH SHA256=20a1a39fd97e5da7f40f5f2e7fd73fd2ea59f9dc4bb8a6c5f228aa543e727e31) +set(cub_URL https://mirror.bazel.build/github.com/NVlabs/cub/archive/1.8.0.zip) +set(cub_HASH SHA256=6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3) set(cub_BUILD ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/cub/src/cub) set(cub_ARCHIVE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/cub_archive) -- GitLab From e7e63d8b2386f2b3ddd234da77c15125516c65b6 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Tue, 27 Feb 2018 16:41:38 -0800 Subject: [PATCH 215/884] [XLA] Remove an unused function with a typo in its name. PiperOrigin-RevId: 187252967 --- tensorflow/compiler/xla/service/hlo_module.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index 06d92f94fd..ca94118763 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -187,11 +187,6 @@ class HloModule { // Returns a randomly generated uint64. uint64 RandomNew64() const; - // Returns the unique name for a computation in this module. - string GetUniqueCompuationName(const string& prefix) { - return computation_name_uniquer_.GetUniqueName(prefix); - } - // Returns the NameUniquer for uniquing instruction names in this module. NameUniquer& instruction_name_uniquer() { return instruction_name_uniquer_; } -- GitLab From 0f52f44bbd1fe0f1a7c97517fbe13f2eff5c2d0d Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Tue, 27 Feb 2018 16:53:54 -0800 Subject: [PATCH 216/884] Pull request for fixing warm-starting device placement (#17312) (#17314) * Update checkpoint_utils.py Fix device allocation bug for warm-starting op * Update checkpoint_utils_test.py Fix test --- tensorflow/python/training/checkpoint_utils.py | 6 +++++- tensorflow/python/training/checkpoint_utils_test.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py index fa3de6fad2..97f82ff23f 100644 --- a/tensorflow/python/training/checkpoint_utils.py +++ b/tensorflow/python/training/checkpoint_utils.py @@ -289,7 +289,11 @@ def _set_checkpoint_initializer(variable, name: Name of the operation. """ base_type = variable.dtype.base_dtype - with ops.colocate_with(variable): + # Do not colocate with variable since RestoreV2 op only runs on CPU and + # colocation will force variable (and other ops that colocate with variable) + # to be on CPU as well. It is okay to place the variable's initializer op on + # CPU since it will only be run once at the start. + with ops.device(variable.device), ops.device("/cpu:0"): restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index cd17faa040..710f00b9da 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -176,7 +176,9 @@ class CheckpointsTest(test.TestCase): checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"useful_scope/": "useful_scope/"}) - self.assertEqual(my4._initializer_op.op.inputs[1].device, "/job:ps") + # initializer runs on the same task but always on CPU. + self.assertEqual(my4._initializer_op.op.inputs[1].device, + "/job:ps/device:CPU:0") def testInitFromRootCheckpoint(self): checkpoint_dir = self.get_temp_dir() -- GitLab From 944423c12057e4a5215fade57c286237dca2b48c Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 27 Feb 2018 17:02:47 -0800 Subject: [PATCH 217/884] Move security.md into the right place. PiperOrigin-RevId: 187255784 --- tensorflow/SECURITY.md => SECURITY.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/SECURITY.md => SECURITY.md (100%) diff --git a/tensorflow/SECURITY.md b/SECURITY.md similarity index 100% rename from tensorflow/SECURITY.md rename to SECURITY.md -- GitLab From 681327cd00822f9e7620cf8d95141a75447132f1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 17:13:19 -0800 Subject: [PATCH 218/884] Changed back to Shard for SplitV to get better performance. PiperOrigin-RevId: 187257148 --- tensorflow/core/kernels/split_v_op.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 16fa890780..51d96a17b3 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -236,8 +236,9 @@ class SplitVOpCPUImpl { }; if (use_parallelism_between_outputs) { // Run in parallel, disabling parallelism in functor. - context->device()->tensorflow_cpu_worker_threads()->workers->ParallelFor( - num_split, input_element_count / num_split, range_output_func); + Shard(num_split, + context->device()->tensorflow_cpu_worker_threads()->workers, + num_split, input_element_count / num_split, range_output_func); } else { // Run sequentially, but allow internal parallelism in functor. range_output_func(0, num_split); -- GitLab From 6585008f3dc3ca0f9163a0588b09379eab46c78a Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 27 Feb 2018 17:32:27 -0800 Subject: [PATCH 219/884] Add unit tests for context propagation in ThreadPool and a benchmark for ParallelFor. PiperOrigin-RevId: 187259233 --- tensorflow/core/BUILD | 1 + tensorflow/core/lib/core/threadpool_test.cc | 57 ++++++++++++++++++--- tensorflow/core/platform/default/context.h | 2 + 3 files changed, 53 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 1893967cdd..08832b58da 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -339,6 +339,7 @@ cc_library( "lib/strings/strcat.h", "lib/strings/stringprintf.h", "platform/abi.h", + "platform/context.h", "platform/cpu_feature_guard.h", "platform/cpu_info.h", "platform/dynamic_annotations.h", diff --git a/tensorflow/core/lib/core/threadpool_test.cc b/tensorflow/core/lib/core/threadpool_test.cc index 627ef5a892..320f3ebb83 100644 --- a/tensorflow/core/lib/core/threadpool_test.cc +++ b/tensorflow/core/lib/core/threadpool_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include +#include "tensorflow/core/platform/context.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/test.h" @@ -35,6 +36,7 @@ TEST(ThreadPool, Empty) { } TEST(ThreadPool, DoWork) { + Context outer_context(ContextKind::kThread); for (int num_threads = 1; num_threads < kNumThreads; num_threads++) { fprintf(stderr, "Testing with %d threads\n", num_threads); const int kWorkItems = 15; @@ -45,7 +47,9 @@ TEST(ThreadPool, DoWork) { { ThreadPool pool(Env::Default(), "test", num_threads); for (int i = 0; i < kWorkItems; i++) { - pool.Schedule([&work, i]() { + pool.Schedule([&outer_context, &work, i]() { + Context inner_context(ContextKind::kThread); + ASSERT_EQ(outer_context, inner_context); ASSERT_FALSE(work[i]); work[i] = true; }); @@ -58,6 +62,7 @@ TEST(ThreadPool, DoWork) { } TEST(ThreadPool, ParallelFor) { + Context outer_context(ContextKind::kThread); // Make ParallelFor use as many threads as possible. int64 kHugeCost = 1 << 30; for (int num_threads = 1; num_threads < kNumThreads; num_threads++) { @@ -68,12 +73,15 @@ TEST(ThreadPool, ParallelFor) { for (int i = 0; i < kWorkItems; i++) { work[i] = false; } - pool.ParallelFor(kWorkItems, kHugeCost, [&work](int64 begin, int64 end) { - for (int64 i = begin; i < end; ++i) { - ASSERT_FALSE(work[i]); - work[i] = true; - } - }); + pool.ParallelFor(kWorkItems, kHugeCost, + [&outer_context, &work](int64 begin, int64 end) { + Context inner_context(ContextKind::kThread); + ASSERT_EQ(outer_context, inner_context); + for (int64 i = begin; i < end; ++i) { + ASSERT_FALSE(work[i]); + work[i] = true; + } + }); for (int i = 0; i < kWorkItems; i++) { ASSERT_TRUE(work[i]); } @@ -167,5 +175,40 @@ static void BM_Parallel(int iters) { } BENCHMARK(BM_Parallel); +static void BM_ParallelFor(int iters, int total, int cost_per_unit) { + ThreadPool pool(Env::Default(), "test", kNumThreads); + // Decrement count concurrently until 0. + std::atomic_int_fast32_t count(iters); + mutex done_lock; + condition_variable done; + bool done_flag = false; + for (int i = 0; i < iters; ++i) { + pool.ParallelFor( + total, cost_per_unit, + [&count, &done_lock, &done, &done_flag](int64 begin, int64 end) { + for (int64 i = begin; i < end; ++i) { + if (count.fetch_sub(1) == 1) { + mutex_lock l(done_lock); + done_flag = true; + done.notify_all(); + } + } + }); + } + mutex_lock l(done_lock); + if (!done_flag) { + done.wait(l); + } +} +BENCHMARK(BM_ParallelFor) + ->ArgPair(1 << 10, 1) + ->ArgPair(1 << 20, 1) + ->ArgPair(1 << 10, 1 << 10) + ->ArgPair(1 << 20, 1 << 10) + ->ArgPair(1 << 10, 1 << 20) + ->ArgPair(1 << 20, 1 << 20) + ->ArgPair(1 << 10, 1 << 30) + ->ArgPair(1 << 20, 1 << 30); + } // namespace thread } // namespace tensorflow diff --git a/tensorflow/core/platform/default/context.h b/tensorflow/core/platform/default/context.h index d8afeb47a9..682f64c26d 100644 --- a/tensorflow/core/platform/default/context.h +++ b/tensorflow/core/platform/default/context.h @@ -22,6 +22,8 @@ class Context { public: Context() {} Context(const ContextKind kind) {} + + bool operator==(const Context& other) const { return true; } }; class WithContext { -- GitLab From 72bbc7f03b6bbd996f5bc4e14c29429612978974 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 18:01:13 -0800 Subject: [PATCH 220/884] Add fields to TfOpStats to store step-related information of some host operations. Also include the starting time of a device step in StepInfoResult. PiperOrigin-RevId: 187262025 --- .../contrib/tpu/profiler/tf_op_stats.proto | 52 +++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto index 2094294baa..e5c798aa2f 100644 --- a/tensorflow/contrib/tpu/profiler/tf_op_stats.proto +++ b/tensorflow/contrib/tpu/profiler/tf_op_stats.proto @@ -77,6 +77,8 @@ message StepInfoResult { // The infeed duration in picoseconds. // Can turn into a map if we want a variable number of ops. optional uint64 infeed_duration_ps = 3; + // The start time of this step in picoseconds. + optional uint64 begin_ps = 4; } // Result proto for a sequence of steps. @@ -155,6 +157,54 @@ message RunEnvironmentResult { repeated HostDependentJobInfoResult host_dependent_job_info = 6; } +// The types of host operations that are tracked. +enum HostOp { + // Invalid host op. + kINVALIDHostOp = 0; + // Each of host op type has two parts: + // (1) the stage where the op happens and (2) the op name. + // stage = Input Data Producer, op = Get Next Batch. + kInputDataProducerGetNextBatch = 1; + // stage = Input Data Producer, op = Session Run. + kInputDataProducerSessionRun = 2; + // stage = Input Data Producer, op = Forward Batch. + kInputDataProducerForwardBatch = 3; + // stage = Infeed Thread, op = Get Next Batch. + kInfeedThreadGetNextBatch = 4; + // stage = Infeed Thread, op = Session Run. + kInfeedThreadSessionRun = 5; + // stage = Infeed Thread, op = Forward Batch. + kInfeedThreadForwardBatch = 6; + // stage = Outfeed Thread, op = Get Next Batch. + kOutfeedThreadGetNextBatch = 7; + // stage = Outfeed Thread, op = Session Run. + kOutfeedThreadSessionRun = 8; + // stage = Outfeed Thread, op = Forward Batch. + kOutfeedThreadForwardBatch = 9; +} + +// Result proto for the host ops per TPU step. +message HostOpsPerTpuStep { + // Whether the data in this message is valid. + optional bool valid = 1 [default = false]; + // The current TPU step number. + optional uint32 tpu_step_num = 2; + // The beginning time of the current TPU step on the device in picoseconds. + optional uint64 tpu_step_begin_ps = 3; + // The ending time of the current TPU step on the device in picoseconds. + optional uint64 tpu_step_end_ps = 4; + // For each possible host operation, maps to the difference between the TPU + // step number that the host op targets and the current TPU step number. + // The key is HostOp, value is the step difference. + map step_diffs = 5; +} + +// Result proto for the host ops for all TPU steps. +message HostOpsResult { + // A sequence of HostOpsPerTpuStep (one for each TPU step) + repeated HostOpsPerTpuStep host_op_sequence = 1; +} + // Result proto for TfStatsHelper. message TfOpStats { // The result for the TF-metric database. @@ -171,4 +221,6 @@ message TfOpStats { optional double matrix_unit_utilization_percent = 6; // The run environment of this profiling session. optional RunEnvironmentResult run_environment = 7; + // The result for the host operations. + optional HostOpsResult host_ops = 8; } -- GitLab From 887c54728f713ec76ea486c94c25dfca791a10c1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 19:09:38 -0800 Subject: [PATCH 221/884] Adopt Eigen::DenseIndex in lieu of int64 for a few variables (to appease compiler warnings/errors). PiperOrigin-RevId: 187268113 --- tensorflow/core/kernels/split_op.cc | 8 ++++---- tensorflow/core/kernels/split_v_op.cc | 4 ++-- tensorflow/core/kernels/unpack_op.cc | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/kernels/split_op.cc b/tensorflow/core/kernels/split_op.cc index 1bc92a4f70..7cc3c532c9 100644 --- a/tensorflow/core/kernels/split_op.cc +++ b/tensorflow/core/kernels/split_op.cc @@ -231,10 +231,10 @@ class SplitOpCPU : public SplitOpBase { if (prefix_dim_size == 1) { auto input_reshaped = input.shaped({split_dim_size, suffix_dim_size}); - auto make_sizes = [&](int64 split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{split_size, suffix_dim_size}; }; - auto reshape_result = [&](Tensor* result, int64 split_size) { + auto reshape_result = [&](Tensor* result, Eigen::DenseIndex split_size) { return result->shaped({split_size, suffix_dim_size}); }; SplitOpCPUImpl{}( @@ -244,11 +244,11 @@ class SplitOpCPU : public SplitOpBase { } else { auto input_reshaped = input.shaped( {prefix_dim_size, split_dim_size, suffix_dim_size}); - auto make_sizes = [&](int64 split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{prefix_dim_size, split_size, suffix_dim_size}; }; - auto reshape_result = [&](Tensor* result, int64 split_size) { + auto reshape_result = [&](Tensor* result, Eigen::DenseIndex split_size) { return result->shaped( {prefix_dim_size, split_size, suffix_dim_size}); }; diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 51d96a17b3..0681ff1198 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -293,7 +293,7 @@ class SplitVOpCPU : public SplitVOpBase { if (prefix_dim_size == 1) { auto input_reshaped = input.shaped({split_dim_size, suffix_dim_size}); - auto make_sizes = [&](Tlen split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{split_size, suffix_dim_size}; }; auto reshape_result = [&](Tensor* result, Tlen split_size) { @@ -306,7 +306,7 @@ class SplitVOpCPU : public SplitVOpBase { } else { auto input_reshaped = input.shaped( {prefix_dim_size, split_dim_size, suffix_dim_size}); - auto make_sizes = [&](Tlen split_size) { + auto make_sizes = [&](Eigen::DenseIndex split_size) { return Eigen::DSizes{prefix_dim_size, split_size, suffix_dim_size}; }; diff --git a/tensorflow/core/kernels/unpack_op.cc b/tensorflow/core/kernels/unpack_op.cc index 4376df34be..1e1647db5c 100644 --- a/tensorflow/core/kernels/unpack_op.cc +++ b/tensorflow/core/kernels/unpack_op.cc @@ -90,16 +90,16 @@ class UnpackOp : public OpKernel { } #endif // TENSORFLOW_USE_SYCL - int64 before_dim = 1; + Eigen::DenseIndex before_dim = 1; for (int i = 0; i < axis; ++i) { before_dim *= input_shape.dim_size(i); } - int64 after_dim = 1; + Eigen::DenseIndex after_dim = 1; for (int i = axis + 1; i < input_shape.dims(); ++i) { after_dim *= input_shape.dim_size(i); } - const int64 axis_dim = input_shape.dim_size(axis); + const Eigen::DenseIndex axis_dim = input_shape.dim_size(axis); // Except for shape, unpack is a special case of split, so we reuse the // same computational kernels. -- GitLab From f6bda409206dc642d7a6f02842e76b0be7234491 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Tue, 27 Feb 2018 19:11:43 -0800 Subject: [PATCH 222/884] [tf.data] Unify behavior for `Dataset.shuffle(..., seed=0)` and `Dataset.shuffle(..., seed=tf.constant(0, dtype=tf.int64))`. Previously, the Python integer argument would give a deterministic seeding, and the tf.Tensor argument would give a non-deterministic seeding when the graph seed was not set. This change fixes the behavior so that both versions give the same deterministic seeding. This change also applies the same fix to `tf.contrib.data.shuffle_and_repeat()` and `RandomDataset`. Fixes #17284. PiperOrigin-RevId: 187268252 --- .../contrib/data/python/ops/random_ops.py | 14 +--- .../contrib/data/python/ops/shuffle_ops.py | 14 +--- .../kernel_tests/shuffle_dataset_op_test.py | 27 ++++++ tensorflow/python/data/ops/BUILD | 1 + tensorflow/python/data/ops/dataset_ops.py | 13 +-- tensorflow/python/data/util/BUILD | 24 ++++++ tensorflow/python/data/util/random_seed.py | 58 +++++++++++++ .../python/data/util/random_seed_test.py | 83 +++++++++++++++++++ 8 files changed, 199 insertions(+), 35 deletions(-) create mode 100644 tensorflow/python/data/util/random_seed.py create mode 100644 tensorflow/python/data/util/random_seed_test.py diff --git a/tensorflow/contrib/data/python/ops/random_ops.py b/tensorflow/contrib/data/python/ops/random_ops.py index 7d727165fe..28ef5e50f3 100644 --- a/tensorflow/contrib/data/python/ops/random_ops.py +++ b/tensorflow/contrib/data/python/ops/random_ops.py @@ -19,11 +19,10 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_dataset_ops @@ -34,16 +33,7 @@ class RandomDataset(dataset_ops.Dataset): def __init__(self, seed=None): """A `Dataset` of pseudorandom values.""" super(RandomDataset, self).__init__() - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): return gen_dataset_ops.random_dataset( diff --git a/tensorflow/contrib/data/python/ops/shuffle_ops.py b/tensorflow/contrib/data/python/ops/shuffle_ops.py index 99bb79bc06..f35795abd3 100644 --- a/tensorflow/contrib/data/python/ops/shuffle_ops.py +++ b/tensorflow/contrib/data/python/ops/shuffle_ops.py @@ -19,11 +19,11 @@ from __future__ import print_function from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.ops import gen_dataset_ops @@ -45,17 +45,7 @@ class _ShuffleAndRepeatDataset(dataset_ops.Dataset): else: self._count = ops.convert_to_tensor( count, dtype=dtypes.int64, name="count") - - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) def _as_variant_tensor(self): # pylint: disable=protected-access diff --git a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py index c089fb08c1..5fcc48831f 100644 --- a/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/shuffle_dataset_op_test.py @@ -132,6 +132,33 @@ class ShuffleDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testSeedZero(self): + """Test for same behavior when the seed is a Python or Tensor zero.""" + iterator = ( + dataset_ops.Dataset.range(10).shuffle(10, seed=0) + .make_one_shot_iterator()) + get_next = iterator.get_next() + + elems = [] + with self.test_session() as sess: + for _ in range(10): + elems.append(sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + seed_placeholder = array_ops.placeholder(dtypes.int64, shape=[]) + iterator = ( + dataset_ops.Dataset.range(10).shuffle(10, seed=seed_placeholder) + .make_initializable_iterator()) + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer, feed_dict={seed_placeholder: 0}) + for elem in elems: + self.assertEqual(elem, sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + def testDefaultArguments(self): components = [0, 1, 2, 3, 4] iterator = (dataset_ops.Dataset.from_tensor_slices(components).shuffle(5) diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index f12b358a7d..dc293562ab 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -23,6 +23,7 @@ py_library( "//tensorflow/python:tensor_util", "//tensorflow/python:util", "//tensorflow/python/data/util:nest", + "//tensorflow/python/data/util:random_seed", "//tensorflow/python/data/util:sparse", "//third_party/py/numpy", ], diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 3fb1f8d547..5751f35fe1 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -26,13 +26,13 @@ import six from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.util import nest +from tensorflow.python.data.util import random_seed from tensorflow.python.data.util import sparse from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import function from tensorflow.python.framework import ops -from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util @@ -1484,16 +1484,7 @@ class ShuffleDataset(Dataset): self._input_dataset = input_dataset self._buffer_size = ops.convert_to_tensor( buffer_size, dtype=dtypes.int64, name="buffer_size") - seed, seed2 = random_seed.get_seed(seed) - if seed is None: - self._seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") - else: - self._seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") - if seed2 is None: - self._seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") - else: - self._seed2 = ops.convert_to_tensor( - seed2, dtype=dtypes.int64, name="seed2") + self._seed, self._seed2 = random_seed.get_seed(seed) if reshuffle_each_iteration is None: self._reshuffle_each_iteration = True else: diff --git a/tensorflow/python/data/util/BUILD b/tensorflow/python/data/util/BUILD index e32c7b54a4..b1bdbdab37 100644 --- a/tensorflow/python/data/util/BUILD +++ b/tensorflow/python/data/util/BUILD @@ -86,6 +86,30 @@ py_test( ], ) +py_library( + name = "random_seed", + srcs = ["random_seed.py"], + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:framework", + ], +) + +py_test( + name = "random_seed_test", + size = "small", + srcs = ["random_seed_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":random_seed", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:util", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/python/data/util/random_seed.py b/tensorflow/python/data/util/random_seed.py new file mode 100644 index 0000000000..e2c9d8672f --- /dev/null +++ b/tensorflow/python/data/util/random_seed.py @@ -0,0 +1,58 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities for generating Tensor-valued random seeds.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops + + +def get_seed(seed): + """Returns the local seeds an operation should use given an op-specific seed. + + See @{tf.get_seed} for more details. This wrapper adds support for the case + where `seed` may be a tensor. + + Args: + seed: An integer or a @{tf.int64} scalar tensor. + + Returns: + A tuple of two @{tf.int64} scalar tensors that should be used for the local + seed of the calling dataset. + """ + seed, seed2 = random_seed.get_seed(seed) + if seed is None: + seed = constant_op.constant(0, dtype=dtypes.int64, name="seed") + else: + seed = ops.convert_to_tensor(seed, dtype=dtypes.int64, name="seed") + if seed2 is None: + seed2 = constant_op.constant(0, dtype=dtypes.int64, name="seed2") + else: + with ops.name_scope("seed2") as scope: + seed2 = ops.convert_to_tensor(seed2, dtype=dtypes.int64) + seed2 = array_ops.where( + math_ops.logical_and( + math_ops.equal(seed, 0), math_ops.equal(seed2, 0)), + constant_op.constant(2**31 - 1, dtype=dtypes.int64), + seed2, + name=scope) + return seed, seed2 diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py new file mode 100644 index 0000000000..c3a2dc0537 --- /dev/null +++ b/tensorflow/python/data/util/random_seed_test.py @@ -0,0 +1,83 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utilities working with arbitrarily nested structures.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.util import random_seed as data_random_seed +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import random_seed +from tensorflow.python.framework import test_util +from tensorflow.python.platform import test + + +class RandomSeedTest(test.TestCase): + + @test_util.run_in_graph_and_eager_modes() + def testRandomSeed(self): + zero_t = constant_op.constant(0, dtype=dtypes.int64, name='zero') + one_t = constant_op.constant(1, dtype=dtypes.int64, name='one') + intmax_t = constant_op.constant( + 2**31 - 1, dtype=dtypes.int64, name='intmax') + test_cases = [ + # Each test case is a tuple with input to get_seed: + # (input_graph_seed, input_op_seed) + # and output from get_seed: + # (output_graph_seed, output_op_seed) + ((None, None), (0, 0)), + ((None, 1), (random_seed.DEFAULT_GRAPH_SEED, 1)), + ((1, 1), (1, 1)), + ((0, 0), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output + ((2**31 - 1, 0), (0, 2**31 - 1)), # Don't wrap to (0, 0) either + ((0, 2**31 - 1), (0, 2**31 - 1)), # Wrapping for the other argument + # Once more, with tensor-valued arguments + ((None, one_t), (random_seed.DEFAULT_GRAPH_SEED, 1)), + ((1, one_t), (1, 1)), + ((0, zero_t), (0, 2**31 - 1)), # Avoid nondeterministic (0, 0) output + ((2**31 - 1, zero_t), (0, 2**31 - 1)), # Don't wrap to (0, 0) either + ((0, intmax_t), (0, 2**31 - 1)), # Wrapping for the other argument + ] + for tc in test_cases: + tinput, toutput = tc[0], tc[1] + random_seed.set_random_seed(tinput[0]) + g_seed, op_seed = data_random_seed.get_seed(tinput[1]) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + msg = 'test_case = {0}, got {1}, want {2}'.format( + tinput, (g_seed, op_seed), toutput) + self.assertEqual((g_seed, op_seed), toutput, msg=msg) + random_seed.set_random_seed(None) + + if context.in_graph_mode(): + random_seed.set_random_seed(1) + tinput = (1, None) + toutput = (1, ops.get_default_graph()._last_id) # pylint: disable=protected-access + random_seed.set_random_seed(tinput[0]) + g_seed, op_seed = data_random_seed.get_seed(tinput[1]) + g_seed = self.evaluate(g_seed) + op_seed = self.evaluate(op_seed) + msg = 'test_case = {0}, got {1}, want {2}'.format(1, (g_seed, op_seed), + toutput) + self.assertEqual((g_seed, op_seed), toutput, msg=msg) + random_seed.set_random_seed(None) + + +if __name__ == '__main__': + test.main() -- GitLab From 891bf22087c271b26325c3f81e4ef08b6b8af6c1 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 27 Feb 2018 19:31:17 -0800 Subject: [PATCH 223/884] Cleanup post moving record gradient to C - Remove unnecessary tuple build (when not needed) - Stop passing record gradient from python PiperOrigin-RevId: 187269557 --- .../python/eager/python_eager_op_gen.cc | 6 +-- tensorflow/python/eager/pywrap_tfe.h | 9 ++-- tensorflow/python/eager/pywrap_tfe_src.cc | 46 ++++++++--------- tensorflow/python/eager/pywrap_tfe_test.py | 49 +++++++++---------- 4 files changed, 49 insertions(+), 61 deletions(-) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index e6d03297e0..554e29c7e0 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -712,9 +712,9 @@ bool GenEagerPythonOp::AddEagerFallbackCode( } void GenEagerPythonOp::AddEagerFastPathExecute() { - string fastpath_execute_params = strings::StrCat( - "_ctx._handle, _ctx.device_name, \"", op_def_.name(), "\", ", - "_execute.record_gradient, name, _ctx._post_execution_callbacks"); + string fastpath_execute_params = + strings::StrCat("_ctx._handle, _ctx.device_name, \"", op_def_.name(), + "\", ", "name, _ctx._post_execution_callbacks"); string fallback_params; for (int i = 0; i < api_def_.in_arg_size(); i++) { diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index f9692a8910..b1b4a6b214 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -160,13 +160,10 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, // Item 2: device_name: Name of the device on which to execute the operation, // or NULL for automatic selection. // Item 3: op_name: Name of the TensorFlow op to execute. -// Item 4: record_gradient_callback: Callback that records the gradient of the -// result. The callback takes (op_name, inputs, attrs, result, name) -// - all sequences and records the gradient. -// Item 5: name: An optional name for the operation. -// Item 6: List representing all callbacks to execute after successful +// Item 4: name: An optional name for the operation. +// Item 5: List representing all callbacks to execute after successful // op execute. -// Item 7 onwards: inputs - This is a list of inputs followed by a list of +// Item 6 onwards: inputs - This is a list of inputs followed by a list of // attrs. It is not necessary for type attrs to be present. // // This is named _C since there doesn't seem to be any way to make it visible diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 30e08c8e65..42d97dfe3f 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -31,6 +31,7 @@ limitations under the License. #include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/python/eager/pywrap_tensor.h" +#include "tensorflow/python/lib/core/safe_ptr.h" using tensorflow::string; using tensorflow::strings::Printf; @@ -1364,7 +1365,7 @@ PyObject* TFE_Py_TapeGradient(PyObject* tape, PyObject* vspace, } namespace { -static const int kFastPathExecuteInputStartIndex = 6; +static const int kFastPathExecuteInputStartIndex = 5; PyObject* GetPythonObjectFromString(const char* s) { #if PY_MAJOR_VERSION >= 3 @@ -1621,46 +1622,43 @@ bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, const std::vector& flattened_inputs, const std::vector& flattened_attrs, PyObject* flattened_result, PyObject* op_name, PyObject* name, - PyObject* record_gradient_callback, PyObject* callbacks) { - PyObject* inputs = PyTuple_New(flattened_inputs.size()); + PyObject* callbacks) { + tensorflow::Safe_PyObjectPtr inputs = + tensorflow::make_safe(PyTuple_New(flattened_inputs.size())); for (int i = 0; i < flattened_inputs.size(); i++) { PyObject* input = flattened_inputs[i]; Py_INCREF(input); - PyTuple_SET_ITEM(inputs, i, input); + PyTuple_SET_ITEM(inputs.get(), i, input); } int num_non_inferred_attrs = PyTuple_GET_SIZE(args) - op_def->input_arg_size() - kFastPathExecuteInputStartIndex; int num_attrs = flattened_attrs.size() + num_non_inferred_attrs; - PyObject* attrs = PyTuple_New(num_attrs); + tensorflow::Safe_PyObjectPtr attrs = + tensorflow::make_safe(PyTuple_New(num_attrs)); for (int i = 0; i < num_non_inferred_attrs; i++) { auto* attr = PyTuple_GET_ITEM( args, kFastPathExecuteInputStartIndex + op_def->input_arg_size() + i); Py_INCREF(attr); - PyTuple_SET_ITEM(attrs, i, attr); + PyTuple_SET_ITEM(attrs.get(), i, attr); } for (int i = num_non_inferred_attrs; i < num_attrs; i++) { // Not INCREFing anything in flattened_attrs as each of those is a new // reference, so allow the attrs tuple to steal the reference. - PyTuple_SET_ITEM(attrs, i, flattened_attrs.at(i - num_non_inferred_attrs)); + PyTuple_SET_ITEM(attrs.get(), i, + flattened_attrs.at(i - num_non_inferred_attrs)); } - PyObject* callback_args = - Py_BuildValue("OOOOO", op_name, inputs, attrs, flattened_result, name); - - auto cleaner = tensorflow::gtl::MakeCleanup([inputs, attrs, callback_args] { - Py_DECREF(inputs); - Py_DECREF(attrs); - Py_DECREF(callback_args); - }); - if (run_gradient_callback) { - RecordGradient(op_name, inputs, attrs, flattened_result, name); + RecordGradient(op_name, inputs.get(), attrs.get(), flattened_result, name); } if (run_post_exec_callbacks) { + tensorflow::Safe_PyObjectPtr callback_args = tensorflow::make_safe( + Py_BuildValue("OOOOO", op_name, inputs.get(), attrs.get(), + flattened_result, name)); for (Py_ssize_t i = 0; i < PyList_Size(callbacks); i++) { PyObject* callback_fn = PyList_GET_ITEM(callbacks, i); if (!PyCallable_Check(callback_fn)) { @@ -1673,7 +1671,7 @@ bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, return false; } PyObject* callback_result = - PyObject_CallObject(callback_fn, callback_args); + PyObject_CallObject(callback_fn, callback_args.get()); if (!callback_result) { return false; } @@ -1703,9 +1701,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyObject* op_name = PyTuple_GET_ITEM(args, 2); const tensorflow::OpDef* op_def = GetOpDef(op_name); if (op_def == nullptr) return nullptr; - PyObject* record_gradient_callback = PyTuple_GET_ITEM(args, 3); - PyObject* name = PyTuple_GET_ITEM(args, 4); - PyObject* callbacks = PyTuple_GET_ITEM(args, 5); + PyObject* name = PyTuple_GET_ITEM(args, 3); + PyObject* callbacks = PyTuple_GET_ITEM(args, 4); if (args_size < kFastPathExecuteInputStartIndex + op_def->input_arg_size()) { PyErr_SetString( @@ -1775,9 +1772,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { // (similar to benchmark_tf_gradient_function_*). Also consider using an // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks // point out problems with heap allocs. - bool run_gradient_callback = !*ThreadTapeIsStopped() && - !GetTapeSet()->empty() && - record_gradient_callback != Py_None; + bool run_gradient_callback = + !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); bool run_post_exec_callbacks = callbacks != Py_None && PyList_Size(callbacks) > 0; bool run_callbacks = run_gradient_callback || run_post_exec_callbacks; @@ -1916,7 +1912,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (run_callbacks && !RunCallbacks(run_gradient_callback, run_post_exec_callbacks, op_def, args, *flattened_inputs, *flattened_attrs, flat_result, - op_name, name, record_gradient_callback, callbacks)) { + op_name, name, callbacks)) { return nullptr; } diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 49323e6640..418ed75178 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -21,7 +21,6 @@ from __future__ import print_function from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.eager import execute from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import test_util @@ -46,15 +45,13 @@ class Tests(test.TestCase): self.assertAllClose( math_ops.matmul(a_2_by_2, b_2_by_2), pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, - None, None, a_2_by_2, b_2_by_2, "transpose_a", False, "transpose_b", - False)) + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, + b_2_by_2, "transpose_a", False, "transpose_b", False)) self.assertAllClose( math_ops.matmul(a_100_by_784, b_100_by_784, transpose_b=True), pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, - None, None, a_100_by_784, b_100_by_784, "transpose_a", False, - "transpose_b", True)) + ctx._handle, ctx.device_name, "MatMul", None, None, a_100_by_784, + b_100_by_784, "transpose_a", False, "transpose_b", True)) @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created @@ -64,8 +61,8 @@ class Tests(test.TestCase): a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) tape.watch(a_2_by_2) z = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, None, - None, a_2_by_2, a_2_by_2, "transpose_a", False, "transpose_b", False) + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, + a_2_by_2, "transpose_a", False, "transpose_b", False) dz_dy = tape.gradient(z, [a_2_by_2])[0] self.assertAllEqual(dz_dy.numpy(), constant_op.constant(4.0, shape=[2, 2]).numpy()) @@ -80,9 +77,9 @@ class Tests(test.TestCase): self.assertAllClose( math_ops.add_n([a_2_by_2, b_2_by_2]), - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "AddN", execute.record_gradient, None, - None, [a_2_by_2, b_2_by_2])) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, + "AddN", None, None, + [a_2_by_2, b_2_by_2])) # Tests homogeneous list op @test_util.assert_no_new_tensors @@ -96,8 +93,8 @@ class Tests(test.TestCase): tape.watch(a_2_by_2) tape.watch(b_2_by_2) z1 = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "AddN", execute.record_gradient, None, - None, [a_2_by_2, b_2_by_2]) + ctx._handle, ctx.device_name, "AddN", None, None, + [a_2_by_2, b_2_by_2]) z2 = math_ops.add_n([a_2_by_2, b_2_by_2]) dz1_dy = tape.gradient(z1, [a_2_by_2])[0] dz2_dy = tape.gradient(z2, [a_2_by_2])[0] @@ -113,9 +110,9 @@ class Tests(test.TestCase): self.assertAllClose( array_ops.identity_n([a_2_by_2, b_2_by_2]), - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "IdentityN", execute.record_gradient, - None, None, [a_2_by_2, b_2_by_2])) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx._handle, ctx.device_name, + "IdentityN", None, None, + [a_2_by_2, b_2_by_2])) # Tests heterogeneous list op @test_util.assert_no_new_tensors @@ -129,8 +126,8 @@ class Tests(test.TestCase): tape.watch(a_2_by_2) tape.watch(b_2_by_2) z1 = pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "IdentityN", execute.record_gradient, - None, None, [a_2_by_2, b_2_by_2]) + ctx._handle, ctx.device_name, "IdentityN", None, None, + [a_2_by_2, b_2_by_2]) z2 = array_ops.identity_n([a_2_by_2, b_2_by_2]) dz1_dy = tape.gradient(z1[0], [a_2_by_2])[0] dz2_dy = tape.gradient(z2[0], [a_2_by_2])[0] @@ -147,22 +144,20 @@ class Tests(test.TestCase): # Not enough base params with self.assertRaisesRegexp(ValueError, - "at least 6 items in the input tuple"): + "at least 5 items in the input tuple"): pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, "Identity") # Not enough inputs with self.assertRaisesRegexp(ValueError, - "Expected to be at least 7, was 6"): - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx_handle, ctx_handle, "Identity", backprop._record_gradient, None, - []) + "Expected to be at least 6, was 5"): + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx_handle, + "Identity", None, []) # Bad type with self.assertRaisesRegexp(TypeError, "expected a string for op_name"): - pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx_handle, ctx.device_name, ctx_handle, backprop._record_gradient, - None, [], a_2_by_2) + pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name, + ctx_handle, None, [], a_2_by_2) if __name__ == "__main__": -- GitLab From ae4c23db58c6436786bbcdea4a15aa814d642220 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 20:16:16 -0800 Subject: [PATCH 224/884] Improve handling of undefined split_dim_tensor in the split_v op. PiperOrigin-RevId: 187272486 --- tensorflow/core/kernels/split_v_op.cc | 7 ++++++- tensorflow/python/kernel_tests/split_op_test.py | 14 ++++++++++++++ tensorflow/python/ops/array_ops.py | 4 +++- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 0681ff1198..0ce0b552e6 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -55,8 +55,13 @@ class SplitVOpBase : public OpKernel { const Tensor& input = context->input(0); const TensorShape& input_shape = input.shape(); const Tensor& split_tensor = context->input(1); + const Tensor& split_dim_tensor = context->input(2); - const int32 split_dim_orig = context->input(2).flat()(0); + OP_REQUIRES(context, split_dim_tensor.NumElements() == 1, + errors::InvalidArgument("split_dim_tensor must have " + "exactly one element.")); + + const int32 split_dim_orig = split_dim_tensor.flat()(0); const int32 split_dim = split_dim_orig < 0 ? split_dim_orig + input.dims() : split_dim_orig; diff --git a/tensorflow/python/kernel_tests/split_op_test.py b/tensorflow/python/kernel_tests/split_op_test.py index 6171793b14..8cfee3eb93 100644 --- a/tensorflow/python/kernel_tests/split_op_test.py +++ b/tensorflow/python/kernel_tests/split_op_test.py @@ -336,6 +336,20 @@ class SplitOpTest(test.TestCase): for s in splits: self.assertEqual(None, s.get_shape().ndims) + def testNonexistentDimTensor(self): + x = array_ops.placeholder(dtypes.int32) + values = np.zeros([5, 30]) + splits = array_ops.placeholder(dtypes.int32) + with self.assertRaisesRegexp(ValueError, "Cannot infer"): + y = array_ops.split(values, splits, axis=x) + + splits = array_ops.placeholder(dtypes.int32, [3]) + y = array_ops.split(values, splits, axis=x) + with self.test_session(use_gpu=True) as sess: + with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, + "must have exactly one element"): + sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]}) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index cdfb955f54..3db3d84475 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1380,7 +1380,9 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): axis=axis, num_split=num_or_size_splits, value=value, name=name) if num is None: - num = size_splits._shape_tuple()[0] + size_splits_shape = size_splits._shape_tuple() + if size_splits_shape: + num = size_splits_shape[0] if num is None: raise ValueError("Cannot infer num from shape %s" % num_or_size_splits) -- GitLab From c38a16dbcc5de5fa5579a3e48ec12be316a2cb3f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 27 Feb 2018 21:24:24 -0800 Subject: [PATCH 225/884] Adds poisson_regression_head. PiperOrigin-RevId: 187277651 --- tensorflow/contrib/estimator/BUILD | 2 + .../estimator/python/estimator/head.py | 61 ++++++++++++++++ .../estimator/python/estimator/head_test.py | 71 +++++++++++++++++++ 3 files changed, 134 insertions(+) diff --git a/tensorflow/contrib/estimator/BUILD b/tensorflow/contrib/estimator/BUILD index ddccfce3c0..773c6ab6c7 100644 --- a/tensorflow/contrib/estimator/BUILD +++ b/tensorflow/contrib/estimator/BUILD @@ -170,6 +170,7 @@ py_library( "//tensorflow/python:lookup_ops", "//tensorflow/python:math_ops", "//tensorflow/python:metrics", + "//tensorflow/python:nn", "//tensorflow/python:sparse_ops", "//tensorflow/python:sparse_tensor", "//tensorflow/python:summary", @@ -192,6 +193,7 @@ py_test( ":head", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", + "//tensorflow/python:check_ops", "//tensorflow/python:client_testlib", "//tensorflow/python:constant_op", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/estimator/python/estimator/head.py b/tensorflow/contrib/estimator/python/estimator/head.py index a45f6934cc..f95fcc8039 100644 --- a/tensorflow/contrib/estimator/python/estimator/head.py +++ b/tensorflow/contrib/estimator/python/estimator/head.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import lookup_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib +from tensorflow.python.ops import nn from tensorflow.python.ops import sparse_ops from tensorflow.python.ops.losses import losses from tensorflow.python.saved_model import signature_constants @@ -237,6 +238,66 @@ def regression_head(weight_column=None, name=name) +def poisson_regression_head( + weight_column=None, + label_dimension=1, + loss_reduction=losses.Reduction.SUM, + compute_full_loss=True, + name=None): + """Creates a `_Head` for poisson regression using `tf.nn.log_poisson_loss`. + + The loss is the weighted sum over all input dimensions. Namely, if the input + labels have shape `[batch_size, label_dimension]`, the loss is the weighted + sum over both `batch_size` and `label_dimension`. + + The head expects `logits` with shape `[D0, D1, ... DN, label_dimension]`. + In many applications, the shape is `[batch_size, label_dimension]`. + + The `labels` shape must match `logits`, namely + `[D0, D1, ... DN, label_dimension]`. If `label_dimension=1`, shape + `[D0, D1, ... DN]` is also supported. + + If `weight_column` is specified, weights must be of shape + `[D0, D1, ... DN]`, `[D0, D1, ... DN, 1]` or + `[D0, D1, ... DN, label_dimension]`. + + This is implemented as a generalized linear model, see + https://en.wikipedia.org/wiki/Generalized_linear_model. + + Args: + weight_column: A string or a `_NumericColumn` created by + `tf.feature_column.numeric_column` defining feature column representing + weights. It is used to down weight or boost examples during training. It + will be multiplied by the loss of the example. + label_dimension: Number of regression labels per example. This is the size + of the last dimension of the labels `Tensor` (typically, this has shape + `[batch_size, label_dimension]`). + loss_reduction: One of `tf.losses.Reduction` except `NONE`. Describes how to + reduce training loss over batch. Defaults to `SUM`. + compute_full_loss: Whether to include the constant `log(z!)` term in + computing the poisson loss. See `tf.nn.log_poisson_loss` for the full + documentation. + name: name of the head. If provided, summary and metrics keys will be + suffixed by `"/" + name`. Also used as `name_scope` when creating ops. + + Returns: + An instance of `_Head` for poisson regression. + + Raises: + ValueError: If `label_dimension` or `loss_reduction` is invalid. + """ + def _poisson_loss(labels, logits): + return nn.log_poisson_loss( + targets=labels, log_input=logits, compute_full_loss=compute_full_loss) + return head_lib._regression_head_with_mean_squared_error_loss( # pylint:disable=protected-access + weight_column=weight_column, + label_dimension=label_dimension, + loss_reduction=loss_reduction, + loss_fn=_poisson_loss, + inverse_link_fn=math_ops.exp, + name=name) + + def multi_label_head(n_classes, weight_column=None, thresholds=None, diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 1411635228..76d050cb28 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import string_ops @@ -1106,5 +1107,75 @@ class MultiLabelHead(test.TestCase): expected_metrics=expected_metrics) +class PoissonRegressionHead(test.TestCase): + + def setUp(self): + ops.reset_default_graph() + + def test_train(self): + head = head_lib.poisson_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + labels = np.array([[1], [2], [3]], dtype=np.int32) + # With x = exp(logits), z = labels. + # loss = -ln(exp(-x) * (x^z) / z!) + # = x - z * ln(x) + ln(z!) + # = exp(logits) - labels * logits - ln(labels!) + # But for ln(z!) and z > 1, the Stirling approximation is used + # ln(z!) = z*ln(z) - z + 0.5*ln(2*pi*z) + # loss = [exp(0) - 1 * 0 + ln(1!), + # exp(-1) - 2 * (-1) + 2*ln(2) - 2 + 0.5*ln(2*pi*2), + # exp(1) - 3 * 1 + 3*ln(3) - 3 + 0.5*ln(2*pi*3)] + # = [1.0, 3.020, 1.482] + # sum_loss = 5.502 + expected_loss = 5.502 + atol = 0.001 + expected_train_result = b'my_train_op' + def _train_op_fn(loss): + with ops.control_dependencies((check_ops.assert_near( + math_ops.to_float(expected_loss), math_ops.to_float(loss), + atol=atol, name='assert_loss'),)): + return constant_op.constant(expected_train_result) + + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.TRAIN, + logits=logits, + labels=labels, + train_op_fn=_train_op_fn) + + with self.test_session() as sess: + _initialize_variables(self, spec.scaffold) + loss, train_result = sess.run([spec.loss, spec.train_op]) + self.assertAlmostEqual(expected_loss, loss, delta=atol) + self.assertEqual(expected_train_result, train_result) + + def test_predict(self): + head = head_lib.poisson_regression_head() + + # Create estimator spec. + logits = np.array([[0], [-1], [1]], dtype=np.float32) + expected_predictions = np.exp(logits) + spec = head.create_estimator_spec( + features={'x': np.array(((42.,),), dtype=np.int32)}, + mode=model_fn.ModeKeys.PREDICT, + logits=logits) + + # Assert spec contains expected tensors. + keys = prediction_keys.PredictionKeys + self.assertItemsEqual( + (keys.PREDICTIONS, keys.LOGITS), spec.predictions.keys()) + self.assertEqual(dtypes.float32, spec.predictions[keys.PREDICTIONS].dtype) + self.assertEqual(dtypes.float32, spec.predictions[keys.LOGITS].dtype) + + # Assert predictions. + with self.test_session(): + _initialize_variables(self, spec.scaffold) + self.assertAllClose( + expected_predictions, spec.predictions[keys.PREDICTIONS].eval()) + self.assertAllClose(logits, spec.predictions[keys.LOGITS].eval()) + + if __name__ == '__main__': test.main() -- GitLab From 503d9b522e28272e032bc45a10e3c0f21398a16e Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Wed, 28 Feb 2018 00:07:55 -0800 Subject: [PATCH 226/884] [XLA:Evaluator] Handle while loop. * Add while loop support to HloEvaluator; * Add a max_loop_iteration argument to the interpreter's constructor to limit the number of loop iterations that will be evaluated (or no bound if -1). Maintain current constant propagation behavior by setting limit to 0 for evaluators used for CP. PiperOrigin-RevId: 187287574 --- .../xla/service/hlo_constant_folding.cc | 5 ++- .../compiler/xla/service/hlo_evaluator.cc | 41 ++++++++++++++++--- .../compiler/xla/service/hlo_evaluator.h | 10 ++++- .../xla/service/while_loop_simplifier.cc | 2 +- tensorflow/compiler/xla/tests/BUILD | 3 ++ tensorflow/compiler/xla/tests/while_test.cc | 4 +- 6 files changed, 55 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_constant_folding.cc b/tensorflow/compiler/xla/service/hlo_constant_folding.cc index 53450991b6..35ecd4428d 100644 --- a/tensorflow/compiler/xla/service/hlo_constant_folding.cc +++ b/tensorflow/compiler/xla/service/hlo_constant_folding.cc @@ -35,7 +35,10 @@ limitations under the License. namespace xla { StatusOr HloConstantFolding::Run(HloModule* module) { - auto evaluator = MakeUnique(); + // Limit the constant folding to 0 iterations to skip folding loops. This + // retains the behavior from before while loop support in HloEvaluator and may + // be revised. + auto evaluator = MakeUnique(/*max_loop_iterations=*/0); XLA_VLOG_LINES(2, "HloConstantFolding::Run(), before:\n" + module->ToString()); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 8c7459099d..c3a3251b7d 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1372,7 +1372,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = Literal::CreateFromShape(map->shape()); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice multi_index) { std::vector> arg_literals; @@ -1507,7 +1507,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { } } - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); // For each resulting dimension, calculate and assign computed value. TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice multi_index) { @@ -1581,7 +1581,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { int64 rank = ShapeUtil::Rank(operand_literal.shape()); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); DimensionVector source_index(rank); std::fill(source_index.begin(), source_index.end(), 0); @@ -1692,7 +1692,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { DimensionVector window_index(window.dimensions_size()); DimensionVector operand_index(ShapeUtil::Rank(operand_literal.shape())); - HloEvaluator embedded_evaluator; + HloEvaluator embedded_evaluator(parent_->max_loop_iterations_); // For each resulting dimension, calculate and assign computed value. TF_RETURN_IF_ERROR( result->Populate([&](ArraySlice output_index) { @@ -2069,7 +2069,8 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { HloEvaluator* parent_; }; // class HloEvaluator::TypedVisitor -HloEvaluator::HloEvaluator() { +HloEvaluator::HloEvaluator(int64 max_loop_iterations) + : max_loop_iterations_(max_loop_iterations) { typed_visitors_[PRED] = MakeUnique>(this); typed_visitors_[U8] = MakeUnique>(this); typed_visitors_[U16] = MakeUnique([](HloInstruction*) { @@ -2511,6 +2512,36 @@ Status HloEvaluator::HandleConditional(HloInstruction* conditional) { return Status::OK(); } +Status HloEvaluator::HandleWhile(HloInstruction* while_hlo) { + HloComputation* cond_comp = while_hlo->while_condition(); + HloComputation* body_comp = while_hlo->while_body(); + // Initialize the loop carried valued with the input to the While instruction. + auto lcv = GetEvaluatedLiteralFor(while_hlo->operand(0)).CloneToUnique(); + bool keep_going = true; + int64 iteration_count = 0; + HloEvaluator cond_evaluator(max_loop_iterations_); + HloEvaluator loop_body_evaluator(max_loop_iterations_); + while (keep_going) { + if (max_loop_iterations_ >= 0 && iteration_count++ > max_loop_iterations_) { + return InvalidArgument("Loop %s exceeded loop iteration limit (%lld).", + while_hlo->name().c_str(), max_loop_iterations_); + } + TF_ASSIGN_OR_RETURN(auto cond_val, cond_evaluator.Evaluate( + *cond_comp, {lcv.get()})); + keep_going = cond_val->GetFirstElement(); + if (keep_going) { + TF_ASSIGN_OR_RETURN(auto body_val, loop_body_evaluator.Evaluate( + *body_comp, {lcv.get()})); + VLOG(3) << "Loop iteration result: " << body_val->ToString(); + lcv = std::move(body_val); + cond_evaluator.ResetVisitStates(); + loop_body_evaluator.ResetVisitStates(); + } + } + evaluated_[while_hlo] = std::move(lcv); + return Status::OK(); +} + Status HloEvaluator::Preprocess(HloInstruction* hlo) { VLOG(2) << "About to visit HLO: " << hlo->ToString(); return Status::OK(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index fc82011630..8a27cf9a3a 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -36,7 +36,10 @@ namespace xla { // This class is not thread-safe. class HloEvaluator : public DfsHloVisitorWithDefault { public: - HloEvaluator(); + // Only evaluate up to max_loop_iterations per while-loop execution if + // specified. + explicit HloEvaluator(int64 max_loop_iterations = -1); + // Evaluates an HLO module and an array of pointers to literals. // Returns the evaluated result as a literal if successful. // Precondition: The indices of arg_literals correspond to the parameter @@ -157,6 +160,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleCall(HloInstruction* call) override; + Status HandleWhile(HloInstruction* while_hlo) override; + private: // Returns the already-evaluated literal result for the instruction. // A Constant instruction is considered evaluated and its literal will be @@ -194,6 +199,9 @@ class HloEvaluator : public DfsHloVisitorWithDefault { // Must be cleared for each evaluation. std::vector arg_literals_; + // Max loop iterations to execute with no maximum if negative. + int64 max_loop_iterations_; + TF_DISALLOW_COPY_AND_ASSIGN(HloEvaluator); }; diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 981de9b220..c9d77c9376 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -212,7 +212,7 @@ static optional GetLoopTripCount(HloInstruction* while_op) { // Now that we know the index of the induction variable, we can we can try to // compute how many times the loop executes. Start by computing the induction // variable's initial value. - HloEvaluator evaluator; + HloEvaluator evaluator(/*max_loop_iterations=*/0); auto* while_init = while_op->mutable_operand(0); auto* indvar_init = while_init->mutable_operand(*indvar_tuple_idx); StatusOr> indvar_init_result = diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 19b3dfae4e..dc282f2440 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -334,6 +334,9 @@ xla_test( xla_test( name = "while_test", srcs = ["while_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", diff --git a/tensorflow/compiler/xla/tests/while_test.cc b/tensorflow/compiler/xla/tests/while_test.cc index 52157b837c..33d457c70b 100644 --- a/tensorflow/compiler/xla/tests/while_test.cc +++ b/tensorflow/compiler/xla/tests/while_test.cc @@ -910,7 +910,7 @@ XLA_TEST_F(WhileTest, WhileWithDynamicUpdateSlice) { // Per backend the values generated can be different as the different backends // use different random number generators. // TODO(b/32240857): Extend test to verify outputs. -TEST_F(WhileTest, WhileWithPrngScalarResult) { +TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithPrngScalarResult)) { auto v6s32 = ShapeUtil::MakeShape(S32, {6}); // Create a computation for the condition: repeat for count iterations. @@ -1166,7 +1166,7 @@ XLA_TEST_F(WhileTest, NestedWhileWithScalarResult) { // while (f(result).get<0>()) { // result = result + 1; // } -TEST_F(WhileTest, WhileWithCallInsideCondition) { +TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithCallInsideCondition)) { auto result_shape = ShapeUtil::MakeShape(S32, {}); // Create a computation for the condition: repeat for 5 iterations. -- GitLab From 6ac343bdfc942678d64dcbfc4d4fc90c0df6a4a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 03:39:04 -0800 Subject: [PATCH 227/884] [TF:XLA] Fix SplitV implementation to support negative split_dim. Mirror behavior of Split op when a negative split_dim is used. PiperOrigin-RevId: 187304771 --- tensorflow/compiler/tests/binary_ops_test.py | 14 +++++++++++ .../compiler/tf2xla/kernels/split_op.cc | 23 ++++++++----------- 2 files changed, 24 insertions(+), 13 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 30a6d3a74d..0e4efaed86 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -1045,6 +1045,20 @@ class BinaryOpsTest(XLATestCase): ], equality_test=self.ListsAreClose) + def splitvOp(x, y): # pylint: disable=invalid-name + return array_ops.split(value=y, num_or_size_splits=[2, 3], axis=x) + for axis in [1, -1]: + self._testBinary( + splitvOp, + np.int32(axis), + np.array([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9]], + dtype=dtype), + expected=[ + np.array([[0, 1], [5, 6]], dtype=dtype), + np.array([[2, 3, 4], [7, 8, 9]], dtype=dtype), + ], + equality_test=self.ListsAreClose) + def testTile(self): for dtype in self.numeric_types: self._testBinary( diff --git a/tensorflow/compiler/tf2xla/kernels/split_op.cc b/tensorflow/compiler/tf2xla/kernels/split_op.cc index 79c435c90a..43c15e7538 100644 --- a/tensorflow/compiler/tf2xla/kernels/split_op.cc +++ b/tensorflow/compiler/tf2xla/kernels/split_op.cc @@ -111,27 +111,24 @@ class SplitVOp : public XlaOpKernel { void Compile(XlaOpKernelContext* ctx) override { const int32 num_split = num_outputs(); + const TensorShape input_shape = ctx->InputShape(0); const TensorShape index_shape = ctx->InputShape(2); - xla::Literal literal_index; - OP_REQUIRES_OK(ctx, ctx->ConstantInput(2, &literal_index)); - int32 split_dim; - OP_REQUIRES(ctx, index_shape.dims() == 0, - errors::InvalidArgument("split_dim input to Split Op must be a " - "scalar")); - split_dim = literal_index.Get({}); + int64 split_dim_orig; + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntScalar(2, &split_dim_orig)); + int64 split_dim = split_dim_orig < 0 ? split_dim_orig + input_shape.dims() + : split_dim_orig; + OP_REQUIRES(ctx, 0 <= split_dim && split_dim < input_shape.dims(), + errors::InvalidArgument("-input rank(-", input_shape.dims(), + ") <= split_dim < input rank (", + input_shape.dims(), "), but got ", + split_dim_orig)); xla::ComputationDataHandle input = ctx->Input(0); - const TensorShape input_shape = ctx->InputShape(0); OP_REQUIRES(ctx, input_shape.dims() > 0, errors::InvalidArgument("Can't split a 0 dimensional input")); - OP_REQUIRES( - ctx, 0 <= split_dim && split_dim < input_shape.dims(), - errors::InvalidArgument("0 <= split_dim < number of input dimensions (", - input_shape.dims(), "), but got ", split_dim)); - OP_REQUIRES( ctx, num_split > 0, errors::InvalidArgument( -- GitLab From 19538075bb174ba315a8b2711e60238b5fb92805 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 04:17:53 -0800 Subject: [PATCH 228/884] Clarify tutorials/image_retraining regarding the use of Mobilenets: - The feature depth multiplier controls the dimension of intermediate activations ("number of neurons"). The size of weight matrices depends on its square. - Quantization with TF-Lite only occurs when its TOCO tool is run on the module. That is out of scope here, so discussion of quantization gets replaced by links to TF-Lite and part 2 of the "Poets" codelab. PiperOrigin-RevId: 187307400 --- .../docs_src/tutorials/image_retraining.md | 31 ++++++++++++------- 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/docs_src/tutorials/image_retraining.md b/tensorflow/docs_src/tutorials/image_retraining.md index df15bc0a9c..246a420400 100644 --- a/tensorflow/docs_src/tutorials/image_retraining.md +++ b/tensorflow/docs_src/tutorials/image_retraining.md @@ -349,31 +349,32 @@ results, but if you intend to deploy your model on mobile devices or other resource-constrained environments you may want to trade off a little accuracy for much smaller file sizes or faster speeds. To help with that, the [retrain.py script](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/image_retraining/retrain.py) -supports 32 different variations on the [Mobilenet architecture](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html). +supports different variations on the [Mobilenet architecture](https://research.googleblog.com/2017/06/mobilenets-open-source-models-for.html). These are a little less precise than Inception v3, but can result in far -smaller file sizes (down to less than a megabyte) and can be many times faster +smaller file sizes (a few megabytes) and can be many times faster to run. To train with one of these models, pass in the `--architecture` flag, for example: ``` python tensorflow/examples/image_retraining/retrain.py \ - --image_dir ~/flower_photos --architecture mobilenet_0.25_128_quantized + --image_dir ~/flower_photos --architecture mobilenet_0.25_128 ``` -This will create a 941KB model file in `/tmp/output_graph.pb`, with 25% of the -parameters of the full Mobilenet, taking 128x128 sized input images, and with -its weights quantized down to eight bits on disk. You can choose '1.0', '0.75', -'0.50', or '0.25' to control the number of weight parameters, and so the file -size (and to some extent the speed), '224', '192', '160', or '128' for the input -image size, with smaller sizes giving faster speeds, and an optional -'_quantized' at the end to indicate whether the file should contain 8-bit or -32-bit float weights. +This will create a 1.9MB model file in `/tmp/output_graph.pb`, with only 25% of +the number of neurons of the full Mobilenet, and trained to take 128x128 sized +input images. + +You can choose '1.0', '0.75', '0.50', or '0.25' to control the number of +neurons (activations of hidden layers); the number of weights (and hence to +some extent the file size and speed) shrinks like the square of that fraction. +You can choose '224', '192', '160', or '128' for the input image size, +with smaller sizes giving faster speeds. The speed and size advantages come at a loss to accuracy of course, but for many purposes this isn't critical. They can also be somewhat offset with improved training data. For example, training with distortions allows me to get above 80% -accuracy on the flower data set even with the 0.25/128/quantized graph above. +accuracy on the flower data set even with the 0.25/128 graph above. If you're going to be using the Mobilenet models in label_image or your own programs, you'll need to feed in an image of the specified size converted to a @@ -395,3 +396,9 @@ python tensorflow/examples/label_image/label_image.py \ --input_mean=128 --input_std=128 \ --image=$HOME/flower_photos/daisy/21652746_cc379e0eea_m.jpg ``` + +For more information on deploying the retrained model to a mobile device, see +the [codelab version](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets/#0) +of this tutorial, especially [part 2](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/#0), which describes +[TensorFlow Lite](/mobile/tflite/) and the additional optimizations it offers +(including quantization of model weights). -- GitLab From 6399c574c12fc58054dbd5989efde2e2d665e3d6 Mon Sep 17 00:00:00 2001 From: Dan Ringwalt Date: Wed, 28 Feb 2018 07:22:02 -0800 Subject: [PATCH 229/884] Replace deprecated _control_inputs with remove/add to avoid warnings. PiperOrigin-RevId: 187321605 --- tensorflow/contrib/graph_editor/reroute.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/graph_editor/reroute.py b/tensorflow/contrib/graph_editor/reroute.py index 7ffdbb7139..95c02a64d4 100644 --- a/tensorflow/contrib/graph_editor/reroute.py +++ b/tensorflow/contrib/graph_editor/reroute.py @@ -471,9 +471,10 @@ def remove_control_inputs(op, cops): if cop not in op.control_inputs: raise ValueError("{} is not a control_input of {}".format(op.name, cop.name)) + control_inputs = [cop for cop in op.control_inputs if cop not in cops] # pylint: disable=protected-access - op._control_inputs = [cop for cop in op._control_inputs if cop not in cops] - op._recompute_node_def() + op._remove_all_control_inputs() + op._add_control_inputs(control_inputs) # pylint: enable=protected-access @@ -496,9 +497,6 @@ def add_control_inputs(op, cops): if cop in op.control_inputs: raise ValueError("{} is already a control_input of {}".format(cop.name, op.name)) - # pylint: disable=protected-access - op._control_inputs += cops - op._recompute_node_def() - # pylint: enable=protected-access + op._add_control_inputs(cops) # pylint: disable=protected-access remove_undocumented(__name__, _allowed_symbols) -- GitLab From f48d3644d433a00733cbe44be67ef4e8ab2988e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 07:40:08 -0800 Subject: [PATCH 230/884] Pass 'import_scope' when calling from_control_flow_context_def. PiperOrigin-RevId: 187323218 --- tensorflow/python/ops/control_flow_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index fb9e2188d7..215c6940df 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1790,7 +1790,7 @@ class CondContext(ControlFlowContext): ret.Enter() for nested_def in context_def.nested_contexts: - from_control_flow_context_def(nested_def) + from_control_flow_context_def(nested_def, import_scope=import_scope) ret.Exit() return ret -- GitLab From e5ab5347d695fe3f7f495864329c05a2ff8b512a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 09:54:19 -0800 Subject: [PATCH 231/884] Move Roadmap to a more prominent place. PiperOrigin-RevId: 187338696 --- tensorflow/docs_src/about/index.md | 1 - tensorflow/docs_src/about/leftnav_files | 1 - tensorflow/docs_src/community/index.md | 1 + tensorflow/docs_src/community/leftnav_files | 1 + tensorflow/docs_src/{about => community}/roadmap.md | 0 5 files changed, 2 insertions(+), 2 deletions(-) rename tensorflow/docs_src/{about => community}/roadmap.md (100%) diff --git a/tensorflow/docs_src/about/index.md b/tensorflow/docs_src/about/index.md index 5326b1e110..dc1e9af876 100644 --- a/tensorflow/docs_src/about/index.md +++ b/tensorflow/docs_src/about/index.md @@ -3,7 +3,6 @@ This section provides a few documents about TensorFlow itself, including the following: - * @{$roadmap$Roadmap}, which summarizes upcoming additions to TensorFlow. * @{$uses$TensorFlow in Use}, which provides a link to our model zoo and lists some popular ways that TensorFlow is being used. * @{$bib$TensorFlow White Papers}, which provides abstracts of white papers diff --git a/tensorflow/docs_src/about/leftnav_files b/tensorflow/docs_src/about/leftnav_files index 28f039e9b5..63763b9d9c 100644 --- a/tensorflow/docs_src/about/leftnav_files +++ b/tensorflow/docs_src/about/leftnav_files @@ -1,5 +1,4 @@ index.md -roadmap.md uses.md bib.md attribution.md diff --git a/tensorflow/docs_src/community/index.md b/tensorflow/docs_src/community/index.md index 8e67022648..b706d9b204 100644 --- a/tensorflow/docs_src/community/index.md +++ b/tensorflow/docs_src/community/index.md @@ -5,6 +5,7 @@ This section contains the following documents: * @{$welcome$Welcome to the TensorFlow Community}, which explains how you can get involved, where to report issues, and where to join like-minded TensorFlow enthusiasts online. + * @{$roadmap$Roadmap}, which summarizes upcoming additions to TensorFlow. * @{$documentation$Writing TensorFlow Documentation}, which explains TensorFlow's documentation conventions. If you are modifying TensorFlow source code or documentation, please read this guide. diff --git a/tensorflow/docs_src/community/leftnav_files b/tensorflow/docs_src/community/leftnav_files index c1595d3c95..fab35024ad 100644 --- a/tensorflow/docs_src/community/leftnav_files +++ b/tensorflow/docs_src/community/leftnav_files @@ -1,5 +1,6 @@ index.md welcome.md +roadmap.md documentation.md style_guide.md benchmarks.md diff --git a/tensorflow/docs_src/about/roadmap.md b/tensorflow/docs_src/community/roadmap.md similarity index 100% rename from tensorflow/docs_src/about/roadmap.md rename to tensorflow/docs_src/community/roadmap.md -- GitLab From 120fdaa4a2869a9bde183ec42398df527bbcc6e0 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 28 Feb 2018 09:59:49 -0800 Subject: [PATCH 232/884] BUILD file visibility change. END_PUBLIC RELNOTES: n/a BEGIN_PUBLIC Automated g4 rollback of changelist 187222292 PiperOrigin-RevId: 187339609 --- tensorflow/compiler/jit/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index af259e0564..c7c9e9bd7a 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -205,6 +205,7 @@ cc_library( name = "graph_to_functiondef", srcs = ["graph_to_functiondef.cc"], hdrs = ["graph_to_functiondef.h"], + visibility = [":friends"], deps = [ "//tensorflow/core:core_cpu", "//tensorflow/core:framework", -- GitLab From 3c9cd2576cb9b88b641b5e38248ca7e49aa5c50a Mon Sep 17 00:00:00 2001 From: MandarJKulkarni <33712629+MandarJKulkarni@users.noreply.github.com> Date: Thu, 1 Mar 2018 00:05:09 +0530 Subject: [PATCH 233/884] Fix typos in profiler.h (#16938) --- tensorflow/cc/profiler/profiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/cc/profiler/profiler.h b/tensorflow/cc/profiler/profiler.h index 6077c45c58..64edbb5766 100644 --- a/tensorflow/cc/profiler/profiler.h +++ b/tensorflow/cc/profiler/profiler.h @@ -61,18 +61,18 @@ class Profiler { /// Adds tracing information `run_meta` to profiler. A `run_meta` is /// generated by a TensorFlow session run call. `step` is the key /// to the `run_meta`. When calling ProfileXXX methods, caller can specify - /// `step` in `options` to seletively profile the corresponding `run_meta`. + /// `step` in `options` to selectively profile the corresponding `run_meta`. /// Multiple different `run_meta` can be keyed by the same `step` in order /// to group them together. void AddStep(int64 step, const RunMetadata& run_meta); /// Profiles the model by organizing nodes in graph structure. - /// Each node is an op and the nodes are contected by the op inputs/outputs. + /// Each node is an op and the nodes are connected by the op inputs/outputs. GraphNodeProto ProfileGraph(const Options& options); /// Profiles the model by organizing nodes in name scope structure. /// Each node is an op, and nodes are organized by the ops' name - /// scope, similar to a filesystem tree. + /// scope, similar to a file system tree. /// E.g. /foo is the root of operation /foo/matmul_1 and foo/conv_2. GraphNodeProto ProfileNameScope(const Options& options); -- GitLab From 12d8142dc1bb914fa3ff0a9029e9b6b71e36b9f5 Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Wed, 28 Feb 2018 10:43:36 -0800 Subject: [PATCH 234/884] [eager] Typo correction, there is no method `tf.data.Dataset.make_iterator`. PiperOrigin-RevId: 187347001 --- tensorflow/contrib/eager/python/datasets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index d177bfeab2..36b7d6d009 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -71,7 +71,7 @@ class Iterator(object): if not context.in_eager_mode(): raise RuntimeError( "{} objects can only be used when eager execution is enabled, use " - "tf.data.Dataset.make_iterator or " + "tf.data.Dataset.make_initializable_iterator or " "tf.data.Dataset.make_one_shot_iterator for graph construction". format(type(self))) with ops.device("/device:CPU:0"): -- GitLab From d2e24b6039433bd83478da8c8c2d6c58034be607 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Wed, 28 Feb 2018 10:52:31 -0800 Subject: [PATCH 235/884] Don't assign device for the keras part of _saved_first_checkpoint. Fix #14504. (#17231) PiperOrigin-RevId: 186526175 --- .../python/keras/_impl/keras/estimator.py | 24 ++++++++--------- .../keras/_impl/keras/estimator_test.py | 27 ++++++++++++++++++- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 624e92a04b..495d8829b6 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -221,18 +221,18 @@ def _save_first_checkpoint(keras_model, estimator, custom_objects, Returns: The model_fn for a keras Estimator. """ - with ops.Graph().as_default() as g, g.device(estimator._device_fn): - random_seed.set_random_seed(estimator.config.tf_random_seed) - training_util.create_global_step() - model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, - custom_objects) - - if isinstance(model, models.Sequential): - model = model.model - # Load weights and save to checkpoint if there is no checkpoint - latest_path = saver_lib.latest_checkpoint(estimator.model_dir) - if not latest_path: - with session.Session() as sess: + # Load weights and save to checkpoint if there is no checkpoint + latest_path = saver_lib.latest_checkpoint(estimator.model_dir) + if not latest_path: + with ops.Graph().as_default(): + random_seed.set_random_seed(estimator.config.tf_random_seed) + training_util.create_global_step() + model = _clone_and_build_model(model_fn_lib.ModeKeys.TRAIN, keras_model, + custom_objects) + if isinstance(model, models.Sequential): + model = model.model + # save to checkpoint + with session.Session(config=estimator._session_config) as sess: model.set_weights(keras_weights) # Make update ops and initialize all variables. if not model.train_function: diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index 9fc48b4117..88dd14b856 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -17,6 +17,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import json from math import log10 import os import tempfile @@ -62,7 +63,7 @@ def simple_functional_model(): return model -def get_resource_for_simple_model(is_sequential, is_evaluate): +def get_resource_for_simple_model(is_sequential=True, is_evaluate=False): model = simple_sequential_model( ) if is_sequential else simple_functional_model() if is_sequential: @@ -352,6 +353,30 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): model_dir=tempfile.mkdtemp(dir=self._base_dir), custom_objects=custom_objects) + def test_tf_config(self): + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['mse', keras.metrics.categorical_accuracy]) + + tf_config = json.dumps({ + 'cluster': { + run_config_lib.TaskType.PS: ['localhost:1234'], + run_config_lib.TaskType.WORKER: ['localhost:1236'], + run_config_lib.TaskType.MASTER: ['localhost:1238'] + }, + 'task': { + 'type': run_config_lib.TaskType.MASTER, + 'index': 0 + } + }) + with test.mock.patch.dict('os.environ', {'TF_CONFIG': tf_config}): + with self.test_session(): + keras.estimator.model_to_estimator( + keras_model=keras_model, + model_dir=tempfile.mkdtemp(dir=self._base_dir)) + if __name__ == '__main__': test.main() -- GitLab From 39a43c4f1d73b0210795d2003b127d3ffa284e98 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 28 Feb 2018 11:07:10 -0800 Subject: [PATCH 236/884] Introduce a ShapeUtil::ForEachIndexWithStatus, change index type to ArraySlice This is not used yet, but I need it in a later CL. I don't specifically need the argument to be an ArraySlice, but it seemed cleaner than taking a const ref to a vector. No functional change intended. PiperOrigin-RevId: 187352376 --- tensorflow/compiler/xla/literal_util.cc | 2 +- tensorflow/compiler/xla/literal_util.h | 2 +- tensorflow/compiler/xla/literal_util_test.cc | 30 +++++++-------- .../compiler/xla/service/hlo_evaluator.cc | 6 +-- tensorflow/compiler/xla/shape_util.h | 38 ++++++++++++++----- tensorflow/compiler/xla/shape_util_test.cc | 32 ++++++++++++++-- 6 files changed, 77 insertions(+), 33 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 823da43b5a..3962a9b316 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -223,7 +223,7 @@ Status Literal::CopySliceFromInternal( Literal::StrideConfig stride_config(src_literal.shape(), shape(), copy_size); - auto copy_proc = [&](const std::vector& indexes) { + auto copy_proc = [&](tensorflow::gtl::ArraySlice indexes) { // Map from multi-dimensional index, to source index. std::transform(indexes.begin(), indexes.end(), src_base.begin(), src_indexes.begin(), std::plus()); diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index d5ae3fd723..1d58f0cbc7 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -1269,7 +1269,7 @@ Status Literal::Populate(const FnType& generator) { int64 minor_dimension_size = ShapeUtil::GetDimension(this_shape, stride_config.minor_dimension); - auto init_function = [&](const std::vector& indexes) { + auto init_function = [&](tensorflow::gtl::ArraySlice indexes) { const int64 index = IndexUtil::MultidimensionalIndexToLinearIndex(shape(), indexes); std::copy(indexes.begin(), indexes.end(), minor_scan_indexes.begin()); diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index ee2f4fe874..9ff0771110 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -30,6 +30,7 @@ limitations under the License. namespace xla { namespace { +using tensorflow::gtl::ArraySlice; using ::testing::ElementsAre; using ::testing::HasSubstr; @@ -214,11 +215,11 @@ TEST_F(LiteralUtilTest, CreateSparse) { std::vector expected_values = {8, 9, 7, 10}; EXPECT_EQ(literal->sparse_indices()->data(), - tensorflow::gtl::ArraySlice( - expected_indices.data(), expected_indices.num_elements())); - EXPECT_EQ(tensorflow::gtl::ArraySlice(literal->data().data(), - expected_values.size()), - tensorflow::gtl::ArraySlice(expected_values)); + ArraySlice(expected_indices.data(), + expected_indices.num_elements())); + EXPECT_EQ( + ArraySlice(literal->data().data(), expected_values.size()), + ArraySlice(expected_values)); } TEST_F(LiteralUtilTest, LiteralR4F32ProjectedStringifies) { @@ -290,7 +291,7 @@ TEST_F(LiteralUtilTest, EachCellR2F32) { // clang-format on std::vector> seen; literal->EachCellAsString( - [&seen](tensorflow::gtl::ArraySlice indices, const string& value) { + [&seen](ArraySlice indices, const string& value) { seen.emplace_back(indices[0], indices[1], value); }); @@ -622,11 +623,10 @@ TEST_F(LiteralUtilTest, TransposeR4) { // clang-format on auto reshape = original->Transpose(/*permutation=*/{2, 3, 0, 1}); - reshape->EachCell( - [&](tensorflow::gtl::ArraySlice indices, float value) { - EXPECT_EQ(value, original->Get( - {indices[2], indices[3], indices[0], indices[1]})); - }); + reshape->EachCell([&](ArraySlice indices, float value) { + EXPECT_EQ(value, original->Get( + {indices[2], indices[3], indices[0], indices[1]})); + }); } TEST_F(LiteralUtilTest, TestR4RelayoutEquivalence) { @@ -863,7 +863,7 @@ TEST_F(LiteralUtilTest, CopySliceFrom) { const int64 zero_base[] = {0, 0, 0, 0}; const int64 step[] = {1, 1, 1, 1}; uint32 seqnr = 0; - auto init_proc = [&](const std::vector& indexes) { + auto init_proc = [&](ArraySlice indexes) { source->Set(indexes, ++seqnr); return true; }; @@ -879,7 +879,7 @@ TEST_F(LiteralUtilTest, CopySliceFrom) { std::vector source_indexes(TF_ARRAYSIZE(dimensions), 0); std::vector blank_indexes(TF_ARRAYSIZE(dimensions), 0); bool matched = true; - auto check_proc = [&](const std::vector& indexes) { + auto check_proc = [&](ArraySlice indexes) { std::copy(indexes.begin(), indexes.end(), source_indexes.begin()); std::transform(source_indexes.begin(), source_indexes.end(), src_base, source_indexes.begin(), std::plus()); @@ -1067,7 +1067,7 @@ TEST_F(LiteralUtilTest, Populate) { primitive_util::NativeToPrimitiveType(), data.dimensions, data.layout); auto literal = Literal::CreateFromShape(shape); - auto generator = [&](tensorflow::gtl::ArraySlice indexes) -> uint32 { + auto generator = [&](ArraySlice indexes) -> uint32 { // Offsets from linear index just to avoid R0 literals to be initialized // with zero. return IndexUtil::MultidimensionalIndexToLinearIndex(literal->shape(), @@ -1079,7 +1079,7 @@ TEST_F(LiteralUtilTest, Populate) { std::vector zero_base(data.dimensions.size(), 0); std::vector step(data.dimensions.size(), 1); bool matched = true; - auto check_function = [&](const std::vector& indexes) { + auto check_function = [&](ArraySlice indexes) { auto value = literal->Get(indexes); matched = matched && (value == generator(indexes)); return matched; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index c3a3251b7d..edb1ad2360 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1222,7 +1222,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { // corresponding index of the resulting padded literal. const PaddingConfig& pad_config = pad->padding_config(); - auto func = [&](const std::vector& input_index) { + auto func = [&](ArraySlice input_index) { for (auto i = 0; i < input_index.size(); ++i) { // Interior padding occurs logically before edge padding, so in the case // of negative edge padding elements are removed from the @@ -1518,7 +1518,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { base[result_to_arg_index[i]] = multi_index[i]; } - auto func = [&](const std::vector& input_index) { + auto func = [&](ArraySlice input_index) { auto curr_val = arg_literal.Get(input_index); // Evaluate computation with specified literal operands. @@ -1954,7 +1954,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { auto result = operand_literal.CloneToUnique(); std::vector result_index(ShapeUtil::Rank(result->shape()), 0); - auto func = [&](const std::vector& update_index) { + auto func = [&](ArraySlice update_index) { std::transform(update_index.begin(), update_index.end(), start.begin(), result_index.begin(), std::plus()); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 8ee263fe5e..923315e001 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/compiler/xla/layout_util.h" #include "tensorflow/compiler/xla/primitive_util.h" +#include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" @@ -564,16 +565,16 @@ class ShapeUtil { // The visitor_function visitor function should return true if it wants to // continue, or false otherwise. // - // visitor_function must be a callable of type bool(const std::vector&) - // or compatible. + // visitor_function must be a callable of type + // StatusOr(ArraySlice) or compatible. template - static void ForEachIndex(const Shape& shape, - tensorflow::gtl::ArraySlice base, - tensorflow::gtl::ArraySlice count, - tensorflow::gtl::ArraySlice incr, - const FnType& visitor_function) { + static Status ForEachIndexWithStatus(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function) { if (ShapeUtil::HasZeroElements(shape)) { - return; + return Status::OK(); } CHECK_EQ(Rank(shape), base.size()); CHECK_EQ(incr.size(), base.size()); @@ -583,7 +584,11 @@ class ShapeUtil { // once with the proper empty indexes. int64 n = -1; std::vector indexes(base.begin(), base.end()); - while (n < rank && visitor_function(indexes)) { + while (n < rank) { + TF_ASSIGN_OR_RETURN(bool should_continue, visitor_function(indexes)); + if (!should_continue) { + break; + } // Increments dimensions in minor to major order. for (n = 0; n < rank; ++n) { int64 dim = LayoutUtil::Minor(shape.layout(), n); @@ -594,6 +599,21 @@ class ShapeUtil { indexes[dim] = base[dim]; } } + + return Status::OK(); + } + + template + static void ForEachIndex(const Shape& shape, + tensorflow::gtl::ArraySlice base, + tensorflow::gtl::ArraySlice count, + tensorflow::gtl::ArraySlice incr, + const FnType& visitor_function) { + ForEachIndexWithStatus(shape, base, count, incr, + [&](tensorflow::gtl::ArraySlice indices) { + return StatusOr(visitor_function(indices)); + }) + .IgnoreError(); } private: diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index 4db97d45b2..a357415698 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -573,10 +573,11 @@ TEST(ShapeUtilTest, ForEachIndex) { Shape shape = ShapeUtil::MakeShape(F32, data.dimensions); // Increments at every invocation. int invocations = 0; - auto increment_func = [&invocations](const std::vector& indexes) { - invocations++; - return true; - }; + auto increment_func = + [&invocations](tensorflow::gtl::ArraySlice indexes) { + invocations++; + return true; + }; std::vector zero_base(data.dimensions.size(), 0); std::vector step(data.dimensions.size(), 1); @@ -588,6 +589,29 @@ TEST(ShapeUtilTest, ForEachIndex) { } } +TEST(ShapeUtilTest, ForEachIndexWithStatus) { + Shape shape = ShapeUtil::MakeShape(F32, {10, 10}); + // Increments at every invocation. + int invocations = 0; + auto increment_func = + [&invocations]( + tensorflow::gtl::ArraySlice indexes) -> StatusOr { + if (++invocations == 5) { + return Unimplemented("Cannot increment beyond 5."); + } + return true; + }; + + Status error_status = ShapeUtil::ForEachIndexWithStatus( + shape, /*base=*/{0, 0}, /*count=*/{10, 10}, /*incr=*/{0, 1}, + increment_func); + + EXPECT_FALSE(error_status.ok()); + EXPECT_THAT(error_status.error_message(), + ::testing::HasSubstr("Cannot increment beyond 5.")); + EXPECT_EQ(invocations, 5); +} + TEST(ShapeUtilTest, DimensionsUnmodifiedByReshape_1x1x1x1_to_1x1x1) { // All output dimensions should be unmodified. One of the input dimensions is // modified because the input rank is larger by one. -- GitLab From 09d9715460bf4d0d0d2229816fe45eb81676a9ca Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 28 Feb 2018 11:50:17 -0800 Subject: [PATCH 237/884] Disable GRPC io utils test. PiperOrigin-RevId: 187360410 --- tensorflow/core/debug/BUILD | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 40cb8353cd..f6fe9edb02 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -298,6 +298,9 @@ tf_cc_test( size = "small", srcs = ["debug_grpc_io_utils_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), + tags = [ + "no_oss", # b/73962011 + ], deps = [ ":debug_graph_utils", ":debug_grpc_testlib", -- GitLab From 31421c3fa3a0585c01198458fa123c3493c21b62 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 28 Feb 2018 12:13:22 -0800 Subject: [PATCH 238/884] [XLA] Fix BF16 normalization to avoid the pass adding new unsupported mixed precision. Resolve unsupported input/output first, then resolve unsupported mixed precision. PiperOrigin-RevId: 187363969 --- .../xla/service/bfloat16_normalization.cc | 115 ++++++++++-------- .../service/bfloat16_normalization_test.cc | 33 ++++- 2 files changed, 95 insertions(+), 53 deletions(-) diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index b032c040e8..6176f5d209 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -221,41 +221,37 @@ Status BFloat16NormalizationVisitor::HandleCrossReplicaSum( } Status BFloat16NormalizationVisitor::HandleInstruction(HloInstruction* hlo) { - std::vector bf16_operands; - std::vector f32_operands; - bool has_f32 = false; - bool has_bf16 = false; + int f32_count = 0; + int bf16_count = 1; for (int64 i = 0; i < hlo->operand_count(); ++i) { if (hlo->operand(i)->shape().element_type() == F32) { - f32_operands.push_back(i); - has_f32 = true; + f32_count += 1; } else if (hlo->operand(i)->shape().element_type() == BF16) { - bf16_operands.push_back(i); - has_bf16 = true; + bf16_count += 1; } } if (hlo->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (hlo->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; } std::vector bf16_called_comps; for (auto* comp : hlo->called_computations()) { bool comp_has_bf16 = false; if (comp->root_instruction()->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (comp->root_instruction()->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; comp_has_bf16 = true; } for (auto* param : comp->parameter_instructions()) { if (param->shape().element_type() == F32) { - has_f32 = true; + f32_count += 1; } else if (param->shape().element_type() == BF16) { - has_bf16 = true; + bf16_count += 1; comp_has_bf16 = true; } } @@ -264,54 +260,69 @@ Status BFloat16NormalizationVisitor::HandleInstruction(HloInstruction* hlo) { } } - if (!bfloat16_support_->SupportsMixedPrecisions(*hlo) && has_bf16 && - has_f32) { - // Resolve unsupported mixed precision. - // - // See if we can change everything to BF16. - if (hlo->called_computations().empty() && - hlo->shape().element_type() == BF16) { - bool can_use_bf16 = true; - for (int i : f32_operands) { - if (bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(*hlo, - i) && - bfloat16_support_->SupportsBF16Operand(*hlo, i)) { - continue; - } - can_use_bf16 = false; - break; - } - if (can_use_bf16) { - for (int i : f32_operands) { - TF_RETURN_IF_ERROR( - InsertConvertBeforeOperand(hlo, i, BF16, computation_)); - } - return Status::OK(); - } - } - if (hlo->shape().element_type() == BF16) { - TF_RETURN_IF_ERROR( - ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); - } - for (int i : bf16_operands) { - TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); - } - return ConvertCalledComputations(hlo, bf16_called_comps); - } - - for (int i : bf16_operands) { - if (!bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + // Resolve unsupported BF16 operands. + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16 && + !bfloat16_support_->SupportsBF16Operand(*hlo, i)) { TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + bf16_count -= 1; + f32_count += 1; } } + // Resolve unsupported BF16 output. if (hlo->shape().element_type() == BF16 && !bfloat16_support_->SupportsBF16Output(*hlo)) { TF_RETURN_IF_ERROR( ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + bf16_count -= 1; + f32_count += 1; } - return Status::OK(); + // Resolve unsupported mixed precision after resolving unsupported BF16 + // operands and output, because the numbers of BF16 operands/output and F32 + // operands/output may have changed. + if (bfloat16_support_->SupportsMixedPrecisions(*hlo) || bf16_count == 0 || + f32_count == 0) { + return Status::OK(); + } + // See if we can change everything to BF16. + if (hlo->called_computations().empty() && + hlo->shape().element_type() == BF16) { + bool can_use_bf16 = true; + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16) { + continue; + } + if ((bfloat16_support_->EffectiveOperandPrecisionIsBF16(*hlo, i) || + bfloat16_support_->EffectiveOperandPrecisionIsOutputPrecision(*hlo, + i)) && + bfloat16_support_->SupportsBF16Operand(*hlo, i)) { + continue; + } + can_use_bf16 = false; + break; + } + if (can_use_bf16) { + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == F32) { + TF_RETURN_IF_ERROR( + InsertConvertBeforeOperand(hlo, i, BF16, computation_)); + } + } + return Status::OK(); + } + } + if (hlo->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR( + ChangeOutputTypeThenInsertConvertBack(hlo, F32, computation_)); + } + for (int i = 0; i < hlo->operand_count(); ++i) { + if (hlo->operand(i)->shape().element_type() == BF16) { + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(hlo, i, F32, computation_)); + } + } + return ConvertCalledComputations(hlo, bf16_called_comps); } Status BFloat16NormalizationVisitor::DefaultAction(HloInstruction* hlo) { diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index 66c3085842..fc0f6f1948 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -41,13 +41,17 @@ class TestBFloat16Support : public BFloat16Support { hlo.opcode() == HloOpcode::kGetTupleElement) { return true; } + if (hlo.opcode() == HloOpcode::kDot) { + // Test that only the first operand of kDot supports BF16. + return operand_index == 0; + } return false; } bool SupportsBF16Output(const HloInstruction& hlo) const override { if (hlo.opcode() == HloOpcode::kAdd || hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kSubtract || - hlo.opcode() == HloOpcode::kTuple || + hlo.opcode() == HloOpcode::kDot || hlo.opcode() == HloOpcode::kTuple || hlo.opcode() == HloOpcode::kGetTupleElement) { return true; } @@ -245,4 +249,31 @@ TEST_F(BFloat16NormalizationTest, ResolveMixedPrecisionTupleCrossReplicaSum) { EXPECT_EQ(ShapeUtil::GetSubshape(crs->shape(), {1}).element_type(), F32); } +// Tests that the normalization should not cause unsupported mixed precision due +// to resolving unsupported BF16 operand. +TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) { + auto builder = HloComputation::Builder(TestName()); + Shape bf16_shape = ShapeUtil::MakeShape(BF16, {4, 4}); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateParameter(0, bf16_shape, "a")); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateParameter(1, bf16_shape, "b")); + + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateBinary(bf16_shape, HloOpcode::kDot, a, b)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(Normalize(module.get())); + + EXPECT_EQ(computation->root_instruction()->opcode(), HloOpcode::kConvert); + EXPECT_EQ(dot->shape().element_type(), F32); + EXPECT_EQ(dot->operand(0)->shape().element_type(), F32); + EXPECT_EQ(dot->operand(0)->opcode(), HloOpcode::kConvert); + EXPECT_EQ(dot->operand(1)->shape().element_type(), F32); + EXPECT_EQ(dot->operand(1)->opcode(), HloOpcode::kConvert); +} + } // namespace xla -- GitLab From 3dbbf740441cdd41b2dc998e09980d72d2e9d440 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 28 Feb 2018 12:14:03 -0800 Subject: [PATCH 239/884] In Grappler item builder, support inferring fetch nodes from siganture defs. PiperOrigin-RevId: 187364078 --- .../core/grappler/grappler_item_builder.cc | 76 ++++++++++++++++--- .../grappler/grappler_item_builder_test.cc | 53 +++++++++++++ 2 files changed, 117 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 606807b9e9..33ad426bbf 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -168,12 +168,6 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( // Fill in feed nodes from config, if any provided. for (const auto& feed_node : cfg.feed_nodes) { const string feed_name = NodeName(feed_node); - if (feed_name.empty()) { - LOG(ERROR) << "Invalid feed node name " << feed_node - << ", skipping this input."; - return nullptr; - } - VLOG(1) << "Will use feed node " << feed_name; new_item->feed.emplace_back(feed_name, Tensor()); } @@ -182,17 +176,75 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( const CollectionDef& nodes = meta_graph.collection_def().at("train_op"); if (nodes.has_node_list()) { for (const auto& node : nodes.node_list().value()) { - const string name = NodeName(node); - if (name.empty()) { - LOG(ERROR) << "Invalid fetch node name " << node - << ", skipping this input"; + new_item->fetch.push_back(NodeName(node)); + } + } + } + + // Detect feed and fetch nodes from signature defs. + for (const auto& name_and_signature : meta_graph.signature_def()) { + for (const auto& name_and_input : name_and_signature.second.inputs()) { + const TensorInfo& input = name_and_input.second; + if (input.has_coo_sparse()) { + // Define the shapes following the comment of CooSparse. + PartialTensorShape partial_shape_1d({-1}); + PartialTensorShape partial_shape_2d({-1, -1}); + TensorShape shape_1d; + TensorShape shape_2d; + if (!partial_shape_1d.AsTensorShape(&shape_1d) || + !partial_shape_2d.AsTensorShape(&shape_2d)) { + LOG(ERROR) << "Internal error when constructing tensor shapes."; return nullptr; } - VLOG(1) << "Will use fetch node " << name; - new_item->fetch.push_back(name); + + new_item->feed.emplace_back( + NodeName(input.coo_sparse().values_tensor_name()), + Tensor(input.dtype(), shape_1d)); + new_item->feed.emplace_back( + NodeName(input.coo_sparse().indices_tensor_name()), + Tensor(DT_INT64, shape_2d)); + new_item->feed.emplace_back( + NodeName(input.coo_sparse().dense_shape_tensor_name()), + Tensor(DT_INT64, shape_1d)); + } else { + new_item->feed.emplace_back( + NodeName(input.name()), + Tensor(input.dtype(), input.tensor_shape())); } } + for (const auto& name_and_output : name_and_signature.second.outputs()) { + const TensorInfo& output = name_and_output.second; + if (output.has_coo_sparse()) { + new_item->fetch.push_back( + NodeName(output.coo_sparse().values_tensor_name())); + new_item->fetch.push_back( + NodeName(output.coo_sparse().indices_tensor_name())); + new_item->fetch.push_back( + NodeName(output.coo_sparse().dense_shape_tensor_name())); + } else { + new_item->fetch.push_back(NodeName(output.name())); + } + } + } + + for (const auto& feed : new_item->feed) { + if (feed.first.empty()) { + LOG(ERROR) << "Invalid feed node name skipping this input"; + return nullptr; + } else { + VLOG(1) << "Will use feed node " << feed.first; + } + } + + for (const auto& fetch : new_item->fetch) { + if (fetch.empty()) { + LOG(ERROR) << "Invalid fetch node name skipping this input"; + return nullptr; + } else { + VLOG(1) << "Will use fetch node " << fetch; + } } + if (new_item->fetch.empty()) { LOG(ERROR) << "Failed to detect the fetch node(s), skipping this input"; return nullptr; diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index ef95992af7..78cbff6c90 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -280,6 +280,59 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { ASSERT_TRUE(item != nullptr); } +TEST_F(GrapplerItemBuilderTest, FromGraphWithSignatureDef) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x = ops::Const(s.WithOpName("x"), 0); + auto y = ops::Const(s.WithOpName("y"), 1); + auto z = ops::Add(s.WithOpName("z"), x, y); + + MetaGraphDef meta_graph; + TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def())); + + TensorInfo input, output; + input.set_name("x"); + input.set_dtype(DT_FLOAT); + output.set_name("z"); + SignatureDef serving_signature; + (*serving_signature.mutable_inputs())["input"] = input; + (*serving_signature.mutable_outputs())["output"] = output; + (*meta_graph.mutable_signature_def())["serving"] = serving_signature; + + std::unique_ptr item = + GrapplerItemFromMetaGraphDef("0", meta_graph, ItemConfig()); + ASSERT_TRUE(item != nullptr); + + EXPECT_EQ(item->feed[0].first, "x"); + EXPECT_EQ(item->fetch[0], "z"); +} + +TEST_F(GrapplerItemBuilderTest, FromGraphWithIncompleteSignatureDef) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + auto x = ops::Const(s.WithOpName("x"), 0); + auto y = ops::Const(s.WithOpName("y"), 1); + + MetaGraphDef meta_graph; + TF_CHECK_OK(s.ToGraphDef(meta_graph.mutable_graph_def())); + + CollectionDef train_op; + train_op.mutable_node_list()->add_value("y"); + (*meta_graph.mutable_collection_def())["train_op"] = train_op; + + TensorInfo input, output; + input.set_name("x"); + input.set_dtype(DT_FLOAT); + // Its coo_sparse proto is incomplete. + output.mutable_coo_sparse()->set_values_tensor_name("z"); + SignatureDef serving_signature; + (*serving_signature.mutable_inputs())["input"] = input; + (*serving_signature.mutable_outputs())["output"] = output; + (*meta_graph.mutable_signature_def())["serving"] = serving_signature; + + std::unique_ptr item = + GrapplerItemFromMetaGraphDef("0", meta_graph, ItemConfig()); + ASSERT_TRUE(item == nullptr); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From b07680459a88224fce83daa7b3b70bcc62b9c896 Mon Sep 17 00:00:00 2001 From: Loo Rong Jie Date: Thu, 1 Mar 2018 04:38:30 +0800 Subject: [PATCH 240/884] [Windows] Copy NominalCPUFrequency from Abseil (#16905) * [Windows] Copy NominalCPUFrequency from Abseil * Add #include --- tensorflow/core/platform/windows/port.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/windows/port.cc b/tensorflow/core/platform/windows/port.cc index 582b232054..f3b27ea394 100644 --- a/tensorflow/core/platform/windows/port.cc +++ b/tensorflow/core/platform/windows/port.cc @@ -25,6 +25,7 @@ limitations under the License. #endif #include +#include #include "tensorflow/core/platform/cpu_info.h" #include "tensorflow/core/platform/demangle.h" @@ -149,11 +150,16 @@ bool Snappy_Uncompress(const char* input, size_t length, char* output) { string Demangle(const char* mangled) { return mangled; } double NominalCPUFrequency() { -#ifdef TENSORFLOW_USE_ABSL - return absl::base_internal::NominalCPUFrequency(); -#else + DWORD data; + DWORD data_size = sizeof(data); + #pragma comment(lib, "shlwapi.lib") // For SHGetValue(). + if (SUCCEEDED( + SHGetValueA(HKEY_LOCAL_MACHINE, + "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", + "~MHz", nullptr, &data, &data_size))) { + return data * 1e6; // Value is MHz. + } return 1.0; -#endif } int64 AvailableRam() { -- GitLab From 8a31fec675f3f1ade28a9a8f38cc8f72d9573256 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 28 Feb 2018 12:55:34 -0800 Subject: [PATCH 241/884] [XLA] FP16 Dot support for the CPU and GPU backends. Extend the stream interface ThenBlasGemmWithAlgorithm to support F16 matrix multiplication with computation type FP32. Extend the stream executor interface DoBlasGemmWithAlgorithm to support F16 GEMM with computation type FP32. Extend the CPU IR emitter to handle F16 Dot instruction, and add F16 matrix multiplication implementation to the CPU runtime. Extend the GPU backend to handle FP16 GEMM Thunk. Replicate the existing matrix multiplication test cases in matrix_ops_simple_test and dot_operation_test for FP16. RELNOTES: PiperOrigin-RevId: 187369731 --- tensorflow/compiler/xla/array.h | 45 +- tensorflow/compiler/xla/array2d.h | 17 +- tensorflow/compiler/xla/array3d.h | 9 +- tensorflow/compiler/xla/array4d.h | 9 +- tensorflow/compiler/xla/reference_util.cc | 56 +- tensorflow/compiler/xla/reference_util.h | 16 +- tensorflow/compiler/xla/service/cpu/BUILD | 1 - .../compiler/xla/service/cpu/cpu_runtime.cc | 4 + .../compiler/xla/service/cpu/cpu_runtime.h | 2 + .../xla/service/cpu/dot_op_emitter.cc | 9 +- .../compiler/xla/service/cpu/ir_emitter.cc | 2 +- .../xla/service/cpu/runtime_matmul.cc | 39 +- .../compiler/xla/service/cpu/runtime_matmul.h | 6 + .../xla/service/cpu/runtime_matvec.cc | 110 --- .../compiler/xla/service/cpu/runtime_matvec.h | 94 ++- .../cpu/runtime_single_threaded_matmul.cc | 36 +- .../cpu/runtime_single_threaded_matmul.h | 6 + .../xla/service/cpu/simple_orc_jit.cc | 2 + .../compiler/xla/service/gpu/gemm_thunk.cc | 25 +- .../xla/service/gpu/ir_emission_utils.cc | 6 +- tensorflow/compiler/xla/shape_util.h | 9 + tensorflow/compiler/xla/tests/BUILD | 1 + .../compiler/xla/tests/convolution_test.cc | 52 +- .../compiler/xla/tests/dot_operation_test.cc | 673 +++++++++--------- .../xla/tests/matrix_ops_simple_test.cc | 375 +++++----- tensorflow/stream_executor/blas.cc | 6 + tensorflow/stream_executor/blas.h | 2 + tensorflow/stream_executor/cuda/cuda_blas.cc | 47 +- 28 files changed, 868 insertions(+), 791 deletions(-) delete mode 100644 tensorflow/compiler/xla/service/cpu/runtime_matvec.cc diff --git a/tensorflow/compiler/xla/array.h b/tensorflow/compiler/xla/array.h index 46ee4e64c9..24b58bec11 100644 --- a/tensorflow/compiler/xla/array.h +++ b/tensorflow/compiler/xla/array.h @@ -121,10 +121,31 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 2D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 1D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && + std::is_same::value>::type> + Array(std::initializer_list values) + : Array(ToInt64Vector({values.size()})) { + int64 idx = 0; + for (const auto& it1 : values) { + values_[idx] = static_cast(it1); + ++idx; + } + CHECK(idx == num_elements()); + } + + // Creates a 2D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. + template ::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list> values) : Array(ToInt64Vector({values.size(), values.begin()->size()})) { @@ -155,10 +176,13 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 3D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 3D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list>> values) @@ -196,10 +220,13 @@ class Array { CHECK(idx == num_elements()); } - // Creates a 4D array of Eigen::half from the given nested initializer list of - // float values. + // Creates a 4D array of a floating-point type (half, bfloat16, float, + // or double) from an initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array(std::initializer_list< std::initializer_list>>> diff --git a/tensorflow/compiler/xla/array2d.h b/tensorflow/compiler/xla/array2d.h index d30e78ecde..a17e81f448 100644 --- a/tensorflow/compiler/xla/array2d.h +++ b/tensorflow/compiler/xla/array2d.h @@ -53,10 +53,13 @@ class Array2D : public Array { Array2D(std::initializer_list> values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array2D(std::initializer_list> values) : Array(values) {} @@ -100,14 +103,16 @@ std::unique_ptr> MakeLinspaceArray2D(double from, double to, int64 n1, int64 n2) { auto array = MakeUnique>(n1, n2); int64 count = n1 * n2; - NativeT step = (count > 1) ? (to - from) / (count - 1) : 0.0f; + NativeT step = + static_cast((count > 1) ? (to - from) / (count - 1) : 0); auto set = [&array, n1, n2](int64 index, NativeT value) { (*array)(index / n2, index % n2) = value; }; for (int64 i = 0; i < count - 1; ++i) { - set(i, static_cast(from + i * step)); + set(i, (static_cast(from) + + static_cast(i) * static_cast(step))); } - set(count - 1, to); + set(count - 1, static_cast(to)); return array; } } // namespace xla diff --git a/tensorflow/compiler/xla/array3d.h b/tensorflow/compiler/xla/array3d.h index e5eb235d45..0e9a0722ae 100644 --- a/tensorflow/compiler/xla/array3d.h +++ b/tensorflow/compiler/xla/array3d.h @@ -57,10 +57,13 @@ class Array3D : public Array { values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array3D( std::initializer_list>> diff --git a/tensorflow/compiler/xla/array4d.h b/tensorflow/compiler/xla/array4d.h index cff70e54ba..a75fffc605 100644 --- a/tensorflow/compiler/xla/array4d.h +++ b/tensorflow/compiler/xla/array4d.h @@ -82,10 +82,13 @@ class Array4D : public Array { values) : Array(values) {} - // Creates an array of Eigen::half from the given nested initializer list of - // float values. + // Creates an array of a floating-point type (half, bfloat16, float, + // or double) from the given nested initializer list of float values. template ::value && + (std::is_same::value || + std::is_same::value || + std::is_same::value || + std::is_same::value) && std::is_same::value>::type> Array4D(std::initializer_list>>> diff --git a/tensorflow/compiler/xla/reference_util.cc b/tensorflow/compiler/xla/reference_util.cc index a9acdae380..8711b8aa2e 100644 --- a/tensorflow/compiler/xla/reference_util.cc +++ b/tensorflow/compiler/xla/reference_util.cc @@ -30,29 +30,23 @@ limitations under the License. namespace xla { -/* static */ std::unique_ptr> ReferenceUtil::TransposeArray2D( - const Array2D& operand) { - auto result = MakeUnique>(operand.width(), operand.height()); - for (int64 w = 0; w < operand.width(); ++w) { - for (int64 h = 0; h < operand.height(); ++h) { - (*result)(w, h) = operand(h, w); - } - } - - return result; -} - -/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( - const Array2D& lhs, const Array2D& rhs) { +namespace { + +template +std::unique_ptr> MatmulArray2DImpl( + const Array2D& lhs, const Array2D& rhs, + const std::function& impl_fn) { CHECK_EQ(lhs.width(), rhs.height()); int m = lhs.height(); int n = rhs.width(); int k = lhs.width(); - auto result = MakeUnique>(m, n); + auto result = MakeUnique>(m, n); // Because Eigen is a header-oriented library, make sure that the Eigen code // is the same as the code used by the CPU backend (otherwise the linker will // randomly pick *some* definition). - __xla_cpu_runtime_EigenSingleThreadedMatMulF32( + impl_fn( /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m, k, /*transpose_lhs=*/0, @@ -60,22 +54,24 @@ namespace xla { return result; } +} // namespace + +/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( + const Array2D& lhs, const Array2D& rhs) { + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF16); +} + +/* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( + const Array2D& lhs, const Array2D& rhs) { + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF32); +} + /* static */ std::unique_ptr> ReferenceUtil::MatmulArray2D( const Array2D& lhs, const Array2D& rhs) { - CHECK_EQ(lhs.width(), rhs.height()); - int m = lhs.height(); - int n = rhs.width(); - int k = lhs.width(); - auto result = MakeUnique>(m, n); - // Because Eigen is a header-oriented library, make sure that the Eigen code - // is the same as the code used by the CPU backend (otherwise the linker will - // randomly pick *some* definition). - __xla_cpu_runtime_EigenSingleThreadedMatMulF64( - /*run_options_ptr=*/nullptr, result->data(), rhs.data(), lhs.data(), n, m, - k, - /*transpose_lhs=*/0, - /*transpose_rhs=*/0); - return result; + return MatmulArray2DImpl( + lhs, rhs, __xla_cpu_runtime_EigenSingleThreadedMatMulF64); } /* static */ std::unique_ptr> ReferenceUtil::Array2DF32ToF64( diff --git a/tensorflow/compiler/xla/reference_util.h b/tensorflow/compiler/xla/reference_util.h index 3ec96f2f38..57b0218882 100644 --- a/tensorflow/compiler/xla/reference_util.h +++ b/tensorflow/compiler/xla/reference_util.h @@ -39,10 +39,22 @@ namespace xla { class ReferenceUtil { public: // Returns the result of a transpose operation on the input matrix. - static std::unique_ptr> TransposeArray2D( - const Array2D& operand); + template + static std::unique_ptr> TransposeArray2D( + const Array2D& operand) { + auto result = MakeUnique>(operand.width(), operand.height()); + for (int64 w = 0; w < operand.width(); ++w) { + for (int64 h = 0; h < operand.height(); ++h) { + (*result)(w, h) = operand(h, w); + } + } + + return result; + } // Returns the result of a matrix multiply `lhs x rhs`. + static std::unique_ptr> MatmulArray2D( + const Array2D& lhs, const Array2D& rhs); static std::unique_ptr> MatmulArray2D( const Array2D& lhs, const Array2D& rhs); static std::unique_ptr> MatmulArray2D( diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 32be0b0c96..4170e31527 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -514,7 +514,6 @@ cc_library( cc_library( name = "runtime_matvec", - srcs = ["runtime_matvec.cc"], hdrs = ["runtime_matvec.h"], copts = runtime_copts(), deps = [ diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc index 40ace96327..9a3bd68c80 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.cc @@ -31,6 +31,8 @@ XfeedManager* GetXfeedManager() { return manager; } +extern const char* const kEigenMatMulF16SymbolName = + "__xla_cpu_runtime_EigenMatMulF16"; extern const char* const kEigenMatMulF32SymbolName = "__xla_cpu_runtime_EigenMatMulF32"; extern const char* const kEigenMatMulF64SymbolName = @@ -40,6 +42,8 @@ extern const char* const kEigenConvF16SymbolName = extern const char* const kEigenConvF32SymbolName = "__xla_cpu_runtime_EigenConvF32"; extern const char* const kEigenFftSymbolName = "__xla_cpu_runtime_EigenFft"; +extern const char* const kEigenSingleThreadedMatMulF16SymbolName = + "__xla_cpu_runtime_EigenSingleThreadedMatMulF16"; extern const char* const kEigenSingleThreadedMatMulF32SymbolName = "__xla_cpu_runtime_EigenSingleThreadedMatMulF32"; extern const char* const kEigenSingleThreadedMatMulF64SymbolName = diff --git a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h index 2141dfe1ce..e61d6ea28b 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_runtime.h +++ b/tensorflow/compiler/xla/service/cpu/cpu_runtime.h @@ -41,11 +41,13 @@ namespace runtime { // the actual symbol. // 2. When using ahead-of-time compilation, the linker can resolve the name // because it is a symbol in the cpu_runtime library. +extern const char* const kEigenMatMulF16SymbolName; extern const char* const kEigenMatMulF32SymbolName; extern const char* const kEigenMatMulF64SymbolName; extern const char* const kEigenConvF16SymbolName; extern const char* const kEigenConvF32SymbolName; extern const char* const kEigenFftSymbolName; +extern const char* const kEigenSingleThreadedMatMulF16SymbolName; extern const char* const kEigenSingleThreadedMatMulF32SymbolName; extern const char* const kEigenSingleThreadedMatMulF64SymbolName; extern const char* const kEigenSingleThreadedConvF16SymbolName; diff --git a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc index cfe7c9c3af..6f06256e08 100644 --- a/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/dot_op_emitter.cc @@ -919,6 +919,12 @@ tensorflow::Status DotOpEmitter::EmitCallToRuntime() { llvm::Type* float_type; const char* fn_name; switch (type) { + case F16: + fn_name = multi_threaded_eigen + ? runtime::kEigenMatMulF16SymbolName + : runtime::kEigenSingleThreadedMatMulF16SymbolName; + float_type = ir_builder_->getHalfTy(); + break; case F32: fn_name = multi_threaded_eigen ? runtime::kEigenMatMulF32SymbolName @@ -1051,7 +1057,8 @@ static bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // The inputs and the output must // 1) be matrices with no padding, and // 2) have an allowed element type. - return output_shape.element_type() == F32 && + PrimitiveType output_primitive_type = output_shape.element_type(); + return (output_primitive_type == F32 || output_primitive_type == F16) && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape); } diff --git a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc index 4dffaee87f..3b8056d505 100644 --- a/tensorflow/compiler/xla/service/cpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/cpu/ir_emitter.cc @@ -2074,7 +2074,7 @@ Status IrEmitter::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(ElementTypesSameAndSupported( /*instruction=*/*root, /*operands=*/{lhs, rhs}, - /*supported_types=*/{F32})); + /*supported_types=*/{F16, F32})); llvm_ir::IrArray lhs_array(GetIrArrayFor(lhs)); llvm_ir::IrArray rhs_array(GetIrArrayFor(rhs)); diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc index bff57d33ae..39b13183ff 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.cc @@ -63,30 +63,41 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, C.device(*run_options->intra_op_thread_pool()) = A.contract(B, dims); } +template +void MatMulImpl(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, + int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { + if (m == 1 || n == 1) { + // Despite being single threaded, this version of matrix * vector is faster. + xla::EigenMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + } else { + MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); + } +} + } // namespace +void __xla_cpu_runtime_EigenMatMulF16(const void* run_options_ptr, + Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 m, int64 n, + int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); +} + void __xla_cpu_runtime_EigenMatMulF32(const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - // Despite being single threaded, this version of matrix * vector is faster. - xla::EigenMatVecF32(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); } void __xla_cpu_runtime_EigenMatMulF64(const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - // Despite being single threaded, this version of matrix * vector is faster. - xla::EigenMatVecF64(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + MatMulImpl(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matmul.h b/tensorflow/compiler/xla/service/cpu/runtime_matmul.h index fdb644651d..b5156434f6 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matmul.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_matmul.h @@ -25,6 +25,12 @@ extern "C" { // order. 'out' is a pointer to a buffer sufficiently large to hold the result // of the operation. Following standard nomenclature: lhs is m x k, // rhs is k x n, and out is m x n. +extern void __xla_cpu_runtime_EigenMatMulF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + extern void __xla_cpu_runtime_EigenMatMulF32( const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc b/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc deleted file mode 100644 index 435820cdd3..0000000000 --- a/tensorflow/compiler/xla/service/cpu/runtime_matvec.cc +++ /dev/null @@ -1,110 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include - -#include "third_party/eigen3/Eigen/Core" -#include "tensorflow/compiler/xla/service/cpu/runtime_matvec.h" - -using tensorflow::int32; -using tensorflow::int64; - -namespace { - -// Does mat * x or mat^T * x. -template -void MatVec(T* out_buf, T* mat_buf, T* x_buf, int64 rows, int64 cols, - int32 transpose) { - // Use an Eigen Matrix instead of a Tensor, as the GEMV from Matrix seems to - // be faster (b/30223679). See also: the matmul op kernel in TensorFlow, - // which implements the same optimization. - using Matrix = Eigen::Matrix; - using MatrixMap = Eigen::Map; - - using Vector = Eigen::Matrix; - using VectorMap = Eigen::Map; - - auto x = VectorMap(x_buf, cols); - auto out = VectorMap(out_buf, rows); - - int64 mat_rows = rows; - int64 mat_cols = cols; - - if (transpose) { - std::swap(mat_rows, mat_cols); - } - - auto mat = MatrixMap(mat_buf, mat_rows, mat_cols); - - if (transpose) { - out = mat.transpose() * x; - } else { - out = mat * x; - } -} - -// Converts matmul-style args to matvec. -template -void DispatchMatVec(T* out, T* lhs, T* rhs, int64 m, int64 n, int64 k, - int32 transpose_lhs, int32 transpose_rhs) { - // If the input is in the form x * A, where x is the vector, then bring A back - // over to the left hand side. We make use of the identity - // - // (x * A)^T = A^T * x^T - // - // We do not need to take the transpose of x or of the result since taking - // the transpose of a vector does not change the memory layout. - const int64 cols = k; - - T* mat; - T* vec; - int64 rows; - bool transpose_mat; - - bool is_mat_vec = (n == 1); - - if (is_mat_vec) { - mat = lhs; - vec = rhs; - rows = m; - transpose_mat = transpose_lhs; - } else { - mat = rhs; - vec = lhs; - rows = n; - transpose_mat = !transpose_rhs; - } - - MatVec(out, mat, vec, rows, cols, transpose_mat); -} - -} // namespace - -namespace xla { - -void EigenMatVecF32(float* out, float* lhs, float* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { - assert((m == 1 || n == 1) && "not a matrix-vector multiply"); - DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); -} - -void EigenMatVecF64(double* out, double* lhs, double* rhs, int64 m, int64 n, - int64 k, int32 transpose_lhs, int32 transpose_rhs) { - assert((m == 1 || n == 1) && "not a matrix-vector multiply"); - DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); -} - -} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h index 1bd8dfb377..70eb98c541 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h @@ -16,10 +16,86 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ #define TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ +#include "third_party/eigen3/Eigen/Core" + #include "tensorflow/core/platform/types.h" namespace xla { +namespace detail { + +using tensorflow::int32; +using tensorflow::int64; + +// Does mat * x or mat^T * x. +template +void MatVec(T* out_buf, T* mat_buf, T* x_buf, int64 rows, int64 cols, + int32 transpose) { + // Use an Eigen Matrix instead of a Tensor, as the GEMV from Matrix seems to + // be faster (b/30223679). See also: the matmul op kernel in TensorFlow, + // which implements the same optimization. + using Matrix = Eigen::Matrix; + using MatrixMap = Eigen::Map; + + using Vector = Eigen::Matrix; + using VectorMap = Eigen::Map; + + auto x = VectorMap(x_buf, cols); + auto out = VectorMap(out_buf, rows); + + int64 mat_rows = rows; + int64 mat_cols = cols; + + if (transpose) { + std::swap(mat_rows, mat_cols); + } + + auto mat = MatrixMap(mat_buf, mat_rows, mat_cols); + + if (transpose) { + out = mat.transpose() * x; + } else { + out = mat * x; + } +} + +// Converts matmul-style args to matvec. +template +void DispatchMatVec(T* out, T* lhs, T* rhs, int64 m, int64 n, int64 k, + int32 transpose_lhs, int32 transpose_rhs) { + // If the input is in the form x * A, where x is the vector, then bring A back + // over to the left hand side. We make use of the identity + // + // (x * A)^T = A^T * x^T + // + // We do not need to take the transpose of x or of the result since taking + // the transpose of a vector does not change the memory layout. + const int64 cols = k; + + T* mat; + T* vec; + int64 rows; + bool transpose_mat; + + bool is_mat_vec = (n == 1); + + if (is_mat_vec) { + mat = lhs; + vec = rhs; + rows = m; + transpose_mat = transpose_lhs; + } else { + mat = rhs; + vec = lhs; + rows = n; + transpose_mat = !transpose_rhs; + } + + MatVec(out, mat, vec, rows, cols, transpose_mat); +} + +} // namespace detail + // Performs a matrix-vector multiplication using Eigen. 'lhs' and 'rhs' are // pointers to buffers containing input matrices in column-major order. 'out' is // a pointer to a buffer sufficiently large to hold the result of the @@ -30,15 +106,15 @@ namespace xla { // // TODO(b/64684907): Compare runtime performance of these functions with dot // simplification. -void EigenMatVecF32(float* out, float* lhs, float* rhs, tensorflow::int64 m, - tensorflow::int64 n, tensorflow::int64 k, - tensorflow::int32 transpose_lhs, - tensorflow::int32 transpose_rhs); - -void EigenMatVecF64(double* out, double* lhs, double* rhs, tensorflow::int64 m, - tensorflow::int64 n, tensorflow::int64 k, - tensorflow::int32 transpose_lhs, - tensorflow::int32 transpose_rhs); +template +void EigenMatVec(T* out, T* lhs, T* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs) { + assert((m == 1 || n == 1) && "not a matrix-vector multiply"); + detail::DispatchMatVec(out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); +} } // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc index ee8eb08155..17303e2f0d 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.cc @@ -57,26 +57,38 @@ void MatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, int64 m, C = A.contract(B, dims); } +template +void SingleThreadedMatMul(const void* run_options_ptr, T* out, T* lhs, T* rhs, + int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + if (m == 1 || n == 1) { + xla::EigenMatVec(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); + } else { + MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, + transpose_rhs); + } +} + } // namespace +void __xla_cpu_runtime_EigenSingleThreadedMatMulF16( + const void* run_options_ptr, Eigen::half* out, Eigen::half* lhs, + Eigen::half* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, + int32 transpose_rhs) { + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); +} + void __xla_cpu_runtime_EigenSingleThreadedMatMulF32( const void* run_options_ptr, float* out, float* lhs, float* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - xla::EigenMatVecF32(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); } void __xla_cpu_runtime_EigenSingleThreadedMatMulF64( const void* run_options_ptr, double* out, double* lhs, double* rhs, int64 m, int64 n, int64 k, int32 transpose_lhs, int32 transpose_rhs) { - if (m == 1 || n == 1) { - xla::EigenMatVecF64(out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs); - } else { - MatMul(run_options_ptr, out, lhs, rhs, m, n, k, transpose_lhs, - transpose_rhs); - } + SingleThreadedMatMul(run_options_ptr, out, lhs, rhs, m, n, k, + transpose_lhs, transpose_rhs); } diff --git a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h index 029eb95142..9371a62242 100644 --- a/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h +++ b/tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h @@ -25,6 +25,12 @@ extern "C" { // 'out' is a pointer to a buffer sufficiently large to hold the result of the // operation. Following standard nomenclature: lhs is m x k, rhs is k x n, and // out is m x n. +extern void __xla_cpu_runtime_EigenSingleThreadedMatMulF16( + const void* /* xla::ExecutableRunOptions* */ run_options_ptr, + Eigen::half* out, Eigen::half* lhs, Eigen::half* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + extern void __xla_cpu_runtime_EigenSingleThreadedMatMulF32( const void* /* xla::ExecutableRunOptions* */ run_options_ptr, float* out, float* lhs, float* rhs, tensorflow::int64 m, tensorflow::int64 n, diff --git a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc index e8a375d637..80c24eaccf 100644 --- a/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc +++ b/tensorflow/compiler/xla/service/cpu/simple_orc_jit.cc @@ -181,10 +181,12 @@ bool RegisterKnownJITSymbols() { REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenFft); + REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32); + REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32); REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64); REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin); diff --git a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc index ba482793e7..ca54b2eed8 100644 --- a/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc +++ b/tensorflow/compiler/xla/service/gpu/gemm_thunk.cc @@ -108,11 +108,13 @@ bool DoGemmWithAlgorithm(MatrixDescriptor lhs_matrix, return stream ->ThenBlasGemmWithAlgorithm( lhs_transpose, rhs_transpose, output_matrix.num_rows, - output_matrix.num_cols, /*size of reduce dim=*/k, /*alpha=*/1.0, - lhs_data, /*leading dim of LHS=*/lhs_matrix.num_rows, rhs_data, - /*leading dim of RHS=*/rhs_matrix.num_rows, /*beta=*/0.0, - &output_data, /*leading dim of output=*/output_matrix.num_rows, - computation_type, algorithm, output_profile_result) + output_matrix.num_cols, /*size of reduce dim=*/k, + /*alpha=*/static_cast(1.0f), lhs_data, + /*leading dim of LHS=*/lhs_matrix.num_rows, rhs_data, + /*leading dim of RHS=*/rhs_matrix.num_rows, + /*beta=*/static_cast(0.0f), &output_data, + /*leading dim of output=*/output_matrix.num_rows, computation_type, + algorithm, output_profile_result) .ok(); } @@ -161,6 +163,8 @@ StatusOr DoGemmAutotune( // DoGemm/DoGemmWithAlgorithm/DoGemmAutotune. auto GetGemmFn(PrimitiveType type) -> decltype(&DoGemm) { switch (type) { + case F16: + return &DoGemm; case F32: return &DoGemm; case F64: @@ -172,6 +176,8 @@ auto GetGemmFn(PrimitiveType type) -> decltype(&DoGemm) { auto GetGemmWithAlgorithmFn(PrimitiveType type) -> decltype(&DoGemmWithAlgorithm) { switch (type) { + case F16: + return &DoGemmWithAlgorithm; case F32: return &DoGemmWithAlgorithm; case F64: @@ -182,6 +188,8 @@ auto GetGemmWithAlgorithmFn(PrimitiveType type) } auto GetGemmAutotuneFn(PrimitiveType type) -> decltype(&DoGemmAutotune) { switch (type) { + case F16: + return &DoGemmAutotune; case F32: return &DoGemmAutotune; case F64: @@ -196,6 +204,10 @@ auto GetGemmAutotuneFn(PrimitiveType type) -> decltype(&DoGemmAutotune) { // separately from the precision of the inputs and result. se::blas::ComputationType GetBlasComputationType(PrimitiveType type) { switch (type) { + case F16: + // Use F32 as computation type for F16 as we currently only implement the + // cuDNN pseudo half configuration for half precision. + return se::blas::ComputationType::kF32; case F32: return se::blas::ComputationType::kF32; case F64: @@ -315,6 +327,9 @@ tensorflow::Status GemmThunk::ExecuteOnStream( stream, /*output_profile_result=*/nullptr); } + + // Autotune will fail when CUDA 8 and GPU sm_50 or older are used. + // Use the older Gemm API in this case. return GetGemmFn(element_type)(lhs_matrix, rhs_matrix, output_matrix, stream); }; diff --git a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc index 2f65edffea..1b89dfa7ae 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emission_utils.cc @@ -49,8 +49,10 @@ bool AreValidGemmShapes(const Shape& lhs_shape, const Shape& rhs_shape, // The inputs and the output must // 1) be matrices with no padding and a non-zero number of elements, // 2) have an allowed element type. - bool type_is_allowed = (output_shape.element_type() == F32 || - output_shape.element_type() == F64); + PrimitiveType output_primitive_type = output_shape.element_type(); + bool type_is_allowed = + (output_primitive_type == F16 || output_primitive_type == F32 || + output_primitive_type == F64); return type_is_allowed && IsRank2WithNoPadding(lhs_shape) && IsRank2WithNoPadding(rhs_shape) && IsRank2WithNoPadding(output_shape) && diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 923315e001..fb66f69709 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -321,6 +321,15 @@ class ShapeUtil { static Shape MakeShape(PrimitiveType element_type, tensorflow::gtl::ArraySlice dimensions); + // Creates a Shape with element type corresponding to T and the given + // dimensions + template + static Shape MakeShapeWithType( + tensorflow::gtl::ArraySlice dimensions) { + return ShapeUtil::MakeShape(primitive_util::NativeToPrimitiveType(), + dimensions); + } + // Constructs a new shape with the given minor_to_major order in its Layout. // Returns a value shape such that shape.has_layout(). static Shape MakeShapeWithLayout( diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index dc282f2440..63f4a4430f 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -1299,6 +1299,7 @@ xla_test( "//tensorflow/compiler/xla/client:local_client", "//tensorflow/compiler/xla/tests:client_library_test_base", "//tensorflow/compiler/xla/tests:literal_test_util", + "//tensorflow/compiler/xla/tests:test_utils", "//tensorflow/compiler/xla/tests:xla_internal_test_main", "//tensorflow/core:lib", "//tensorflow/core:test", diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index e2b5c91653..99640f5bb5 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -53,26 +53,12 @@ class ConvolutionTest : public ClientLibraryTestBase { #endif }; -#if (XLA_TEST_BACKEND_GPU || XLA_TEST_BACKEND_CPU) -using TestTypes = ::testing::Types; -#else +#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 using TestTypes = ::testing::Types; +#else +using TestTypes = ::testing::Types; #endif -template -Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions); - -template <> -Shape MakeShapeWrapper(tensorflow::gtl::ArraySlice dimensions) { - return ShapeUtil::MakeShape(F32, dimensions); -} - -template <> -Shape MakeShapeWrapper( - tensorflow::gtl::ArraySlice dimensions) { - return ShapeUtil::MakeShape(F16, dimensions); -} - template class ForwardPassConvolution_3x3x256_256_OutputZ_Iota : public ConvolutionTest { public: @@ -121,8 +107,8 @@ class Convolve_1x1x1x2_1x1x1x2_Valid : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 1, 2}); - Shape filter_shape = MakeShapeWrapper({1, 1, 1, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 1, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -152,8 +138,8 @@ class Convolve_1x1x4x4_1x1x2x2_Valid : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kValid); @@ -186,8 +172,8 @@ class Convolve_1x1x4x4_1x1x2x2_Same : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); @@ -222,8 +208,8 @@ class Convolve_1x1x4x4_1x1x3x3_Same : public ConvolutionTest { public: void RunTest() { ComputationBuilder builder(client_, TestName()); - Shape input_shape = MakeShapeWrapper({1, 1, 4, 4}); - Shape filter_shape = MakeShapeWrapper({1, 1, 3, 3}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 1, 4, 4}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 1, 3, 3}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); auto conv = builder.Conv(input, filter, {1, 1}, Padding::kSame); @@ -280,8 +266,8 @@ class Convolve1D_1x2x5_1x2x2_WithRHSDilation : public ConvolutionTest { void RunTest() { ComputationBuilder builder(client_, TestName()); { - Shape input_shape = MakeShapeWrapper({1, 2, 5}); - Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); // Convolution dimensions are bf0_oi0->bo0. @@ -381,8 +367,8 @@ class Convolve1D_1x2x5_1x2x2_WithPadding : public ConvolutionTest { void RunTest() { ComputationBuilder builder(client_, TestName()); { - Shape input_shape = MakeShapeWrapper({1, 2, 5}); - Shape filter_shape = MakeShapeWrapper({1, 2, 2}); + Shape input_shape = ShapeUtil::MakeShapeWithType({1, 2, 5}); + Shape filter_shape = ShapeUtil::MakeShapeWithType({1, 2, 2}); auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); // Convolution dimensions are bf0_oi0->bo0. @@ -486,8 +472,8 @@ class Convolve2D_1x3x3x5_3x3x5x5_Valid : public ConvolutionTest { ComputationBuilder builder(client_, TestName()); std::vector input_dims = {1, 3, 3, 5}; std::vector filter_dims = {3, 3, 5, 3}; - Shape input_shape = MakeShapeWrapper(input_dims); - Shape filter_shape = MakeShapeWrapper(filter_dims); + Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); + Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); { auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); @@ -611,8 +597,8 @@ class Convolve1D1WindowTestBase input_feature}; std::vector filter_dims = {window_size, input_feature, output_feature}; - Shape input_shape = MakeShapeWrapper(input_dims); - Shape filter_shape = MakeShapeWrapper(filter_dims); + Shape input_shape = ShapeUtil::MakeShapeWithType(input_dims); + Shape filter_shape = ShapeUtil::MakeShapeWithType(filter_dims); { auto input = builder.Parameter(0, input_shape, "input"); auto filter = builder.Parameter(1, filter_shape, "filter"); diff --git a/tensorflow/compiler/xla/tests/dot_operation_test.cc b/tensorflow/compiler/xla/tests/dot_operation_test.cc index 815962094a..09b1dd283e 100644 --- a/tensorflow/compiler/xla/tests/dot_operation_test.cc +++ b/tensorflow/compiler/xla/tests/dot_operation_test.cc @@ -34,169 +34,194 @@ limitations under the License. namespace xla { namespace { -// TODO(b/34468543): use GUnit typed tests when we can do all tests on all -// backends. class DotOperationTest : public ClientLibraryTestBase { public: ErrorSpec error_spec_{0.0001, 1e-5}; - - protected: - template - void TestOneElementVectorDot(); - template - void TestVectorDot(); - template - void TestSquareMatrixDot(bool lhs_row_major = false, - bool rhs_row_major = false); - template - void TestNonsquareMatrixDot(bool lhs_row_major = false, - bool rhs_row_major = false); }; -XLA_TEST_F(DotOperationTest, ZeroElementVectorDotF32) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({}); - auto rhs = builder.ConstantR1({}); +#if defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = ::testing::Types; +#elif !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16) && \ + !defined(XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT64) +using TypesF16F32 = ::testing::Types; +using TypesF16F32F64 = ::testing::Types; +using TypesF16F32F64CF64 = + ::testing::Types; +#else +#error "Situation not handled yet" +#endif + +template +class DotOperationTest_F16F32F64CF64 : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTest_F16F32F64CF64, TypesF16F32F64CF64); + +XLA_TYPED_TEST(DotOperationTest_F16F32F64CF64, ZeroElementVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + + auto lhs = builder.ConstantR1({}); + auto rhs = builder.ConstantR1({}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 0.0, {}, error_spec_); + this->template ComputeAndCompareR0(&builder, static_cast(0.0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, TrivialMatrixVectorDotF32) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2({{3.0, 4.0}}); - auto rhs = builder.ConstantR1({3.0, 4.0}); - auto result = builder.Dot(lhs, rhs); +template +class DotOperationTest_F16F32F64 : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTest_F16F32F64, TypesF16F32F64); - ComputeAndCompareR1(&builder, {25.0}, {}, error_spec_); -} - -template -void DotOperationTest::TestOneElementVectorDot() { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({2.0}); - auto rhs = builder.ConstantR1({3.0}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, TrivialMatrixVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D({{3.0f, 4.0f}}); + auto rhs = builder.ConstantFromArray({3.0f, 4.0f}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 6.0, {}, error_spec_); + this->template ComputeAndCompareR1(&builder, {static_cast(25.0f)}, {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, OneElementVectorDotF32) { - TestOneElementVectorDot(); -} +XLA_TYPED_TEST(DotOperationTest_F16F32F64, OneElementVectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR1({static_cast(2.0f)}); + auto rhs = builder.ConstantR1({static_cast(3.0f)}); + auto result = builder.Dot(lhs, rhs); -XLA_TEST_F(DotOperationTest, OneElementVectorDotF64) { - TestOneElementVectorDot(); + this->template ComputeAndCompareR0(&builder, static_cast(6.0f), {}, + this->error_spec_); } -template -void DotOperationTest::TestVectorDot() { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR1({1.0, 2.5, 42.0}); - auto rhs = builder.ConstantR1({11.0, -1.0, 0.5}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, VectorDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantFromArray({1.0f, 2.5f, 42.0f}); + auto rhs = builder.ConstantFromArray({11.0f, -1.0f, 0.5f}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR0(&builder, 29.5, {}, error_spec_); + this->template ComputeAndCompareR0(&builder, static_cast(29.5f), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, VectorDotF32) { TestVectorDot(); } - -XLA_TEST_F(DotOperationTest, VectorDotF64) { TestVectorDot(); } - -namespace { - std::vector MinorToMajorForIsRowMajor(bool row_major) { return {row_major ? 1 : 0, row_major ? 0 : 1}; } -} // namespace - -XLA_TEST_F(DotOperationTest, Dot_0x2_2x0) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); - auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_0x2_2x0) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); + auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(0, 0), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(0, 0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_0x2_2x3) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); - auto rhs = builder.ConstantR2({{7.0, 8.0, 9.0}, {42.0, 77.0, 101.0}}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_0x2_2x3) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); + auto rhs = builder.ConstantR2FromArray2D( + {{7.0f, 8.0f, 9.0f}, {42.0f, 77.0f, 101.0f}}); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(0, 3), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(0, 3), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_3x2_2x0) { - ComputationBuilder builder(client_, TestName()); - auto lhs = - builder.ConstantR2({{7.0, 8.0}, {9.0, 42.0}, {77.0, 101.0}}); - auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_3x2_2x0) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D( + {{7.0f, 8.0f}, {9.0f, 42.0f}, {77.0f, 101.0f}}); + auto rhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(3, 0), {}, error_spec_); + this->template ComputeAndCompareR2(&builder, Array2D(3, 0), {}, + this->error_spec_); } -XLA_TEST_F(DotOperationTest, Dot_2x0_0x2) { - ComputationBuilder builder(client_, TestName()); - auto lhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); - auto rhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, Dot_2x0_0x2) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto lhs = builder.ConstantR2FromArray2D(Array2D(2, 0)); + auto rhs = builder.ConstantR2FromArray2D(Array2D(0, 2)); auto result = builder.Dot(lhs, rhs); - ComputeAndCompareR2(&builder, Array2D(2, 2, 0.0f), {}, - error_spec_); + this->template ComputeAndCompareR2( + &builder, Array2D(2, 2, static_cast(0.0f)), {}, this->error_spec_); } -XLA_TEST_F(DotOperationTest, FusedDot) { - ComputationBuilder builder(client_, TestName()); - auto param0 = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 4}), "arg0"); - auto param1 = builder.Parameter(1, ShapeUtil::MakeShape(F32, {4, 1}), "arg1"); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, FusedDot) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto param0 = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 4}), "arg0"); + auto param1 = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({4, 1}), "arg1"); auto exp0 = builder.Exp(param0); auto result = builder.Dot(exp0, param1); - auto lhs_handle = client_ - ->TransferToServer(*Literal::CreateR2( - {{1.0, 2.0, 3.0, 4.0}, {-1.0, -2.0, -3.0, -4.0}})) - .ConsumeValueOrDie(); - auto rhs_handle = client_ - ->TransferToServer(*Literal::CreateR2( - {{1.0}, {2.0}, {3.0}, {4.0}})) - .ConsumeValueOrDie(); - - ComputeAndCompareR2( - &builder, Array2D({{296.14560492846033}, {0.8611737683031964}}), - {lhs_handle.get(), rhs_handle.get()}, error_spec_); -} - -template -void DotOperationTest::TestSquareMatrixDot(bool lhs_row_major, - bool rhs_row_major) { auto lhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 2.0}, {3.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) - .ConsumeValueOrDie(); - auto rhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 6.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2D( + {{1.0f, 2.0f, 3.0f, 4.0f}, {-1.0f, -2.0f, -3.0f, -4.0f}})) .ConsumeValueOrDie(); + auto rhs_handle = this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2D( + {{1.0f}, {2.0f}, {3.0f}, {4.0f}})) + .ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); - auto result = builder.Dot( - builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "lhs"), - builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs")); + if (std::is_same::value) { + this->error_spec_ = ErrorSpec{0.0001, 1e-3}; + } - Array2D expected({{15.0, -2.0}, {-25.0, 34.0}}); - ComputeAndCompareR2( - &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); + this->template ComputeAndCompareR2( + &builder, Array2D({{296.14560492846033f}, {0.8611737683031964f}}), + {lhs_handle.get(), rhs_handle.get()}, this->error_spec_); } +template +class SquareMatrixDot : public DotOperationTest { + public: + void TestImpl(bool lhs_row_major, bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 2.0f}, {3.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(lhs_row_major)))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 6.0f}, {7.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(rhs_row_major)))) + .ConsumeValueOrDie(); + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs")); + + Array2D expected({{15.0f, -2.0f}, {-25.0f, 34.0f}}); + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, error_spec_); + } +}; + +TYPED_TEST_CASE(SquareMatrixDot, TypesF16F32F64CF64); +XLA_TYPED_TEST(SquareMatrixDot, TypesFF) { this->TestImpl(false, false); } +XLA_TYPED_TEST(SquareMatrixDot, TypesFT) { this->TestImpl(false, true); } +XLA_TYPED_TEST(SquareMatrixDot, TypesTF) { this->TestImpl(true, false); } +XLA_TYPED_TEST(SquareMatrixDot, TypesTT) { this->TestImpl(true, true); } + struct DotTestParam { int m; int k; @@ -302,14 +327,13 @@ void ParametricDotTest::TestImpl() { if (param.has_addend) { args.push_back(addend_handle.get()); } - - ComputeAndCompareR2(&builder, *expected, args, ErrorSpec(0.3, 3e-3)); + ErrorSpec error_spec(0.3, 3e-3); + if (std::is_same::value) { + error_spec = ErrorSpec(0.3, 5e-3); + } + ComputeAndCompareR2(&builder, *expected, args, error_spec); } -XLA_TEST_P(ParametricDotTest, TestF32) { TestImpl(); } - -XLA_TEST_P(ParametricDotTest, TestF64) { TestImpl(); } - std::vector CreateDotTestParameters() { std::vector params; @@ -331,6 +355,12 @@ std::vector CreateDotTestParameters() { return params; } +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(ParametricDotTest, TestF16) { TestImpl(); } +#endif +XLA_TEST_P(ParametricDotTest, TestF32) { TestImpl(); } +XLA_TEST_P(ParametricDotTest, TestF64) { TestImpl(); } + INSTANTIATE_TEST_CASE_P(DotTests, ParametricDotTest, ::testing::ValuesIn(CreateDotTestParameters()), PrintDotTestParam); @@ -343,14 +373,6 @@ class ParametricDotTestWithoutLayoutAssignment : public ParametricDotTest { } }; -XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF32) { - TestImpl(); -} - -XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF64) { - TestImpl(); -} - std::vector CreateNoLayoutAssignmentDotTestParameters() { std::vector params; @@ -407,110 +429,60 @@ std::vector CreateNoLayoutAssignmentDotTestParameters() { return params; } -INSTANTIATE_TEST_CASE_P( - DotTests, ParametricDotTestWithoutLayoutAssignment, - ::testing::ValuesIn(CreateNoLayoutAssignmentDotTestParameters()), - PrintDotTestParam); - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFF) { - TestSquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorFT) { - TestSquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTF) { - TestSquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF32MinorToMajorTT) { - TestSquareMatrixDot(true, true); +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF16) { + TestImpl(); } - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFF) { - TestSquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorFT) { - TestSquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTF) { - TestSquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotC64MinorToMajorTT) { - TestSquareMatrixDot(true, true); -} - -XLA_TEST_F(DotOperationTest, SquareMatrixDotF64) { - TestSquareMatrixDot(); -} - -template -void DotOperationTest::TestNonsquareMatrixDot(bool lhs_row_major, - bool rhs_row_major) { - auto lhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(lhs_row_major)))) - .ConsumeValueOrDie(); - auto rhs_handle = - client_ - ->TransferToServer(*Literal::CreateR2WithLayout( - {{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}}, - LayoutUtil::MakeLayout(MinorToMajorForIsRowMajor(rhs_row_major)))) - .ConsumeValueOrDie(); - - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); - auto result = builder.Dot( - builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 3}), "lhs"), - builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}), "rhs")); - - Array2D expected({{26.0, 0.0}, {-12.0, 10.0}}); - - ComputeAndCompareR2( - &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFF) { - TestNonsquareMatrixDot(false, false); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorFT) { - TestNonsquareMatrixDot(false, true); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTF) { - TestNonsquareMatrixDot(true, false); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF32MajorToMinorTT) { - TestNonsquareMatrixDot(true, true); -} - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotF64) { - TestNonsquareMatrixDot(); +#endif +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF32) { + TestImpl(); } - -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFF) { - TestNonsquareMatrixDot(false, false); +XLA_TEST_P(ParametricDotTestWithoutLayoutAssignment, TestF64) { + TestImpl(); } -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorFT) { - TestNonsquareMatrixDot(false, true); -} +INSTANTIATE_TEST_CASE_P( + DotTests, ParametricDotTestWithoutLayoutAssignment, + ::testing::ValuesIn(CreateNoLayoutAssignmentDotTestParameters()), + PrintDotTestParam); -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTF) { - TestNonsquareMatrixDot(true, false); -} +template +class NonsquareMatrixDot : public DotOperationTest { + public: + void TestImpl(bool lhs_row_major, bool rhs_row_major) { + auto lhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 2.0f, 3.0f}, {3.0f, -4.0f, -1.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(lhs_row_major)))) + .ConsumeValueOrDie(); + auto rhs_handle = + client_ + ->TransferToServer(*Literal::CreateFromArrayWithLayout( + {{1.0f, 6.0f}, {2.0f, 3.0f}, {7.0f, -4.0f}}, + LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(rhs_row_major)))) + .ConsumeValueOrDie(); + + ComputationBuilder builder(client_, TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); + auto result = builder.Dot( + builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 3}), "lhs"), + builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {3, 2}), "rhs")); + + Array2D expected({{26.0f, 0.0f}, {-12.0f, 10.0f}}); + + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, error_spec_); + } +}; -XLA_TEST_F(DotOperationTest, NonsquareMatrixDotC64MajorToMinorTT) { - TestNonsquareMatrixDot(true, true); -} +TYPED_TEST_CASE(NonsquareMatrixDot, TypesF16F32F64CF64); +XLA_TYPED_TEST(NonsquareMatrixDot, TestFF) { this->TestImpl(false, false); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestFT) { this->TestImpl(false, true); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestTF) { this->TestImpl(true, false); } +XLA_TYPED_TEST(NonsquareMatrixDot, TestTT) { this->TestImpl(true, true); } XLA_TEST_F(DotOperationTest, MatrixVectorC64) { auto lhs_handle = @@ -537,25 +509,35 @@ XLA_TEST_F(DotOperationTest, MatrixVectorC64) { &builder, expected, {lhs_handle.get(), rhs_handle.get()}, error_spec_); } -XLA_TEST_F(DotOperationTest, ConcurrentMatMul) { - ComputationBuilder builder(client_, TestName()); - auto matrix1 = builder.ConstantR2({{1.0, 2.0}, {3.0, 4.0}}); - auto matrix2 = builder.ConstantR2({{5.0, 6.0}, {7.0, 8.0}}); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, ConcurrentMatMult) { + using T = TypeParam; + + ComputationBuilder builder(this->client_, this->TestName()); + auto matrix1 = builder.ConstantR2FromArray2D({{1.0f, 2.0f}, {3.0f, 4.0f}}); + auto matrix2 = builder.ConstantR2FromArray2D({{5.0f, 6.0f}, {7.0f, 8.0f}}); auto matrix12 = builder.Dot(matrix1, matrix2); auto matrix21 = builder.Dot(matrix2, matrix1); builder.Add(matrix12, matrix21); - Array2D expected({{42.0, 56.0}, {74.0, 96.0}}); - ComputeAndCompareR2(&builder, expected, {}, error_spec_); + Array2D expected({{42.0f, 56.0f}, {74.0f, 96.0f}}); + this->template ComputeAndCompareR2(&builder, expected, {}, + this->error_spec_); } +template +class DotOperationTestForBatchMatMul : public DotOperationTest {}; +TYPED_TEST_CASE(DotOperationTestForBatchMatMul, TypesF16F32F64); + // Regression test for b/32055648. The root of the graph is a kFusion of 4 // bitcasts. Although bitcasts don't map to thunks, the root should still be // sync-dependent on bitcasts' operands. -XLA_TEST_F(DotOperationTest, BatchMatMul) { - ComputationBuilder builder(client_, TestName()); - auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "x"); - auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2, 2, 2}), "y"); +XLA_TYPED_TEST(DotOperationTestForBatchMatMul, Types) { + using T = TypeParam; + ComputationBuilder builder(this->client_, this->TestName()); + auto x = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2, 2, 2}), "x"); + auto y = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 2, 2, 2}), "y"); auto x_flat = builder.Reshape(x, {0, 1, 2, 3}, {4, 2, 2}); auto y_flat = builder.Reshape(y, {0, 1, 2, 3}, {4, 2, 2}); @@ -576,29 +558,42 @@ XLA_TEST_F(DotOperationTest, BatchMatMul) { auto out_flat = builder.ConcatInDim(out_slices, 0); builder.Reshape(out_flat, {0, 1, 2}, {2, 2, 2, 2}); - auto x_data = client_ - ->TransferToServer(*Literal::CreateR4( - {{{{1000, 100}, {10, 1}}, {{2000, 200}, {20, 2}}}, - {{{3000, 300}, {30, 3}}, {{4000, 400}, {40, 4}}}})) - .ConsumeValueOrDie(); - auto y_data = client_ - ->TransferToServer(*Literal::CreateR4( - {{{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}}, - {{{11, 22}, {33, 44}}, {{55, 66}, {77, 88}}}})) + auto x_data = this->client_ + ->TransferToServer(*Literal::CreateR4FromArray4D( + {{{{1000.0f, 100.0f}, {10.0f, 1.0f}}, + {{2000.0f, 200.0f}, {20.0f, 2.0f}}}, + {{{3000.0f, 300.0f}, {30.0f, 3.0f}}, + {{4000.0f, 400.0f}, {40.0f, 4.0f}}}})) .ConsumeValueOrDie(); + auto y_data = + this->client_ + ->TransferToServer(*Literal::CreateR4FromArray4D( + {{{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, + {{{11.0f, 22.0f}, {33.0f, 44.0f}}, + {{55.0f, 66.0f}, {77.0f, 88.0f}}}})) + .ConsumeValueOrDie(); - ComputeAndCompareR4( + if (std::is_same::value) { + this->error_spec_ = ErrorSpec{0.0001, 1e-3}; + } + this->template ComputeAndCompareR4( &builder, /*expected=*/ - {{{{1300, 2400}, {13, 24}}, {{11400, 13600}, {114, 136}}}, - {{{42900, 79200}, {429, 792}}, {{250800, 299200}, {2508, 2992}}}}, - {x_data.get(), y_data.get()}, error_spec_); + {{{{1300.0f, 2400.0f}, {13.0f, 24.0f}}, + {{11400.0f, 13600.0f}, {114.0f, 136.0f}}}, + {{{42900.0f, 79200.0f}, {429.0f, 792.0f}}, + {{250800.0f, 299200.0f}, {2508.0f, 2992.0f}}}}, + {x_data.get(), y_data.get()}, this->error_spec_); } -XLA_TEST_F(DotOperationTest, GeneralMatMul) { - ComputationBuilder builder(client_, TestName()); - auto x = builder.Parameter(0, ShapeUtil::MakeShape(F32, {2, 2, 2}), "x"); - auto y = builder.Parameter(1, ShapeUtil::MakeShape(F32, {2, 2, 2}), "y"); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, GeneralMatMul) { + using T = TypeParam; + + ComputationBuilder builder(this->client_, this->TestName()); + auto x = + builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2, 2}), "x"); + auto y = + builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 2, 2}), "y"); DotDimensionNumbers dnums; dnums.add_lhs_contracting_dimensions(2); @@ -608,31 +603,34 @@ XLA_TEST_F(DotOperationTest, GeneralMatMul) { auto out = builder.DotGeneral(x, y, dnums); - auto x_data = client_ - ->TransferToServer(*Literal::CreateR3( - {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}})) - .ConsumeValueOrDie(); + auto x_data = + this->client_ + ->TransferToServer(*Literal::CreateR3FromArray3D( + {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}})) + .ConsumeValueOrDie(); - auto y_data = client_ - ->TransferToServer(*Literal::CreateR3( - {{{1.0, 0.0}, {0.0, 1.0}}, {{1.0, 0.0}, {0.0, 1.0}}})) - .ConsumeValueOrDie(); + auto y_data = + this->client_ + ->TransferToServer(*Literal::CreateR3FromArray3D( + {{{1.0f, 0.0f}, {0.0f, 1.0f}}, {{1.0f, 0.0f}, {0.0f, 1.0f}}})) + .ConsumeValueOrDie(); - ComputeAndCompareR3( + this->template ComputeAndCompareR3( &builder, /*expected=*/ - {{{1.0, 2.0}, {3.0, 4.0}}, {{5.0, 6.0}, {7.0, 8.0}}}, - {x_data.get(), y_data.get()}, error_spec_); + {{{1.0f, 2.0f}, {3.0f, 4.0f}}, {{5.0f, 6.0f}, {7.0f, 8.0f}}}, + {x_data.get(), y_data.get()}, this->error_spec_); } -TEST_F(DotOperationTest, TransposeFolding) { +XLA_TYPED_TEST(DotOperationTest_F16F32F64, TransposeFolding) { + using T = TypeParam; for (bool transpose_lhs : {false, true}) { for (bool transpose_rhs : {false, true}) { for (bool row_major : {false, true}) { - std::unique_ptr> lhs( - new Array2D({{1.0, 2.0, 3.0}, {3.0, -4.0, -1.0}})); - std::unique_ptr> rhs( - new Array2D({{1.0, 6.0}, {2.0, 3.0}, {7.0, -4.0}})); + std::unique_ptr> lhs( + new Array2D({{1.0f, 2.0f, 3.0f}, {3.0f, -4.0f, -1.0f}})); + std::unique_ptr> rhs( + new Array2D({{1.0f, 6.0f}, {2.0f, 3.0f}, {7.0f, -4.0f}})); if (transpose_lhs) { lhs = ReferenceUtil::TransposeArray2D(*lhs); @@ -641,22 +639,20 @@ TEST_F(DotOperationTest, TransposeFolding) { rhs = ReferenceUtil::TransposeArray2D(*rhs); } auto lhs_handle = - client_ - ->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - *lhs, LayoutUtil::MakeLayout( - MinorToMajorForIsRowMajor(row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + *lhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) .ConsumeValueOrDie(); auto rhs_handle = - client_ - ->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - *rhs, LayoutUtil::MakeLayout( - MinorToMajorForIsRowMajor(row_major)))) + this->client_ + ->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + *rhs, LayoutUtil::MakeLayout( + MinorToMajorForIsRowMajor(row_major)))) .ConsumeValueOrDie(); - ComputationBuilder builder(client_, TestName()); - auto prim_type = primitive_util::NativeToPrimitiveType(); + ComputationBuilder builder(this->client_, this->TestName()); + auto prim_type = primitive_util::NativeToPrimitiveType(); auto lhs_arg = builder.Parameter( 0, ShapeUtil::MakeShape(prim_type, {lhs->height(), lhs->width()}), "lhs"); @@ -671,24 +667,27 @@ TEST_F(DotOperationTest, TransposeFolding) { } auto result = builder.Dot(lhs_arg, rhs_arg); - Array2D expected({{26.0, 0.0}, {-12.0, 10.0}}); + Array2D expected({{26.0f, 0.0f}, {-12.0f, 10.0f}}); VLOG(1) << "TestTransposeFolding " << transpose_lhs << " " << transpose_rhs << " " << row_major; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - error_spec_); + this->template ComputeAndCompareR2( + &builder, expected, {lhs_handle.get(), rhs_handle.get()}, + this->error_spec_); } } } } -TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) { - auto prim_type = primitive_util::NativeToPrimitiveType(); +XLA_TYPED_TEST(DotOperationTest_F16F32F64, + DotOfConcatOptimizationWithConstLHS) { + using T = TypeParam; + auto prim_type = primitive_util::NativeToPrimitiveType(); - std::unique_ptr> constant_lhs_array(new Array2D( - {{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, {6.0, 5.0, 4.0, 3.0, 2.0, 1.0}})); + std::unique_ptr> constant_lhs_array( + new Array2D({{1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}, + {6.0f, 5.0f, 4.0f, 3.0f, 2.0f, 1.0f}})); - ComputationBuilder builder(client_, TestName()); + ComputationBuilder builder(this->client_, this->TestName()); auto lhs_constant = builder.ConstantR2FromArray2D(*constant_lhs_array); auto rhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), "rhs_arg_0"); @@ -699,78 +698,80 @@ TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstLHS) { auto result = builder.Dot( lhs_constant, builder.ConcatInDim({rhs_arg_0, rhs_arg_1, rhs_arg_2}, 0)); - std::unique_ptr> arg_0_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}})); - std::unique_ptr> arg_1_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}})); - std::unique_ptr> arg_2_value_array( - new Array2D({{1.0, 2.0}})); + std::unique_ptr> arg_0_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}})); + std::unique_ptr> arg_1_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}, {5.0f, 6.0f}})); + std::unique_ptr> arg_2_value_array(new Array2D({{1.0f, 2.0f}})); TF_ASSERT_OK_AND_ASSIGN( auto arg_0_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_0_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_0_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_1_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_1_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_1_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_2_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_2_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_2_value_array))); - Array2D expected({{53.0, 74.0}, {45.0, 66.0}}); - ComputeAndCompareR2( + Array2D expected({{53.0f, 74.0f}, {45.0f, 66.0f}}); + this->template ComputeAndCompareR2( &builder, expected, - {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); -} - -TEST_F(DotOperationTest, DotOfConcatOptimizationWithConstRHS) { - auto prim_type = primitive_util::NativeToPrimitiveType(); - - std::unique_ptr> constant_rhs_array( - new Array2D({{1.0, 2.0}, - {3.0, 4.0}, - {5.0, 6.0}, - {6.0, 5.0}, - {4.0, 3.0}, - {2.0, 1.0}})); - - ComputationBuilder builder(client_, TestName()); + {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, + this->error_spec_); +} + +XLA_TYPED_TEST(DotOperationTest_F16F32F64, + DotOfConcatOptimizationWithConstRHS) { + using T = TypeParam; + std::unique_ptr> constant_rhs_array( + new Array2D({{1.0f, 2.0f}, + {3.0f, 4.0f}, + {5.0f, 6.0f}, + {6.0f, 5.0f}, + {4.0f, 3.0f}, + {2.0f, 1.0f}})); + + ComputationBuilder builder(this->client_, this->TestName()); auto rhs_constant = builder.ConstantR2FromArray2D(*constant_rhs_array); - auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShape(prim_type, {2, 2}), + auto lhs_arg_0 = builder.Parameter(0, ShapeUtil::MakeShapeWithType({2, 2}), "lhs_arg_0"); - auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShape(prim_type, {2, 3}), + auto lhs_arg_1 = builder.Parameter(1, ShapeUtil::MakeShapeWithType({2, 3}), "lhs_arg_1"); - auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShape(prim_type, {2, 1}), + auto lhs_arg_2 = builder.Parameter(2, ShapeUtil::MakeShapeWithType({2, 1}), "lhs_arg_2"); auto result = builder.Dot( builder.ConcatInDim({lhs_arg_0, lhs_arg_1, lhs_arg_2}, 1), rhs_constant); - std::unique_ptr> arg_0_value_array( - new Array2D({{1.0, 2.0}, {3.0, 4.0}})); - std::unique_ptr> arg_1_value_array( - new Array2D({{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}})); - std::unique_ptr> arg_2_value_array( - new Array2D({{1.0}, {2.0}})); + std::unique_ptr> arg_0_value_array( + new Array2D({{1.0f, 2.0f}, {3.0f, 4.0f}})); + std::unique_ptr> arg_1_value_array( + new Array2D({{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}})); + std::unique_ptr> arg_2_value_array( + new Array2D({{1.0f}, {2.0f}})); TF_ASSERT_OK_AND_ASSIGN( auto arg_0_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_0_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_0_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_1_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_1_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_1_value_array))); TF_ASSERT_OK_AND_ASSIGN( auto arg_2_value, - client_->TransferToServer( - *Literal::CreateR2FromArray2D(*arg_2_value_array))); + this->client_->TransferToServer( + *Literal::CreateR2FromArray2D(*arg_2_value_array))); - Array2D expected({{38.0, 36.0}, {93.0, 91.0}}); - ComputeAndCompareR2( + Array2D expected({{38.0f, 36.0f}, {93.0f, 91.0f}}); + this->template ComputeAndCompareR2( &builder, expected, - {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, error_spec_); + {arg_0_value.get(), arg_1_value.get(), arg_2_value.get()}, + this->error_spec_); } + } // namespace } // namespace xla diff --git a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc index 6c86dd5b9e..c42f71388b 100644 --- a/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc +++ b/tensorflow/compiler/xla/tests/matrix_ops_simple_test.cc @@ -29,6 +29,8 @@ limitations under the License. #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/client_library_test_base.h" #include "tensorflow/compiler/xla/tests/literal_test_util.h" +#include "tensorflow/compiler/xla/tests/test_macros.h" +#include "tensorflow/compiler/xla/tests/test_utils.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/logging.h" @@ -38,258 +40,223 @@ limitations under the License. namespace xla { namespace { -class MatOpsSimpleTest : public ClientLibraryTestBase { - protected: - Computation BuildSum() { - // sum(x, y) = x + y - ComputationBuilder builder(client_, "sum"); - auto x_value = - builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); - auto y_value = - builder.Parameter(1, ShapeUtil::MakeShape(F32, {}), "y_value"); - builder.Add(x_value, y_value); - auto computation_status = builder.Build(); - TF_CHECK_OK(computation_status.status()); - return computation_status.ConsumeValueOrDie(); - } - - void TestLinspaceMax(int64 rows, int64 cols) { - float from = -128.0, to = 256.0; - std::unique_ptr> alhs = - MakeLinspaceArray2D(from, to, rows, cols); - auto arhs = MakeUnique>(rows, cols, 1.0); - - ComputationBuilder builder( - client_, - tensorflow::strings::Printf("max_%lldx%lld_linspace", rows, cols)); - auto lhs = builder.ConstantR2FromArray2D(*alhs); - auto rhs = builder.ConstantR2FromArray2D(*arhs); - auto max = builder.Max(lhs, rhs); - - Array2D aexpected(rows, cols); - for (int row = 0; row < rows; ++row) { - for (int col = 0; col < cols; ++col) { - aexpected(row, col) = std::max((*alhs)(row, col), (*arhs)(row, col)); - } - } - - ComputeAndCompareR2(&builder, aexpected, {}, ErrorSpec(1e-6)); - } -}; - -TEST_F(MatOpsSimpleTest, ExpTwoByTwoValues) { - ComputationBuilder builder(client_, "exp_2x2"); - auto data = builder.ConstantR2({ - {1.0, 0.0}, // row 0 - {-1.0, 0.5}, // row 1 +#ifdef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +using TypesF16F32 = ::testing::Types; +#else +using TypesF16F32 = ::testing::Types; +#endif + +class MatOpsSimpleTest : public ClientLibraryTestBase {}; + +template +class MatOpsSimpleTest_F16F32 : public MatOpsSimpleTest {}; + +// TODO(bixia): This test for F16 failed on GPU 02-25-2018. +#ifdef XLA_TEST_BACKEND_GPU +TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, ::testing::Types); +#else +TYPED_TEST_CASE(MatOpsSimpleTest_F16F32, TypesF16F32); +#endif + +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, ExpTwoByTwoValues) { + using T = TypeParam; + ComputationBuilder builder(this->client_, "exp_2x2"); + auto data = builder.ConstantR2FromArray2D({ + {1.0f, 0.0f}, // row 0 + {-1.0f, 0.5f}, // row 1 }); builder.Exp(data); std::unique_ptr expected = - Literal::CreateR2({{2.71828, 1.00000}, // row 0 - {0.36788, 1.64872}}); // row 1 + Literal::CreateR2FromArray2D({{2.71828f, 1.00000f}, // row 0 + {0.36788f, 1.64872f}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-5)); } -TEST_F(MatOpsSimpleTest, MapTwoByTwo) { +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MapTwoByTwo) { + using T = TypeParam; Computation add_half; { // add_half(x) = x + 0.5 - ComputationBuilder builder(client_, "add_half"); + ComputationBuilder builder(this->client_, "add_half"); auto x_value = - builder.Parameter(0, ShapeUtil::MakeShape(F32, {}), "x_value"); - auto half = builder.ConstantR0(0.5); + builder.Parameter(0, ShapeUtil::MakeShapeWithType({}), "x_value"); + auto half = builder.ConstantR0(static_cast(0.5)); builder.Add(x_value, half); auto computation_status = builder.Build(); ASSERT_IS_OK(computation_status.status()); add_half = computation_status.ConsumeValueOrDie(); } - ComputationBuilder builder(client_, "map_2x2"); - auto data = builder.ConstantR2({ - {1.0, 0.0}, // row 0 - {-1.0, 0.5}, // row 1 + ComputationBuilder builder(this->client_, "map_2x2"); + auto data = builder.ConstantR2FromArray2D({ + {1.0f, 0.0f}, // row 0 + {-1.0f, 0.5f}, // row 1 }); auto map = builder.Map({data}, add_half, {0, 1}); std::unique_ptr expected = - Literal::CreateR2({{1.5, 0.5}, // row 0 - {-0.5, 1.0}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-5)); + Literal::CreateR2FromArray2D({{1.5f, 0.5f}, // row 0 + {-0.5f, 1.0f}}); // row 1 + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-5)); } -TEST_F(MatOpsSimpleTest, MaxTwoByTwoValues) { - ComputationBuilder builder(client_, "max_2x2"); - auto lhs = builder.ConstantR2({ - {7.0, 2.0}, // row 0 - {3.0, -4.0}, // row 1 +XLA_TYPED_TEST(MatOpsSimpleTest_F16F32, MaxTwoByTwoValues) { + using T = TypeParam; + ComputationBuilder builder(this->client_, "max_2x2"); + auto lhs = builder.ConstantR2FromArray2D({ + {7.0f, 2.0f}, // row 0 + {3.0f, -4.0f}, // row 1 }); - auto rhs = builder.ConstantR2({ - {5.0, 6.0}, // row 0 - {1.0, -8.0}, // row 1 + auto rhs = builder.ConstantR2FromArray2D({ + {5.0f, 6.0f}, // row 0 + {1.0f, -8.0f}, // row 1 }); auto max = builder.Max(lhs, rhs); std::unique_ptr expected = - Literal::CreateR2({{7.0, 6.0}, // row 0 - {3.0, -4.0}}); // row 1 - ComputeAndCompareLiteral(&builder, *expected, {}, ErrorSpec(1e-6)); + Literal::CreateR2FromArray2D({{7.0f, 6.0f}, // row 0 + {3.0f, -4.0f}}); // row 1 + this->template ComputeAndCompareLiteral(&builder, *expected, {}, + ErrorSpec(1e-6)); } -TEST_F(MatOpsSimpleTest, Max1x1Linspace) { TestLinspaceMax(1, 1); } - -TEST_F(MatOpsSimpleTest, Max2x2Linspace) { TestLinspaceMax(2, 2); } - -TEST_F(MatOpsSimpleTest, Max3x3Linspace) { TestLinspaceMax(3, 3); } - -TEST_F(MatOpsSimpleTest, Max4x4Linspace) { TestLinspaceMax(4, 4); } - -TEST_F(MatOpsSimpleTest, Max6x6Linspace) { TestLinspaceMax(6, 6); } - -TEST_F(MatOpsSimpleTest, Max8x8Linspace) { TestLinspaceMax(8, 8); } - -TEST_F(MatOpsSimpleTest, Max12x12Linspace) { TestLinspaceMax(12, 12); } - -TEST_F(MatOpsSimpleTest, Max16x16Linspace) { TestLinspaceMax(16, 16); } +struct TestLinspaceMaxParam { + int64 rows; + int64 cols; +}; -TEST_F(MatOpsSimpleTest, Max32x8Linspace) { TestLinspaceMax(32, 8); } +class TestLinspaceMaxParametric + : public MatOpsSimpleTest, + public ::testing::WithParamInterface { + public: + template + void TestImpl() { + TestLinspaceMaxParam param = GetParam(); + int64 rows = param.rows; + int64 cols = param.cols; + float from = -128.0, to = 256.0; + std::unique_ptr> alhs = + MakeLinspaceArray2D(from, to, rows, cols); + auto arhs = MakeUnique>(rows, cols, static_cast(1.0f)); -TEST_F(MatOpsSimpleTest, Max64x8Linspace) { TestLinspaceMax(64, 8); } + ComputationBuilder builder( + client_, + tensorflow::strings::Printf("max_%lldx%lld_linspace", rows, cols)); + auto lhs = builder.ConstantR2FromArray2D(*alhs); + auto rhs = builder.ConstantR2FromArray2D(*arhs); + auto max = builder.Max(lhs, rhs); -class MatOpsDotAddTest - : public ClientLibraryTestBase, - public ::testing::WithParamInterface> {}; - -TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2) { - bool row_major = std::get<0>(GetParam()); - bool add_lhs = std::get<1>(GetParam()); - bool transpose = std::get<2>(GetParam()); - Array2D lhs({{1.0, 2.0}, {3.0, 4.0}}); - Array2D rhs({{10.0, 11.0}, {12.0, 13.0}}); - - auto minor_to_major = [](bool row_major) -> std::vector { - return {row_major ? 1 : 0, row_major ? 0 : 1}; - }; - - auto prim_type = primitive_util::NativeToPrimitiveType(); - Shape lhs_shape = - ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); - Shape rhs_shape = - ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); - - TF_ASSERT_OK_AND_ASSIGN( - auto lhs_handle, - client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( - lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - TF_ASSERT_OK_AND_ASSIGN( - auto rhs_handle, - client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( - rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - - ComputationBuilder builder(client_, TestName()); - auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); - auto lhs_mat_arg = lhs_arg; - if (transpose) { - lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); - } - auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); - auto result = builder.Dot(lhs_mat_arg, rhs_arg); - Array2D expected; - if (add_lhs) { - result = builder.Add(result, lhs_arg); - if (transpose) { - expected = Array2D({{47, 52}, {71, 78}}); - } else { - expected = Array2D({{35, 39}, {81, 89}}); + Array2D expected(rows, cols); + for (int row = 0; row < rows; ++row) { + for (int col = 0; col < cols; ++col) { + expected(row, col) = std::max((*alhs)(row, col), (*arhs)(row, col)); + } } - } else { - result = builder.Add(result, rhs_arg); - if (transpose) { - expected = Array2D({{56, 61}, {80, 87}}); - } else { - expected = Array2D({{44, 48}, {90, 98}}); + ErrorSpec error_spec(1e-6); + if (std::is_same::value) { + error_spec = ErrorSpec(1e-6, 2e-4); } + ComputeAndCompareR2(&builder, expected, {}, error_spec); } +}; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - ErrorSpec(1e-6)); +string PrintTestLinspaceMaxParam( + const ::testing::TestParamInfo& test_param) { + const TestLinspaceMaxParam& param = test_param.param; + return tensorflow::strings::StrCat(param.rows, "r", param.cols, "c"); } -INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest, - ::testing::Combine(::testing::Bool(), ::testing::Bool(), - ::testing::Bool())); +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +// TODO(bixia): This test failed on GPU 02-25-2018 +#ifdef XLA_TEST_BACKEND_CPU +XLA_TEST_P(TestLinspaceMaxParametric, TestF16) { TestImpl(); } +#endif +#endif +XLA_TEST_P(TestLinspaceMaxParametric, TestF32) { TestImpl(); } + +INSTANTIATE_TEST_CASE_P( + TestLinspaceMax, TestLinspaceMaxParametric, + ::testing::Values(TestLinspaceMaxParam{1, 1}, TestLinspaceMaxParam{2, 2}, + TestLinspaceMaxParam{3, 3}, TestLinspaceMaxParam{4, 4}, + TestLinspaceMaxParam{6, 6}, TestLinspaceMaxParam{8, 8}, + TestLinspaceMaxParam{12, 12}, + TestLinspaceMaxParam{16, 16}, TestLinspaceMaxParam{32, 8}, + TestLinspaceMaxParam{64, 8}), + PrintTestLinspaceMaxParam); -class MatOpsDotAddTest_bf16 +class MatOpsDotAddTest : public ClientLibraryTestBase, - public ::testing::WithParamInterface> {}; - -TEST_P(MatOpsDotAddTest_bf16, Dot_Add_2x2_2x2) { - bool row_major = std::get<0>(GetParam()); - bool add_lhs = std::get<1>(GetParam()); - bool transpose = std::get<2>(GetParam()); - Array2D lhs( - {{bfloat16(1.0f), bfloat16(2.0f)}, {bfloat16(3.0), bfloat16(4.0)}}); - Array2D rhs( - {{bfloat16(10.0f), bfloat16(11.0f)}, {bfloat16(12.0f), bfloat16(13.0f)}}); - - auto minor_to_major = [](bool row_major) -> std::vector { - return {row_major ? 1 : 0, row_major ? 0 : 1}; - }; - - auto prim_type = primitive_util::NativeToPrimitiveType(); - Shape lhs_shape = - ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); - Shape rhs_shape = - ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); - - TF_ASSERT_OK_AND_ASSIGN( - auto lhs_handle, - client_->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - TF_ASSERT_OK_AND_ASSIGN( - auto rhs_handle, - client_->TransferToServer( - *Literal::CreateR2FromArray2DWithLayout( - rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); - - ComputationBuilder builder(client_, TestName()); - auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); - auto lhs_mat_arg = lhs_arg; - if (transpose) { - lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); - } - auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); - auto result = builder.Dot(lhs_mat_arg, rhs_arg); - Array2D expected; - if (add_lhs) { - result = builder.Add(result, lhs_arg); + public ::testing::WithParamInterface> { + public: + template + void TestImpl() { + bool row_major = std::get<0>(GetParam()); + bool add_lhs = std::get<1>(GetParam()); + bool transpose = std::get<2>(GetParam()); + Array2D lhs({{1.0f, 2.0f}, {3.0f, 4.0f}}); + Array2D rhs({{10.0f, 11.0f}, {12.0f, 13.0f}}); + + auto minor_to_major = [](bool row_major) -> std::vector { + return {row_major ? 1 : 0, row_major ? 0 : 1}; + }; + + auto prim_type = primitive_util::NativeToPrimitiveType(); + Shape lhs_shape = + ShapeUtil::MakeShape(prim_type, {lhs.height(), lhs.width()}); + Shape rhs_shape = + ShapeUtil::MakeShape(prim_type, {rhs.height(), rhs.width()}); + + TF_ASSERT_OK_AND_ASSIGN( + auto lhs_handle, + client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + lhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); + TF_ASSERT_OK_AND_ASSIGN( + auto rhs_handle, + client_->TransferToServer(*Literal::CreateR2FromArray2DWithLayout( + rhs, LayoutUtil::MakeLayout(minor_to_major(row_major))))); + + ComputationBuilder builder(client_, TestName()); + auto lhs_arg = builder.Parameter(0, lhs_shape, "lhs"); + auto lhs_mat_arg = lhs_arg; if (transpose) { - expected = Array2D( - {{bfloat16(47), bfloat16(52)}, {bfloat16(71), bfloat16(78)}}); - } else { - expected = Array2D( - {{bfloat16(35), bfloat16(39)}, {bfloat16(81), bfloat16(89)}}); + lhs_mat_arg = builder.Transpose(lhs_mat_arg, {1, 0}); } - } else { - result = builder.Add(result, rhs_arg); - if (transpose) { - expected = Array2D( - {{bfloat16(56), bfloat16(61)}, {bfloat16(80), bfloat16(87)}}); + auto rhs_arg = builder.Parameter(1, rhs_shape, "rhs"); + auto result = builder.Dot(lhs_mat_arg, rhs_arg); + Array2D expected; + if (add_lhs) { + result = builder.Add(result, lhs_arg); + if (transpose) { + expected = Array2D({{47.0f, 52.0f}, {71.0f, 78.0f}}); + } else { + expected = Array2D({{35.0f, 39.0f}, {81.0f, 89.0f}}); + } } else { - expected = Array2D( - {{bfloat16(44), bfloat16(48)}, {bfloat16(90), bfloat16(98)}}); + result = builder.Add(result, rhs_arg); + if (transpose) { + expected = Array2D({{56.0f, 61.0f}, {80.0f, 87.0f}}); + } else { + expected = Array2D({{44.0f, 48.0f}, {90.0f, 98.0f}}); + } } + + ComputeAndCompareR2(&builder, expected, + {lhs_handle.get(), rhs_handle.get()}, + ErrorSpec(1e-6)); } +}; - ComputeAndCompareR2(&builder, expected, - {lhs_handle.get(), rhs_handle.get()}, - ErrorSpec(1e-6)); -} +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2BF16) { TestImpl(); } +#ifndef XLA_BACKEND_DOES_NOT_SUPPORT_FLOAT16 +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2F16) { TestImpl(); } +#endif +XLA_TEST_P(MatOpsDotAddTest, Dot_Add_2x2_2x2F32) { TestImpl(); } -INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest_bf16, +INSTANTIATE_TEST_CASE_P(MatOpsDotAddTestInstances, MatOpsDotAddTest, ::testing::Combine(::testing::Bool(), ::testing::Bool(), ::testing::Bool())); diff --git a/tensorflow/stream_executor/blas.cc b/tensorflow/stream_executor/blas.cc index da09d84921..31724cf6c9 100644 --- a/tensorflow/stream_executor/blas.cc +++ b/tensorflow/stream_executor/blas.cc @@ -79,6 +79,8 @@ string ComputationTypeString(ComputationType ty) { return "f32"; case ComputationType::kF64: return "f64"; + case ComputationType::kI32: + return "i32"; case ComputationType::kComplexF32: return "complex f32"; case ComputationType::kComplexF64: @@ -88,6 +90,10 @@ string ComputationTypeString(ComputationType ty) { } } +std::ostream& operator<<(std::ostream& os, ComputationType ty) { + return os << ComputationTypeString(ty); +} + } // namespace blas } // namespace gputools } // namespace perftools diff --git a/tensorflow/stream_executor/blas.h b/tensorflow/stream_executor/blas.h index 072f085546..c5f778a5c7 100644 --- a/tensorflow/stream_executor/blas.h +++ b/tensorflow/stream_executor/blas.h @@ -104,6 +104,8 @@ enum class ComputationType { // Converts a ComputationType to a string. string ComputationTypeString(ComputationType ty); +std::ostream &operator<<(std::ostream &os, ComputationType ty); + // Opaque identifier for an "algorithm" used by a blas routine. This functions // as a hint to the blas library. typedef int64 AlgorithmType; diff --git a/tensorflow/stream_executor/cuda/cuda_blas.cc b/tensorflow/stream_executor/cuda/cuda_blas.cc index 44a3a745ad..c563f8f931 100644 --- a/tensorflow/stream_executor/cuda/cuda_blas.cc +++ b/tensorflow/stream_executor/cuda/cuda_blas.cc @@ -13,17 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -// Include cuBLAS headers early, and then set EIGEN_HAS_CUDA_FP16 -// if we have new enough CUDA (which we will only know after including -// cuda.h). This ensures that Eigen's Half.h does not attempt to make its own -// __half typedef if CUDA has already defined one (and conversely, that we do -// not include after Half.h has made its typedef). -#include "cuda/include/cuda.h" #include "cuda/include/cublas_v2.h" - -#if CUDA_VERSION >= 7050 -#define EIGEN_HAS_CUDA_FP16 -#endif +#include "cuda/include/cuda.h" #if CUDA_VERSION >= 8000 #define SE_CUDA_DATA_HALF CUDA_R_16F @@ -33,6 +24,34 @@ limitations under the License. #include "tensorflow/stream_executor/cuda/cuda_blas.h" +// Both Eigen Half.h and CUDA cuda_fp16.h provide similar typedef for __half. As +// such, there are two ways to get the typedef for __half: +// +// (1) Includes cuda_fp16.h and defines EIGEN_HAS_CUDA_FP16. +// (2) Neither includes cuda_fp16.h nor defines EIGEN_HAS_CUDA_FP16. +// +// Due to issue b/73793421, when the first approach is used and NVCC is used to +// compile this file, NVCC will complain duplicated definition for +// EIGEN_HAS_CUDA_FP16. On the other hand, when the second approach is used and +// clang is used to compile this file, clang will not understand __half +// due to missing the definition and macro EIGEN_HAS_CUDA_FP16. +// +// Because this file may be compiled with CLANG but will never be compiled with +// NVCC, we choose the first approach for CUDA < 9.0. For CUDA >= 9.0, we have +// to use the second approach because the data member in the __half defined +// by CUDA > 9.0 is `__x` while Eigen expects it to be `x`. +// +// TODO(b/73793421): Remove the following code block to switch to the second +// approach when the issue is fixed. +#if CUDA_VERSION < 9000 +#include "cuda/include/cuda_fp16.h" +#if CUDA_VERSION >= 7050 +#define EIGEN_HAS_CUDA_FP16 +#endif +#endif + +#include "third_party/eigen3/Eigen/Core" + #include #include @@ -2256,6 +2275,14 @@ bool CUDABlas::DoBlasGemmWithAlgorithm( DeviceMemory *c, int ldc, blas::ComputationType computation_type, blas::AlgorithmType algorithm, blas::ProfileResult *output_profile_result) { + if (computation_type == blas::ComputationType::kF32) { + return DoBlasGemmWithAlgorithmImpl( + stream, transa, transb, m, n, k, static_cast(alpha), a, lda, b, + ldb, static_cast(beta), c, ldc, computation_type, algorithm, + output_profile_result); + } + + CHECK_EQ(computation_type, blas::ComputationType::kF16); return DoBlasGemmWithAlgorithmImpl( stream, transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc, computation_type, algorithm, output_profile_result); -- GitLab From 757a71e886fb9328b19b0ba15658e49cfa7cc323 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 28 Feb 2018 13:00:30 -0800 Subject: [PATCH 242/884] Lift ops to the global graph if all graphs are building functions This change ensures that, when all graphs are building functions, `init_scope` lifts ops into the global graph. PiperOrigin-RevId: 187370367 --- tensorflow/python/framework/ops.py | 60 +++++++++++++++---------- tensorflow/python/framework/ops_test.py | 31 +++++++++---- 2 files changed, 59 insertions(+), 32 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index b0d2704c07..735ba316d0 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5103,38 +5103,50 @@ def init_scope(): """ # pylint: enable=g-doc-return-or-yield,line-too-long - in_graph_mode = context.in_graph_mode() - # Retrieve the active name scope: entering an `init_scope` preserves - # the name scope of the current context. - if in_graph_mode: + if context.in_eager_mode(): + # Fastpath. + with tape.stop_recording(): + yield + else: + # Retrieve the active name scope: entering an `init_scope` preserves + # the name scope of the current context. default_graph = get_default_graph() scope = default_graph.get_name_scope() - else: - scope = context.context().scope_name - if scope and scope[-1] != '/': - # Names that end with trailing slashes are treated by `name_scope` as - # absolute. - scope = scope + '/' - - outer_context = None - if in_graph_mode and not _default_graph_stack.stack: - outer_context = default_graph.as_default - else: - for stack_entry in reversed(context.context_stack.stack): - if not stack_entry.is_building_function: - outer_context = stack_entry.enter_context_fn - break + if scope and scope[-1] != '/': + # Names that end with trailing slashes are treated by `name_scope` as + # absolute. + scope = scope + '/' + + outer_context = None + if not _default_graph_stack.stack: + # If the default graph stack is empty, then we cannot be building a + # function. Install the global graph (which, in this case, is also the + # default graph) as the outer context. + if default_graph.building_function: + raise RuntimeError("The global graph is building a function.") + outer_context = default_graph.as_default + else: + # Find a context that is not building a function. + for stack_entry in reversed(context.context_stack.stack): + if not stack_entry.is_building_function: + outer_context = stack_entry.enter_context_fn + break - if outer_context is None: - raise AssertionError("All graphs are building functions, and no " + if outer_context is None: + # As a last resort, obtain the global default graph; this graph doesn't + # necessarily live on the graph stack (and hence it doesn't necessarily + # live on the context stack), but it is stored in the graph stack's + # encapsulating object. + outer_context = _default_graph_stack._GetGlobalDefaultGraph().as_default # pylint: disable=protected-access + + if outer_context is None: + # Sanity check; this shouldn't be triggered. + raise RuntimeError("All graphs are building functions, and no " "eager context was previously active.") - try: with outer_context(), name_scope(scope), control_dependencies( None), tape.stop_recording(): yield - finally: - pass def enable_eager_execution(config=None, device_policy=None): diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index a141fe6340..1f2dfb8d43 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2156,14 +2156,6 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertIs(g, ops.get_default_graph()) self.assertTrue(context.in_graph_mode()) - def testAllGraphsBuildingFunctionsRaisesError(self): - g = ops.Graph() - g._building_function = True # pylint: disable=protected-access - with g.as_default(): - with self.assertRaises(AssertionError): - with ops.init_scope(): - pass - def testStaysInEagerWhenOnlyEagerContextActive(self): with context.eager_mode(): with ops.init_scope(): @@ -2241,6 +2233,29 @@ class InitScopeTest(test_util.TensorFlowTestCase): self.assertEqual(4, int(compiled_outer(inner=compiled_inner))) self.assertEqual(7, int(compiled_outer(inner=compiled_inner))) + def testFallsBackToGlobalGraphWhenAllGraphsAreBuildingFunctions(self): + with context.graph_mode(): + ops.reset_default_graph() + # This doesn't push anything onto the graph stack, but it does + # set the stack's global graph. + global_graph = ops.get_default_graph() + fn_graph = ops.Graph() + + # pylint: disable=protected-access + fn_graph._building_function = True + self.assertEqual(len(ops._default_graph_stack.stack), 0) + with fn_graph.as_default(): + self.assertEqual(len(ops._default_graph_stack.stack), 1) + with ops.init_scope(): + self.assertGreater(len(ops._default_graph_stack.stack), 1) + dummy = constant_op.constant(1.0) + self.assertEqual(len(ops._default_graph_stack.stack), 1) + # Note that the global graph is _not_ on the graph stack. + self.assertEqual(len(ops._default_graph_stack.stack), 0) + # Ensure that `dummy` was added to the global graph. + self.assertEqual(global_graph, dummy.graph) + # pylint: enable=protected-access + def testInstallsDefaultGraphWhenGraphStackIsEmptyInGraphMode(self): with context.graph_mode(): # pylint: disable=protected-access -- GitLab From 69f674b473470b44c6a1ca1bbb3bcc6a8c53074b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 13:02:07 -0800 Subject: [PATCH 243/884] Factor out the LstmBatchStep for the various LSTM Ops. PiperOrigin-RevId: 187370622 --- .../kernels/bidirectional_sequence_lstm.cc | 183 ++---------------- .../lite/kernels/internal/kernel_utils.cc | 147 ++++++++++++++ .../lite/kernels/internal/kernel_utils.h | 36 ++++ tensorflow/contrib/lite/kernels/lstm.cc | 170 +++++----------- .../kernels/unidirectional_sequence_lstm.cc | 179 +++++------------ 5 files changed, 294 insertions(+), 421 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc index 8d70df5e21..a64ac42bc4 100644 --- a/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/bidirectional_sequence_lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -443,166 +444,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { return kTfLiteOk; } -// Performs an LSTM batch inference step for input specified by input_ptr_batch. -// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and -// biases (*_bias_ptr), and buffers (*_scratch), along with additional -// parameters: -// - params: various LSTM params including activation, clipping, etc., -// - use_cifg: use coupled input forget gates, -// - use_peephole: whether to use peephole connection or not, -// - n_batch: size of batch, -// - n_cell: number of cells (or units), -// - n_input: the input size, -// - n_output: the output size. -// -// The pointers to the hidden state and the output are updated as a result. -// -// The pointers with the suffix "_batch" point to data aligned in batch_major -// order, and each step processes batch_size many inputs from input_ptr_batch, -// and updates batch_size many outputs and hidden states. -void LstmBatchStep( - const float* input_ptr_batch, const float* input_to_input_weights_ptr, - const float* input_to_forget_weights_ptr, - const float* input_to_cell_weights_ptr, - const float* input_to_output_weights_ptr, - const float* recurrent_to_input_weights_ptr, - const float* recurrent_to_forget_weights_ptr, - const float* recurrent_to_cell_weights_ptr, - const float* recurrent_to_output_weights_ptr, - const float* cell_to_input_weights_ptr, - const float* cell_to_forget_weights_ptr, - const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, - const float* forget_gate_bias_ptr, const float* cell_bias_ptr, - const float* output_gate_bias_ptr, const float* projection_weights_ptr, - const float* projection_bias_ptr, const TfLiteLSTMParams* params, - bool use_cifg, bool use_peephole, int n_batch, int n_cell, int n_input, - int n_output, float* output_state_ptr, float* cell_state_ptr, - float* input_gate_scratch, float* forget_gate_scratch, float* cell_scratch, - float* output_gate_scratch, float* output_ptr_time) { - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, - input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, - forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, - output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, - output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, input_gate_scratch, - /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, forget_gate_scratch, - /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr, - n_batch, output_gate_scratch, - /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, - n_batch * n_cell, cell_state_ptr); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, - params->cell_clip, cell_state_ptr); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights_ptr != nullptr); - const bool use_projection_bias = (projection_bias_ptr != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, - n_batch, output_ptr_time); - } else { - tensor_utils::ZeroVector(output_ptr_time, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, - output_ptr_time, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_time, n_batch * n_output, - params->proj_clip, output_ptr_time); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_time); - } - tensor_utils::CopyVector(output_ptr_time, n_batch * n_output, - output_state_ptr); -} - // The LSTM Op engine. TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { auto* params = reinterpret_cast(node->builtin_data); @@ -756,7 +597,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const float* input_ptr_batch = input->data.f + t * n_batch * n_input; float* output_ptr_time = fw_output->data.f + t * n_batch * n_fw_output; - LstmBatchStep( + kernel_utils::LstmStep( input_ptr_batch, fw_input_to_input_weights_ptr, fw_input_to_forget_weights->data.f, fw_input_to_cell_weights->data.f, fw_input_to_output_weights->data.f, fw_recurrent_to_input_weights_ptr, @@ -766,11 +607,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { fw_cell_to_forget_weights_ptr, fw_cell_to_output_weights_ptr, fw_input_gate_bias_ptr, fw_forget_gate_bias->data.f, fw_cell_bias->data.f, fw_output_gate_bias->data.f, - fw_projection_weights_ptr, fw_projection_bias_ptr, params, fw_use_cifg, - fw_use_peephole, n_batch, n_fw_cell, n_input, n_fw_output, - fw_output_state->data.f, fw_cell_state->data.f, fw_input_gate_scratch, - fw_forget_gate_scratch, fw_cell_scratch, fw_output_gate_scratch, - output_ptr_time); + fw_projection_weights_ptr, fw_projection_bias_ptr, params, n_batch, + n_fw_cell, n_input, n_fw_output, fw_output_state->data.f, + fw_cell_state->data.f, fw_input_gate_scratch, fw_forget_gate_scratch, + fw_cell_scratch, fw_output_gate_scratch, output_ptr_time); } // n_cell and n_output will be the same size when there is no projection. @@ -828,7 +668,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { const float* input_ptr_batch = input->data.f + t * n_batch * n_input; float* output_ptr_time = bw_output->data.f + t * n_batch * n_bw_output; - LstmBatchStep( + kernel_utils::LstmStep( input_ptr_batch, bw_input_to_input_weights_ptr, bw_input_to_forget_weights->data.f, bw_input_to_cell_weights->data.f, bw_input_to_output_weights->data.f, bw_recurrent_to_input_weights_ptr, @@ -838,11 +678,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { bw_cell_to_forget_weights_ptr, bw_cell_to_output_weights_ptr, bw_input_gate_bias_ptr, bw_forget_gate_bias->data.f, bw_cell_bias->data.f, bw_output_gate_bias->data.f, - bw_projection_weights_ptr, bw_projection_bias_ptr, params, bw_use_cifg, - bw_use_peephole, n_batch, n_bw_cell, n_input, n_bw_output, - bw_output_state->data.f, bw_cell_state->data.f, bw_input_gate_scratch, - bw_forget_gate_scratch, bw_cell_scratch, bw_output_gate_scratch, - output_ptr_time); + bw_projection_weights_ptr, bw_projection_bias_ptr, params, n_batch, + n_bw_cell, n_input, n_bw_output, bw_output_state->data.f, + bw_cell_state->data.f, bw_input_gate_scratch, bw_forget_gate_scratch, + bw_cell_scratch, bw_output_gate_scratch, output_ptr_time); } // Backward step. diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc index 510395126c..f142374269 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.cc @@ -40,5 +40,152 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, hidden_state_ptr_batch); } +void LstmStep( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch) { + // Since we have already checked that weights are all there or none, we can + // check the existense of only one to the get the condition. + const bool use_cifg = (input_to_input_weights_ptr == nullptr); + const bool use_peephole = (cell_to_output_weights_ptr != nullptr); + // Initialize scratch buffers with bias. + if (!use_cifg) { + tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell, n_batch, + input_gate_scratch); + } + tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch, + forget_gate_scratch); + tensor_utils::VectorBatchVectorAssign(cell_bias_ptr, n_cell, n_batch, + cell_scratch); + tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch, + output_gate_scratch); + + // For each batch and cell: compute input_weight * input. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_input_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + input_gate_scratch, /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_forget_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + forget_gate_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_cell_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + input_to_output_weights_ptr, n_cell, n_input, input_ptr_batch, n_batch, + output_gate_scratch, /*result_stride=*/1); + + // For each batch and cell: compute recurrent_weight * output_state. + if (!use_cifg) { + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_input_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, input_gate_scratch, + /*result_stride=*/1); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_forget_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, forget_gate_scratch, + /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_cell_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, cell_scratch, /*result_stride=*/1); + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + recurrent_to_output_weights_ptr, n_cell, n_output, output_state_ptr, + n_batch, output_gate_scratch, + /*result_stride=*/1); + + // For each batch and cell: update input gate. + if (!use_cifg) { + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_input_weights_ptr, n_cell, cell_state_ptr, n_batch, + input_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, + input_gate_scratch); + } + + // For each batch and cell: update forget gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_forget_weights_ptr, n_cell, cell_state_ptr, n_batch, + forget_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, + forget_gate_scratch); + + // For each batch and cell: update the cell. + tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, cell_state_ptr, + n_batch * n_cell, cell_state_ptr); + tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, + params->activation, cell_scratch); + if (use_cifg) { + tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, + forget_gate_scratch); + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, forget_gate_scratch, n_batch * n_cell, cell_state_ptr); + } else { + tensor_utils::VectorVectorCwiseProductAccumulate( + cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state_ptr); + } + if (params->cell_clip > 0.0) { + tensor_utils::ClipVector(cell_state_ptr, n_batch * n_cell, + params->cell_clip, cell_state_ptr); + } + + // For each batch and cell: update the output gate. + if (use_peephole) { + tensor_utils::VectorBatchVectorCwiseProductAccumulate( + cell_to_output_weights_ptr, n_cell, cell_state_ptr, n_batch, + output_gate_scratch); + } + tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, + output_gate_scratch); + tensor_utils::ApplyActivationToVector(cell_state_ptr, n_batch * n_cell, + params->activation, cell_scratch); + tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, + n_batch * n_cell, output_gate_scratch); + + // For each batch: update the projection and output_state. + const bool use_projection_weight = (projection_weights_ptr != nullptr); + const bool use_projection_bias = (projection_bias_ptr != nullptr); + if (use_projection_weight) { + if (use_projection_bias) { + tensor_utils::VectorBatchVectorAssign(projection_bias_ptr, n_output, + n_batch, output_ptr_batch); + } else { + tensor_utils::ZeroVector(output_ptr_batch, n_batch * n_output); + } + tensor_utils::MatrixBatchVectorMultiplyAccumulate( + projection_weights_ptr, n_output, n_cell, output_gate_scratch, n_batch, + output_ptr_batch, /*result_stride=*/1); + if (params->proj_clip > 0.0) { + tensor_utils::ClipVector(output_ptr_batch, n_batch * n_output, + params->proj_clip, output_ptr_batch); + } + } else { + tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, + output_ptr_batch); + } + tensor_utils::CopyVector(output_ptr_batch, n_batch * n_output, + output_state_ptr); +} + } // namespace kernel_utils } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h index 9872d4500b..3ec60ee57a 100644 --- a/tensorflow/contrib/lite/kernels/internal/kernel_utils.h +++ b/tensorflow/contrib/lite/kernels/internal/kernel_utils.h @@ -35,6 +35,42 @@ void RnnBatchStep(const float* input_ptr_batch, const float* input_weights_ptr, TfLiteFusedActivation activation, float* hidden_state_ptr_batch, float* output_ptr_batch); +// Performs an LSTM batch inference step for input specified by input_ptr_batch. +// The LSTM cell is specified by the pointers to its weights (*_weights_ptr) and +// biases (*_bias_ptr), and buffers (*_scratch), along with additional +// parameters: +// - params: various LSTM params including activation, clipping, etc., +// - n_batch: size of batch, +// - n_cell: number of cells (or units), +// - n_input: the input size, +// - n_output: the output size. +// +// The pointers to the cell and output state and the output are updated. Unless +// projection is specified output and output state contain the same data. +// +// The pointers with the suffix "_batch" point to data aligned in batch_major +// order, and each step processes batch_size many inputs from input_ptr_batch, +// and updates batch_size many cell and output states. +void LstmStep( + const float* input_ptr_batch, const float* input_to_input_weights_ptr, + const float* input_to_forget_weights_ptr, + const float* input_to_cell_weights_ptr, + const float* input_to_output_weights_ptr, + const float* recurrent_to_input_weights_ptr, + const float* recurrent_to_forget_weights_ptr, + const float* recurrent_to_cell_weights_ptr, + const float* recurrent_to_output_weights_ptr, + const float* cell_to_input_weights_ptr, + const float* cell_to_forget_weights_ptr, + const float* cell_to_output_weights_ptr, const float* input_gate_bias_ptr, + const float* forget_gate_bias_ptr, const float* cell_bias_ptr, + const float* output_gate_bias_ptr, const float* projection_weights_ptr, + const float* projection_bias_ptr, const TfLiteLSTMParams* params, + int n_batch, int n_cell, int n_input, int n_output, float* output_state_ptr, + float* cell_state_ptr, float* input_gate_scratch, + float* forget_gate_scratch, float* cell_scratch, float* output_gate_scratch, + float* output_ptr_batch); + } // namespace kernel_utils } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_KERNEL_UTILS_H_ diff --git a/tensorflow/contrib/lite/kernels/lstm.cc b/tensorflow/contrib/lite/kernels/lstm.cc index 6c06264d84..b9255b23a5 100644 --- a/tensorflow/contrib/lite/kernels/lstm.cc +++ b/tensorflow/contrib/lite/kernels/lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -377,127 +378,54 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; } - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias->data.f, n_cell, - n_batch, input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias->data.f, n_cell, - n_batch, forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias->data.f, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias->data.f, n_cell, - n_batch, output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights->data.f, n_cell, n_input, input->data.f, n_batch, - input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights->data.f, n_cell, n_input, input->data.f, n_batch, - forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights->data.f, n_cell, n_input, input->data.f, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights->data.f, n_cell, n_input, input->data.f, n_batch, - output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights->data.f, n_cell, n_output, output_state->data.f, - n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights->data.f, n_cell, cell_state->data.f, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights->data.f, n_cell, cell_state->data.f, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, - cell_state->data.f, n_batch * n_cell, - cell_state->data.f); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, cell_state->data.f); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state->data.f, n_batch * n_cell, - params->cell_clip, cell_state->data.f); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights->data.f, n_cell, cell_state->data.f, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state->data.f, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias->data.f, n_output, - n_batch, output->data.f); - } else { - tensor_utils::ZeroVector(output->data.f, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights->data.f, n_output, n_cell, output_gate_scratch, - n_batch, output->data.f, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output->data.f, n_batch * n_output, - params->proj_clip, output->data.f); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output->data.f); - } - tensor_utils::CopyVector(output->data.f, n_batch * n_output, - output_state->data.f); + // Check optional tensors, the respective pointers can be null. + const float* input_to_input_weights_ptr = + (use_cifg) ? nullptr : input_to_input_weights->data.f; + const float* recurrent_to_input_weights_ptr = + (use_cifg) ? nullptr : recurrent_to_input_weights->data.f; + const float* input_gate_bias_ptr = + (use_cifg) ? nullptr : input_gate_bias->data.f; + const float* cell_to_input_weights_ptr = + (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr; + const float* cell_to_forget_weights_ptr = + (use_peephole) ? cell_to_forget_weights->data.f : nullptr; + const float* cell_to_output_weights_ptr = + (use_peephole) ? cell_to_output_weights->data.f : nullptr; + const float* projection_weights_ptr = + (projection_weights == nullptr) ? nullptr : projection_weights->data.f; + const float* projection_bias_ptr = + (projection_bias == nullptr) ? nullptr : projection_bias->data.f; + + // Required tensors, pointers are non-null. + const float* input_ptr_batch = input->data.f; + const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f; + const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f; + const float* input_to_output_weights_ptr = input_to_output_weights->data.f; + const float* recurrent_to_forget_weights_ptr = + recurrent_to_forget_weights->data.f; + const float* recurrent_to_cell_weights_ptr = + recurrent_to_cell_weights->data.f; + const float* recurrent_to_output_weights_ptr = + recurrent_to_output_weights->data.f; + const float* forget_gate_bias_ptr = forget_gate_bias->data.f; + const float* cell_bias_ptr = cell_bias->data.f; + const float* output_gate_bias_ptr = output_gate_bias->data.f; + + float* output_state_ptr = output_state->data.f; + float* cell_state_ptr = cell_state->data.f; + float* output_ptr_batch = output->data.f; + + kernel_utils::LstmStep( + input_ptr_batch, input_to_input_weights_ptr, input_to_forget_weights_ptr, + input_to_cell_weights_ptr, input_to_output_weights_ptr, + recurrent_to_input_weights_ptr, recurrent_to_forget_weights_ptr, + recurrent_to_cell_weights_ptr, recurrent_to_output_weights_ptr, + cell_to_input_weights_ptr, cell_to_forget_weights_ptr, + cell_to_output_weights_ptr, input_gate_bias_ptr, forget_gate_bias_ptr, + cell_bias_ptr, output_gate_bias_ptr, projection_weights_ptr, + projection_bias_ptr, params, n_batch, n_cell, n_input, n_output, + output_state_ptr, cell_state_ptr, input_gate_scratch, forget_gate_scratch, + cell_scratch, output_gate_scratch, output_ptr_batch); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc index 9cdb58714e..508a570e2e 100644 --- a/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc +++ b/tensorflow/contrib/lite/kernels/unidirectional_sequence_lstm.cc @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" +#include "tensorflow/contrib/lite/kernels/internal/kernel_utils.h" #include "tensorflow/contrib/lite/kernels/internal/tensor_utils.h" #include "tensorflow/contrib/lite/kernels/kernel_util.h" #include "tensorflow/contrib/lite/kernels/op_macros.h" @@ -380,135 +381,57 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { output_gate_scratch = scratch_buffer->data.f + 3 * n_cell * n_batch; } + // Check optional tensors, the respective pointers can be null. + const float* input_to_input_weights_ptr = + (use_cifg) ? nullptr : input_to_input_weights->data.f; + const float* recurrent_to_input_weights_ptr = + (use_cifg) ? nullptr : recurrent_to_input_weights->data.f; + const float* input_gate_bias_ptr = + (use_cifg) ? nullptr : input_gate_bias->data.f; + const float* cell_to_input_weights_ptr = + (use_peephole && !use_cifg) ? cell_to_input_weights->data.f : nullptr; + const float* cell_to_forget_weights_ptr = + (use_peephole) ? cell_to_forget_weights->data.f : nullptr; + const float* cell_to_output_weights_ptr = + (use_peephole) ? cell_to_output_weights->data.f : nullptr; + const float* projection_weights_ptr = + (projection_weights == nullptr) ? nullptr : projection_weights->data.f; + const float* projection_bias_ptr = + (projection_bias == nullptr) ? nullptr : projection_bias->data.f; + + // Required tensors, pointers are non-null. + const float* input_to_forget_weights_ptr = input_to_forget_weights->data.f; + const float* input_to_cell_weights_ptr = input_to_cell_weights->data.f; + const float* input_to_output_weights_ptr = input_to_output_weights->data.f; + const float* recurrent_to_forget_weights_ptr = + recurrent_to_forget_weights->data.f; + const float* recurrent_to_cell_weights_ptr = + recurrent_to_cell_weights->data.f; + const float* recurrent_to_output_weights_ptr = + recurrent_to_output_weights->data.f; + const float* forget_gate_bias_ptr = forget_gate_bias->data.f; + const float* cell_bias_ptr = cell_bias->data.f; + const float* output_gate_bias_ptr = output_gate_bias->data.f; + + float* output_state_ptr = output_state->data.f; + float* cell_state_ptr = cell_state->data.f; + for (int t = 0; t < max_time; t++) { - const float* input_ptr_time = input->data.f + t * n_batch * n_input; - // Initialize scratch buffers with bias. - if (!use_cifg) { - tensor_utils::VectorBatchVectorAssign(input_gate_bias->data.f, n_cell, - n_batch, input_gate_scratch); - } - tensor_utils::VectorBatchVectorAssign(forget_gate_bias->data.f, n_cell, - n_batch, forget_gate_scratch); - tensor_utils::VectorBatchVectorAssign(cell_bias->data.f, n_cell, n_batch, - cell_scratch); - tensor_utils::VectorBatchVectorAssign(output_gate_bias->data.f, n_cell, - n_batch, output_gate_scratch); - - // For each batch and cell: compute input_weight * input. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_input_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, input_gate_scratch, /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_forget_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, forget_gate_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_cell_weights->data.f, n_cell, n_input, input_ptr_time, n_batch, - cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - input_to_output_weights->data.f, n_cell, n_input, input_ptr_time, - n_batch, output_gate_scratch, /*result_stride=*/1); - - // For each batch and cell: compute recurrent_weight * output_state. - if (!use_cifg) { - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_input_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, input_gate_scratch, - /*result_stride=*/1); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_forget_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, forget_gate_scratch, - /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_cell_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, cell_scratch, /*result_stride=*/1); - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - recurrent_to_output_weights->data.f, n_cell, n_output, - output_state->data.f, n_batch, output_gate_scratch, - /*result_stride=*/1); - - // For each batch and cell: update input gate. - if (!use_cifg) { - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_input_weights->data.f, n_cell, cell_state->data.f, n_batch, - input_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(input_gate_scratch, n_cell * n_batch, - input_gate_scratch); - } - - // For each batch and cell: update forget gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_forget_weights->data.f, n_cell, cell_state->data.f, n_batch, - forget_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(forget_gate_scratch, n_cell * n_batch, - forget_gate_scratch); - - // For each batch and cell: update the cell. - tensor_utils::VectorVectorCwiseProduct(forget_gate_scratch, - cell_state->data.f, n_batch * n_cell, - cell_state->data.f); - tensor_utils::ApplyActivationToVector(cell_scratch, n_batch * n_cell, - params->activation, cell_scratch); - if (use_cifg) { - tensor_utils::Sub1Vector(forget_gate_scratch, n_batch * n_cell, - forget_gate_scratch); - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, forget_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } else { - tensor_utils::VectorVectorCwiseProductAccumulate( - cell_scratch, input_gate_scratch, n_batch * n_cell, - cell_state->data.f); - } - if (params->cell_clip > 0.0) { - tensor_utils::ClipVector(cell_state->data.f, n_batch * n_cell, - params->cell_clip, cell_state->data.f); - } - - // For each batch and cell: update the output gate. - if (use_peephole) { - tensor_utils::VectorBatchVectorCwiseProductAccumulate( - cell_to_output_weights->data.f, n_cell, cell_state->data.f, n_batch, - output_gate_scratch); - } - tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell, - output_gate_scratch); - tensor_utils::ApplyActivationToVector(cell_state->data.f, n_batch * n_cell, - params->activation, cell_scratch); - tensor_utils::VectorVectorCwiseProduct(output_gate_scratch, cell_scratch, - n_batch * n_cell, - output_gate_scratch); - - // For each batch: update the projection and output_state. - const bool use_projection_weight = (projection_weights != nullptr); - const bool use_projection_bias = (projection_bias != nullptr); - float* output_ptr_time = output->data.f + t * n_batch * n_output; - if (use_projection_weight) { - if (use_projection_bias) { - tensor_utils::VectorBatchVectorAssign(projection_bias->data.f, n_output, - n_batch, output_ptr_time); - } else { - tensor_utils::ZeroVector(output_ptr_time, n_batch * n_output); - } - tensor_utils::MatrixBatchVectorMultiplyAccumulate( - projection_weights->data.f, n_output, n_cell, output_gate_scratch, - n_batch, output_ptr_time, /*result_stride=*/1); - if (params->proj_clip > 0.0) { - tensor_utils::ClipVector(output_ptr_time, n_batch * n_output, - params->proj_clip, output_ptr_time); - } - } else { - tensor_utils::CopyVector(output_gate_scratch, n_batch * n_output, - output_ptr_time); - } - tensor_utils::CopyVector(output_ptr_time, n_batch * n_output, - output_state->data.f); + const float* input_ptr_batch = input->data.f + t * n_batch * n_input; + float* output_ptr_batch = output->data.f + t * n_batch * n_output; + + kernel_utils::LstmStep( + input_ptr_batch, input_to_input_weights_ptr, + input_to_forget_weights_ptr, input_to_cell_weights_ptr, + input_to_output_weights_ptr, recurrent_to_input_weights_ptr, + recurrent_to_forget_weights_ptr, recurrent_to_cell_weights_ptr, + recurrent_to_output_weights_ptr, cell_to_input_weights_ptr, + cell_to_forget_weights_ptr, cell_to_output_weights_ptr, + input_gate_bias_ptr, forget_gate_bias_ptr, cell_bias_ptr, + output_gate_bias_ptr, projection_weights_ptr, projection_bias_ptr, + params, n_batch, n_cell, n_input, n_output, output_state_ptr, + cell_state_ptr, input_gate_scratch, forget_gate_scratch, cell_scratch, + output_gate_scratch, output_ptr_batch); } return kTfLiteOk; } -- GitLab From c1777a2633bd5615a1d654e50f82d0cf75fd60f0 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Wed, 28 Feb 2018 13:17:06 -0800 Subject: [PATCH 244/884] [XLA] Fix up some error messages to conform to XLA's error message style. PiperOrigin-RevId: 187372860 --- tensorflow/compiler/xla/literal_util.cc | 18 +++++++++++------- tensorflow/compiler/xla/literal_util_test.cc | 10 +++++----- .../compiler/xla/service/allocation_tracker.cc | 2 +- .../compiler/xla/service/hlo_instruction.cc | 6 ++++-- .../xla/tests/deconstruct_tuple_test.cc | 2 +- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 3962a9b316..c3eb8caa57 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -343,7 +343,7 @@ Status Literal::Piece::CopyFrom(const Literal::Piece& src) { #undef COPY_ELEMENTS default: return Unimplemented( - "Unhandled primitive type %s", + "Copying a Literal object with element type %s is not implemented.", PrimitiveType_Name(subshape().element_type()).c_str()); } } @@ -491,7 +491,10 @@ Status Literal::CopySliceFrom(const Literal& src_literal, default: break; } - return Unimplemented("Unhandled primitive type %d", shape().element_type()); + return Unimplemented( + "Copying a slice from a Literal object with element type %d is not " + "implemented.", + shape().element_type()); } /* static */ Literal Literal::Zero(PrimitiveType primitive_type) { @@ -1394,8 +1397,8 @@ StatusOr> ConvertIfDestTypeMatches( return ConvertToC64(src_literal); // Other types are not yet supported. default: - return InvalidArgument( - "Unimplemented: Convert from type %s to type %s", + return Unimplemented( + "Converting from type %s to type %s is not implemented.", PrimitiveType_Name(src_literal.shape().element_type()).c_str(), PrimitiveType_Name(primitive_dest_type).c_str()); } @@ -1424,9 +1427,10 @@ StatusOr> Literal::Convert( #undef CONVERT_IF_DEST_TYPE_MATCHES // Other types are not yet supported. default: - return InvalidArgument("Unimplemented: Convert from type %s to type %s", - PrimitiveType_Name(shape().element_type()).c_str(), - PrimitiveType_Name(primitive_dest_type).c_str()); + return Unimplemented( + "Converting from type %s to type %s is not implemented.", + PrimitiveType_Name(shape().element_type()).c_str(), + PrimitiveType_Name(primitive_dest_type).c_str()); } } diff --git a/tensorflow/compiler/xla/literal_util_test.cc b/tensorflow/compiler/xla/literal_util_test.cc index 9ff0771110..04e45f0049 100644 --- a/tensorflow/compiler/xla/literal_util_test.cc +++ b/tensorflow/compiler/xla/literal_util_test.cc @@ -1232,15 +1232,15 @@ TEST_F(LiteralUtilTest, ConvertIfTypesMatch) { EXPECT_EQ(*conv, *c64); EXPECT_EQ(s32->Convert(TUPLE).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(s32->Convert(S16).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(s32->Convert(U16).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(c64->Convert(F32).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); EXPECT_EQ(c64->Convert(S32).status().code(), - tensorflow::error::INVALID_ARGUMENT); + tensorflow::error::UNIMPLEMENTED); } TEST_F(LiteralUtilTest, CopyFromProto_Bool) { diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 4e80679c11..7a75c02531 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -109,7 +109,7 @@ StatusOr> AllocationTracker::DeconstructTuple( TF_RET_CHECK(ShapeUtil::IsTuple(shaped_buffer->on_device_shape())); if (ShapeUtil::IsNestedTuple(shaped_buffer->on_device_shape())) { - return Unimplemented("deconstructing nested tuples not yet supported"); + return Unimplemented("Deconstructing nested tuples is not implemented."); } std::vector element_handles; diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index a534d8ff06..af9d772b00 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -2682,8 +2682,10 @@ Status HloInstruction::Visit(DfsHloVisitorBase* visitor) { case HloOpcode::kTrace: break; } - return Unimplemented("unhandled HloOpcode for DfsHloVisitor: %s", - HloOpcodeString(opcode_).c_str()); + return InternalError( + "Unhandled HloOpcode for DfsHloVisitor: %s. This should not happen - " + "please file a bug for XLA.", + HloOpcodeString(opcode_).c_str()); } // Explicit instantiations. diff --git a/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc index 032c06cd3c..3ab0ea4ad4 100644 --- a/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc +++ b/tensorflow/compiler/xla/tests/deconstruct_tuple_test.cc @@ -195,7 +195,7 @@ XLA_TEST_F(DeconstructTupleTest, DeconstructNestedTuple) { auto result_status = client_->DeconstructTuple(*global_data); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("deconstructing nested tuples not yet supported")); + HasSubstr("Deconstructing nested tuples is not implemented")); } } // namespace -- GitLab From c661f2c3de75e3ad58bce52b39b8cc2e7ee07c0e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 28 Feb 2018 13:19:01 -0800 Subject: [PATCH 245/884] [TF:XLA] Bump open source llvm revision to r326313 PiperOrigin-RevId: 187373178 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index fa3671b4c9..ea8f42ab8d 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/832f2bf0d8908aea8160bab128708d521764fe8d.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", ], - sha256 = "e6bb793bbdce37ee5643789a27d174f1cdd8e7323a69d5f331376eb34755ee0d", - strip_prefix = "llvm-832f2bf0d8908aea8160bab128708d521764fe8d", + sha256 = "7990b4d446de971e0acc481942920452a182d2f87a8164bdc117fd9b9ace591d", + strip_prefix = "llvm-9a6e78e4adc959d2825f7af35b4ed0e09394d840", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 0f3105c39b079d8e7741e48e3b098c47c81a453a Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Wed, 28 Feb 2018 13:43:42 -0800 Subject: [PATCH 246/884] [XLA] Add a HLO simplifier pass to fold Conditional(constant_predicate, true_computation, false_computation) to Call(predicated_computation) and finally inlined computation. PiperOrigin-RevId: 187376657 --- tensorflow/compiler/xla/service/BUILD | 35 ++++ .../xla/service/conditional_simplifier.cc | 106 ++++++++++++ .../xla/service/conditional_simplifier.h | 38 +++++ .../service/conditional_simplifier_test.cc | 153 ++++++++++++++++++ tensorflow/compiler/xla/service/cpu/BUILD | 1 + .../compiler/xla/service/cpu/cpu_compiler.cc | 2 + tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/gpu_compiler.cc | 2 + 8 files changed, 338 insertions(+) create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier.cc create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier.h create mode 100644 tensorflow/compiler/xla/service/conditional_simplifier_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index e6a6e54927..e4ae812532 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1213,6 +1213,41 @@ tf_cc_test( ], ) +cc_library( + name = "conditional_simplifier", + srcs = ["conditional_simplifier.cc"], + hdrs = ["conditional_simplifier.h"], + deps = [ + ":call_inliner", + ":hlo", + ":hlo_pass", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + +tf_cc_test( + name = "conditional_simplifier_test", + srcs = ["conditional_simplifier_test.cc"], + deps = [ + ":conditional_simplifier", + ":hlo", + ":hlo_matchers", + "//tensorflow/compiler/xla:literal_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/core:lib", + "//tensorflow/core:test", + ], +) + cc_library( name = "while_loop_simplifier", srcs = ["while_loop_simplifier.cc"], diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.cc b/tensorflow/compiler/xla/service/conditional_simplifier.cc new file mode 100644 index 0000000000..f35de08085 --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier.cc @@ -0,0 +1,106 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" + +#include +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" + +namespace xla { + +// Tries to replace a conditional with a call operation of the corresponding +// computation. If the given conditional has a constant predicate, tries to +// replace it with a call to its true/false computation as appropirate and then +// inline that computation. +// +// Returns true if it made a change to the graph. +static StatusOr TryRemoveConditional(HloInstruction* conditional) { + CHECK_EQ(conditional->opcode(), HloOpcode::kConditional); + // Do not remove conditionals that contain side-effecting instructions or + // have control predecessors/successors in either true/false computation. + if (!conditional->parent()->IsRemovable(conditional) || + conditional->HasSideEffect()) { + VLOG(2) << "Not attempting to remove conditional as it is not removable or " + "has side effect: " + << conditional->ToShortString(); + return false; + } + + if (conditional->operand(0)->opcode() != HloOpcode::kConstant) { + VLOG(2) << "Not attempting to remove conditional as its predicate is not a " + "compile-time constant: " + << conditional->ToShortString(); + return false; + } + + auto computation = conditional->parent(); + HloInstruction* call_op; + if (conditional->operand(0)->literal().Get({})) { + call_op = computation->AddInstruction(HloInstruction::CreateCall( + conditional->shape(), {conditional->mutable_operand(1)}, + conditional->true_computation())); + } else { + call_op = computation->AddInstruction(HloInstruction::CreateCall( + conditional->shape(), {conditional->mutable_operand(2)}, + conditional->false_computation())); + } + + TF_RETURN_IF_ERROR(computation->ReplaceInstruction(conditional, call_op)); + TF_RETURN_IF_ERROR(CallInliner::Inline(call_op).status()); + + return true; +} + +StatusOr ConditionalSimplifier::Run(HloModule* module) { + XLA_VLOG_LINES( + 3, "ConditionalSimplifier::Run(), before:\n" + module->ToString()); + bool changed = false; + + // Gather all the conditional ops in our module. We do this ahead of time so + // we don't have to worry about mutating the lists of computations or + // instructions as we iterate. + std::vector conditional_ops; + for (auto* comp : module->computations()) { + for (auto* instr : comp->instructions()) { + if (instr->opcode() == HloOpcode::kConditional) { + conditional_ops.push_back(instr); + } + } + } + + for (HloInstruction* conditional_op : conditional_ops) { + TF_ASSIGN_OR_RETURN(bool result, TryRemoveConditional(conditional_op)); + changed |= result; + } + + XLA_VLOG_LINES(3, + "ConditionalSimplifier::Run(), after:\n" + module->ToString()); + return changed; +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/conditional_simplifier.h b/tensorflow/compiler/xla/service/conditional_simplifier.h new file mode 100644 index 0000000000..063261e26d --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier.h @@ -0,0 +1,38 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ + +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/service/hlo_pass_interface.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/stringpiece.h" + +namespace xla { + +// HLO pass that removes kConditional with a constant predicate, replacing them +// with their true or false computation as appropriate. +class ConditionalSimplifier : public HloPassInterface { + public: + tensorflow::StringPiece name() const override { + return "simplify-conditional"; + } + StatusOr Run(HloModule* module) override; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_CONDITIONAL_SIMPLIFIER_H_ diff --git a/tensorflow/compiler/xla/service/conditional_simplifier_test.cc b/tensorflow/compiler/xla/service/conditional_simplifier_test.cc new file mode 100644 index 0000000000..868348547d --- /dev/null +++ b/tensorflow/compiler/xla/service/conditional_simplifier_test.cc @@ -0,0 +1,153 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" + +#include +#include + +#include "tensorflow/compiler/xla/literal_util.h" +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/xla_data.pb.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { +namespace { + +namespace op = xla::testing::opcode_matchers; + +class ConditionalSimplifierTest : public HloVerifiedTestBase { + public: + // Makes a computation that contains a conditional with constant predicate. + HloComputation* MakeConditional(HloModule* module); +}; + +HloComputation* ConditionalSimplifierTest::MakeConditional(HloModule* module) { + HloComputation::Builder builder(TestName()); + + // true_computation returns param+1. + HloComputation* true_computation; + { + HloComputation::Builder true_computation_builder(TestName() + + ".true_computation"); + auto param = + true_computation_builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(S32, {}), "param")); + auto one = true_computation_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1))); + + true_computation_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, param, one)); + + true_computation = + module->AddEmbeddedComputation(true_computation_builder.Build()); + } + + // false_computation returns param+42. + HloComputation* false_computation; + { + HloComputation::Builder false_computation_builder(TestName() + + ".false_computation"); + auto param = false_computation_builder.AddInstruction( + HloInstruction::CreateParameter(0, ShapeUtil::MakeShape(S32, {}), + "param")); + auto forty_two = false_computation_builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(42))); + + false_computation_builder.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, param, forty_two)); + false_computation = + module->AddEmbeddedComputation(false_computation_builder.Build()); + } + + auto false_instrn = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(false))); + auto false_param = builder.AddInstruction(HloInstruction::CreateParameter( + 0, ShapeUtil::MakeShape(S32, {}), "false_param")); + auto one = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(1))); + + builder.AddInstruction(HloInstruction::CreateConditional( + ShapeUtil::MakeShape(S32, {}), false_instrn, one, true_computation, + false_param, false_computation)); + + return module->AddEntryComputation(builder.Build()); +} + +TEST_F(ConditionalSimplifierTest, ConditionalGetsInlined) { + HloComputation* computation = MakeConditional(&module()); + ASSERT_TRUE(ConditionalSimplifier().Run(&module()).ValueOrDie()); + EXPECT_THAT(computation->root_instruction(), + op::Add(op::Parameter(), op::Constant())); +} + +TEST_F(ConditionalSimplifierTest, ConditionalWithControlDependency) { + HloComputation* computation = MakeConditional(&module()); + + auto* true_op = computation->AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(true))); + TF_ASSERT_OK( + true_op->AddControlDependencyTo(computation->root_instruction())); + + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsSend) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + + auto* true_computation = conditional->true_computation(); + auto* send = true_computation->AddInstruction(HloInstruction::CreateSend( + true_computation->AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR0(true))), + /*channel_id=*/0)); + true_computation->AddInstruction(HloInstruction::CreateSendDone(send)); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsRecv) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + + auto* true_computation = conditional->true_computation(); + auto* recv = true_computation->AddInstruction(HloInstruction::CreateRecv( + ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); + true_computation->AddInstruction(HloInstruction::CreateRecvDone(recv)); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +TEST_F(ConditionalSimplifierTest, NotRemovedIfContainsNonRemovableInstruction) { + HloComputation* computation = MakeConditional(&module()); + auto* conditional = computation->root_instruction(); + ASSERT_EQ(conditional->opcode(), HloOpcode::kConditional); + auto* false_computation = conditional->false_computation(); + false_computation->AddInstruction( + HloInstruction::CreateInfeed(ShapeUtil::MakeShape(F32, {1}), "config")); + EXPECT_FALSE(ConditionalSimplifier().Run(&module()).ValueOrDie()); +} + +} // namespace +} // namespace xla diff --git a/tensorflow/compiler/xla/service/cpu/BUILD b/tensorflow/compiler/xla/service/cpu/BUILD index 4170e31527..38a54fcb64 100644 --- a/tensorflow/compiler/xla/service/cpu/BUILD +++ b/tensorflow/compiler/xla/service/cpu/BUILD @@ -105,6 +105,7 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:dot_decomposer", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", diff --git a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc index 387806e24a..0d15be5a23 100644 --- a/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc +++ b/tensorflow/compiler/xla/service/cpu/cpu_compiler.cc @@ -47,6 +47,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/cpu/compiler_functor.h" #include "tensorflow/compiler/xla/service/cpu/conv_canonicalization.h" #include "tensorflow/compiler/xla/service/cpu/cpu_copy_insertion.h" @@ -275,6 +276,7 @@ Status CpuCompiler::RunHloPasses(HloModule* module, bool is_aot_compile) { pass.AddPass(); pass.AddPass(); pass.AddPass(); + pass.AddPass(); } pipeline.AddPass( [](const HloInstruction& dot, diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 9da4fb97fa..334efff1e6 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -510,6 +510,7 @@ cc_library( "//tensorflow/compiler/xla/service:buffer_assignment", "//tensorflow/compiler/xla/service:buffer_liveness", "//tensorflow/compiler/xla/service:call_inliner", + "//tensorflow/compiler/xla/service:conditional_simplifier", "//tensorflow/compiler/xla/service:dot_decomposer", "//tensorflow/compiler/xla/service:executable", "//tensorflow/compiler/xla/service:flatten_call_graph", diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 28ebd034ee..9e37acdf31 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -33,6 +33,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/buffer_assignment.h" #include "tensorflow/compiler/xla/service/buffer_liveness.h" #include "tensorflow/compiler/xla/service/call_inliner.h" +#include "tensorflow/compiler/xla/service/conditional_simplifier.h" #include "tensorflow/compiler/xla/service/dot_decomposer.h" #include "tensorflow/compiler/xla/service/flatten_call_graph.h" #include "tensorflow/compiler/xla/service/gpu/cudnn_batchnorm_rewriter.h" @@ -176,6 +177,7 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, pass.AddPass(); pass.AddPass(); pass.AddPass(); + pass.AddPass(); } pipeline.AddPass( -- GitLab From 9d6c5a06638262f6815717c682fab29ba3524282 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 28 Feb 2018 13:48:38 -0800 Subject: [PATCH 247/884] Bypasses warnings in eager mode for converting indexed slices to tensors. PiperOrigin-RevId: 187377370 --- tensorflow/python/ops/gradients_impl.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 1418c0b10f..227316a01e 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -86,17 +86,19 @@ def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False): % str(value)) # TODO(mrry): Consider adding static shape information to # IndexedSlices, to avoid using numpy here. - dense_shape_value = tensor_util.constant_value(value.dense_shape) - if dense_shape_value is not None: - num_elements = np.prod(dense_shape_value) - if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS: + if context.in_graph_mode(): + dense_shape_value = tensor_util.constant_value(value.dense_shape) + if dense_shape_value is not None: + num_elements = np.prod(dense_shape_value) + if num_elements >= _LARGE_SPARSE_NUM_ELEMENTS: + warnings.warn( + "Converting sparse IndexedSlices to a dense Tensor with %d " + "elements. This may consume a large amount of memory." % + num_elements) + else: warnings.warn( - "Converting sparse IndexedSlices to a dense Tensor with %d elements. " - "This may consume a large amount of memory." % num_elements) - else: - warnings.warn( - "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " - "This may consume a large amount of memory.") + "Converting sparse IndexedSlices to a dense Tensor of unknown shape. " + "This may consume a large amount of memory.") return math_ops.unsorted_segment_sum( value.values, value.indices, value.dense_shape[0], name=name) -- GitLab From a72ece230eb46c1afcb96c52dc5ae6ceabdeaf25 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 28 Feb 2018 13:55:35 -0800 Subject: [PATCH 248/884] Checkpointable: Handle Optimizer non-slot variables Overrides the Checkpointable dependency-gathering logic to key Optimizer dependencies to the current graph. Moves my Checkpointable Adam prototype out of contrib. Right now there is no check that loading all happens in the same graph. This would be easy enough to do (save a Graph ID with the _Checkpoint object), but it's not clear to me that it's useful; doing deferred restoration in whichever graph the variable is created in seems reasonable. (Let me know if you disagree) PiperOrigin-RevId: 187378372 --- .../eager/python/checkpointable_utils_test.py | 115 +++++++++++------- tensorflow/python/ops/variables.py | 3 + tensorflow/python/training/checkpointable.py | 96 ++++++++++++--- tensorflow/python/training/optimizer.py | 48 +++++++- ...tensorflow.train.-adadelta-optimizer.pbtxt | 1 - ...sorflow.train.-adagrad-d-a-optimizer.pbtxt | 1 - .../tensorflow.train.-adagrad-optimizer.pbtxt | 1 - .../tensorflow.train.-adam-optimizer.pbtxt | 1 - .../tensorflow.train.-ftrl-optimizer.pbtxt | 1 - ...ow.train.-gradient-descent-optimizer.pbtxt | 1 - ...tensorflow.train.-momentum-optimizer.pbtxt | 1 - .../golden/tensorflow.train.-optimizer.pbtxt | 1 - ...ow.train.-proximal-adagrad-optimizer.pbtxt | 1 - ...-proximal-gradient-descent-optimizer.pbtxt | 1 - ...nsorflow.train.-r-m-s-prop-optimizer.pbtxt | 1 - ...rflow.train.-sync-replicas-optimizer.pbtxt | 1 - 16 files changed, 196 insertions(+), 78 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 68f0d93632..7367f1b71c 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -23,6 +23,7 @@ import six from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import network as network_lib +from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context from tensorflow.python.eager import test from tensorflow.python.framework import constant_op @@ -56,40 +57,6 @@ class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): return super(CheckpointableNetwork, self).track_layer(layer) -class CheckpointableAdam(adam.AdamOptimizer, checkpointable.Checkpointable): - - # NOTE: Copied from Optimizer with modifications to use add_variable - # for non-slot variables. These contortions are necessary to maintain - # checkpoint compatibility with variable.name based saving. - # TODO(allenl): Make this cleaner. - def _create_non_slot_variable(self, initial_value, name, colocate_with): - """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): - graph = colocate_with.graph - else: - graph = None - - key = (name, graph) - v = self._non_slot_dict.get(key, None) - if v is None: - with ops.colocate_with(colocate_with): - def _variable_getter(name, shape, dtype, initializer): - del shape, dtype # not used, but there for compatibility - return variable_scope.variable( - name=name, initial_value=initializer, trainable=False) - - initial_value = ops.convert_to_tensor(initial_value) - v = self._add_variable_with_custom_getter( - name=name, - shape=initial_value.get_shape(), - initializer=initial_value, - getter=_variable_getter) - - self._non_slot_dict[key] = v - - return v - - class NonLayerCheckpointable(checkpointable.Checkpointable): def __init__(self): @@ -208,7 +175,7 @@ class CheckpointingTests(test.TestCase): # A nuisance Network using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. other_network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) @@ -314,7 +281,7 @@ class CheckpointingTests(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testSaveRestore(self): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network) input_value = constant_op.constant([[3.]]) @@ -346,7 +313,7 @@ class CheckpointingTests(test.TestCase): if context.in_graph_mode(): return # Restore-on-create is only supported when executing eagerly on_create_network = MyNetwork() - on_create_optimizer = CheckpointableAdam(0.001) + on_create_optimizer = adam.AdamOptimizer(0.001) on_create_root = checkpointable_utils.Checkpoint( optimizer=on_create_optimizer, network=on_create_network) # Deferred restoration @@ -378,7 +345,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=training_util.get_or_create_global_step()) @@ -402,7 +369,7 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): with ops.Graph().as_default(): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) @@ -439,7 +406,7 @@ class CheckpointingTests(test.TestCase): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()): network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, global_step=training_util.get_or_create_global_step()) @@ -573,7 +540,7 @@ class CheckpointingTests(test.TestCase): root = checkpointable.Checkpointable() root.var = checkpointable_utils.add_variable( root, name="var", initializer=0.) - optimizer = CheckpointableAdam(0.1) + optimizer = adam.AdamOptimizer(0.1) if context.in_graph_mode(): train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of @@ -607,7 +574,7 @@ class CheckpointingTests(test.TestCase): no_slot_status.assert_consumed() no_slot_status.run_restore_ops() self.assertEqual(12., self.evaluate(new_root.var)) - new_root.optimizer = CheckpointableAdam(0.1) + new_root.optimizer = adam.AdamOptimizer(0.1) with self.assertRaisesRegexp(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) @@ -819,7 +786,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = checkpointable.Checkpointable() obj.var = variable_scope.get_variable(name="v", initializer=0.) - obj.opt = CheckpointableAdam(0.1) + obj.opt = adam.AdamOptimizer(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.CheckpointableSaver(obj) @@ -837,7 +804,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") obj = checkpointable.Checkpointable() obj.var = variable_scope.get_variable(name="v", initializer=0.) - obj.opt = CheckpointableAdam(0.1) + obj.opt = adam.AdamOptimizer(0.1) obj.opt.minimize(obj.var.read_value()) self.evaluate(checkpointable_utils.gather_initializers(obj)) saver = checkpointable_utils.CheckpointableSaver(obj) @@ -847,13 +814,71 @@ class CheckpointingTests(test.TestCase): saver.restore(save_path) self.assertEqual(before_ops, graph.get_operations()) + def testMultipleGraphsNonSlotVariables(self): + with context.graph_mode(): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + optimizer = adam.AdamOptimizer(0.001) + # Construct a model in one graph + first_graph = ops.Graph() + first_session = session_lib.Session(graph=first_graph) + with first_graph.as_default(), first_session.as_default(): + first_variable = resource_variable_ops.ResourceVariable([1.]) + first_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=first_variable) + train_op = optimizer.minimize(first_variable.read_value) + self.evaluate(checkpointable_utils.gather_initializers( + first_root_checkpointable)) + self.evaluate(train_op) + self.evaluate(first_variable.assign([1.])) + self.evaluate(optimizer.get_slot( + var=first_variable, name="m").assign([2.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(3.)) + + # Save and load in a second graph + second_graph = ops.Graph() + with second_graph.as_default(), session_lib.Session(graph=second_graph): + second_variable = resource_variable_ops.ResourceVariable([1.]) + second_root_checkpointable = checkpointable_utils.Checkpoint( + optimizer=optimizer, variable=second_variable) + train_op = optimizer.minimize(second_variable.read_value) + second_root_checkpointable.restore(None).initialize_or_restore() + self.evaluate(train_op) + self.evaluate(second_variable.assign([4.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([5.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.evaluate(beta1_power.assign(6.)) + save_path = second_root_checkpointable.save(checkpoint_prefix) + self.evaluate(second_variable.assign([7.])) + self.evaluate(optimizer.get_slot( + var=second_variable, name="m").assign([8.])) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + status = second_root_checkpointable.restore(save_path) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([4.], self.evaluate(second_variable)) + self.assertAllEqual([5.], self.evaluate(optimizer.get_slot( + var=second_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(6., self.evaluate(beta1_power)) + + # Check that the first graph is unmolested + with first_graph.as_default(), first_session.as_default(): + self.assertAllEqual([1.], self.evaluate(first_variable)) + self.assertAllEqual([2.], self.evaluate(optimizer.get_slot( + var=first_variable, name="m"))) + beta1_power, _ = optimizer._get_beta_accumulators() + self.assertAllEqual(3., self.evaluate(beta1_power)) + class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) network = MyNetwork() - optimizer = CheckpointableAdam(0.001) + optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, network=network, optimizer_step=optimizer_step) diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index d382683858..643a3b7edc 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -307,6 +307,9 @@ class Variable(checkpointable.CheckpointableBase): if constraint is not None and not callable(constraint): raise ValueError("The `constraint` argument must be a callable.") + # Store the graph key so optimizers know how to only retrieve variables from + # this graph. + self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access if isinstance(initial_value, checkpointable.CheckpointInitialValue): self._maybe_initialize_checkpointable() self._update_uid = initial_value.checkpoint_position.restore_uid diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index c5e7f3cdac..02c3aebda8 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -31,8 +31,8 @@ from tensorflow.python.util import nest # creation (avoiding double assignment when executing eagerly). VARIABLE_VALUE_KEY = "VARIABLE_VALUE" -_CheckpointableReference = collections.namedtuple( - "_CheckpointableReference", +CheckpointableReference = collections.namedtuple( + "CheckpointableReference", [ # The local name for this dependency. "name", @@ -301,14 +301,17 @@ class CheckpointableBase(object): Not __init__, since most objects will forget to call it. """ - if hasattr(self, "_checkpoint_dependencies"): + if hasattr(self, "_unconditional_checkpoint_dependencies"): # __init__ already called. This check means that we don't need # Checkpointable.__init__() in the constructor of every TensorFlow object. return - # A list of _CheckpointableReference objects. - self._checkpoint_dependencies = [] + # A list of CheckpointableReference objects. Some classes implementing + # `Checkpointable`, notably `Optimizer`s, may override the + # _checkpoint_dependencies property with conditional dependencies + # (e.g. based on the current graph when saving). + self._unconditional_checkpoint_dependencies = [] # Maps names -> Checkpointable objects - self._dependency_names = {} + self._unconditional_dependency_names = {} # Restorations for other Checkpointable objects on which this object may # eventually depend. self._deferred_dependencies = {} # local name -> _CheckpointPosition list @@ -320,6 +323,32 @@ class CheckpointableBase(object): "initialization code was run.") self._update_uid = -1 + @property + def _checkpoint_dependencies(self): + """All dependencies of this object. + + May be overridden to include conditional dependencies. + + Returns: + A list of `CheckpointableReference` objects indicating named + `Checkpointable` dependencies which should be saved along with this + object. + """ + return self._unconditional_checkpoint_dependencies + + def _lookup_dependency(self, name): + """Look up a dependency by name. + + May be overridden to include conditional dependencies. + + Args: + name: The local name of the dependency. + Returns: + A `Checkpointable` object, or `None` if no dependency by this name was + found. + """ + return self._unconditional_dependency_names.get(name, None) + def _add_variable_with_custom_getter( self, name, shape=None, dtype=dtypes.float32, initializer=None, getter=None, overwrite=False, @@ -349,7 +378,7 @@ class CheckpointableBase(object): ValueError: If the variable name is not unique. """ self._maybe_initialize_checkpointable() - if not overwrite and name in self._dependency_names: + if not overwrite and self._lookup_dependency(name) is not None: raise ValueError( ("A variable named '%s' already exists in this Checkpointable, but " "Checkpointable._add_variable called to create another with " @@ -461,9 +490,10 @@ class CheckpointableBase(object): raise TypeError( ("Checkpointable._track_checkpointable() passed type %s, not a " "Checkpointable.") % (type(checkpointable),)) - new_reference = _CheckpointableReference(name=name, ref=checkpointable) - if (name in self._dependency_names - and self._dependency_names[name] is not checkpointable): + new_reference = CheckpointableReference(name=name, ref=checkpointable) + current_object = self._lookup_dependency(name) + if (current_object is not None + and current_object is not checkpointable): if not overwrite: raise ValueError( ("Called Checkpointable._track_checkpointable() with name='%s', " @@ -471,19 +501,47 @@ class CheckpointableBase(object): "dependency. Names must be unique (or overwrite=True).") % (name,)) # This is a weird thing to do, but we're not going to stop people from # using __setattr__. - for index, (old_name, _) in enumerate(self._checkpoint_dependencies): + for index, (old_name, _) in enumerate( + self._unconditional_checkpoint_dependencies): if name == old_name: - self._checkpoint_dependencies[index] = new_reference + self._unconditional_checkpoint_dependencies[index] = new_reference else: - self._checkpoint_dependencies.append(new_reference) + self._unconditional_checkpoint_dependencies.append(new_reference) - self._dependency_names[name] = checkpointable - deferred_dependency_list = self._deferred_dependencies.pop(name, None) - if deferred_dependency_list is not None: - for checkpoint_position in deferred_dependency_list: - checkpoint_position.restore(checkpointable=checkpointable) + self._unconditional_dependency_names[name] = checkpointable + self._handle_deferred_dependencies(name=name, checkpointable=checkpointable) return checkpointable + def _handle_deferred_dependencies(self, name, checkpointable): + """Pop and load any deferred checkpoint restores into `checkpointable`. + + This method does not add a new dependency on `checkpointable`, but it does + check if any outstanding/deferred dependencies have been queued waiting for + this dependency to be added (matched based on `name`). If so, + `checkpointable` and its dependencies are restored. The restorations are + considered fulfilled and so are deleted. + + `_track_checkpointable` is more appropriate for adding a + normal/unconditional dependency, and includes handling for deferred + restorations. This method allows objects such as `Optimizer` to use the same + restoration logic while managing conditional dependencies themselves, by + overriding `_checkpoint_dependencies` and `_lookup_dependency` to change the + object's dependencies based on the context it is saved/restored in (a single + optimizer instance can have state associated with multiple graphs). + + Args: + name: The name of the dependency within this object (`self`), used to + match `checkpointable` with values saved in a checkpoint. + checkpointable: The Checkpointable object to restore (inheriting from + `CheckpointableBase`). + """ + deferred_dependencies_list = self._deferred_dependencies.pop(name, ()) + for checkpoint_position in sorted( + deferred_dependencies_list, + key=lambda restore: restore.checkpoint.restore_uid, + reverse=True): + checkpoint_position.restore(checkpointable) + def _restore_from_checkpoint_position(self, checkpoint_position): """Restore this object and its dependencies (may be deferred).""" # Attempt a breadth-first traversal, since presumably the user has more @@ -519,7 +577,7 @@ class CheckpointableBase(object): child_position = _CheckpointPosition( checkpoint=checkpoint, proto_id=child.node_id) - local_object = self._dependency_names.get(child.local_name, None) + local_object = self._lookup_dependency(child.local_name) if local_object is None: # We don't yet have a dependency registered with this name. Save it # in case we do. diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 454cc3add5..ba7e087c5a 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -216,7 +216,11 @@ def _get_processor(v): @tf_export("train.Optimizer") -class Optimizer(checkpointable.Checkpointable): +class Optimizer( + # Optimizers inherit from CheckpointableBase rather than Checkpointable + # since they do most of their dependency management themselves (slot + # variables are special-cased, and non-slot variables are keyed to graphs). + checkpointable.CheckpointableBase): """Base class for optimizers. This class defines the API to add Ops to train a model. You never use this @@ -645,7 +649,8 @@ class Optimizer(checkpointable.Checkpointable): def _create_non_slot_variable(self, initial_value, name, colocate_with): """Add an extra variable, not associated with a slot.""" - if context.in_graph_mode(): + in_graph_mode = context.in_graph_mode() + if in_graph_mode: graph = colocate_with.graph else: graph = None @@ -653,12 +658,51 @@ class Optimizer(checkpointable.Checkpointable): key = (name, graph) v = self._non_slot_dict.get(key, None) if v is None: + self._maybe_initialize_checkpointable() with ops.colocate_with(colocate_with): + if not in_graph_mode: + restored_initial_value = self._preload_simple_restoration( + name=name, shape=None) + if restored_initial_value is not None: + initial_value = restored_initial_value v = variable_scope.variable(initial_value, name=name, trainable=False) + # Restore this variable by name if necessary, but don't add a + # Checkpointable dependency. Optimizers return the current graph's + # non-slot variables from _checkpoint_dependencies explicitly rather + # than unconditionally adding dependencies (since there may be multiple + # non-slot variables with the same name in different graphs, trying to + # save all of them would result in errors). + self._handle_deferred_dependencies(name=name, checkpointable=v) self._non_slot_dict[key] = v return v + @property + def _checkpoint_dependencies(self): + """From Checkpointable. Gather graph-specific non-slot variables to save.""" + current_graph_non_slot_variables = [] + current_graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access + for (name, _), variable_object in sorted(self._non_slot_dict.items(), + # Avoid comparing graphs + key=lambda item: item[0][0]): + if variable_object._graph_key == current_graph_key: # pylint: disable=protected-access + current_graph_non_slot_variables.append( + checkpointable.CheckpointableReference( + name=name, ref=variable_object)) + return (super(Optimizer, self)._checkpoint_dependencies + + current_graph_non_slot_variables) + + def _lookup_dependency(self, name): + """From Checkpointable. Find a non-slot variable in the current graph.""" + unconditional = super(Optimizer, self)._lookup_dependency(name) + if unconditional is not None: + return unconditional + if context.in_graph_mode(): + graph = ops.get_default_graph() + else: + graph = None + return self._get_non_slot_variable(name, graph=graph) + def _get_non_slot_variable(self, name, graph=None): return self._non_slot_dict.get((name, graph), None) diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt index c02e54adfb..16bfbf20d5 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adadelta-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdadeltaOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt index 2b619908fc..61cde9181c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-d-a-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdagradDAOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt index 2005cf4677..0a998c1afe 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adagrad-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdagradOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt index 0a2bae1d90..cc59541525 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-adam-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.AdamOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt index 847f9ad759..1add3a9021 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-ftrl-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.FtrlOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt index 13a58e0608..ef5bbd6ace 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-gradient-descent-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.GradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt index bfbc2357a3..3d6e87f5eb 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-momentum-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.MomentumOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt index 437efa0a2b..e73861ff7c 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-optimizer.pbtxt @@ -1,7 +1,6 @@ path: "tensorflow.train.Optimizer" tf_class { is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt index 72f224605f..301b35b199 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-adagrad-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.ProximalAdagradOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt index 316275b1fb..8815befa93 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-proximal-gradient-descent-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.ProximalGradientDescentOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt index af50a19861..e9819683ba 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-r-m-s-prop-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.RMSPropOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { diff --git a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt index 6edc516c93..3db96aff87 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.-sync-replicas-optimizer.pbtxt @@ -2,7 +2,6 @@ path: "tensorflow.train.SyncReplicasOptimizer" tf_class { is_instance: "" is_instance: "" - is_instance: "" is_instance: "" is_instance: "" member { -- GitLab From 8cd02f550634ea7ae5f75531a49986e099ddf957 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 13:58:18 -0800 Subject: [PATCH 249/884] Fix Markdown syntax of bulleted list. PiperOrigin-RevId: 187378753 --- tensorflow/python/ops/distributions/uniform.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index 3580af18f2..e0c554442f 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -45,11 +45,12 @@ class Uniform(distribution.Distribution): Z = b - a ``` - where: - * `low = a`, - * `high = b`, - * `Z` is the normalizing constant, and, - * `I[predicate]` is the [indicator function]( + where + + - `low = a`, + - `high = b`, + - `Z` is the normalizing constant, and + - `I[predicate]` is the [indicator function]( https://en.wikipedia.org/wiki/Indicator_function) for `predicate`. The parameters `low` and `high` must be shaped in a way that supports -- GitLab From 9f95084b53303af50d0a13fd9bb40a183af9104a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 14:22:07 -0800 Subject: [PATCH 250/884] Make fuse_op handle loops in the graph The current implementation of fuse_op does not work when there are loops in the tensorflow graph. PiperOrigin-RevId: 187382720 --- .../contrib/framework/python/framework/graph_util.py | 7 ++++++- .../contrib/framework/python/framework/graph_util_test.py | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/framework/python/framework/graph_util.py b/tensorflow/contrib/framework/python/framework/graph_util.py index 49eec3a3f1..2703224b1b 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util.py +++ b/tensorflow/contrib/framework/python/framework/graph_util.py @@ -85,14 +85,19 @@ def fuse_op(graph_def, input_nodes, output_nodes, output_dtypes, if n not in reachable_by_input and n not in output_nodes_set: # n is between input and output, i.e., part of the fused op next_to_visit = [n] + visited = set() while next_to_visit: cur_node = next_to_visit[0] + visited.add(cur_node) del next_to_visit[0] if cur_node in reachable_by_input and cur_node not in input_nodes_set: raise TypeError("Node %s uses input %s not in input_nodes." % (n, cur_node)) if cur_node not in input_nodes_set: - next_to_visit += name_to_input_name[cur_node] + next_to_visit += [ + input_node for input_node in name_to_input_name[cur_node] + if input_node not in visited + ] elif n not in reachable_by_input: nodes_post_output.append(n) diff --git a/tensorflow/contrib/framework/python/framework/graph_util_test.py b/tensorflow/contrib/framework/python/framework/graph_util_test.py index b8a6d109e1..812c5fbd8c 100644 --- a/tensorflow/contrib/framework/python/framework/graph_util_test.py +++ b/tensorflow/contrib/framework/python/framework/graph_util_test.py @@ -42,7 +42,8 @@ class GraphUtilTest(test.TestCase): graph_def = graph_pb2.GraphDef() node_a = GetNewNode('A', 'Placeholder', []) node_b = GetNewNode('B', 'Op1', ['A']) - node_c = GetNewNode('C', 'Op1', ['B']) + # A loop in the part that will be fused. + node_c = GetNewNode('C', 'Op1', ['B', 'C']) node_d = GetNewNode('D', 'Op1', ['C']) node_e = GetNewNode('E', 'Op1', ['D']) graph_def.node.extend([node_a, node_b, node_c, node_d, node_e]) -- GitLab From b21969b1305b211cd08f8d628b6a5a0e7a9e16f8 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Feb 2018 14:36:09 -0800 Subject: [PATCH 251/884] [tf.data] Add `num_parallel_reads` argument to `tf.data.TFRecordDataset`. This provides a convenient way to use the `tf.contrib.data.parallel_interleave()` idiom for reading multiple TFRecord files in parallel. In addition, the `filenames` argument to the initializer can now be a `tf.data.Dataset` of strings, which makes it easier to use `TFRecordDataset` with `Dataset.list_files()`. PiperOrigin-RevId: 187384812 --- tensorflow/contrib/data/python/ops/BUILD | 1 + .../contrib/data/python/ops/interleave_ops.py | 97 +--------- .../kernel_tests/reader_dataset_ops_test.py | 36 +++- tensorflow/python/data/ops/BUILD | 1 + tensorflow/python/data/ops/readers.py | 166 +++++++++++++++++- .../tensorflow.data.-t-f-record-dataset.pbtxt | 2 +- 6 files changed, 200 insertions(+), 103 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 789cb9c99a..16fe31675f 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -126,6 +126,7 @@ py_library( "//tensorflow/python:tensor_util", "//tensorflow/python:util", "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/ops:readers", "//tensorflow/python/data/util:convert", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", diff --git a/tensorflow/contrib/data/python/ops/interleave_ops.py b/tensorflow/contrib/data/python/ops/interleave_ops.py index 3124ca1d15..91f19da02d 100644 --- a/tensorflow/contrib/data/python/ops/interleave_ops.py +++ b/tensorflow/contrib/data/python/ops/interleave_ops.py @@ -17,101 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops import dataset_ops -from tensorflow.python.data.util import convert -from tensorflow.python.data.util import nest -from tensorflow.python.data.util import sparse -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import function -from tensorflow.python.framework import ops -from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.data.ops import readers from tensorflow.python.util import deprecation -class ParallelInterleaveDataset(dataset_ops.Dataset): - """A `Dataset` that maps a function over its input and flattens the result.""" - - def __init__(self, input_dataset, map_func, cycle_length, block_length, - sloppy, buffer_output_elements, prefetch_input_elements): - """See `tf.contrib.data.parallel_interleave()` for details.""" - super(ParallelInterleaveDataset, self).__init__() - self._input_dataset = input_dataset - - @function.Defun(*nest.flatten( - sparse.as_dense_types(input_dataset.output_types, - input_dataset.output_classes))) - def tf_map_func(*args): - """A wrapper for Defun that facilitates shape inference.""" - # Pass in shape information from the input_dataset. - dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, - input_dataset.output_classes) - for arg, shape in zip(args, nest.flatten(dense_shapes)): - arg.set_shape(shape) - - nested_args = nest.pack_sequence_as(input_dataset.output_types, args) - nested_args = sparse.deserialize_sparse_tensors( - nested_args, input_dataset.output_types, input_dataset.output_shapes, - input_dataset.output_classes) - if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access - dataset = map_func(*nested_args) - else: - dataset = map_func(nested_args) - - if not isinstance(dataset, dataset_ops.Dataset): - raise TypeError("`map_func` must return a `Dataset` object.") - - self._output_classes = dataset.output_classes - self._output_types = dataset.output_types - self._output_shapes = dataset.output_shapes - - return dataset._as_variant_tensor() # pylint: disable=protected-access - - self._map_func = tf_map_func - self._map_func.add_to_graph(ops.get_default_graph()) - - self._cycle_length = ops.convert_to_tensor( - cycle_length, dtype=dtypes.int64, name="cycle_length") - self._block_length = ops.convert_to_tensor( - block_length, dtype=dtypes.int64, name="block_length") - self._sloppy = ops.convert_to_tensor( - sloppy, dtype=dtypes.bool, name="sloppy") - self._buffer_output_elements = convert.optional_param_to_tensor( - "buffer_output_elements", - buffer_output_elements, - argument_default=2 * block_length) - self._prefetch_input_elements = convert.optional_param_to_tensor( - "prefetch_input_elements", - prefetch_input_elements, - argument_default=2 * cycle_length) - - def _as_variant_tensor(self): - return gen_dataset_ops.parallel_interleave_dataset( - self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access - self._map_func.captured_inputs, - self._cycle_length, - self._block_length, - self._sloppy, - self._buffer_output_elements, - self._prefetch_input_elements, - f=self._map_func, - output_types=nest.flatten( - sparse.as_dense_types(self.output_types, self.output_classes)), - output_shapes=nest.flatten( - sparse.as_dense_shapes(self.output_shapes, self.output_classes))) - - @property - def output_classes(self): - return self._output_classes - - @property - def output_shapes(self): - return self._output_shapes - - @property - def output_types(self): - return self._output_types - - def parallel_interleave(map_func, cycle_length, block_length=1, @@ -162,7 +71,7 @@ def parallel_interleave(map_func, @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): - return ParallelInterleaveDataset( + return readers.ParallelInterleaveDataset( dataset, map_func, cycle_length, block_length, sloppy, buffer_output_elements, prefetch_input_elements) @@ -221,7 +130,7 @@ def sloppy_interleave(map_func, cycle_length, block_length=1): @{tf.data.Dataset.apply}. """ def _apply_fn(dataset): - return ParallelInterleaveDataset( + return readers.ParallelInterleaveDataset( dataset, map_func, cycle_length, diff --git a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py index d7140088c3..1ddedfda4e 100644 --- a/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/python/data/kernel_tests/reader_dataset_ops_test.py @@ -21,6 +21,7 @@ import gzip import os import zlib +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.ops import iterator_ops from tensorflow.python.data.ops import readers from tensorflow.python.framework import constant_op @@ -736,12 +737,43 @@ class TFRecordDatasetTest(test.TestCase): one_mebibyte = 2**20 d = readers.TFRecordDataset(self.test_filenames, buffer_size=one_mebibyte) iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() with self.test_session() as sess: for j in range(self._num_files): for i in range(self._num_records): - self.assertAllEqual(self._record(j, i), sess.run(iterator.get_next())) + self.assertAllEqual(self._record(j, i), sess.run(next_element)) with self.assertRaises(errors.OutOfRangeError): - sess.run(iterator.get_next()) + sess.run(next_element) + + def testReadFromDatasetOfFiles(self): + files = dataset_ops.Dataset.from_tensor_slices(self.test_filenames) + d = readers.TFRecordDataset(files) + iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() + with self.test_session() as sess: + for j in range(self._num_files): + for i in range(self._num_records): + self.assertAllEqual(self._record(j, i), sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + + def testReadTenEpochsFromDatasetOfFilesInParallel(self): + files = dataset_ops.Dataset.from_tensor_slices( + self.test_filenames).repeat(10) + d = readers.TFRecordDataset(files, num_parallel_reads=4) + iterator = d.make_one_shot_iterator() + next_element = iterator.get_next() + expected = [] + actual = [] + with self.test_session() as sess: + for _ in range(10): + for j in range(self._num_files): + for i in range(self._num_records): + expected.append(self._record(j, i)) + actual.append(sess.run(next_element)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(next_element) + self.assertEqual(sorted(expected), sorted(actual)) if __name__ == "__main__": diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index dc293562ab..a8f2154db8 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -35,6 +35,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":dataset_ops", + "//tensorflow/python:array_ops", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", diff --git a/tensorflow/python/data/ops/readers.py b/tensorflow/python/data/ops/readers.py index fa7601741b..6c493d8163 100644 --- a/tensorflow/python/data/ops/readers.py +++ b/tensorflow/python/data/ops/readers.py @@ -17,11 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.python.data.ops.dataset_ops import Dataset +from tensorflow.python.data.ops import dataset_ops from tensorflow.python.data.util import convert +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse from tensorflow.python.framework import dtypes +from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.util.tf_export import tf_export @@ -31,7 +35,7 @@ _DEFAULT_READER_BUFFER_SIZE_BYTES = 256 * 1024 # 256 KB @tf_export("data.TextLineDataset") -class TextLineDataset(Dataset): +class TextLineDataset(dataset_ops.Dataset): """A `Dataset` comprising lines from one or more text files.""" def __init__(self, filenames, compression_type=None, buffer_size=None): @@ -73,8 +77,7 @@ class TextLineDataset(Dataset): return dtypes.string -@tf_export("data.TFRecordDataset") -class TFRecordDataset(Dataset): +class _TFRecordDataset(dataset_ops.Dataset): """A `Dataset` comprising records from one or more TFRecord files.""" def __init__(self, filenames, compression_type=None, buffer_size=None): @@ -87,7 +90,7 @@ class TFRecordDataset(Dataset): buffer_size: (Optional.) A `tf.int64` scalar representing the number of bytes in the read buffer. 0 means no buffering. """ - super(TFRecordDataset, self).__init__() + super(_TFRecordDataset, self).__init__() # Force the type to string even if filenames is an empty list. self._filenames = ops.convert_to_tensor( filenames, dtypes.string, name="filenames") @@ -118,8 +121,159 @@ class TFRecordDataset(Dataset): return dtypes.string +class ParallelInterleaveDataset(dataset_ops.Dataset): + """A `Dataset` that maps a function over its input and flattens the result.""" + + def __init__(self, input_dataset, map_func, cycle_length, block_length, + sloppy, buffer_output_elements, prefetch_input_elements): + """See `tf.contrib.data.parallel_interleave()` for details.""" + super(ParallelInterleaveDataset, self).__init__() + self._input_dataset = input_dataset + + @function.Defun(*nest.flatten( + sparse.as_dense_types(input_dataset.output_types, + input_dataset.output_classes))) + def tf_map_func(*args): + """A wrapper for Defun that facilitates shape inference.""" + # Pass in shape information from the input_dataset. + dense_shapes = sparse.as_dense_shapes(input_dataset.output_shapes, + input_dataset.output_classes) + for arg, shape in zip(args, nest.flatten(dense_shapes)): + arg.set_shape(shape) + + nested_args = nest.pack_sequence_as(input_dataset.output_types, args) + nested_args = sparse.deserialize_sparse_tensors( + nested_args, input_dataset.output_types, input_dataset.output_shapes, + input_dataset.output_classes) + if dataset_ops._should_unpack_args(nested_args): # pylint: disable=protected-access + dataset = map_func(*nested_args) + else: + dataset = map_func(nested_args) + + if not isinstance(dataset, dataset_ops.Dataset): + raise TypeError("`map_func` must return a `Dataset` object.") + + self._output_classes = dataset.output_classes + self._output_types = dataset.output_types + self._output_shapes = dataset.output_shapes + + return dataset._as_variant_tensor() # pylint: disable=protected-access + + self._map_func = tf_map_func + self._map_func.add_to_graph(ops.get_default_graph()) + + self._cycle_length = ops.convert_to_tensor( + cycle_length, dtype=dtypes.int64, name="cycle_length") + self._block_length = ops.convert_to_tensor( + block_length, dtype=dtypes.int64, name="block_length") + self._sloppy = ops.convert_to_tensor( + sloppy, dtype=dtypes.bool, name="sloppy") + self._buffer_output_elements = convert.optional_param_to_tensor( + "buffer_output_elements", + buffer_output_elements, + argument_default=2 * block_length) + self._prefetch_input_elements = convert.optional_param_to_tensor( + "prefetch_input_elements", + prefetch_input_elements, + argument_default=2 * cycle_length) + + def _as_variant_tensor(self): + return gen_dataset_ops.parallel_interleave_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + self._map_func.captured_inputs, + self._cycle_length, + self._block_length, + self._sloppy, + self._buffer_output_elements, + self._prefetch_input_elements, + f=self._map_func, + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes)), + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes))) + + @property + def output_classes(self): + return self._output_classes + + @property + def output_shapes(self): + return self._output_shapes + + @property + def output_types(self): + return self._output_types + + +@tf_export("data.TFRecordDataset") +class TFRecordDataset(dataset_ops.Dataset): + """A `Dataset` comprising records from one or more TFRecord files.""" + + def __init__(self, filenames, compression_type=None, buffer_size=None, + num_parallel_reads=None): + """Creates a `TFRecordDataset` to read for one or more TFRecord files. + + NOTE: The `num_parallel_reads` argument can be used to improve performance + when reading from a remote filesystem. + + Args: + filenames: A `tf.string` tensor or `tf.data.Dataset` containing one or + more filenames. + compression_type: (Optional.) A `tf.string` scalar evaluating to one of + `""` (no compression), `"ZLIB"`, or `"GZIP"`. + buffer_size: (Optional.) A `tf.int64` scalar representing the number of + bytes in the read buffer. 0 means no buffering. + num_parallel_reads: (Optional.) A `tf.int64` scalar representing the + number of files to read in parallel. Defaults to reading files + sequentially. + + Raises: + TypeError: If any argument does not have the expected type. + ValueError: If any argument does not have the expected shape. + """ + super(TFRecordDataset, self).__init__() + if isinstance(filenames, dataset_ops.Dataset): + if filenames.output_types != dtypes.string: + raise TypeError( + "`filenames` must be a `tf.data.Dataset` of `tf.string` elements.") + if not filenames.output_shapes.is_compatible_with(tensor_shape.scalar()): + raise ValueError( + "`filenames` must be a `tf.data.Dataset` of scalar `tf.string` " + "elements.") + else: + filenames = ops.convert_to_tensor(filenames, dtype=dtypes.string) + filenames = array_ops.reshape(filenames, [-1], name="flat_filenames") + filenames = dataset_ops.Dataset.from_tensor_slices(filenames) + + def read_one_file(filename): + return _TFRecordDataset(filename, compression_type, buffer_size) + + if num_parallel_reads is None: + self._impl = filenames.flat_map(read_one_file) + else: + self._impl = ParallelInterleaveDataset( + filenames, read_one_file, cycle_length=num_parallel_reads, + block_length=1, sloppy=False, buffer_output_elements=None, + prefetch_input_elements=None) + + def _as_variant_tensor(self): + return self._impl._as_variant_tensor() # pylint: disable=protected-access + + @property + def output_classes(self): + return self._impl.output_classes + + @property + def output_shapes(self): + return self._impl.output_shapes + + @property + def output_types(self): + return self._impl.output_types + + @tf_export("data.FixedLengthRecordDataset") -class FixedLengthRecordDataset(Dataset): +class FixedLengthRecordDataset(dataset_ops.Dataset): """A `Dataset` of fixed-length records from one or more binary files.""" def __init__(self, diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 9770389e5e..709ec127ce 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -17,7 +17,7 @@ tf_class { } member_method { name: "__init__" - argspec: "args=[\'self\', \'filenames\', \'compression_type\', \'buffer_size\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + argspec: "args=[\'self\', \'filenames\', \'compression_type\', \'buffer_size\', \'num_parallel_reads\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\'], " } member_method { name: "apply" -- GitLab From 281677dffc41665343d434752df6464fe2b52319 Mon Sep 17 00:00:00 2001 From: Giuseppe Date: Wed, 28 Feb 2018 23:32:19 +0100 Subject: [PATCH 252/884] Fix markdown error in documentation. Newline in the middle of links was preventing their rendering. --- tensorflow/docs_src/install/install_sources.md | 3 +-- tensorflow/docs_src/install/install_windows.md | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8d83e9f119..acf0af0d9d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,8 +393,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index dedf485f93..f0a30ee394 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -153,8 +153,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). -- GitLab From f28e4d6faf94c08464f430f9cd01ef32dde6ad46 Mon Sep 17 00:00:00 2001 From: Richard Wei Date: Wed, 28 Feb 2018 14:43:39 -0800 Subject: [PATCH 253/884] Package c_api_experimental.h in binary release distributions. PiperOrigin-RevId: 187385913 --- tensorflow/c/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/BUILD b/tensorflow/c/BUILD index 5dfb743681..29ed957c9a 100644 --- a/tensorflow/c/BUILD +++ b/tensorflow/c/BUILD @@ -17,7 +17,10 @@ load( filegroup( name = "headers", - srcs = ["c_api.h"], + srcs = [ + "c_api.h", + "c_api_experimental.h", + ], visibility = ["//tensorflow:__subpackages__"], ) -- GitLab From 91d49c7d98114da4e4647c62d9f9b69119296b69 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Feb 2018 14:50:02 -0800 Subject: [PATCH 254/884] Removing underscore prefixes from hidden generated Python functions. PiperOrigin-RevId: 187386941 --- tensorflow/compiler/tests/binary_ops_test.py | 50 +++++------ tensorflow/compiler/tests/concat_ops_test.py | 2 +- tensorflow/compiler/tests/image_ops_test.py | 2 +- tensorflow/compiler/tests/lrn_ops_test.py | 8 +- .../compiler/tests/pooling_ops_3d_test.py | 18 ++-- tensorflow/compiler/tests/pooling_ops_test.py | 10 +-- .../compiler/tests/spacetobatch_op_test.py | 4 +- tensorflow/compiler/tests/stack_ops_test.py | 46 +++++----- .../compiler/tests/tensor_array_ops_test.py | 2 +- tensorflow/contrib/lookup/lookup_ops.py | 38 +++----- tensorflow/python/__init__.py | 4 - .../python/debug/lib/debug_gradients.py | 9 +- tensorflow/python/eager/benchmarks_test.py | 3 +- tensorflow/python/eager/ops_test.py | 8 +- .../python/eager/python_eager_op_gen.cc | 25 ++++-- tensorflow/python/framework/function_test.py | 4 +- .../python/framework/graph_util_test.py | 14 +-- tensorflow/python/framework/ops_test.py | 2 +- tensorflow/python/framework/python_op_gen.cc | 56 ++++++++++-- .../python/framework/python_op_gen_internal.h | 3 + .../python/framework/tensor_util_test.py | 2 +- .../python/grappler/layout_optimizer_test.py | 10 +-- .../python/kernel_tests/array_ops_test.py | 2 +- .../kernel_tests/batchtospace_op_test.py | 2 +- .../python/kernel_tests/bcast_ops_test.py | 4 +- .../kernel_tests/checkpoint_ops_test.py | 34 ++++---- .../python/kernel_tests/concat_op_test.py | 22 ++--- .../kernel_tests/control_flow_ops_py_test.py | 10 +-- .../python/kernel_tests/cwise_ops_test.py | 10 +-- .../kernel_tests/determinant_op_test.py | 2 +- .../fractional_avg_pool_op_test.py | 10 +-- .../fractional_max_pool_op_test.py | 28 +++--- .../matrix_exponential_op_test.py | 12 +-- .../kernel_tests/matrix_logarithm_op_test.py | 14 +-- .../python/kernel_tests/pooling_ops_test.py | 86 +++++++++---------- .../kernel_tests/save_restore_ops_test.py | 7 +- tensorflow/python/kernel_tests/scalar_test.py | 4 +- .../kernel_tests/spacetobatch_op_test.py | 4 +- .../kernel_tests/sparse_xent_op_test.py | 13 +-- .../python/kernel_tests/stack_ops_test.py | 82 +++++++++--------- .../kernel_tests/tensor_array_ops_test.py | 2 +- .../python/kernel_tests/unique_op_test.py | 6 +- .../python/kernel_tests/variable_ops_test.py | 24 +++--- .../python/kernel_tests/variables_test.py | 2 +- .../python/kernel_tests/xent_op_test.py | 12 +-- .../python/ops/accumulate_n_benchmark.py | 7 +- tensorflow/python/ops/array_grad.py | 16 +--- tensorflow/python/ops/array_ops.py | 77 ++++++++--------- tensorflow/python/ops/batch_norm_benchmark.py | 5 +- .../python/ops/candidate_sampling_ops.py | 12 +-- tensorflow/python/ops/control_flow_grad.py | 1 - tensorflow/python/ops/control_flow_ops.py | 46 ++++------ tensorflow/python/ops/ctc_ops.py | 6 +- tensorflow/python/ops/data_flow_ops.py | 42 ++++----- tensorflow/python/ops/functional_ops.py | 2 +- tensorflow/python/ops/gradients_impl.py | 2 +- tensorflow/python/ops/histogram_ops.py | 4 +- tensorflow/python/ops/image_grad.py | 12 +-- tensorflow/python/ops/image_ops_impl.py | 12 +-- tensorflow/python/ops/io_ops.py | 75 ++++++++-------- tensorflow/python/ops/linalg/linalg_impl.py | 8 +- tensorflow/python/ops/linalg_ops.py | 15 ++-- tensorflow/python/ops/logging_ops.py | 15 ++-- tensorflow/python/ops/lookup_ops.py | 20 ++--- tensorflow/python/ops/math_grad.py | 59 +++++-------- tensorflow/python/ops/math_ops.py | 80 +++++++++-------- tensorflow/python/ops/nn_batchnorm_test.py | 3 +- tensorflow/python/ops/nn_grad.py | 64 +++++++------- tensorflow/python/ops/nn_impl.py | 6 +- tensorflow/python/ops/nn_ops.py | 39 ++++----- tensorflow/python/ops/parsing_ops.py | 23 ++--- tensorflow/python/ops/random_ops.py | 16 ++-- tensorflow/python/ops/script_ops.py | 8 +- tensorflow/python/ops/session_ops.py | 13 ++- tensorflow/python/ops/sparse_grad.py | 11 +-- tensorflow/python/ops/sparse_ops.py | 34 ++++---- tensorflow/python/ops/standard_ops.py | 1 - tensorflow/python/ops/state_ops.py | 15 ++-- tensorflow/python/ops/string_ops.py | 4 +- tensorflow/python/ops/summary_ops.py | 3 +- tensorflow/python/ops/tensor_array_ops.py | 20 ++--- tensorflow/python/summary/summary.py | 9 +- tensorflow/python/training/checkpoint_ops.py | 6 +- .../training/learning_rate_decay_test.py | 20 ++--- .../python/training/moving_averages_test.py | 2 +- tensorflow/python/training/saver.py | 6 +- .../python/training/saver_test_utils.py | 12 +-- tensorflow/python/user_ops/user_ops.py | 2 +- 88 files changed, 742 insertions(+), 803 deletions(-) diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 0e4efaed86..6bcfed7b69 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -71,7 +71,7 @@ class BinaryOpsTest(XLATestCase): expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([3, 3, -1.5, -8, 44], dtype=dtype), np.array([2, -2, 7, -4, 0], dtype=dtype), expected=np.array( @@ -108,57 +108,57 @@ class BinaryOpsTest(XLATestCase): [0, np.pi / 4, np.pi / 2, np.pi * 3 / 4, np.pi], dtype=dtype)) self._testBinary( - gen_math_ops._reciprocal_grad, + gen_math_ops.reciprocal_grad, np.array([4, -3, -2, 1], dtype=dtype), np.array([5, -6, 7, -8], dtype=dtype), expected=np.array([-80, 54, -28, 8], dtype=dtype)) self._testBinary( - gen_math_ops._sigmoid_grad, + gen_math_ops.sigmoid_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-60, -36, -14, 0], dtype=dtype)) self._testBinary( - gen_math_ops._rsqrt_grad, + gen_math_ops.rsqrt_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-160, -81, -28, -4], dtype=dtype)) self._testBinary( - gen_math_ops._sqrt_grad, + gen_math_ops.sqrt_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([0.625, 1, 1.75, 4], dtype=dtype)) self._testBinary( - gen_nn_ops._softplus_grad, + gen_nn_ops.softplus_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array( [3.97322869, 2.99258232, 1.99817801, 0.99966466], dtype=dtype)) self._testBinary( - gen_nn_ops._softsign_grad, + gen_nn_ops.softsign_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array( [0.11111111, 0.06122449, 0.03125, 0.01234568], dtype=dtype)) self._testBinary( - gen_math_ops._tanh_grad, + gen_math_ops.tanh_grad, np.array([4, 3, 2, 1], dtype=dtype), np.array([5, 6, 7, 8], dtype=dtype), expected=np.array([-75, -48, -21, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._elu_grad, + gen_nn_ops.elu_grad, np.array([1, 2, 3, 4, 5, 6], dtype=dtype), np.array([-.6, -.4, -.2, 0, .2, .4], dtype=dtype), expected=np.array([0.4, 1.2, 2.4, 4, 5, 6], dtype=dtype)) self._testBinary( - gen_nn_ops._selu_grad, + gen_nn_ops.selu_grad, np.array([1, 2, 3, 4, 5, 6], dtype=dtype), np.array([-.6, -.4, -.2, .2, .4, .6], dtype=dtype), expected=np.array( @@ -166,20 +166,20 @@ class BinaryOpsTest(XLATestCase): 4.202803949422, 5.2535049367774, 6.30420592413], dtype=dtype)) self._testBinary( - gen_nn_ops._relu_grad, + gen_nn_ops.relu_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dtype), np.array([0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10], dtype=dtype)) self._testBinary( - gen_nn_ops._relu6_grad, + gen_nn_ops.relu6_grad, np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], dtype=dtype), np.array( [0, 0, 0, 0, 0, 0.1, 0.3, 0.5, 0.7, 0.9, 6.1, 10.0], dtype=dtype), expected=np.array([0, 0, 0, 0, 0, 6, 7, 8, 9, 10, 0, 0], dtype=dtype)) self._testBinary( - gen_nn_ops._softmax_cross_entropy_with_logits, + gen_nn_ops.softmax_cross_entropy_with_logits, np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=dtype), np.array([[0.1, 0.2, 0.3, 0.4], [0.4, 0.3, 0.2, 0.1]], dtype=dtype), expected=[ @@ -191,7 +191,7 @@ class BinaryOpsTest(XLATestCase): equality_test=self.ListsAreClose) self._testBinary( - gen_nn_ops._sparse_softmax_cross_entropy_with_logits, + gen_nn_ops.sparse_softmax_cross_entropy_with_logits, np.array([[0.1, 0.2, 0.3, 0.4], [0.5, 0.6, 0.7, 0.8], [0.9, 1.0, 1.1, 1.2]], dtype=dtype), np.array([2, 1, 7], dtype=np.int32), @@ -207,7 +207,7 @@ class BinaryOpsTest(XLATestCase): def testIntOps(self): for dtype in self.int_types: self._testBinary( - gen_math_ops._truncate_div, + gen_math_ops.truncate_div, np.array([3, 3, -1, -9, -8], dtype=dtype), np.array([2, -2, 7, 2, -4], dtype=dtype), expected=np.array([1, -1, 0, -4, 2], dtype=dtype)) @@ -369,7 +369,7 @@ class BinaryOpsTest(XLATestCase): expected=np.array([[[[False, True], [True, False]]]], dtype=dtype)) self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([3, 3j, -1.5j, -8, 2 + 3j, 2 + 4j], dtype=dtype), np.array([2, -2, 7j, -4j, 4 - 6j, 1 + 2j], dtype=dtype), expected=np.array( @@ -378,7 +378,7 @@ class BinaryOpsTest(XLATestCase): # Test inf/nan scenarios. self._testBinary( - gen_math_ops._real_div, + gen_math_ops.real_div, np.array([4 + 3j, 4, 3j, -4, -4j, 2 - 3j], dtype=dtype), np.array([0, 0, 0, 0, 0, 0], dtype=dtype), expected=np.array( @@ -418,19 +418,19 @@ class BinaryOpsTest(XLATestCase): lhs = np.array([4 + 2j, -3 - 1j, 2j, 1], dtype=dtype) rhs = np.array([5, -6j, 7 - 3j, -8j], dtype=dtype) self._testBinary( - gen_math_ops._reciprocal_grad, lhs, rhs, expected=-rhs * lhs * lhs) + gen_math_ops.reciprocal_grad, lhs, rhs, expected=-rhs * lhs * lhs) self._testBinary( - gen_math_ops._sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs)) + gen_math_ops.sigmoid_grad, lhs, rhs, expected=rhs * lhs * (1 - lhs)) self._testBinary( - gen_math_ops._rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2) + gen_math_ops.rsqrt_grad, lhs, rhs, expected=lhs**3 * rhs / -2) self._testBinary( - gen_math_ops._sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs)) + gen_math_ops.sqrt_grad, lhs, rhs, expected=rhs / (2 * lhs)) self._testBinary( - gen_math_ops._tanh_grad, lhs, rhs, expected=rhs * (1 - lhs * lhs)) + gen_math_ops.tanh_grad, lhs, rhs, expected=rhs * (1 - lhs * lhs)) def testComplexMath(self): for dtype in self.complex_types: @@ -538,7 +538,7 @@ class BinaryOpsTest(XLATestCase): if dtype not in self.complex_types: # floordiv unsupported for complex. self._testBinary( - gen_math_ops._floor_div, + gen_math_ops.floor_div, np.array([3, 3, -1, -9, -8], dtype=dtype), np.array([2, -2, 7, 2, -4], dtype=dtype), expected=np.array([1, -2, -1, -5, 2], dtype=dtype)) @@ -554,12 +554,12 @@ class BinaryOpsTest(XLATestCase): def _testRemainder(self, dtype): """Test cases for remainder operators.""" self._testBinary( - gen_math_ops._floor_mod, + gen_math_ops.floor_mod, np.array([3, 3, -1, -8], dtype=dtype), np.array([2, -2, 7, -4], dtype=dtype), expected=np.array([1, -1, 6, 0], dtype=dtype)) self._testBinary( - gen_math_ops._truncate_mod, + gen_math_ops.truncate_mod, np.array([3, 3, -1, -8], dtype=dtype), np.array([2, -2, 7, -4], dtype=dtype), expected=np.array([1, 1, -1, 0], dtype=dtype)) diff --git a/tensorflow/compiler/tests/concat_ops_test.py b/tensorflow/compiler/tests/concat_ops_test.py index 81734082d9..f10973e19f 100644 --- a/tensorflow/compiler/tests/concat_ops_test.py +++ b/tensorflow/compiler/tests/concat_ops_test.py @@ -301,7 +301,7 @@ class ConcatOffsetTest(XLATestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) diff --git a/tensorflow/compiler/tests/image_ops_test.py b/tensorflow/compiler/tests/image_ops_test.py index 538fa8e8e5..3bc41b7cfd 100644 --- a/tensorflow/compiler/tests/image_ops_test.py +++ b/tensorflow/compiler/tests/image_ops_test.py @@ -426,7 +426,7 @@ class ResizeBilinearTest(XLATestCase): with self.test_session() as sess, self.test_scope(): dtype = dtype or np.float32 grads = array_ops.placeholder(np.float32) - resized = gen_image_ops._resize_bilinear_grad( + resized = gen_image_ops.resize_bilinear_grad( grads, np.zeros([1, input_shape[0], input_shape[1], 1], dtype=dtype), align_corners=True) diff --git a/tensorflow/compiler/tests/lrn_ops_test.py b/tensorflow/compiler/tests/lrn_ops_test.py index 5d8d89224d..69bd8f7230 100644 --- a/tensorflow/compiler/tests/lrn_ops_test.py +++ b/tensorflow/compiler/tests/lrn_ops_test.py @@ -115,11 +115,11 @@ class LRNTest(XLATestCase): out_image = constant_op.constant(out_image_vals, shape=shape) out_grads = constant_op.constant(out_grads_vals, shape=shape) with ops.device(CPU_DEVICE): - expected = gen_nn_ops._lrn_grad(out_grads, in_image, out_image, - depth_radius, bias, alpha, beta) + expected = gen_nn_ops.lrn_grad(out_grads, in_image, out_image, + depth_radius, bias, alpha, beta) with self.test_scope(): - actual = gen_nn_ops._lrn_grad(out_grads, in_image, out_image, - depth_radius, bias, alpha, beta) + actual = gen_nn_ops.lrn_grad(out_grads, in_image, out_image, + depth_radius, bias, alpha, beta) expected_val = expected.eval() actual_val = actual.eval() self.assertAllClose(actual_val, expected_val, rtol=1e-3) diff --git a/tensorflow/compiler/tests/pooling_ops_3d_test.py b/tensorflow/compiler/tests/pooling_ops_3d_test.py index eb48fe555a..4eed903963 100644 --- a/tensorflow/compiler/tests/pooling_ops_3d_test.py +++ b/tensorflow/compiler/tests/pooling_ops_3d_test.py @@ -33,7 +33,7 @@ from tensorflow.python.platform import test # MaxPoolGrad. def _AvgPoolGrad(inputs, outputs, output_gradients, ksize, strides, padding): del outputs # Unused by average-pooling gradients. - return gen_nn_ops._avg_pool3d_grad( + return gen_nn_ops.avg_pool3d_grad( inputs.get_shape().as_list(), output_gradients, ksize=ksize, @@ -263,7 +263,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding1_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[1, 3, 3, 3, 1], ksize=[1, 1, 1], strides=[1, 1, 1], @@ -272,7 +272,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_1_6_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 3, 6, 3], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -281,7 +281,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_1_7_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 5, 7, 3], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -290,7 +290,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradValidPadding2_2_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 2, 2, 2, 3], ksize=[2, 2, 2], strides=[2, 2, 2], @@ -299,7 +299,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding1_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 2, 4, 1], ksize=[1, 1, 1], strides=[1, 1, 1], @@ -308,7 +308,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding2_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 3, 2, 4, 1], ksize=[2, 2, 2], strides=[1, 1, 1], @@ -317,7 +317,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding2_2_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[2, 5, 2, 4, 3], ksize=[2, 2, 2], strides=[2, 2, 2], @@ -326,7 +326,7 @@ class Pooling3DTest(XLATestCase): def testMaxPoolGradSamePadding3_1_3d(self): self._VerifyGradient( nn_ops.max_pool3d, - gen_nn_ops._max_pool3d_grad, + gen_nn_ops.max_pool3d_grad, input_sizes=[1, 3, 3, 7, 1], ksize=[3, 3, 3], strides=[1, 1, 1], diff --git a/tensorflow/compiler/tests/pooling_ops_test.py b/tensorflow/compiler/tests/pooling_ops_test.py index 7c19a99c4e..e0e85295fe 100644 --- a/tensorflow/compiler/tests/pooling_ops_test.py +++ b/tensorflow/compiler/tests/pooling_ops_test.py @@ -459,7 +459,7 @@ class PoolGradTest(XLATestCase): padding="SAME") def testMaxPool(self): - self._TestPooling(nn_ops.max_pool, gen_nn_ops._max_pool_grad) + self._TestPooling(nn_ops.max_pool, gen_nn_ops.max_pool_grad) def testAvgPool(self): # Wrapper around AvgPoolGrad that ignores extra arguments needed by @@ -467,7 +467,7 @@ class PoolGradTest(XLATestCase): def AvgPoolGrad(inputs, outputs, output_gradients, ksize, strides, padding, data_format): del outputs # Unused by average-pooling gradients. - return gen_nn_ops._avg_pool_grad( + return gen_nn_ops.avg_pool_grad( inputs.get_shape().as_list(), output_gradients, ksize=ksize, @@ -483,7 +483,7 @@ class PoolGradTest(XLATestCase): def testMaxPoolKernelSmallerThanStrideValid(self): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 7, 7, 1], ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1], @@ -492,7 +492,7 @@ class PoolGradTest(XLATestCase): def testMaxPoolKernelSmallerThanStrideSame(self): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 3, 3, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -500,7 +500,7 @@ class PoolGradTest(XLATestCase): self._VerifyValues( nn_ops.max_pool, - gen_nn_ops._max_pool_grad, + gen_nn_ops.max_pool_grad, input_sizes=[1, 4, 4, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], diff --git a/tensorflow/compiler/tests/spacetobatch_op_test.py b/tensorflow/compiler/tests/spacetobatch_op_test.py index c013f4b50a..92518aadc4 100644 --- a/tensorflow/compiler/tests/spacetobatch_op_test.py +++ b/tensorflow/compiler/tests/spacetobatch_op_test.py @@ -75,11 +75,11 @@ class SpaceToBatchTest(XLATestCase): for dtype in self.float_types: # outputs = space_to_batch(inputs) placeholder = array_ops.placeholder(dtype) - x_tf = gen_array_ops._space_to_batch( + x_tf = gen_array_ops.space_to_batch( placeholder, paddings, block_size=block_size) self.assertAllEqual(sess.run(x_tf, {placeholder: inputs}), outputs) # inputs = batch_to_space(outputs) - x_tf = gen_array_ops._batch_to_space( + x_tf = gen_array_ops.batch_to_space( placeholder, paddings, block_size=block_size) self.assertAllEqual(sess.run(x_tf, {placeholder: outputs}), inputs) diff --git a/tensorflow/compiler/tests/stack_ops_test.py b/tensorflow/compiler/tests/stack_ops_test.py index 2b9c227973..94342f9567 100644 --- a/tensorflow/compiler/tests/stack_ops_test.py +++ b/tensorflow/compiler/tests/stack_ops_test.py @@ -34,33 +34,33 @@ class StackOpTest(XLATestCase): with self.test_session(), self.test_scope(): size = array_ops.placeholder(dtypes.int32) v = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(size, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, v) + h = gen_data_flow_ops.stack_v2(size, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, v) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval({size: 5, v: [[4.0, 5.0]]})) def testStackPushPopSwap(self): with self.test_session(), self.test_scope(): a = np.arange(2000) x = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, x, swap_memory=True) + h = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose(a, c1.eval({x: a})) def testMultiStack(self): with self.test_session(), self.test_scope(): v = array_ops.placeholder(dtypes.float32) - h1 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, v) + h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c1 = gen_data_flow_ops.stack_push_v2(h1, v) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - h2 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="bar") + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval({v: 4.0})) @@ -69,15 +69,15 @@ class StackOpTest(XLATestCase): with self.test_session() as sess, self.test_scope(): v1 = array_ops.placeholder(dtypes.float32) v2 = array_ops.placeholder(dtypes.float32) - h1 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - h2 = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") + h1 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + h2 = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, v1) + c1 = gen_data_flow_ops.stack_push_v2(h1, v1) with ops.control_dependencies([c1]): - c2 = gen_data_flow_ops._stack_push_v2(h2, v2) + c2 = gen_data_flow_ops.stack_push_v2(h2, v2) with ops.control_dependencies([c2]): - pop1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - pop2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) out1, out2 = sess.run([pop1, pop2], {v1: 4.0, v2: 5.0}) self.assertAllClose(out1, 4.0) @@ -86,17 +86,17 @@ class StackOpTest(XLATestCase): def testCloseStack(self): with self.test_session() as sess, self.test_scope(): size = array_ops.placeholder(dtypes.int32) - h = gen_data_flow_ops._stack_v2(size, dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close_v2(h) + h = gen_data_flow_ops.stack_v2(size, dtypes.float32, stack_name="foo") + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1, {size: 5}) def testPushCloseStack(self): with self.test_session() as sess, self.test_scope(): v = array_ops.placeholder(dtypes.float32) - h = gen_data_flow_ops._stack_v2(5, dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, v) + h = gen_data_flow_ops.stack_v2(5, dtypes.float32, stack_name="foo") + c = gen_data_flow_ops.stack_push_v2(h, v) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1, {v: [[4.0, 5.0]]}) diff --git a/tensorflow/compiler/tests/tensor_array_ops_test.py b/tensorflow/compiler/tests/tensor_array_ops_test.py index a62925a181..7624d6e4b2 100644 --- a/tensorflow/compiler/tests/tensor_array_ops_test.py +++ b/tensorflow/compiler/tests/tensor_array_ops_test.py @@ -338,7 +338,7 @@ class TensorArrayTest(xla_test.XLATestCase): w0 = ta.write(0, [[4.0, 5.0]]) # Test reading wrong datatype. - r0_bad = gen_data_flow_ops._tensor_array_read_v3( + r0_bad = gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtype2, flow_in=w0.flow) with self.assertRaisesOpError("TensorArray dtype is "): r0_bad.eval() diff --git a/tensorflow/contrib/lookup/lookup_ops.py b/tensorflow/contrib/lookup/lookup_ops.py index a430dac4ec..62f1c810fc 100644 --- a/tensorflow/contrib/lookup/lookup_ops.py +++ b/tensorflow/contrib/lookup/lookup_ops.py @@ -341,23 +341,21 @@ class MutableHashTable(LookupInterface): # training to work correctly. Use the node name if no shared_name has been # explicitly specified. use_node_name_sharing = checkpoint and shared_name is None - # pylint: disable=protected-access if self._default_value.get_shape().ndims == 0: - self._table_ref = gen_lookup_ops._mutable_hash_table_v2( + self._table_ref = gen_lookup_ops.mutable_hash_table_v2( shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=key_dtype, value_dtype=value_dtype, name=name) else: - self._table_ref = gen_lookup_ops._mutable_hash_table_of_tensors_v2( + self._table_ref = gen_lookup_ops.mutable_hash_table_of_tensors_v2( shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, key_dtype=key_dtype, value_dtype=value_dtype, value_shape=self._default_value.get_shape(), name=name) - # pylint: enable=protected-access super(MutableHashTable, self).__init__(key_dtype, value_dtype, self._table_ref.op.name.split( "/")[-1]) @@ -378,9 +376,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=name) + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -406,8 +402,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_find" % self._name, (self._table_ref, keys, self._default_value)) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, keys, self._default_value, name=name) values.set_shape(keys.get_shape().concatenate(self._value_shape)) @@ -437,7 +432,7 @@ class MutableHashTable(LookupInterface): [self._table_ref, keys, values]) as name: with ops.colocate_with(self._table_ref): # pylint: disable=protected-access - op = gen_lookup_ops._lookup_table_insert_v2( + op = gen_lookup_ops.lookup_table_insert_v2( self._table_ref, keys, values, name=name) return op @@ -454,8 +449,7 @@ class MutableHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_export_values" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - exported_keys, exported_values = gen_lookup_ops._lookup_table_export_v2( + exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( self._table_ref, self._key_dtype, self._value_dtype, name=name) exported_values.set_shape(exported_keys.get_shape().concatenate( @@ -477,7 +471,7 @@ class MutableHashTable(LookupInterface): def restore(self, restored_tensors, unused_restored_shapes): # pylint: disable=protected-access with ops.colocate_with(self.op._table_ref): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op._table_ref, restored_tensors[0], restored_tensors[1]) @@ -551,8 +545,7 @@ class MutableDenseHashTable(LookupInterface): # explicitly specified. use_node_name_sharing = checkpoint and shared_name is None empty_key = ops.convert_to_tensor(empty_key, dtype=key_dtype) - # pylint: disable=protected-access - self._table_ref = gen_lookup_ops._mutable_dense_hash_table_v2( + self._table_ref = gen_lookup_ops.mutable_dense_hash_table_v2( empty_key=empty_key, shared_name=shared_name, use_node_name_sharing=use_node_name_sharing, @@ -560,7 +553,6 @@ class MutableDenseHashTable(LookupInterface): value_shape=self._value_shape, initial_num_buckets=initial_num_buckets, name=name) - # pylint: enable=protected-access super(MutableDenseHashTable, self).__init__( key_dtype, value_dtype, self._table_ref.op.name.split("/")[-1]) @@ -580,8 +572,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=name) + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=name) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -607,8 +598,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_find" % self._name, [self._table_ref, keys]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, keys, self._default_value, name=name) if keys.get_shape().ndims is not None and keys.get_shape().ndims > 0: @@ -640,8 +630,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_insert" % self._name, [self._table_ref, keys, values]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - op = gen_lookup_ops._lookup_table_insert_v2( + op = gen_lookup_ops.lookup_table_insert_v2( self._table_ref, keys, values, name=name) return op @@ -658,8 +647,7 @@ class MutableDenseHashTable(LookupInterface): with ops.name_scope(name, "%s_lookup_table_export_values" % self._name, [self._table_ref]) as name: with ops.colocate_with(self._table_ref): - # pylint: disable=protected-access - exported_keys, exported_values = gen_lookup_ops._lookup_table_export_v2( + exported_keys, exported_values = gen_lookup_ops.lookup_table_export_v2( self._table_ref, self._key_dtype, self._value_dtype, name=name) exported_values.set_shape(exported_keys.get_shape().concatenate( @@ -681,5 +669,5 @@ class MutableDenseHashTable(LookupInterface): def restore(self, restored_tensors, unused_restored_shapes): # pylint: disable=protected-access with ops.colocate_with(self.op._table_ref): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op._table_ref, restored_tensors[0], restored_tensors[1]) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index 02ed5517ca..d6715fa522 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -198,13 +198,9 @@ tf_export('TensorInfo')(TensorInfo) _allowed_symbols.extend([ 'arg_max', 'arg_min', - 'mul', # use tf.multiply instead. - 'neg', # use tf.negative instead. - 'sub', # use tf.subtract instead. 'create_partitioned_variables', 'deserialize_many_sparse', 'lin_space', - 'list_diff', # Use tf.listdiff instead. 'listdiff', # Use tf.listdiff instead. 'parse_single_sequence_example', 'serialize_many_sparse', diff --git a/tensorflow/python/debug/lib/debug_gradients.py b/tensorflow/python/debug/lib/debug_gradients.py index 16f51a4b32..589a13db7f 100644 --- a/tensorflow/python/debug/lib/debug_gradients.py +++ b/tensorflow/python/debug/lib/debug_gradients.py @@ -156,11 +156,12 @@ class GradientsDebugger(object): # TODO(cais): Implement value_stack. grad_debug_op_name = _tensor_to_grad_debug_op_name(input_tensor, self._uuid) # pylint: disable=protected-access - identity_op = (gen_array_ops._debug_gradient_ref_identity - if input_tensor.dtype._is_ref_dtype - else gen_array_ops._debug_gradient_identity) - debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name) + identity_op = ( + gen_array_ops.debug_gradient_ref_identity + if input_tensor.dtype._is_ref_dtype else + gen_array_ops.debug_gradient_identity) # pylint: enable=protected-access + debug_grad_identity = identity_op(input_tensor, name=grad_debug_op_name) assert debug_grad_identity.dtype == input_tensor.dtype if debug_grad_identity.op.name != grad_debug_op_name: raise ValueError( diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index b56cbe80a7..228ff62b20 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -243,7 +243,8 @@ class MicroBenchmarks(test.Benchmark): def _benchmark_gen_math_ops_matmul(self, m, transpose_b, num_iters): def func(): - gen_math_ops._mat_mul(m, m, transpose_b=transpose_b) + gen_math_ops.mat_mul(m, m, transpose_b=transpose_b) + self._run(func, num_iters) def _benchmark_tfe_py_fastpath_execute_matmul(self, m, transpose_b, diff --git a/tensorflow/python/eager/ops_test.py b/tensorflow/python/eager/ops_test.py index 553571d267..f70c7544d6 100644 --- a/tensorflow/python/eager/ops_test.py +++ b/tensorflow/python/eager/ops_test.py @@ -131,8 +131,12 @@ class OpsTest(test_util.TensorFlowTestCase): dtype=dtypes.int64) values = constant_op.constant([2, 3, 5, 7, 11]) shape = constant_op.constant([2, 7], dtype=dtypes.int64) - result = sparse_ops.gen_sparse_ops._sparse_split( # pylint: disable=protected-access - split_dim, indices, values, shape, num_split=2) + result = sparse_ops.gen_sparse_ops.sparse_split( + split_dim, + indices, + values, + shape, + num_split=2) output_indices, output_values, output_shape = result self.assertEqual(2, len(output_indices)) self.assertEqual(2, len(output_values)) diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index 554e29c7e0..3de7445a50 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -955,10 +955,10 @@ from tensorflow.python.util.tf_export import tf_export if (api_def->visibility() == ApiDef::SKIP) { continue; } - // An op is hidden if either its ApiDef visibility is HIDDEN // or it is in the hidden_ops list. bool is_hidden = api_def->visibility() == ApiDef::HIDDEN; + bool hidden_by_api_def = is_hidden; if (!is_hidden) { for (const string& hidden : hidden_ops) { if (op_def.name() == hidden) { @@ -971,13 +971,22 @@ from tensorflow.python.util.tf_export import tf_export string function_name; python_op_gen_internal::GenerateLowerCaseOpName(op_def.name(), &function_name); - if (is_hidden) function_name = strings::StrCat("_", function_name); - - // When users create custom python wrappers, they may link in the - // default op registry by accident, and because they can't - // enumerate all 'hidden' symbols, this guard is to prevent - // instantiating a python reserved word in their wrapper. - if (python_op_gen_internal::IsPythonReserved(function_name)) { + bool is_reserved = python_op_gen_internal::IsPythonReserved(function_name); + + // Prefix an op with underscore if the op is listed in hidden_ops or + // name is reserved or it is of the exceptions in IsOpWithUnderscorePrefix. + // Do not add underscores to ops set to HIDDEN in ApiDef otherwise. + // TODO(annarev): don't prefix with underscores even if op is in hidden_ops. + if (is_hidden) { + if (!hidden_by_api_def || is_reserved || + python_op_gen_internal::IsOpWithUnderscorePrefix(function_name)) { + function_name = strings::StrCat("_", function_name); + } + } else if (is_reserved) { + // When users create custom python wrappers, they may link in the + // default op registry by accident, and because they can't + // enumerate all 'hidden' symbols, this guard is to prevent + // instantiating a python reserved word in their wrapper. continue; } diff --git a/tensorflow/python/framework/function_test.py b/tensorflow/python/framework/function_test.py index 52052ba77d..65ca801cbe 100644 --- a/tensorflow/python/framework/function_test.py +++ b/tensorflow/python/framework/function_test.py @@ -193,7 +193,7 @@ class FunctionTest(test.TestCase): @function.Defun(dtypes.float32, dtypes.float32) def XSquarePlusOneGrad(x, dy): - dx = functional_ops._symbolic_gradient( + dx = functional_ops.symbolic_gradient( input=[x, dy], Tout=[dtypes.float32], f="XSquarePlusOneFn", name="dx") return dx @@ -295,7 +295,7 @@ class FunctionTest(test.TestCase): # gradient function is (x, y, dz) -> (dx, dy). dx's shape # should be the same as x's; and dy's shape should be the same # as y's. - dx, dy = functional_ops._symbolic_gradient( + dx, dy = functional_ops.symbolic_gradient( input=[x, y, dz], Tout=[dtypes.float32] * 2, f="Foo") self.assertEqual(x.get_shape(), dx.get_shape()) self.assertEqual(y.get_shape(), dy.get_shape()) diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 0421837d49..1cdd738198 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -47,46 +47,46 @@ class DeviceFunctionsTest(test.TestCase): def testTwoDeviceFunctions(self): with ops.Graph().as_default() as g: - var_0 = gen_state_ops._variable( + var_0 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_0", container="", shared_name="") with g.device(test_device_func_pin_variable_to_cpu): - var_1 = gen_state_ops._variable( + var_1 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_1", container="", shared_name="") - var_2 = gen_state_ops._variable( + var_2 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_2", container="", shared_name="") - var_3 = gen_state_ops._variable( + var_3 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_3", container="", shared_name="") with g.device(test_device_func_pin_variable_to_cpu): - var_4 = gen_state_ops._variable( + var_4 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_4", container="", shared_name="") with g.device("/device:GPU:0"): - var_5 = gen_state_ops._variable( + var_5 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_5", container="", shared_name="") - var_6 = gen_state_ops._variable( + var_6 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="var_6", diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 1f2dfb8d43..55576f0e88 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -2892,7 +2892,7 @@ class OutputTypesTest(test_util.TensorFlowTestCase): with g.as_default(): x = constant_op.constant([1, 1, 2, 4, 4, 4, 7, 8, 8], dtype=dtypes.double) - y, _ = gen_array_ops._unique(x) + y, _ = gen_array_ops.unique(x) self.assertEqual([types_pb2.DT_DOUBLE, types_pb2.DT_INT32], y.op._output_types) # pylint: disable=protected-access diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index c95149d177..4813458f07 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -75,6 +75,38 @@ bool IsPythonReserved(const string& s) { return kPythonReserved->count(s) > 0; } +bool IsOpWithUnderscorePrefix(const string& s) { + static const std::set* const kUnderscoreOps = new std::set( + {// Lowercase built-in functions and types in Python, from: + // [x for x in dir(__builtins__) if x[0].islower()] + // These need to be excluded so they don't conflict with actual built-in + // functions since we use '*' imports. + "abs", "all", "any", "apply", "bin", "bool", "buffer", "bytearray", + "bytes", "callable", "chr", "classmethod", "cmp", "coerce", "compile", + "complex", "copyright", "credits", "delattr", "dict", "dir", "divmod", + "enumerate", "eval", "execfile", "exit", "file", "filter", "float", + "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", + "hex", "id", "input", "int", "intern", "isinstance", "issubclass", + "iter", "len", "license", "list", "locals", "long", "map", "max", + "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", + "print", "property", "quit", "range", "raw_input", "reduce", "reload", + "repr", "reversed", "round", "set", "setattr", "slice", "sorted", + "staticmethod", "str", "sum", "super", "tuple", "type", "unichr", + "unicode", "vars", "xrange", "zip", + // These have the same name as ops defined in Python and might be used + // incorrectly depending on order of '*' imports. + // TODO(annarev): reduce usage of '*' imports and remove these from the + // list. + "fused_batch_norm", "histogram_fixed_width", "stack", + "batch_norm_with_global_normalization", + // TODO(annarev): replace these ops in the next change. + "add_sparse_to_tensors_map", "add_many_sparse_to_tensors_map", + "broadcast_gradient_args", "concat", "enter", "histogram_summary", + "ref_enter", "ref_identity", "scalar_summary", + "take_many_sparse_from_tensors_map"}); + return kUnderscoreOps->count(s) > 0; +} + string AvoidPythonReserved(const string& s) { if (IsPythonReserved(s)) return strings::StrCat(s, "_"); return s; @@ -816,6 +848,7 @@ from tensorflow.python.util.tf_export import tf_export // An op is hidden if either its ApiDef visibility is HIDDEN // or it is in the hidden_ops list. bool is_hidden = api_def->visibility() == ApiDef::HIDDEN; + bool hidden_by_api_def = is_hidden; if (!is_hidden) { for (const string& hidden : hidden_ops) { if (op_def.name() == hidden) { @@ -828,13 +861,22 @@ from tensorflow.python.util.tf_export import tf_export string function_name; python_op_gen_internal::GenerateLowerCaseOpName(op_def.name(), &function_name); - if (is_hidden) function_name = strings::StrCat("_", function_name); - - // When users create custom python wrappers, they may link in the - // default op registry by accident, and because they can't - // enumerate all 'hidden' symbols, this guard is to prevent - // instantiating a python reserved word in their wrapper. - if (python_op_gen_internal::IsPythonReserved(function_name)) { + bool is_reserved = python_op_gen_internal::IsPythonReserved(function_name); + + // Prefix an op with underscore if the op is listed in hidden_ops or + // name is reserved or it is of the exceptions in IsOpWithUnderscorePrefix. + // Do not add underscores to ops set to HIDDEN in ApiDef otherwise. + // TODO(annarev): don't prefix with underscores even if op is in hidden_ops. + if (is_hidden) { + if (!hidden_by_api_def || is_reserved || + python_op_gen_internal::IsOpWithUnderscorePrefix(function_name)) { + function_name = strings::StrCat("_", function_name); + } + } else if (is_reserved) { + // When users create custom python wrappers, they may link in the + // default op registry by accident, and because they can't + // enumerate all 'hidden' symbols, this guard is to prevent + // instantiating a python reserved word in their wrapper. continue; } diff --git a/tensorflow/python/framework/python_op_gen_internal.h b/tensorflow/python/framework/python_op_gen_internal.h index 4319e5a782..e0cfb05f4b 100644 --- a/tensorflow/python/framework/python_op_gen_internal.h +++ b/tensorflow/python/framework/python_op_gen_internal.h @@ -29,6 +29,9 @@ namespace python_op_gen_internal { // Returns true if s is a Python keyword or built-in. bool IsPythonReserved(const string& s); +// Whether the op should be prefixed with underscore. +bool IsOpWithUnderscorePrefix(const string& s); + // Add a _ to the end of s if necessary to avoid a Python keyword or built-in. string AvoidPythonReserved(const string& s); diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index bea0ee34fd..6b1b3dd40c 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -768,7 +768,7 @@ class ConstantValueTest(test.TestCase): self.assertAllClose(np_val, tensor_util.constant_value(tf_val)) def testUnknown(self): - tf_val = gen_state_ops._variable( + tf_val = gen_state_ops.variable( shape=[3, 4, 7], dtype=dtypes.float32, name="tf_val", diff --git a/tensorflow/python/grappler/layout_optimizer_test.py b/tensorflow/python/grappler/layout_optimizer_test.py index 0f51501740..5a84b16a23 100644 --- a/tensorflow/python/grappler/layout_optimizer_test.py +++ b/tensorflow/python/grappler/layout_optimizer_test.py @@ -321,7 +321,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) dim = array_ops.placeholder(dtype='int32') sizes = constant_op.constant([50, 10, 4], shape=[3]) - split = gen_array_ops._split_v( + split = gen_array_ops.split_v( value=conv, size_splits=sizes, axis=dim, num_split=3) output = math_ops.reduce_sum(split[0]) @@ -896,7 +896,7 @@ class LayoutOptimizerTest(test.TestCase): add = math_ops.add(conv, conv) mean = math_ops.reduce_mean(conv) condition = math_ops.less(conv, mean) - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) with session.Session(config=_get_config(False)) as sess: @@ -926,7 +926,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = array_ops.placeholder(dtype='bool') - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) condition_val = np.zeros((1, 7, 7, 64)) @@ -957,7 +957,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) add = math_ops.add(conv, conv) condition = constant_op.constant(True) - select = gen_math_ops._select(condition, conv, add) + select = gen_math_ops.select(condition, conv, add) output = array_ops.identity(select) with session.Session(config=_get_config(False)) as sess: @@ -1023,7 +1023,7 @@ class LayoutOptimizerTest(test.TestCase): conv = _two_layer_model(x) ksize = constant_op.constant([1, 2, 3, 1], shape=[4]) strides = array_ops.placeholder(dtype='int32', shape=[4]) - max_pool = gen_nn_ops._max_pool_v2(conv, ksize, strides, 'VALID') + max_pool = gen_nn_ops.max_pool_v2(conv, ksize, strides, 'VALID') output = array_ops.identity(max_pool) strides_val = [1, 3, 2, 1] diff --git a/tensorflow/python/kernel_tests/array_ops_test.py b/tensorflow/python/kernel_tests/array_ops_test.py index 365cf72108..d35f62b186 100644 --- a/tensorflow/python/kernel_tests/array_ops_test.py +++ b/tensorflow/python/kernel_tests/array_ops_test.py @@ -1223,7 +1223,7 @@ class SnapshotOpTest(test_util.TensorFlowTestCase): for dtype in [dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64]: with self.test_session(use_gpu=True): x = constant_op.constant([0, 1, 2, 3], dtype=dtype) - y = gen_array_ops._snapshot(x) + y = gen_array_ops.snapshot(x) self.assertAllEqual(y.eval(), [0, 1, 2, 3]) diff --git a/tensorflow/python/kernel_tests/batchtospace_op_test.py b/tensorflow/python/kernel_tests/batchtospace_op_test.py index 0c802476a0..6143cd3baa 100644 --- a/tensorflow/python/kernel_tests/batchtospace_op_test.py +++ b/tensorflow/python/kernel_tests/batchtospace_op_test.py @@ -44,7 +44,7 @@ class CppOpImpl(object): @staticmethod def batch_to_space(*args, **kwargs): - return gen_array_ops._batch_to_space(*args, **kwargs) + return gen_array_ops.batch_to_space(*args, **kwargs) class BatchToSpaceDepthToSpace(test.TestCase, PythonOpImpl): diff --git a/tensorflow/python/kernel_tests/bcast_ops_test.py b/tensorflow/python/kernel_tests/bcast_ops_test.py index 9e51234605..cb46fcb007 100644 --- a/tensorflow/python/kernel_tests/bcast_ops_test.py +++ b/tensorflow/python/kernel_tests/bcast_ops_test.py @@ -20,8 +20,8 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.ops.gen_array_ops import _broadcast_args from tensorflow.python.ops.gen_array_ops import _broadcast_gradient_args +from tensorflow.python.ops.gen_array_ops import broadcast_args from tensorflow.python.platform import test @@ -29,7 +29,7 @@ class BcastOpsTest(test.TestCase): def _GetBroadcastShape(self, xs, ys): with self.test_session() as sess: - return sess.run(_broadcast_args(xs, ys)) + return sess.run(broadcast_args(xs, ys)) def _GetGradientArgs(self, xs, ys): with self.test_session() as sess: diff --git a/tensorflow/python/kernel_tests/checkpoint_ops_test.py b/tensorflow/python/kernel_tests/checkpoint_ops_test.py index a786d0a47e..7f147ba53a 100644 --- a/tensorflow/python/kernel_tests/checkpoint_ops_test.py +++ b/tensorflow/python/kernel_tests/checkpoint_ops_test.py @@ -50,7 +50,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_no_vocab_changes(self): """Tests where vocab does not change at all.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.old_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -63,7 +63,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_shifted_vocab(self): """Tests where vocab is the same, but shifted / ordered differently.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -76,7 +76,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_offset(self): """Tests offset and num_new_vocab logic.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=1, @@ -89,7 +89,7 @@ class GenerateVocabRemappingTest(test.TestCase): def test_generate_remapping_with_old_vocab_size(self): """Tests where old_vocab_size is specified.""" - remapping, num_present = gen_checkpoint_ops._generate_vocab_remapping( + remapping, num_present = gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=self.new_vocab_file, old_vocab_file=self.old_vocab_file, num_new_vocab=3, @@ -132,7 +132,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # No column remapping, new weight matrix has second row, then first row. row_remapping = [1, 0] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -147,7 +147,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # No row remapping, new weight matrix has third col, then first col. row_remapping = list(range(self.old_num_rows)) col_remapping = [2, 0] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -162,7 +162,7 @@ class LoadAndRemapMatrixTest(test.TestCase): # Both row and column remappings. row_remapping = [1, 0, 4] col_remapping = [1, 15] - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=row_remapping, @@ -177,7 +177,7 @@ class LoadAndRemapMatrixTest(test.TestCase): def test_load_and_remap_with_init(self): """Tests the op's load and remap where there are missing entries.""" init_val = 42 - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -196,7 +196,7 @@ class LoadAndRemapMatrixTest(test.TestCase): """Tests when all the rows are missing and need to be initialized.""" num_rows = 7 initializing_values = [42] * num_rows * self.old_num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[-1] * num_rows, @@ -214,7 +214,7 @@ class LoadAndRemapMatrixTest(test.TestCase): num_rows = 7 num_cols = 4 initializing_values = [42] * num_rows * num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[-1] * num_rows, @@ -235,7 +235,7 @@ class LoadAndRemapMatrixTest(test.TestCase): invalid_remapping = [1, 0, 0, 0, 1, 2] # Invalid row remapping. - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=invalid_remapping, @@ -247,7 +247,7 @@ class LoadAndRemapMatrixTest(test.TestCase): remapped_matrix.eval() # Invalid column remapping. - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=list(range(self.old_num_rows)), @@ -260,7 +260,7 @@ class LoadAndRemapMatrixTest(test.TestCase): def test_load_and_remap_incorrect_initializing_values(self): """Tests that errors are raised with incorrect number of init values.""" - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -275,7 +275,7 @@ class LoadAndRemapMatrixTest(test.TestCase): with self.test_session(), self.assertRaises(errors.InvalidArgumentError): remapped_matrix.eval() - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=[self.bundle_file], old_tensor_name=self.old_tensor_name, row_remapping=[2, -1, 0], @@ -314,7 +314,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): num_rows, num_cols = np_value.shape # Tests loading the entire tensor (except reversed). - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Simply reverses the rows of the matrix. @@ -332,7 +332,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): self.assertGreater(num_rows, 2) prefix_rows = 2 suffix_rows = 3 - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Reverses the rows of the matrix, then prepends and appends @@ -353,7 +353,7 @@ class LoadAndRemapMatrixWithMaxRowsTest(test.TestCase): # Tests when everything is taken from initializing_values. new_rows = 7 initializing_values = [42] * new_rows * num_cols - remapped_matrix = gen_checkpoint_ops._load_and_remap_matrix( + remapped_matrix = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, # Nothing is loaded from the old tensor. diff --git a/tensorflow/python/kernel_tests/concat_op_test.py b/tensorflow/python/kernel_tests/concat_op_test.py index 127bc6bb20..81c6a4aa6e 100644 --- a/tensorflow/python/kernel_tests/concat_op_test.py +++ b/tensorflow/python/kernel_tests/concat_op_test.py @@ -526,7 +526,7 @@ class ConcatOpTest(test.TestCase): with self.test_session(use_gpu=True): t1 = [] t2 = [] - output = gen_array_ops._concat_v2([t1, t2], 0).eval() + output = gen_array_ops.concat_v2([t1, t2], 0).eval() self.assertFalse(output) # Checks that output is empty def testConcatInvalidAxis(self): @@ -534,20 +534,20 @@ class ConcatOpTest(test.TestCase): with self.test_session(use_gpu=True): t1 = [1] t2 = [2] - gen_array_ops._concat_v2([t1, t2], 1).eval() + gen_array_ops.concat_v2([t1, t2], 1).eval() def testConcatNegativeAxis(self): with self.test_session(use_gpu=True): t1 = [[1, 2, 3], [4, 5, 6]] t2 = [[7, 8, 9], [10, 11, 12]] - c = gen_array_ops._concat_v2([t1, t2], -2) + c = gen_array_ops.concat_v2([t1, t2], -2) self.assertEqual([4, 3], c.get_shape().as_list()) output = c.eval() self.assertAllEqual([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]], output) - c = gen_array_ops._concat_v2([t1, t2], -1) + c = gen_array_ops.concat_v2([t1, t2], -1) self.assertEqual([2, 6], c.get_shape().as_list()) output = c.eval() self.assertAllEqual([[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]], output) @@ -615,7 +615,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) @@ -624,7 +624,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([[2, 3, 5]], dtypes.int32) s1 = constant_op.constant([[2, 7, 5]], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"should be a vector"): sess.run(off) @@ -634,7 +634,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(4, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"Concat dim is out of range: 4 vs. 3"): sess.run(off) @@ -644,7 +644,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5, 10], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp(errors_impl.InvalidArgumentError, r"should contain 3 elem"): sess.run(off) @@ -654,7 +654,7 @@ class ConcatOffsetTest(test.TestCase): cdim = constant_op.constant(1, dtypes.int32) s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 10], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1]) + off = gen_array_ops.concat_offset(cdim, [s0, s1]) with self.assertRaisesRegexp( errors_impl.InvalidArgumentError, r"All dimensions except 1 must match. Input 1 has shape \[2 7 10\] " @@ -667,7 +667,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([2, 7, 5], dtypes.int32) s2 = constant_op.constant([2, 20, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [0, 3, 0], [0, 10, 0]]) @@ -675,7 +675,7 @@ class ConcatOffsetTest(test.TestCase): s0 = constant_op.constant([2, 3, 5], dtypes.int32) s1 = constant_op.constant([1, 3, 5], dtypes.int32) s2 = constant_op.constant([3, 3, 5], dtypes.int32) - off = gen_array_ops._concat_offset(cdim, [s0, s1, s2]) + off = gen_array_ops.concat_offset(cdim, [s0, s1, s2]) ans = sess.run(off) self.assertAllEqual(ans, [[0, 0, 0], [2, 0, 0], [3, 0, 0]]) diff --git a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py index 58f38650eb..b429fa5c42 100644 --- a/tensorflow/python/kernel_tests/control_flow_ops_py_test.py +++ b/tensorflow/python/kernel_tests/control_flow_ops_py_test.py @@ -552,7 +552,7 @@ class ControlFlowTest(test.TestCase): def testCondRef(self): with self.test_session(): - x = gen_state_ops._variable( + x = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="x", @@ -580,7 +580,7 @@ class ControlFlowTest(test.TestCase): def testUninitializedRefIdentity(self): with self.test_session() as sess: - v = gen_state_ops._variable( + v = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="v", @@ -1620,7 +1620,7 @@ class ControlFlowTest(test.TestCase): def testWhileStack_1(self): with self.test_session(): - s = gen_data_flow_ops._stack_v2(-1, dtypes.int32, stack_name="foo") + s = gen_data_flow_ops.stack_v2(-1, dtypes.int32, stack_name="foo") i = constant_op.constant(0) def c(i): @@ -1629,7 +1629,7 @@ class ControlFlowTest(test.TestCase): def b(i): ni = math_ops.add(i, 1) ni = control_flow_ops.with_dependencies( - [gen_data_flow_ops._stack_push_v2(s, i)], ni) + [gen_data_flow_ops.stack_push_v2(s, i)], ni) return ni r = control_flow_ops.while_loop(c, b, [i], parallel_iterations=1) @@ -1641,7 +1641,7 @@ class ControlFlowTest(test.TestCase): def b1(i, x): ni = math_ops.subtract(i, 1) - nx = x + gen_data_flow_ops._stack_pop_v2(s, dtypes.int32) + nx = x + gen_data_flow_ops.stack_pop_v2(s, dtypes.int32) return [ni, nx] _, rx = control_flow_ops.while_loop( diff --git a/tensorflow/python/kernel_tests/cwise_ops_test.py b/tensorflow/python/kernel_tests/cwise_ops_test.py index 0d9b46c30d..8db0bb6f0d 100644 --- a/tensorflow/python/kernel_tests/cwise_ops_test.py +++ b/tensorflow/python/kernel_tests/cwise_ops_test.py @@ -495,11 +495,11 @@ class UnaryOpTest(test.TestCase): dtype_tols = [(np.float32, 5e-4), (np.float64, 1e-6), (np.complex64, 5e-4), (np.complex128, 1e-6)] op_range = [ - (gen_math_ops._reciprocal_grad, [-2, 2]), - (gen_math_ops._rsqrt_grad, [0.1, 3]), - (gen_math_ops._sigmoid_grad, [-2, 2]), - (gen_math_ops._sqrt_grad, [0.1, 3]), - (gen_math_ops._tanh_grad, [-2, 2]), + (gen_math_ops.reciprocal_grad, [-2, 2]), + (gen_math_ops.rsqrt_grad, [0.1, 3]), + (gen_math_ops.sigmoid_grad, [-2, 2]), + (gen_math_ops.sqrt_grad, [0.1, 3]), + (gen_math_ops.tanh_grad, [-2, 2]), ] def rand(dtype): diff --git a/tensorflow/python/kernel_tests/determinant_op_test.py b/tensorflow/python/kernel_tests/determinant_op_test.py index 222038b22e..a52b2c0dc3 100644 --- a/tensorflow/python/kernel_tests/determinant_op_test.py +++ b/tensorflow/python/kernel_tests/determinant_op_test.py @@ -65,7 +65,7 @@ class DeterminantOpTest(test.TestCase): self._compareDeterminantBase(matrix_x, linalg_ops.matrix_determinant(matrix_x)) self._compareLogDeterminantBase( - matrix_x, gen_linalg_ops._log_matrix_determinant(matrix_x)) + matrix_x, gen_linalg_ops.log_matrix_determinant(matrix_x)) def testBasic(self): # 2x2 matrices diff --git a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py index feec9934e4..faac7d8365 100644 --- a/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_avg_pool_op_test.py @@ -347,7 +347,7 @@ class FractionalAvgPoolGradTest(test.TestCase): Two types of tests for FractionalAvgPoolGrad. 1) Test fractional_avg_pool_grad() directly. - This type of test relies on gen_nn_ops._avg_pool_grad() returns the + This type of test relies on gen_nn_ops.avg_pool_grad() returns the correct result. For example: * input_tensor_shape = (1, 10, 10, 1) * window_size = (1, 2, 2, 1) @@ -404,13 +404,13 @@ class FractionalAvgPoolGradTest(test.TestCase): num_elements *= dim_size output_backprop = (self._PRNG.rand(num_elements) * 1000).reshape(output_data.shape) - input_backprop_tensor = gen_nn_ops._avg_pool_grad( + input_backprop_tensor = gen_nn_ops.avg_pool_grad( input_tensor.get_shape(), output_backprop, window_size, stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows + 1, row_window_size)) col_seq = list(range(0, num_cols + 1, col_window_size)) - fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad( + fap_input_backprop_tensor = gen_nn_ops.fractional_avg_pool_grad( input_tensor.get_shape(), output_backprop, row_seq, @@ -443,7 +443,7 @@ class FractionalAvgPoolGradTest(test.TestCase): num_elements *= dim_size output_backprop = (self._PRNG.rand(num_elements) * 1000).reshape(output_data.shape) - input_backprop_tensor = gen_nn_ops._avg_pool_grad( + input_backprop_tensor = gen_nn_ops.avg_pool_grad( input_tensor.get_shape(), output_backprop, window_size, stride_size, padding) input_backprop = input_backprop_tensor.eval() @@ -451,7 +451,7 @@ class FractionalAvgPoolGradTest(test.TestCase): col_seq = list(range(0, num_cols, col_window_size - 1)) row_seq[-1] += 1 col_seq[-1] += 1 - fap_input_backprop_tensor = gen_nn_ops._fractional_avg_pool_grad( + fap_input_backprop_tensor = gen_nn_ops.fractional_avg_pool_grad( input_tensor.get_shape(), output_backprop, row_seq, diff --git a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py index 5983ae7759..6477c9ebc4 100644 --- a/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py +++ b/tensorflow/python/kernel_tests/fractional_max_pool_op_test.py @@ -318,7 +318,7 @@ class FractionalMaxPoolGradTest(test.TestCase): Two types of tests for FractionalMaxPoolGrad. 1) Test fractional_max_pool_grad() directly. - This type of test relies on gen_nn_ops._max_pool_grad() returns the correct + This type of test relies on gen_nn_ops.max_pool_grad() returns the correct result. For example: * input_tensor_shape = (1, 10, 10, 1) * window_size = (1, 2, 2, 1) @@ -384,16 +384,13 @@ class FractionalMaxPoolGradTest(test.TestCase): stride_size, padding) output_data = output_tensor.eval() output_backprop = self._PRNG.randint(100, size=output_data.shape) - input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor, - output_tensor, - output_backprop, - window_size, - stride_size, - padding) + input_backprop_tensor = gen_nn_ops.max_pool_grad( + input_tensor, output_tensor, output_backprop, window_size, + stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows + 1, row_window_size)) col_seq = list(range(0, num_cols + 1, col_window_size)) - fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad( + fmp_input_backprop_tensor = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, output_backprop, @@ -422,18 +419,15 @@ class FractionalMaxPoolGradTest(test.TestCase): stride_size, padding) output_data = output_tensor.eval() output_backprop = self._PRNG.randint(100, size=output_data.shape) - input_backprop_tensor = gen_nn_ops._max_pool_grad(input_tensor, - output_tensor, - output_backprop, - window_size, - stride_size, - padding) + input_backprop_tensor = gen_nn_ops.max_pool_grad( + input_tensor, output_tensor, output_backprop, window_size, + stride_size, padding) input_backprop = input_backprop_tensor.eval() row_seq = list(range(0, num_rows, row_window_size - 1)) col_seq = list(range(0, num_cols, col_window_size - 1)) row_seq[-1] += 1 col_seq[-1] += 1 - fmp_input_backprop_tensor = gen_nn_ops._fractional_max_pool_grad( + fmp_input_backprop_tensor = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, output_backprop, @@ -591,7 +585,7 @@ class FractionalMaxPoolGradTest(test.TestCase): output_tensor = constant_op.constant( output_data_not_overlapping, shape=output_size) grad = constant_op.constant(output_backprop, shape=output_size) - r = gen_nn_ops._fractional_max_pool_grad( + r = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, grad, @@ -606,7 +600,7 @@ class FractionalMaxPoolGradTest(test.TestCase): # Test when overlapping is True output_tensor = constant_op.constant( output_data_overlapping, shape=output_size) - r = gen_nn_ops._fractional_max_pool_grad( + r = gen_nn_ops.fractional_max_pool_grad( input_tensor, output_tensor, grad, row_seq, col_seq, overlapping=True) input_backprop_overlapping = r.eval() self.assertShapeEqual( diff --git a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py index 6203a412d7..a0c66c77d8 100644 --- a/tensorflow/python/kernel_tests/matrix_exponential_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_exponential_op_test.py @@ -48,7 +48,7 @@ class ExponentialOpTest(test.TestCase): def _verifyExponential(self, x, np_type): inp = x.astype(np_type) with self.test_session(use_gpu=True): - tf_ans = gen_linalg_ops._matrix_exponential(inp) + tf_ans = gen_linalg_ops.matrix_exponential(inp) if x.size == 0: np_ans = np.empty(x.shape, dtype=np_type) else: @@ -116,13 +116,13 @@ class ExponentialOpTest(test.TestCase): # When the exponential of a non-square matrix is attempted we should return # an error with self.assertRaises(ValueError): - gen_linalg_ops._matrix_exponential(np.array([[1., 2., 3.], [3., 4., 5.]])) + gen_linalg_ops.matrix_exponential(np.array([[1., 2., 3.], [3., 4., 5.]])) def testWrongDimensions(self): # The input to the exponential should be at least a 2-dimensional tensor. tensor3 = constant_op.constant([1., 2.]) with self.assertRaises(ValueError): - gen_linalg_ops._matrix_exponential(tensor3) + gen_linalg_ops.matrix_exponential(tensor3) def testEmpty(self): self._verifyExponentialReal(np.empty([0, 2, 2])) @@ -143,8 +143,8 @@ class ExponentialOpTest(test.TestCase): with self.test_session(use_gpu=True) as sess: matrix1 = random_ops.random_normal([5, 5], seed=42) matrix2 = random_ops.random_normal([5, 5], seed=42) - expm1 = gen_linalg_ops._matrix_exponential(matrix1) - expm2 = gen_linalg_ops._matrix_exponential(matrix2) + expm1 = gen_linalg_ops.matrix_exponential(matrix1) + expm2 = gen_linalg_ops.matrix_exponential(matrix2) expm = sess.run([expm1, expm2]) self.assertAllEqual(expm[0], expm[1]) @@ -180,7 +180,7 @@ class MatrixExponentialBenchmark(test.Benchmark): session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) - expm = gen_linalg_ops._matrix_exponential(matrix) + expm = gen_linalg_ops.matrix_exponential(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, diff --git a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py index 18ed59828c..24edc4f59f 100644 --- a/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py +++ b/tensorflow/python/kernel_tests/matrix_logarithm_op_test.py @@ -39,8 +39,8 @@ class LogarithmOpTest(test.TestCase): inp = x.astype(np_type) with self.test_session(use_gpu=True): # Verify that expm(logm(A)) == A. - tf_ans = gen_linalg_ops._matrix_exponential( - gen_linalg_ops._matrix_logarithm(inp)) + tf_ans = gen_linalg_ops.matrix_exponential( + gen_linalg_ops.matrix_logarithm(inp)) out = tf_ans.eval() self.assertAllClose(inp, out, rtol=1e-4, atol=1e-3) @@ -85,14 +85,14 @@ class LogarithmOpTest(test.TestCase): # When the logarithm of a non-square matrix is attempted we should return # an error with self.assertRaises(ValueError): - gen_linalg_ops._matrix_logarithm( + gen_linalg_ops.matrix_logarithm( np.array([[1., 2., 3.], [3., 4., 5.]], dtype=np.complex64)) def testWrongDimensions(self): # The input to the logarithm should be at least a 2-dimensional tensor. tensor3 = constant_op.constant([1., 2.], dtype=dtypes.complex64) with self.assertRaises(ValueError): - gen_linalg_ops._matrix_logarithm(tensor3) + gen_linalg_ops.matrix_logarithm(tensor3) def testEmpty(self): self._verifyLogarithmComplex(np.empty([0, 2, 2], dtype=np.complex64)) @@ -115,8 +115,8 @@ class LogarithmOpTest(test.TestCase): random_ops.random_normal([5, 5], seed=42), dtypes.complex64) matrix2 = math_ops.cast( random_ops.random_normal([5, 5], seed=42), dtypes.complex64) - logm1 = gen_linalg_ops._matrix_logarithm(matrix1) - logm2 = gen_linalg_ops._matrix_logarithm(matrix2) + logm1 = gen_linalg_ops.matrix_logarithm(matrix1) + logm2 = gen_linalg_ops.matrix_logarithm(matrix2) logm = sess.run([logm1, logm2]) self.assertAllEqual(logm[0], logm[1]) @@ -152,7 +152,7 @@ class MatrixLogarithmBenchmark(test.Benchmark): session.Session() as sess, \ ops.device("/cpu:0"): matrix = self._GenerateMatrix(shape) - logm = gen_linalg_ops._matrix_logarithm(matrix) + logm = gen_linalg_ops.matrix_logarithm(matrix) variables.global_variables_initializer().run() self.run_op_benchmark( sess, diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index 4466beeec9..a0ac355b60 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -405,7 +405,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 3, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -427,7 +427,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -456,7 +456,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 2, 1], ksize=[1, 1, 2, 1], strides=[1, 1, 1, 1], @@ -485,7 +485,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 1, 2, 1], @@ -494,7 +494,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu, v2=v2) self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 2, 1, 1], @@ -519,7 +519,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 4], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], @@ -554,7 +554,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 8, 8, 8], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], @@ -565,7 +565,7 @@ class PoolingTest(test.TestCase): def _testMaxPoolEmptyInput(self, use_gpu): self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[0, 8, 8, 8], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], @@ -600,7 +600,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 1, 1, 10], ksize=[1, 1, 1, 2], strides=[1, 1, 1, 2], @@ -626,7 +626,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 2, 2, 6], ksize=[1, 1, 1, 3], strides=[1, 1, 1, 3], @@ -648,7 +648,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 7, 7, 1], ksize=[1, 2, 2, 1], strides=[1, 3, 3, 1], @@ -689,7 +689,7 @@ class PoolingTest(test.TestCase): for v2 in [True, False]: self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 3, 3, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -699,7 +699,7 @@ class PoolingTest(test.TestCase): v2=v2) self._VerifyValues( - gen_nn_ops._max_pool_v2, + gen_nn_ops.max_pool_v2, input_sizes=[1, 4, 4, 1], ksize=[1, 1, 1, 1], strides=[1, 2, 2, 1], @@ -764,8 +764,8 @@ class PoolingTest(test.TestCase): _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding) argmax = argmax_op.eval() grad_in = constant_op.constant(tensor_output, shape=output_shape) - out_op = gen_nn_ops._max_pool_grad_with_argmax(t, grad_in, argmax, - ksize, strides, padding) + out_op = gen_nn_ops.max_pool_grad_with_argmax(t, grad_in, argmax, ksize, + strides, padding) gpu_val = out_op.eval() self.assertShapeEqual(gpu_val, out_op) with self.test_session(use_gpu=False): @@ -773,8 +773,8 @@ class PoolingTest(test.TestCase): out_op = nn_ops.max_pool(t, ksize, strides, padding) orig_out = out_op.eval() grad_in = constant_op.constant(tensor_output, shape=output_shape) - out_op = gen_nn_ops._max_pool_grad(t, orig_out, grad_in, ksize, strides, - padding) + out_op = gen_nn_ops.max_pool_grad(t, orig_out, grad_in, ksize, strides, + padding) cpu_val = out_op.eval() self.assertShapeEqual(cpu_val, out_op) # The CPU version accumulates its gradient on fp16, so it's less @@ -793,7 +793,7 @@ class PoolingTest(test.TestCase): _, argmax_op = nn_ops.max_pool_with_argmax(t, ksize, strides, padding) argmax = argmax_op.eval() grad_in = constant_op.constant(tensor_input, shape=input_shape) - out_op = gen_nn_ops._max_pool_grad_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_grad_with_argmax( t, grad_in, argmax, ksize, strides, padding) gpu_val = out_op.eval() self.assertShapeEqual(gpu_val, out_op) @@ -802,8 +802,8 @@ class PoolingTest(test.TestCase): out_op = nn_ops.max_pool(t, ksize, strides, padding) orig_out = out_op.eval() grad_in = constant_op.constant(tensor_input, shape=input_shape) - out_op = gen_nn_ops._max_pool_grad_grad(t, orig_out, grad_in, ksize, - strides, padding) + out_op = gen_nn_ops.max_pool_grad_grad(t, orig_out, grad_in, ksize, + strides, padding) cpu_val = out_op.eval() self.assertShapeEqual(cpu_val, out_op) # The CPU version accumulates its gradient on fp16, so it's less @@ -842,7 +842,7 @@ class PoolingTest(test.TestCase): t = constant_op.constant(tensor_input, shape=[1, 2, 2, 1]) argmax = constant_op.constant( tensor_argmax, shape=[1, 2, 2, 1], dtype=dtypes.int64) - out_op = gen_nn_ops._max_pool_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_with_argmax( orig_in, t, argmax, @@ -865,7 +865,7 @@ class PoolingTest(test.TestCase): t = constant_op.constant(tensor_input, shape=[1, 3, 3, 1]) argmax = constant_op.constant( tensor_argmax, shape=[1, 2, 2, 1], dtype=dtypes.int64) - out_op = gen_nn_ops._max_pool_grad_grad_with_argmax( + out_op = gen_nn_ops.max_pool_grad_grad_with_argmax( orig_in, t, argmax, @@ -1029,7 +1029,7 @@ class PoolingTest(test.TestCase): self.assertLess(err, err_tolerance) def _testMaxPoolGradValidPadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1043,7 +1043,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_1_6(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 6, 6, 3], @@ -1057,7 +1057,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_1_7(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 7, 7, 3], @@ -1071,7 +1071,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding1_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1085,7 +1085,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradValidPadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 2, 3], @@ -1099,7 +1099,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1113,7 +1113,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding1_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1127,7 +1127,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding2_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1141,7 +1141,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1155,7 +1155,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradSamePadding3_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestGradient( pool_func, input_sizes=[1, 7, 7, 1], @@ -1199,7 +1199,7 @@ class PoolingTest(test.TestCase): Returns: A Tensor. """ - pool_func = gen_nn_ops.max_pool_grad_v2 if v2 else gen_nn_ops._max_pool_grad + pool_func = gen_nn_ops.max_pool_grad_v2 if v2 else gen_nn_ops.max_pool_grad return pool_func(orig_input, orig_output, grad, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) @@ -1208,7 +1208,7 @@ class PoolingTest(test.TestCase): expected_input_backprop, input_sizes, output_sizes, window_rows, window_cols, row_stride, col_stride, padding, use_gpu, v2): - pool_func = gen_nn_ops._max_pool_v2 if v2 else nn_ops.max_pool + pool_func = gen_nn_ops.max_pool_v2 if v2 else nn_ops.max_pool with self.test_session(use_gpu=use_gpu): input_tensor = constant_op.constant(input_data, shape=input_sizes) output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], @@ -1504,7 +1504,7 @@ class PoolingTest(test.TestCase): self._testMaxPoolGradDirectWithNans2_2() def _testMaxPoolGradGradValidPadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[1, 3, 3, 1], @@ -1518,7 +1518,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_1_6(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 6, 6, 3], @@ -1532,7 +1532,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_1_7(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 7, 7, 3], @@ -1546,7 +1546,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradValidPadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 2, 3], @@ -1560,7 +1560,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding1_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1574,7 +1574,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding2_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1588,7 +1588,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding2_2(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[2, 2, 4, 3], @@ -1602,7 +1602,7 @@ class PoolingTest(test.TestCase): use_gpu=use_gpu) def _testMaxPoolGradGradSamePadding3_1(self, data_format, use_gpu): - for pool_func in [gen_nn_ops._max_pool_v2, nn_ops.max_pool]: + for pool_func in [gen_nn_ops.max_pool_v2, nn_ops.max_pool]: self._ConstructAndTestSecondGradient( pool_func, input_sizes=[1, 7, 7, 1], @@ -1644,7 +1644,7 @@ class PoolingTest(test.TestCase): Returns: A Tensor. """ - return gen_nn_ops._max_pool_grad_grad( + return gen_nn_ops.max_pool_grad_grad( orig_input, orig_output, grad, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) diff --git a/tensorflow/python/kernel_tests/save_restore_ops_test.py b/tensorflow/python/kernel_tests/save_restore_ops_test.py index 1bdfa9ebd8..cb9aa1e34d 100644 --- a/tensorflow/python/kernel_tests/save_restore_ops_test.py +++ b/tensorflow/python/kernel_tests/save_restore_ops_test.py @@ -31,11 +31,10 @@ class ShardedFileOpsTest(test.TestCase): with session.Session( target="", config=config_pb2.ConfigProto(device_count={"CPU": 2})): self.assertEqual( - gen_io_ops._sharded_filename("foo", 4, 100).eval(), + gen_io_ops.sharded_filename("foo", 4, 100).eval(), b"foo-00004-of-00100") self.assertEqual( - gen_io_ops._sharded_filespec("foo", 100).eval(), - b"foo-?????-of-00100") + gen_io_ops.sharded_filespec("foo", 100).eval(), b"foo-?????-of-00100") class ShapeInferenceTest(test.TestCase): @@ -53,7 +52,7 @@ class ShapeInferenceTest(test.TestCase): [dtypes.float32, dtypes.float32]) def testRestoreSlice(self): - op = gen_io_ops._restore_slice("model", "var", "3 4 0,1:-", dtypes.float32) + op = gen_io_ops.restore_slice("model", "var", "3 4 0,1:-", dtypes.float32) self.assertEqual([1, 4], op.get_shape()) diff --git a/tensorflow/python/kernel_tests/scalar_test.py b/tensorflow/python/kernel_tests/scalar_test.py index e65241981e..0d8fd23294 100644 --- a/tensorflow/python/kernel_tests/scalar_test.py +++ b/tensorflow/python/kernel_tests/scalar_test.py @@ -92,11 +92,11 @@ class ScalarTest(test.TestCase): self.check(array_ops.reshape, (7, 1), 'sizes input must be 1-D', [7]) def testShardedFilename(self): - self.check(gen_io_ops._sharded_filename, ('foo', 4, [100]), + self.check(gen_io_ops.sharded_filename, ('foo', 4, [100]), 'must be a scalar', b'foo-00004-of-00100') def testShardedFilespec(self): - self.check(gen_io_ops._sharded_filespec, ('foo', [100]), 'must be a scalar', + self.check(gen_io_ops.sharded_filespec, ('foo', [100]), 'must be a scalar', b'foo-?????-of-00100') def testUnsortedSegmentSum(self): diff --git a/tensorflow/python/kernel_tests/spacetobatch_op_test.py b/tensorflow/python/kernel_tests/spacetobatch_op_test.py index b943dfa4e5..2a9232b6ae 100644 --- a/tensorflow/python/kernel_tests/spacetobatch_op_test.py +++ b/tensorflow/python/kernel_tests/spacetobatch_op_test.py @@ -86,11 +86,11 @@ class CppOpImpl(object): @staticmethod def space_to_batch(*args, **kwargs): - return gen_array_ops._space_to_batch(*args, **kwargs) + return gen_array_ops.space_to_batch(*args, **kwargs) @staticmethod def batch_to_space(*args, **kwargs): - return gen_array_ops._batch_to_space(*args, **kwargs) + return gen_array_ops.batch_to_space(*args, **kwargs) class SpaceToBatchTest(test.TestCase, PythonOpImpl): diff --git a/tensorflow/python/kernel_tests/sparse_xent_op_test.py b/tensorflow/python/kernel_tests/sparse_xent_op_test.py index cd5b711a0e..a841fe83a7 100644 --- a/tensorflow/python/kernel_tests/sparse_xent_op_test.py +++ b/tensorflow/python/kernel_tests/sparse_xent_op_test.py @@ -64,7 +64,7 @@ class SparseXentTest(test.TestCase): def _testXent(self, np_features, np_labels): np_loss, np_backprop = self._npXent(np_features, np_labels) with self.test_session(use_gpu=True) as sess: - loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np_features, np_labels) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) @@ -73,7 +73,7 @@ class SparseXentTest(test.TestCase): def testSingleClass(self): for label_dtype in np.int32, np.int64: with self.test_session(use_gpu=True) as sess: - loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( np.array([[1.], [-1.], [0.]]).astype(np.float32), np.array([0, 0, 0]).astype(label_dtype)) tf_loss, tf_backprop = sess.run([loss, backprop]) @@ -87,8 +87,9 @@ class SparseXentTest(test.TestCase): if test.is_built_with_cuda() and test.is_gpu_available(): with self.test_session(use_gpu=True) as sess: - loss, backprop = (gen_nn_ops._sparse_softmax_cross_entropy_with_logits( - features, labels)) + loss, backprop = ( + gen_nn_ops.sparse_softmax_cross_entropy_with_logits( + features, labels)) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllClose( [[np.nan] * 4, [0.25, 0.25, 0.25, -0.75], @@ -100,8 +101,8 @@ class SparseXentTest(test.TestCase): [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3) with self.test_session(use_gpu=False) as sess: - loss, backprop = (gen_nn_ops._sparse_softmax_cross_entropy_with_logits( - features, labels)) + loss, backprop = ( + gen_nn_ops.sparse_softmax_cross_entropy_with_logits(features, labels)) with self.assertRaisesOpError("Received a label value of"): sess.run([loss, backprop]) diff --git a/tensorflow/python/kernel_tests/stack_ops_test.py b/tensorflow/python/kernel_tests/stack_ops_test.py index aa409336f5..afd2eaffab 100644 --- a/tensorflow/python/kernel_tests/stack_ops_test.py +++ b/tensorflow/python/kernel_tests/stack_ops_test.py @@ -34,11 +34,11 @@ class StackOpTest(test.TestCase): def _testStackPushPop(self, use_gpu): with self.test_session(use_gpu=use_gpu): - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval()) def testStackPushPop(self): @@ -49,11 +49,11 @@ class StackOpTest(test.TestCase): with self.test_session(use_gpu=use_gpu): a = np.arange(2000) x = constant_op.constant(a, dtype=dtypes.float32) - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, x, swap_memory=True) + c = gen_data_flow_ops.stack_push_v2(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) self.assertAllClose(a, c1.eval()) def testStackPushPopSwap(self): @@ -63,7 +63,7 @@ class StackOpTest(test.TestCase): def _testStackWhileSwap(self, use_gpu): with self.test_session(use_gpu=use_gpu): n = constant_op.constant(0) - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") def c(x): @@ -72,7 +72,7 @@ class StackOpTest(test.TestCase): def b(x): with ops.control_dependencies([x]): a = constant_op.constant(np.ones(2000), dtype=dtypes.float32) - v = gen_data_flow_ops._stack_push_v2(h, a, swap_memory=True) + v = gen_data_flow_ops.stack_push_v2(h, a, swap_memory=True) with ops.control_dependencies([v]): return math_ops.add(x, 1) @@ -86,7 +86,7 @@ class StackOpTest(test.TestCase): def b1(x, y): nx = math_ops.subtract(x, 1) - ny = y + gen_data_flow_ops._stack_pop_v2(h, dtypes.float32) + ny = y + gen_data_flow_ops.stack_pop_v2(h, dtypes.float32) return [nx, ny] _, ry = control_flow_ops.while_loop( @@ -99,16 +99,16 @@ class StackOpTest(test.TestCase): def _testMultiStack(self, use_gpu): with self.test_session(use_gpu=use_gpu): - h1 = gen_data_flow_ops._stack_v2( + h1 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, 4.0) + c1 = gen_data_flow_ops.stack_push_v2(h1, 4.0) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - h2 = gen_data_flow_ops._stack_v2( + c1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + h2 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval()) @@ -119,17 +119,17 @@ class StackOpTest(test.TestCase): def _testSameNameStacks(self, use_gpu): """Different stacks with the same name do not interfere.""" with self.test_session(use_gpu=use_gpu) as sess: - h1 = gen_data_flow_ops._stack_v2( + h1 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - h2 = gen_data_flow_ops._stack_v2( + h2 = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push_v2(h1, 4.0) + c1 = gen_data_flow_ops.stack_push_v2(h1, 4.0) with ops.control_dependencies([c1]): - c2 = gen_data_flow_ops._stack_push_v2(h2, 5.0) + c2 = gen_data_flow_ops.stack_push_v2(h2, 5.0) with ops.control_dependencies([c2]): - pop1 = gen_data_flow_ops._stack_pop_v2(h1, dtypes.float32) - pop2 = gen_data_flow_ops._stack_pop_v2(h2, dtypes.float32) + pop1 = gen_data_flow_ops.stack_pop_v2(h1, dtypes.float32) + pop2 = gen_data_flow_ops.stack_pop_v2(h2, dtypes.float32) out1, out2 = sess.run([pop1, pop2]) self.assertAllClose(out1, 4.0) @@ -141,9 +141,9 @@ class StackOpTest(test.TestCase): def _testCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1) def testCloseStack(self): @@ -152,11 +152,11 @@ class StackOpTest(test.TestCase): def _testPushCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: - h = gen_data_flow_ops._stack_v2( + h = gen_data_flow_ops.stack_v2( -1, elem_type=dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push_v2(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push_v2(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close_v2(h) + c1 = gen_data_flow_ops.stack_close_v2(h) sess.run(c1) def testPushCloseStack(self): @@ -170,9 +170,9 @@ class StackOpRefTest(test.TestCase): def _testStackPushPop(self, use_gpu): with self.test_session(use_gpu=use_gpu): h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h, dtypes.float32) self.assertAllClose([[4.0, 5.0]], c1.eval()) def testStackPushPop(self): @@ -184,9 +184,9 @@ class StackOpRefTest(test.TestCase): a = np.arange(2000) x = constant_op.constant(a, dtype=dtypes.float32) h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, x, swap_memory=True) + c = gen_data_flow_ops.stack_push(h, x, swap_memory=True) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_pop(h, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h, dtypes.float32) self.assertAllClose(a, c1.eval()) def testStackPushPopSwap(self): @@ -196,13 +196,13 @@ class StackOpRefTest(test.TestCase): def _testMultiStack(self, use_gpu): with self.test_session(use_gpu=use_gpu): h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push(h1, 4.0) + c1 = gen_data_flow_ops.stack_push(h1, 4.0) with ops.control_dependencies([c1]): - c1 = gen_data_flow_ops._stack_pop(h1, dtypes.float32) + c1 = gen_data_flow_ops.stack_pop(h1, dtypes.float32) h2 = gen_data_flow_ops._stack(dtypes.float32, stack_name="bar") - c2 = gen_data_flow_ops._stack_push(h2, 5.0) + c2 = gen_data_flow_ops.stack_push(h2, 5.0) with ops.control_dependencies([c2]): - c2 = gen_data_flow_ops._stack_pop(h2, dtypes.float32) + c2 = gen_data_flow_ops.stack_pop(h2, dtypes.float32) r = c1 + c2 self.assertAllClose(9.0, r.eval()) @@ -217,7 +217,7 @@ class StackOpRefTest(test.TestCase): def b(x): with ops.control_dependencies([x]): a = constant_op.constant(np.ones(2000), dtype=dtypes.float32) - v = gen_data_flow_ops._stack_push(h, a, swap_memory=True) + v = gen_data_flow_ops.stack_push(h, a, swap_memory=True) with ops.control_dependencies([v]): return math_ops.add(x, 1) @@ -231,7 +231,7 @@ class StackOpRefTest(test.TestCase): def b1(x, y): nx = math_ops.subtract(x, 1) - ny = y + gen_data_flow_ops._stack_pop(h, dtypes.float32) + ny = y + gen_data_flow_ops.stack_pop(h, dtypes.float32) return [nx, ny] _, ry = control_flow_ops.while_loop( @@ -249,9 +249,9 @@ class StackOpRefTest(test.TestCase): def _testSameNameStacks(self, use_gpu): with self.test_session(use_gpu=use_gpu): h1 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_push(h1, 4.0) + c1 = gen_data_flow_ops.stack_push(h1, 4.0) h2 = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c2 = gen_data_flow_ops._stack_push(h2, 5.0) + c2 = gen_data_flow_ops.stack_push(h2, 5.0) _ = c1 + c2 self.assertNotEqual(h1.eval()[1], h2.eval()[1]) @@ -262,7 +262,7 @@ class StackOpRefTest(test.TestCase): def _testCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c1 = gen_data_flow_ops._stack_close(h) + c1 = gen_data_flow_ops.stack_close(h) sess.run(c1) def testCloseStack(self): @@ -272,9 +272,9 @@ class StackOpRefTest(test.TestCase): def _testPushCloseStack(self, use_gpu): with self.test_session(use_gpu=use_gpu) as sess: h = gen_data_flow_ops._stack(dtypes.float32, stack_name="foo") - c = gen_data_flow_ops._stack_push(h, [[4.0, 5.0]]) + c = gen_data_flow_ops.stack_push(h, [[4.0, 5.0]]) with ops.control_dependencies([c]): - c1 = gen_data_flow_ops._stack_close(h) + c1 = gen_data_flow_ops.stack_close(h) sess.run(c1) def testPushCloseStack(self): diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index aad2443eea..8f09f3d78b 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -437,7 +437,7 @@ class TensorArrayTest(test.TestCase): # Test reading wrong datatype, which is only possible in graph mode if context.in_graph_mode(): - r0_bad = gen_data_flow_ops._tensor_array_read_v3( + r0_bad = gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtypes.float64, flow_in=w0.flow) with self.assertRaisesOpError( "TensorArray dtype is float but Op requested dtype double."): diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 6366d2e181..173d95b258 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -66,9 +66,9 @@ class UniqueTest(test.TestCase): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) with self.test_session() as sess: - y0, idx0 = gen_array_ops._unique_v2(x, axis=np.array([0], dtype)) + y0, idx0 = gen_array_ops.unique_v2(x, axis=np.array([0], dtype)) tf_y0, tf_idx0 = sess.run([y0, idx0]) - y1, idx1 = gen_array_ops._unique_v2(x, axis=np.array([1], dtype)) + y1, idx1 = gen_array_ops.unique_v2(x, axis=np.array([1], dtype)) tf_y1, tf_idx1 = sess.run([y1, idx1]) self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) self.assertAllEqual(tf_idx0, np.array([0, 0, 1])) @@ -80,7 +80,7 @@ class UniqueTest(test.TestCase): # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) with self.test_session() as sess: - y, idx = gen_array_ops._unique_v2(x, axis=np.array([], np.int32)) + y, idx = gen_array_ops.unique_v2(x, axis=np.array([], np.int32)) tf_y, tf_idx = sess.run([y, idx]) self.assertEqual(len(x), len(tf_idx)) diff --git a/tensorflow/python/kernel_tests/variable_ops_test.py b/tensorflow/python/kernel_tests/variable_ops_test.py index 79071029fd..cf369c0718 100644 --- a/tensorflow/python/kernel_tests/variable_ops_test.py +++ b/tensorflow/python/kernel_tests/variable_ops_test.py @@ -165,26 +165,26 @@ class VariableOpTest(test.TestCase): def testTemporaryVariable(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="foo") var = state_ops.assign(var, [[4.0, 5.0]]) var = state_ops.assign_add(var, [[6.0, 7.0]]) - final = gen_state_ops._destroy_temporary_variable(var, var_name="foo") + final = gen_state_ops.destroy_temporary_variable(var, var_name="foo") self.assertAllClose([[10.0, 12.0]], final.eval()) def testDestroyNonexistentTemporaryVariable(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable([1, 2], dtypes.float32) - final = gen_state_ops._destroy_temporary_variable(var, var_name="bad") + var = gen_state_ops.temporary_variable([1, 2], dtypes.float32) + final = gen_state_ops.destroy_temporary_variable(var, var_name="bad") with self.assertRaises(errors.NotFoundError): final.eval() def testDuplicateTemporaryVariable(self): with self.test_session(use_gpu=True): - var1 = gen_state_ops._temporary_variable( + var1 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="dup") var1 = state_ops.assign(var1, [[1.0, 2.0]]) - var2 = gen_state_ops._temporary_variable( + var2 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="dup") var2 = state_ops.assign(var2, [[3.0, 4.0]]) final = var1 + var2 @@ -193,25 +193,25 @@ class VariableOpTest(test.TestCase): def testDestroyTemporaryVariableTwice(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable([1, 2], dtypes.float32) - val1 = gen_state_ops._destroy_temporary_variable(var, var_name="dup") - val2 = gen_state_ops._destroy_temporary_variable(var, var_name="dup") + var = gen_state_ops.temporary_variable([1, 2], dtypes.float32) + val1 = gen_state_ops.destroy_temporary_variable(var, var_name="dup") + val2 = gen_state_ops.destroy_temporary_variable(var, var_name="dup") final = val1 + val2 with self.assertRaises(errors.NotFoundError): final.eval() def testTemporaryVariableNoLeak(self): with self.test_session(use_gpu=True): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="bar") final = array_ops.identity(var) final.eval() def testTwoTemporaryVariablesNoLeaks(self): with self.test_session(use_gpu=True): - var1 = gen_state_ops._temporary_variable( + var1 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="var1") - var2 = gen_state_ops._temporary_variable( + var2 = gen_state_ops.temporary_variable( [1, 2], dtypes.float32, var_name="var2") final = var1 + var2 final.eval() diff --git a/tensorflow/python/kernel_tests/variables_test.py b/tensorflow/python/kernel_tests/variables_test.py index b16c8c002c..27599868b7 100644 --- a/tensorflow/python/kernel_tests/variables_test.py +++ b/tensorflow/python/kernel_tests/variables_test.py @@ -687,7 +687,7 @@ class VariableContainerTest(test.TestCase): v1 = variables.Variable([1]) with ops.container("l2"): v2 = variables.Variable([2]) - special_v = gen_state_ops._variable( + special_v = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="VariableInL3", diff --git a/tensorflow/python/kernel_tests/xent_op_test.py b/tensorflow/python/kernel_tests/xent_op_test.py index e152f02d8e..e3e120a4eb 100644 --- a/tensorflow/python/kernel_tests/xent_op_test.py +++ b/tensorflow/python/kernel_tests/xent_op_test.py @@ -48,7 +48,7 @@ class XentTest(test.TestCase): def _testXent(self, np_features, np_labels, use_gpu=False): np_loss, np_backprop = self._npXent(np_features, np_labels) with self.test_session(use_gpu=use_gpu) as sess: - loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( np_features, np_labels) tf_loss, tf_backprop = sess.run([loss, backprop]) self.assertAllCloseAccordingToType(np_loss, tf_loss) @@ -71,7 +71,7 @@ class XentTest(test.TestCase): def _testSingleClass(self, use_gpu=False): for dtype in np.float16, np.float32: with self.test_session(use_gpu=use_gpu) as sess: - loss, backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + loss, backprop = gen_nn_ops.softmax_cross_entropy_with_logits( np.array([[1.], [-1.], [0.]]).astype(dtype), np.array([[-1.], [0.], [1.]]).astype(dtype)) tf_loss, tf_backprop = sess.run([loss, backprop]) @@ -89,7 +89,7 @@ class XentTest(test.TestCase): np_labels = np.array([[[0., 0., 0., 1.]], [[0., .5, .5, 0.]]]).astype(dtype) self.assertRaisesRegexp(ValueError, "must be rank 2", - gen_nn_ops._softmax_cross_entropy_with_logits, + gen_nn_ops.softmax_cross_entropy_with_logits, np_features, np_labels) def testNpXent(self): @@ -131,14 +131,14 @@ class XentTest(test.TestCase): def testShapeMismatch(self): with self.test_session(): with self.assertRaises(ValueError): - gen_nn_ops._softmax_cross_entropy_with_logits( + gen_nn_ops.softmax_cross_entropy_with_logits( [[0., 1.], [2., 3.]], [[0., 1., 0.], [1., 0., 0.]]) def testNotMatrix(self): with self.test_session(): with self.assertRaises(ValueError): - gen_nn_ops._softmax_cross_entropy_with_logits([0., 1., 2., 3.], - [0., 1., 0., 1.]) + gen_nn_ops.softmax_cross_entropy_with_logits([0., 1., 2., 3.], + [0., 1., 0., 1.]) def testHalf(self): self._testAll( diff --git a/tensorflow/python/ops/accumulate_n_benchmark.py b/tensorflow/python/ops/accumulate_n_benchmark.py index c58d36f397..a709066cae 100644 --- a/tensorflow/python/ops/accumulate_n_benchmark.py +++ b/tensorflow/python/ops/accumulate_n_benchmark.py @@ -39,7 +39,7 @@ from tensorflow.python.platform import test class AccumulateNBenchmark(test.Benchmark): def _AccumulateNTemplate(self, inputs, init, shape, validate_shape): - var = gen_state_ops._temporary_variable( + var = gen_state_ops.temporary_variable( shape=shape, dtype=inputs[0].dtype.base_dtype) ref = state_ops.assign(var, init, validate_shape=validate_shape) update_ops = [ @@ -47,8 +47,7 @@ class AccumulateNBenchmark(test.Benchmark): ref, tensor, use_locking=True).op for tensor in inputs ] with ops.control_dependencies(update_ops): - return gen_state_ops._destroy_temporary_variable( - ref, var_name=var.op.name) + return gen_state_ops.destroy_temporary_variable(ref, var_name=var.op.name) def _AccumulateNInitializedWithFirst(self, inputs): return self._AccumulateNTemplate( @@ -60,7 +59,7 @@ class AccumulateNBenchmark(test.Benchmark): def _AccumulateNInitializedWithMerge(self, inputs): return self._AccumulateNTemplate( inputs, - init=array_ops.zeros_like(gen_control_flow_ops._merge(inputs)[0]), + init=array_ops.zeros_like(gen_control_flow_ops.merge(inputs)[0]), shape=tensor_shape.vector(0), validate_shape=False) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 9745d38dc2..925cf8ef32 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -139,7 +139,6 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): # on CPUs and a Maxwell TitanX. A speedup was seen in a large majority of # cases when switching implementations at N=16, but it is possible that # there will be a small number of performance regressions. - # pylint: disable=protected-access if len(sizes) > 16: # extract the size of each input along the concat dimension sizes = array_ops.squeeze( @@ -148,10 +147,9 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): [1, -1])) out_grads = array_ops.split(grad, sizes, non_neg_concat_dim) else: - offset = gen_array_ops._concat_offset(non_neg_concat_dim, sizes) + offset = gen_array_ops.concat_offset(non_neg_concat_dim, sizes) for (begin, size) in zip(offset, sizes): out_grads.append(array_ops.slice(grad, begin, size)) - # pylint: enable=protected-access elif isinstance(grad, ops.IndexedSlices): # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. @@ -627,9 +625,7 @@ def _ReverseSequenceGrad(op, grad): @ops.RegisterGradient("Reverse") def _ReverseGrad(op, grad): reverse_dims = op.inputs[1] - # pylint: disable=protected-access - return gen_array_ops._reverse(grad, reverse_dims), None - # pylint: enable=protected-access + return gen_array_ops.reverse(grad, reverse_dims), None @ops.RegisterGradient("ReverseV2") @@ -700,17 +696,13 @@ ops.NotDifferentiable("OneHot") @ops.RegisterGradient("MirrorPad") def _MirrorPadGrad(op, grad): mode = op.get_attr("mode") - # pylint: disable=protected-access - return [gen_array_ops._mirror_pad_grad(grad, op.inputs[1], mode=mode), None] - # pylint: enable=protected-access + return [gen_array_ops.mirror_pad_grad(grad, op.inputs[1], mode=mode), None] @ops.RegisterGradient("MirrorPadGrad") def _MirrorPadGradGrad(op, grad): mode = op.get_attr("mode") - # pylint: disable=protected-access - return [gen_array_ops._mirror_pad(grad, op.inputs[1], mode=mode), None] - # pylint: enable=protected-access + return [gen_array_ops.mirror_pad(grad, op.inputs[1], mode=mode), None] @ops.RegisterGradient("QuantizeAndDequantize") diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 3db3d84475..cc559695ed 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -198,7 +198,7 @@ def expand_dims(input, axis=None, name=None, dim=None): if axis is not None: raise ValueError("can't specify both 'dim' and 'axis'") axis = dim - return gen_array_ops._expand_dims(input, axis, name) + return gen_array_ops.expand_dims(input, axis, name) # pylint: enable=redefined-builtin,protected-access @@ -211,28 +211,25 @@ def expand_dims(input, axis=None, name=None, dim=None): "This op will be removed after the deprecation date. " "Please switch to tf.setdiff1d().") def listdiff(x, y, out_idx=None, name=None): - return gen_array_ops._list_diff(x, y, out_idx, name) + return gen_array_ops.list_diff(x, y, out_idx, name) -listdiff.__doc__ = gen_array_ops._list_diff.__doc__ + "\n" + listdiff.__doc__ +listdiff.__doc__ = gen_array_ops.list_diff.__doc__ + "\n" + listdiff.__doc__ # pylint: enable=protected-access -# pylint: disable=undefined-variable,protected-access +# pylint: disable=undefined-variable @tf_export("setdiff1d") def setdiff1d(x, y, index_dtype=dtypes.int32, name=None): - return gen_array_ops._list_diff(x, y, index_dtype, name) + return gen_array_ops.list_diff(x, y, index_dtype, name) -setdiff1d.__doc__ = gen_array_ops._list_diff.__doc__ - -# pylint: enable=protected-access +setdiff1d.__doc__ = gen_array_ops.list_diff.__doc__ @tf_export("broadcast_dynamic_shape") def broadcast_dynamic_shape(shape_x, shape_y): - # pylint: disable=protected-access """Returns the broadcasted dynamic shape between `shape_x` and `shape_y`. Args: @@ -242,8 +239,7 @@ def broadcast_dynamic_shape(shape_x, shape_y): Returns: A rank 1 integer `Tensor` representing the broadcasted shape. """ - return gen_array_ops._broadcast_args(shape_x, shape_y) - # pylint: enable=protected-access + return gen_array_ops.broadcast_args(shape_x, shape_y) @tf_export("broadcast_static_shape") @@ -399,7 +395,7 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): with ops.name_scope(name, "Size", [input]) as name: if isinstance(input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): - return gen_math_ops._prod( + return gen_math_ops.prod( gen_math_ops.cast(input.dense_shape, out_type), 0, name=name) else: input_tensor = ops.convert_to_tensor(input) @@ -892,7 +888,7 @@ def parallel_stack(values, name="parallel_stack"): output_shape = tensor_shape.TensorShape([len(values)]) output_shape = output_shape.concatenate(value_shape) # expand_dims converts concat to stack. - return gen_array_ops._parallel_concat( + return gen_array_ops.parallel_concat( [expand_dims(value, 0) for value in values], shape=output_shape) @@ -950,7 +946,7 @@ def stack(values, axis=0, name="stack"): raise ValueError("axis = %d not in [%d, %d)" % (axis, -expanded_num_dims, expanded_num_dims)) - return gen_array_ops._pack(values, axis=axis, name=name) + return gen_array_ops.pack(values, axis=axis, name=name) # pylint: disable=invalid-name @@ -994,7 +990,7 @@ def _autopacking_helper(list_or_tuple, dtype, name): # convertible-to-tensor types, such as numpy arrays. elems_as_tensors.append( constant_op.constant(elem, dtype=dtype, name=str(i))) - return gen_array_ops._pack(elems_as_tensors, name=scope) + return gen_array_ops.pack(elems_as_tensors, name=scope) else: return converted_elems @@ -1089,7 +1085,7 @@ def unstack(value, num=None, axis=0, name="unstack"): num = value_shape[axis].value if num is None: raise ValueError("Cannot infer num from shape %s" % value_shape) - return gen_array_ops._unpack(value, num=num, axis=axis, name=name) + return gen_array_ops.unpack(value, num=num, axis=axis, name=name) @tf_export("concat") @@ -1186,7 +1182,7 @@ def concat(values, axis, name="concat"): dtype=dtypes.int32).get_shape().assert_is_compatible_with( tensor_shape.scalar()) return identity(values[0], name=scope) - return gen_array_ops._concat_v2(values=values, axis=axis, name=name) + return gen_array_ops.concat_v2(values=values, axis=axis, name=name) @tf_export("boolean_mask") @@ -1254,8 +1250,7 @@ def boolean_mask(tensor, mask, name="boolean_mask", axis=None): axis = 0 if axis is None else axis shape_tensor[axis:axis + ndims_mask].assert_is_compatible_with(shape_mask) - leading_size = gen_math_ops._prod( - shape(tensor)[axis:axis + ndims_mask], [0]) + leading_size = gen_math_ops.prod(shape(tensor)[axis:axis + ndims_mask], [0]) tensor = reshape(tensor, concat([ shape(tensor)[:axis], [leading_size], @@ -1319,10 +1314,10 @@ def unique(x, out_idx=dtypes.int32, name=None): # period (3 weeks) pass. # TODO(yongtang): The documentation should also # be updated when switch to v2. - return gen_array_ops._unique(x, out_idx, name) + return gen_array_ops.unique(x, out_idx, name) -unique.__doc__ = gen_array_ops._unique.__doc__ +unique.__doc__ = gen_array_ops.unique.__doc__ @tf_export("split") @@ -1376,7 +1371,7 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): """ size_splits = ops.convert_to_tensor(num_or_size_splits) if size_splits._rank() == 0 and size_splits.dtype.is_integer: - return gen_array_ops._split( + return gen_array_ops.split( axis=axis, num_split=num_or_size_splits, value=value, name=name) if num is None: @@ -1386,12 +1381,8 @@ def split(value, num_or_size_splits, axis=0, num=None, name="split"): if num is None: raise ValueError("Cannot infer num from shape %s" % num_or_size_splits) - return gen_array_ops._split_v( - value=value, - size_splits=size_splits, - axis=axis, - num_split=num, - name=name) + return gen_array_ops.split_v( + value=value, size_splits=size_splits, axis=axis, num_split=num, name=name) @tf_export("transpose") @@ -1461,7 +1452,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False): """ with ops.name_scope(name, "transpose", [a]) as name: transpose_fn = ( - gen_array_ops._conjugate_transpose + gen_array_ops.conjugate_transpose if (conjugate and a.dtype.is_complex) else gen_array_ops.transpose) if perm is None: rank = gen_array_ops.rank(a) @@ -1639,7 +1630,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) with ops.device(tensor.device): - return gen_array_ops._zeros_like(tensor, name=name) + return gen_array_ops.zeros_like(tensor, name=name) # For now, variant types must be created via zeros_like; as we need to # pass the input variant object to the proper zeros callback. @@ -1654,7 +1645,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) else: - return gen_array_ops._zeros_like(tensor, name=name) + return gen_array_ops.zeros_like(tensor, name=name) @tf_export("ones_like") @@ -1775,7 +1766,7 @@ def placeholder(dtype, shape=None, name=None): raise RuntimeError("tf.placeholder() is not compatible with " "eager execution.") - return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name) + return gen_array_ops.placeholder(dtype=dtype, shape=shape, name=name) # pylint: disable=redefined-outer-name @@ -1919,15 +1910,15 @@ def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0): # pyl # TODO(rjryan): Once the forward compatibility period (3 weeks) have passed # remove the "Pad" fallback here. if constant_values != 0: - result = gen_array_ops._pad_v2( + result = gen_array_ops.pad_v2( tensor, paddings, constant_values, name=name) else: - result = gen_array_ops._pad(tensor, paddings, name=name) + result = gen_array_ops.pad(tensor, paddings, name=name) elif mode == "REFLECT": - result = gen_array_ops._mirror_pad( + result = gen_array_ops.mirror_pad( tensor, paddings, mode="REFLECT", name=name) elif mode == "SYMMETRIC": - result = gen_array_ops._mirror_pad( + result = gen_array_ops.mirror_pad( tensor, paddings, mode="SYMMETRIC", name=name) else: raise ValueError("Unknown padding mode: %s" % mode) @@ -2157,7 +2148,7 @@ def edit_distance(hypothesis, truth, normalize=True, name="edit_distance"): sparse_tensor.SparseTensorValue)): raise TypeError("Truth must be a SparseTensor.") - return gen_array_ops._edit_distance( + return gen_array_ops.edit_distance( hypothesis.indices, hypothesis.values, hypothesis.dense_shape, @@ -2294,7 +2285,7 @@ def space_to_batch(input, paddings, block_size, name=None): # pylint: disable=r return result -space_to_batch.__doc__ = gen_array_ops._space_to_batch.__doc__ +space_to_batch.__doc__ = gen_array_ops.space_to_batch.__doc__ @tf_export("space_to_depth") @@ -2324,7 +2315,7 @@ def batch_to_space(input, crops, block_size, name=None): # pylint: disable=rede return result -batch_to_space.__doc__ = gen_array_ops._batch_to_space.__doc__ +batch_to_space.__doc__ = gen_array_ops.batch_to_space.__doc__ @tf_export("one_hot") @@ -2468,8 +2459,8 @@ def one_hot(indices, raise TypeError("dtype {0} of on_value does not match " "dtype {1} of off_value".format(on_dtype, off_dtype)) - return gen_array_ops._one_hot(indices, depth, on_value, off_value, axis, - name) + return gen_array_ops.one_hot(indices, depth, on_value, off_value, axis, + name) def _all_dimensions(x): @@ -2597,7 +2588,7 @@ def squeeze(input, axis=None, name=None, squeeze_dims=None): axis = squeeze_dims if np.isscalar(axis): axis = [axis] - return gen_array_ops._squeeze(input, axis, name) + return gen_array_ops.squeeze(input, axis, name) @tf_export("where") @@ -2648,7 +2639,7 @@ def where(condition, x=None, y=None, name=None): condition, preferred_dtype=dtypes.bool, name="condition") return gen_array_ops.where(condition=condition, name=name) elif x is not None and y is not None: - return gen_math_ops._select(condition=condition, x=x, y=y, name=name) + return gen_math_ops.select(condition=condition, x=x, y=y, name=name) else: raise ValueError("x and y must both be non-None or both be None.") diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index c2ee2b3832..4f65e3771c 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -41,9 +41,8 @@ def batch_norm_op(tensor, mean, variance, beta, gamma, scale): # _batch_norm_with_global_normalization is deprecated in v9 ops.get_default_graph().graph_def_versions.producer = 8 # pylint: disable=protected-access - return gen_nn_ops._batch_norm_with_global_normalization(tensor, mean, - variance, beta, gamma, - 0.001, scale) + return gen_nn_ops.batch_norm_with_global_normalization( + tensor, mean, variance, beta, gamma, 0.001, scale) # pylint: enable=protected-access diff --git a/tensorflow/python/ops/candidate_sampling_ops.py b/tensorflow/python/ops/candidate_sampling_ops.py index 220ef1754d..9ea1ea9c92 100644 --- a/tensorflow/python/ops/candidate_sampling_ops.py +++ b/tensorflow/python/ops/candidate_sampling_ops.py @@ -77,7 +77,7 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._uniform_candidate_sampler( + return gen_candidate_sampling_ops.uniform_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -136,7 +136,7 @@ def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._log_uniform_candidate_sampler( + return gen_candidate_sampling_ops.log_uniform_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -193,7 +193,7 @@ def learned_unigram_candidate_sampler(true_classes, num_true, num_sampled, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._learned_unigram_candidate_sampler( + return gen_candidate_sampling_ops.learned_unigram_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, seed=seed1, seed2=seed2, name=name) @@ -283,7 +283,7 @@ def fixed_unigram_candidate_sampler(true_classes, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._fixed_unigram_candidate_sampler( + return gen_candidate_sampling_ops.fixed_unigram_candidate_sampler( true_classes, num_true, num_sampled, unique, range_max, vocab_file=vocab_file, distortion=distortion, num_reserved_ids=num_reserved_ids, num_shards=num_shards, shard=shard, @@ -321,7 +321,7 @@ def all_candidate_sampler(true_classes, num_true, num_sampled, unique, of each of `sampled_candidates`. All returned values are 1.0. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._all_candidate_sampler( + return gen_candidate_sampling_ops.all_candidate_sampler( true_classes, num_true, num_sampled, unique, seed=seed1, seed2=seed2, name=name) @@ -370,6 +370,6 @@ def compute_accidental_hits(true_classes, sampled_candidates, num_true, """ seed1, seed2 = random_seed.get_seed(seed) - return gen_candidate_sampling_ops._compute_accidental_hits( + return gen_candidate_sampling_ops.compute_accidental_hits( true_classes, sampled_candidates, num_true, seed=seed1, seed2=seed2, name=name) diff --git a/tensorflow/python/ops/control_flow_grad.py b/tensorflow/python/ops/control_flow_grad.py index 97b57177b2..21354b5ae8 100644 --- a/tensorflow/python/ops/control_flow_grad.py +++ b/tensorflow/python/ops/control_flow_grad.py @@ -28,7 +28,6 @@ from tensorflow.python.ops import math_ops # go/tf-wildcard-import # pylint: disable=wildcard-import,undefined-variable from tensorflow.python.ops.control_flow_ops import * -from tensorflow.python.ops.gen_control_flow_ops import * # pylint: enable=wildcard-import diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 215c6940df..689f7cdc8f 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -329,7 +329,7 @@ def exit(data, name=None): # pylint: disable=redefined-builtin data = ops.internal_convert_to_tensor_or_indexed_slices(data, as_ref=True) if isinstance(data, ops.Tensor): if data.dtype._is_ref_dtype: # pylint: disable=protected-access - return gen_control_flow_ops._ref_exit(data, name) + return gen_control_flow_ops.ref_exit(data, name) else: return gen_control_flow_ops._exit(data, name) else: @@ -371,17 +371,17 @@ def switch(data, pred, dtype=None, name=None): data, dtype=dtype, name="data", as_ref=True) pred = ops.convert_to_tensor(pred, name="pred") if isinstance(data, ops.Tensor): - return gen_control_flow_ops._switch(data, pred, name=name) + return gen_control_flow_ops.switch(data, pred, name=name) else: if not isinstance(data, (ops.IndexedSlices, sparse_tensor.SparseTensor)): raise TypeError("Type %s not supported" % type(data)) val, ind = data.values, data.indices - val_f, val_t = gen_control_flow_ops._switch(val, pred, name=name) - ind_f, ind_t = gen_control_flow_ops._switch(ind, pred, name="indices") + val_f, val_t = gen_control_flow_ops.switch(val, pred, name=name) + ind_f, ind_t = gen_control_flow_ops.switch(ind, pred, name="indices") if isinstance(data, ops.IndexedSlices): dense_shape = data.dense_shape if dense_shape is not None: - dense_shape_f, dense_shape_t = gen_control_flow_ops._switch( + dense_shape_f, dense_shape_t = gen_control_flow_ops.switch( dense_shape, pred, name="dense_shape") else: dense_shape_f, dense_shape_t = None, None @@ -389,7 +389,7 @@ def switch(data, pred, dtype=None, name=None): ops.IndexedSlices(val_t, ind_t, dense_shape_t)) else: dense_shape = data.dense_shape - dense_shape_f, dense_shape_t = gen_control_flow_ops._switch( + dense_shape_f, dense_shape_t = gen_control_flow_ops.switch( data.dense_shape, pred, name="dense_shape") return (sparse_tensor.SparseTensor(ind_f, val_f, dense_shape_f), sparse_tensor.SparseTensor(ind_t, val_t, dense_shape_t)) @@ -473,15 +473,15 @@ def merge(inputs, name=None): ] if all([isinstance(v, ops.Tensor) for v in inputs]): if all([v.dtype._is_ref_dtype for v in inputs]): # pylint: disable=protected-access - return gen_control_flow_ops._ref_merge(inputs, name) + return gen_control_flow_ops.ref_merge(inputs, name) else: - return gen_control_flow_ops._merge(inputs, name) + return gen_control_flow_ops.merge(inputs, name) elif all([isinstance(v, sparse_tensor.SparseTensor) for v in inputs]): # Only handle the case when all inputs are SparseTensor. values, _ = merge([inp.values for inp in inputs], name=name) - indices, chosen_index = gen_control_flow_ops._merge( + indices, chosen_index = gen_control_flow_ops.merge( [inp.indices for inp in inputs], name="indices") - dense_shape, _ = gen_control_flow_ops._merge( + dense_shape, _ = gen_control_flow_ops.merge( [inp.dense_shape for inp in inputs], name="dense_shape") return (sparse_tensor.SparseTensor(indices, values, dense_shape), chosen_index) @@ -489,13 +489,13 @@ def merge(inputs, name=None): # For now convert all the inputs as IndexedSlices. inputs = math_ops._as_indexed_slices_list(inputs, optimize=False) values, _ = merge([inp.values for inp in inputs], name=name) - indices, chosen_index = gen_control_flow_ops._merge( + indices, chosen_index = gen_control_flow_ops.merge( [inp.indices for inp in inputs], name="indices") if any(inp.dense_shape is not None for inp in inputs): if any(inp.dense_shape is None for inp in inputs): raise ValueError("Either all merged IndexedSlices must have a " "dense_shape, or none must have a dense_shape.") - dense_shape, _ = gen_control_flow_ops._merge( + dense_shape, _ = gen_control_flow_ops.merge( [inp.dense_shape for inp in inputs], name="dense_shape") else: dense_shape = None @@ -1015,10 +1015,8 @@ class GradLoopState(object): else: max_size = GetMaxSizeFromNestedMaximumIterations( value, self.forward_context) - # pylint: disable=protected-access - acc = gen_data_flow_ops._stack_v2( + acc = gen_data_flow_ops.stack_v2( max_size=max_size, elem_type=value.dtype.base_dtype, name="f_acc") - # pylint: enable=protected-access if curr_ctxt: curr_ctxt.Exit() @@ -1031,10 +1029,8 @@ class GradLoopState(object): if value_ctxt == self.forward_context: # value is not nested in the forward context. self.forward_context.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access self.forward_context.Exit() # Protect stack push and order it before forward_index. self.forward_index.op._add_control_input(push.op) @@ -1046,18 +1042,14 @@ class GradLoopState(object): # The special case for creating a zero tensor for a dead # branch of a switch. See ControlFlowState.ZerosLike(). value_ctxt.outer_context.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access value_ctxt.outer_context.Exit() push.op._set_control_flow_context(value_ctxt) else: value_ctxt.Enter() - # pylint: disable=protected-access - push = gen_data_flow_ops._stack_push_v2( + push = gen_data_flow_ops.stack_push_v2( enter_acc, value, swap_memory=swap_enabled) - # pylint: enable=protected-access value_ctxt.Exit() # Protect stack push and order it before forward_sync. self.forward_sync._add_control_input(push.op) @@ -1104,10 +1096,8 @@ class GradLoopState(object): pred = cond_ctxt.pred branch = (1 - cond_ctxt.branch) if dead_branch else cond_ctxt.branch history_value = _SwitchRefOrTensor(history_value, pred)[branch] - # pylint: disable=protected-access - pop = gen_data_flow_ops._stack_pop_v2(history_value, - value.dtype.base_dtype) - # pylint: enable=protected-access + pop = gen_data_flow_ops.stack_pop_v2(history_value, + value.dtype.base_dtype) pop.set_shape(value.get_shape()) self.grad_context.Exit() parallel_iterations = self.grad_context.parallel_iterations diff --git a/tensorflow/python/ops/ctc_ops.py b/tensorflow/python/ops/ctc_ops.py index 83da6739db..4b57e2de79 100644 --- a/tensorflow/python/ops/ctc_ops.py +++ b/tensorflow/python/ops/ctc_ops.py @@ -148,7 +148,7 @@ def ctc_loss(labels, inputs, sequence_length, if not time_major: inputs = array_ops.transpose(inputs, [1, 0, 2]) # (B,T,N) => (T,B,N) - loss, _ = gen_ctc_ops._ctc_loss( + loss, _ = gen_ctc_ops.ctc_loss( inputs, labels.indices, labels.values, @@ -224,7 +224,7 @@ def ctc_greedy_decoder(inputs, sequence_length, merge_repeated=True): sequence found, the negative of the sum of the greatest logit at each timeframe. """ - outputs = gen_ctc_ops._ctc_greedy_decoder( + outputs = gen_ctc_ops.ctc_greedy_decoder( inputs, sequence_length, merge_repeated=merge_repeated) (decoded_ix, decoded_val, decoded_shape, log_probabilities) = outputs return ([sparse_tensor.SparseTensor(decoded_ix, decoded_val, decoded_shape)], @@ -272,7 +272,7 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100, """ decoded_ixs, decoded_vals, decoded_shapes, log_probabilities = ( - gen_ctc_ops._ctc_beam_search_decoder( + gen_ctc_ops.ctc_beam_search_decoder( inputs, sequence_length, beam_width=beam_width, top_paths=top_paths, merge_repeated=merge_repeated)) diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 03ed537cfc..052caffd49 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -342,10 +342,10 @@ class QueueBase(object): val.get_shape().assert_is_compatible_with(shape) if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_enqueue_v2( + return gen_data_flow_ops.queue_enqueue_v2( self._queue_ref, vals, name=scope) else: - return gen_data_flow_ops._queue_enqueue( + return gen_data_flow_ops.queue_enqueue( self._queue_ref, vals, name=scope) def enqueue_many(self, vals, name=None): @@ -387,7 +387,7 @@ class QueueBase(object): val.get_shape().with_rank_at_least(1)[0]) val.get_shape()[1:].assert_is_compatible_with(shape) - return gen_data_flow_ops._queue_enqueue_many_v2( + return gen_data_flow_ops.queue_enqueue_many_v2( self._queue_ref, vals, name=scope) def _dequeue_return_value(self, tensors): @@ -436,10 +436,10 @@ class QueueBase(object): if name is None: name = "%s_Dequeue" % self._name if self._queue_ref.dtype == _dtypes.resource: - ret = gen_data_flow_ops._queue_dequeue_v2( + ret = gen_data_flow_ops.queue_dequeue_v2( self._queue_ref, self._dtypes, name=name) else: - ret = gen_data_flow_ops._queue_dequeue( + ret = gen_data_flow_ops.queue_dequeue( self._queue_ref, self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -479,7 +479,7 @@ class QueueBase(object): if name is None: name = "%s_DequeueMany" % self._name - ret = gen_data_flow_ops._queue_dequeue_many_v2( + ret = gen_data_flow_ops.queue_dequeue_many_v2( self._queue_ref, n=n, component_types=self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -523,7 +523,7 @@ class QueueBase(object): if name is None: name = "%s_DequeueUpTo" % self._name - ret = gen_data_flow_ops._queue_dequeue_up_to_v2( + ret = gen_data_flow_ops.queue_dequeue_up_to_v2( self._queue_ref, n=n, component_types=self._dtypes, name=name) # NOTE(mrry): Not using a shape function because we need access to @@ -560,12 +560,12 @@ class QueueBase(object): if name is None: name = "%s_Close" % self._name if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_close_v2( + return gen_data_flow_ops.queue_close_v2( self._queue_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) else: - return gen_data_flow_ops._queue_close( + return gen_data_flow_ops.queue_close( self._queue_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) @@ -601,9 +601,9 @@ class QueueBase(object): if name is None: name = "%s_Size" % self._name if self._queue_ref.dtype == _dtypes.resource: - return gen_data_flow_ops._queue_size_v2(self._queue_ref, name=name) + return gen_data_flow_ops.queue_size_v2(self._queue_ref, name=name) else: - return gen_data_flow_ops._queue_size(self._queue_ref, name=name) + return gen_data_flow_ops.queue_size(self._queue_ref, name=name) @tf_export("RandomShuffleQueue") @@ -683,7 +683,7 @@ class RandomShuffleQueue(QueueBase): # the id of the last op created.) string = (str(seed1) + shared_name).encode("utf-8") seed2 = int(hashlib.md5(string).hexdigest()[:8], 16) & 0x7FFFFFFF - queue_ref = gen_data_flow_ops._random_shuffle_queue_v2( + queue_ref = gen_data_flow_ops.random_shuffle_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -748,7 +748,7 @@ class FIFOQueue(QueueBase): dtypes = _as_type_list(dtypes) shapes = _as_shape_list(shapes, dtypes) names = _as_name_list(names, dtypes) - queue_ref = gen_data_flow_ops._fifo_queue_v2( + queue_ref = gen_data_flow_ops.fifo_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -827,7 +827,7 @@ class PaddingFIFOQueue(QueueBase): "but received %d dtypes and %d shapes." % (len(dtypes), len(shapes))) - queue_ref = gen_data_flow_ops._padding_fifo_queue_v2( + queue_ref = gen_data_flow_ops.padding_fifo_queue_v2( component_types=dtypes, shapes=shapes, capacity=capacity, @@ -895,7 +895,7 @@ class PriorityQueue(QueueBase): types = _as_type_list(types) shapes = _as_shape_list(shapes, types) - queue_ref = gen_data_flow_ops._priority_queue_v2( + queue_ref = gen_data_flow_ops.priority_queue_v2( component_types=types, shapes=shapes, capacity=capacity, @@ -985,7 +985,7 @@ class Barrier(object): else: self._shapes = [tensor_shape.unknown_shape() for _ in self._types] - self._barrier_ref = gen_data_flow_ops._barrier( + self._barrier_ref = gen_data_flow_ops.barrier( component_types=self._types, shapes=self._shapes, shared_name=shared_name, @@ -1026,7 +1026,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierInsertMany" % self._name - return gen_data_flow_ops._barrier_insert_many( + return gen_data_flow_ops.barrier_insert_many( self._barrier_ref, keys, values, component_index, name=name) def take_many(self, @@ -1073,7 +1073,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierTakeMany" % self._name - ret = gen_data_flow_ops._barrier_take_many( + ret = gen_data_flow_ops.barrier_take_many( self._barrier_ref, num_elements, self._types, @@ -1122,7 +1122,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierClose" % self._name - return gen_data_flow_ops._barrier_close( + return gen_data_flow_ops.barrier_close( self._barrier_ref, cancel_pending_enqueues=cancel_pending_enqueues, name=name) @@ -1139,7 +1139,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierReadySize" % self._name - return gen_data_flow_ops._barrier_ready_size(self._barrier_ref, name=name) + return gen_data_flow_ops.barrier_ready_size(self._barrier_ref, name=name) def incomplete_size(self, name=None): """Compute the number of incomplete elements in the given barrier. @@ -1153,7 +1153,7 @@ class Barrier(object): """ if name is None: name = "%s_BarrierIncompleteSize" % self._name - return gen_data_flow_ops._barrier_incomplete_size( + return gen_data_flow_ops.barrier_incomplete_size( self._barrier_ref, name=name) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index ac03d30fcd..09a0e345f2 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -41,7 +41,7 @@ from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops.gen_functional_ops import * # pylint: enable=wildcard-import # pylint: disable=unused-import -from tensorflow.python.ops.gen_functional_ops import _symbolic_gradient +from tensorflow.python.ops.gen_functional_ops import symbolic_gradient # pylint: enable=unused-import from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index 227316a01e..be61014395 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -356,7 +356,7 @@ def _SymGrad(op, out_grads): for k in op.node_def.attr: f.attr[k].CopyFrom(op.node_def.attr[k]) # pylint: disable=protected-access - in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f) + in_grads = functional_ops.symbolic_gradient(input=f_in, Tout=f_types, f=f) # pylint: enable=protected-access return in_grads diff --git a/tensorflow/python/ops/histogram_ops.py b/tensorflow/python/ops/histogram_ops.py index 6a975160b0..4a1ef54fb5 100644 --- a/tensorflow/python/ops/histogram_ops.py +++ b/tensorflow/python/ops/histogram_ops.py @@ -141,5 +141,7 @@ def histogram_fixed_width(values, """ with ops.name_scope(name, 'histogram_fixed_width', [values, value_range, nbins]) as name: - return gen_math_ops._histogram_fixed_width( # pylint: disable=protected-access + # pylint: disable=protected-access + return gen_math_ops._histogram_fixed_width( values, value_range, nbins, dtype=dtype, name=name) + # pylint: enable=protected-access diff --git a/tensorflow/python/ops/image_grad.py b/tensorflow/python/ops/image_grad.py index 093843cd5b..9f43e3f146 100644 --- a/tensorflow/python/ops/image_grad.py +++ b/tensorflow/python/ops/image_grad.py @@ -41,12 +41,10 @@ def _ResizeNearestNeighborGrad(op, grad): else: image_shape = array_ops.shape(image)[1:3] - # pylint: disable=protected-access - grads = gen_image_ops._resize_nearest_neighbor_grad( + grads = gen_image_ops.resize_nearest_neighbor_grad( grad, image_shape, align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grads, None] @@ -61,10 +59,8 @@ def _ResizeBilinearGrad(op, grad): Returns: The gradients w.r.t. the input. """ - # pylint: disable=protected-access - grad0 = gen_image_ops._resize_bilinear_grad( + grad0 = gen_image_ops.resize_bilinear_grad( grad, op.inputs[0], align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grad0, None] @@ -82,10 +78,8 @@ def _ResizeBicubicGrad(op, grad): allowed_types = [dtypes.float32, dtypes.float64] grad0 = None if op.inputs[0].dtype in allowed_types: - # pylint: disable=protected-access - grad0 = gen_image_ops._resize_bicubic_grad( + grad0 = gen_image_ops.resize_bicubic_grad( grad, op.inputs[0], align_corners=op.get_attr("align_corners")) - # pylint: enable=protected-access return [grad0, None] diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index 53bd108c44..ca8806a095 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -1115,10 +1115,8 @@ def adjust_contrast(images, contrast_factor): orig_dtype = images.dtype flt_images = convert_image_dtype(images, dtypes.float32) - # pylint: disable=protected-access - adjusted = gen_image_ops._adjust_contrastv2( + adjusted = gen_image_ops.adjust_contrastv2( flt_images, contrast_factor=contrast_factor, name=name) - # pylint: enable=protected-access return convert_image_dtype(adjusted, orig_dtype, saturate=True) @@ -1732,7 +1730,7 @@ def sample_distorted_bounding_box(image_size, Provide as input to `tf.image.draw_bounding_boxes`. """ with ops.name_scope(name, 'sample_distorted_bounding_box'): - return gen_image_ops._sample_distorted_bounding_box_v2( # pylint: disable=protected-access + return gen_image_ops.sample_distorted_bounding_box_v2( image_size, bounding_boxes, seed=seed, @@ -1786,10 +1784,8 @@ def non_max_suppression(boxes, """ with ops.name_scope(name, 'non_max_suppression'): iou_threshold = ops.convert_to_tensor(iou_threshold, name='iou_threshold') - # pylint: disable=protected-access - return gen_image_ops._non_max_suppression_v2(boxes, scores, max_output_size, - iou_threshold) - # pylint: enable=protected-access + return gen_image_ops.non_max_suppression_v2(boxes, scores, max_output_size, + iou_threshold) _rgb_to_yiq_kernel = [[0.299, 0.59590059, diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py index 5e70b3186f..7c782c12a5 100644 --- a/tensorflow/python/ops/io_ops.py +++ b/tensorflow/python/ops/io_ops.py @@ -111,10 +111,10 @@ def _save(filename, tensor_names, tensors, tensor_slices=None, name="save"): An Operation that saves the tensors. """ if tensor_slices is None: - return gen_io_ops._save(filename, tensor_names, tensors, name=name) + return gen_io_ops.save(filename, tensor_names, tensors, name=name) else: - return gen_io_ops._save_slices(filename, tensor_names, tensor_slices, - tensors, name=name) + return gen_io_ops.save_slices(filename, tensor_names, tensor_slices, + tensors, name=name) def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, @@ -136,7 +136,7 @@ def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type, A tensor of type "tensor_type". """ base_type = dtypes.as_dtype(tensor_type).base_dtype - return gen_io_ops._restore_slice( + return gen_io_ops.restore_slice( file_pattern, tensor_name, shape_and_slice, base_type, preferred_shard, name=name) @@ -208,12 +208,12 @@ class ReaderBase(object): else: queue_ref = queue.queue_ref if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_read_v2(self._reader_ref, queue_ref, name=name) + return gen_io_ops.reader_read_v2(self._reader_ref, queue_ref, name=name) else: # For compatibility with pre-resource queues, create a ref(string) tensor # which can be looked up as the same queue by a resource manager. - old_queue_op = gen_data_flow_ops._fake_queue(queue_ref) - return gen_io_ops._reader_read(self._reader_ref, old_queue_op, name=name) + old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) + return gen_io_ops.reader_read(self._reader_ref, old_queue_op, name=name) def read_up_to(self, queue, num_records, # pylint: disable=invalid-name name=None): @@ -240,18 +240,18 @@ class ReaderBase(object): else: queue_ref = queue.queue_ref if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_read_up_to_v2(self._reader_ref, - queue_ref, - num_records, - name=name) + return gen_io_ops.reader_read_up_to_v2(self._reader_ref, + queue_ref, + num_records, + name=name) else: # For compatibility with pre-resource queues, create a ref(string) tensor # which can be looked up as the same queue by a resource manager. - old_queue_op = gen_data_flow_ops._fake_queue(queue_ref) - return gen_io_ops._reader_read_up_to(self._reader_ref, - old_queue_op, - num_records, - name=name) + old_queue_op = gen_data_flow_ops.fake_queue(queue_ref) + return gen_io_ops.reader_read_up_to(self._reader_ref, + old_queue_op, + num_records, + name=name) def num_records_produced(self, name=None): """Returns the number of records this reader has produced. @@ -267,11 +267,11 @@ class ReaderBase(object): """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_num_records_produced_v2(self._reader_ref, - name=name) + return gen_io_ops.reader_num_records_produced_v2(self._reader_ref, + name=name) else: - return gen_io_ops._reader_num_records_produced(self._reader_ref, - name=name) + return gen_io_ops.reader_num_records_produced(self._reader_ref, + name=name) def num_work_units_completed(self, name=None): """Returns the number of work units this reader has finished processing. @@ -283,11 +283,11 @@ class ReaderBase(object): An int64 Tensor. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_num_work_units_completed_v2(self._reader_ref, - name=name) + return gen_io_ops.reader_num_work_units_completed_v2(self._reader_ref, + name=name) else: - return gen_io_ops._reader_num_work_units_completed(self._reader_ref, - name=name) + return gen_io_ops.reader_num_work_units_completed(self._reader_ref, + name=name) def serialize_state(self, name=None): """Produce a string tensor that encodes the state of a reader. @@ -302,9 +302,9 @@ class ReaderBase(object): A string Tensor. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_serialize_state_v2(self._reader_ref, name=name) + return gen_io_ops.reader_serialize_state_v2(self._reader_ref, name=name) else: - return gen_io_ops._reader_serialize_state(self._reader_ref, name=name) + return gen_io_ops.reader_serialize_state(self._reader_ref, name=name) def restore_state(self, state, name=None): """Restore a reader to a previously saved state. @@ -321,11 +321,10 @@ class ReaderBase(object): The created Operation. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_restore_state_v2( + return gen_io_ops.reader_restore_state_v2( self._reader_ref, state, name=name) else: - return gen_io_ops._reader_restore_state( - self._reader_ref, state, name=name) + return gen_io_ops.reader_restore_state(self._reader_ref, state, name=name) @property def supports_serialize(self): @@ -342,9 +341,9 @@ class ReaderBase(object): The created Operation. """ if self._reader_ref.dtype == dtypes.resource: - return gen_io_ops._reader_reset_v2(self._reader_ref, name=name) + return gen_io_ops.reader_reset_v2(self._reader_ref, name=name) else: - return gen_io_ops._reader_reset(self._reader_ref, name=name) + return gen_io_ops.reader_reset(self._reader_ref, name=name) ops.NotDifferentiable("ReaderRead") @@ -377,7 +376,7 @@ class WholeFileReader(ReaderBase): Args: name: A name for the operation (optional). """ - rr = gen_io_ops._whole_file_reader_v2(name=name) + rr = gen_io_ops.whole_file_reader_v2(name=name) super(WholeFileReader, self).__init__(rr, supports_serialize=True) @@ -406,8 +405,8 @@ class TextLineReader(ReaderBase): to skip from the beginning of every file. name: A name for the operation (optional). """ - rr = gen_io_ops._text_line_reader_v2(skip_header_lines=skip_header_lines, - name=name) + rr = gen_io_ops.text_line_reader_v2(skip_header_lines=skip_header_lines, + name=name) super(TextLineReader, self).__init__(rr) @@ -444,7 +443,7 @@ class FixedLengthRecordReader(ReaderBase): name: A name for the operation (optional). encoding: The type of encoding for the file. Defaults to none. """ - rr = gen_io_ops._fixed_length_record_reader_v2( + rr = gen_io_ops.fixed_length_record_reader_v2( record_bytes=record_bytes, header_bytes=header_bytes, footer_bytes=footer_bytes, @@ -480,7 +479,7 @@ class TFRecordReader(ReaderBase): compression_type = python_io.TFRecordOptions.get_compression_type_string( options) - rr = gen_io_ops._tf_record_reader_v2( + rr = gen_io_ops.tf_record_reader_v2( name=name, compression_type=compression_type) super(TFRecordReader, self).__init__(rr) @@ -506,7 +505,7 @@ class LMDBReader(ReaderBase): name: A name for the operation (optional). options: A LMDBRecordOptions object (optional). """ - rr = gen_io_ops._lmdb_reader(name=name) + rr = gen_io_ops.lmdb_reader(name=name) super(LMDBReader, self).__init__(rr) @@ -534,7 +533,7 @@ class IdentityReader(ReaderBase): Args: name: A name for the operation (optional). """ - rr = gen_io_ops._identity_reader_v2(name=name) + rr = gen_io_ops.identity_reader_v2(name=name) super(IdentityReader, self).__init__(rr, supports_serialize=True) diff --git a/tensorflow/python/ops/linalg/linalg_impl.py b/tensorflow/python/ops/linalg/linalg_impl.py index d5bd916f80..2be2d5a3d4 100644 --- a/tensorflow/python/ops/linalg/linalg_impl.py +++ b/tensorflow/python/ops/linalg/linalg_impl.py @@ -31,18 +31,16 @@ band_part = array_ops.matrix_band_part cholesky = linalg_ops.cholesky cholesky_solve = linalg_ops.cholesky_solve det = linalg_ops.matrix_determinant -# pylint: disable=protected-access -slogdet = gen_linalg_ops._log_matrix_determinant -# pylint: disable=protected-access +slogdet = gen_linalg_ops.log_matrix_determinant diag = array_ops.matrix_diag diag_part = array_ops.matrix_diag_part eigh = linalg_ops.self_adjoint_eig eigvalsh = linalg_ops.self_adjoint_eigvals einsum = special_math_ops.einsum -expm = gen_linalg_ops._matrix_exponential +expm = gen_linalg_ops.matrix_exponential eye = linalg_ops.eye inv = linalg_ops.matrix_inverse -logm = gen_linalg_ops._matrix_logarithm +logm = gen_linalg_ops.matrix_logarithm lstsq = linalg_ops.matrix_solve_ls norm = linalg_ops.norm qr = linalg_ops.qr diff --git a/tensorflow/python/ops/linalg_ops.py b/tensorflow/python/ops/linalg_ops.py index 9803eed6ae..37470e00d7 100644 --- a/tensorflow/python/ops/linalg_ops.py +++ b/tensorflow/python/ops/linalg_ops.py @@ -248,7 +248,7 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): and l2_regularizer != 0 due to poor accuracy. """ - # pylint: disable=protected-access,long-lambda + # pylint: disable=long-lambda def _use_composite_impl(fast, tensor_shape): """Determines whether to use the composite or specialized CPU kernel. @@ -323,9 +323,8 @@ def matrix_solve_ls(matrix, rhs, l2_regularizer=0.0, fast=True, name=None): if _use_composite_impl(fast, tensor_shape): return _composite_impl(matrix, rhs, l2_regularizer) else: - return gen_linalg_ops._matrix_solve_ls( + return gen_linalg_ops.matrix_solve_ls( matrix, rhs, l2_regularizer, fast=fast, name=name) - # pylint: enable=protected-access @tf_export('self_adjoint_eig', 'linalg.eigh') @@ -346,8 +345,7 @@ def self_adjoint_eig(tensor, name=None): v: Eigenvectors. Shape is `[..., N, N]`. The columns of the inner most matrices contain eigenvectors of the corresponding matrices in `tensor` """ - # pylint: disable=protected-access - e, v = gen_linalg_ops._self_adjoint_eig_v2(tensor, compute_v=True, name=name) + e, v = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=True, name=name) return e, v @@ -369,8 +367,7 @@ def self_adjoint_eigvals(tensor, name=None): e: Eigenvalues. Shape is `[..., N]`. The vector `e[..., :]` contains the `N` eigenvalues of `tensor[..., :, :]`. """ - # pylint: disable=protected-access - e, _ = gen_linalg_ops._self_adjoint_eig_v2(tensor, compute_v=False, name=name) + e, _ = gen_linalg_ops.self_adjoint_eig_v2(tensor, compute_v=False, name=name) return e @@ -435,10 +432,8 @@ def svd(tensor, full_matrices=False, compute_uv=True, name=None): ```` @end_compatibility """ - # pylint: disable=protected-access - s, u, v = gen_linalg_ops._svd( + s, u, v = gen_linalg_ops.svd( tensor, compute_uv=compute_uv, full_matrices=full_matrices, name=name) - # pylint: enable=protected-access if compute_uv: return math_ops.real(s), u, v else: diff --git a/tensorflow/python/ops/logging_ops.py b/tensorflow/python/ops/logging_ops.py index 3757109c95..a7ea7dc6e1 100644 --- a/tensorflow/python/ops/logging_ops.py +++ b/tensorflow/python/ops/logging_ops.py @@ -170,7 +170,7 @@ def image_summary(tag, tensor, max_images=3, collections=None, name=None): buffer. """ with ops.name_scope(name, "ImageSummary", [tag, tensor]) as scope: - val = gen_logging_ops._image_summary( + val = gen_logging_ops.image_summary( tag=tag, tensor=tensor, max_images=max_images, name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val @@ -226,11 +226,12 @@ def audio_summary(tag, with ops.name_scope(name, "AudioSummary", [tag, tensor]) as scope: sample_rate = ops.convert_to_tensor(sample_rate, dtype=dtypes.float32, name="sample_rate") - val = gen_logging_ops._audio_summary_v2(tag=tag, - tensor=tensor, - max_outputs=max_outputs, - sample_rate=sample_rate, - name=scope) + val = gen_logging_ops.audio_summary_v2( + tag=tag, + tensor=tensor, + max_outputs=max_outputs, + sample_rate=sample_rate, + name=scope) _Collect(val, collections, [ops.GraphKeys.SUMMARIES]) return val @@ -263,7 +264,7 @@ def merge_summary(inputs, collections=None, name=None): buffer resulting from the merging. """ with ops.name_scope(name, "MergeSummary", inputs): - val = gen_logging_ops._merge_summary(inputs=inputs, name=name) + val = gen_logging_ops.merge_summary(inputs=inputs, name=name) _Collect(val, collections, []) return val diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index f539a7bb68..baf7cc19fa 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -196,9 +196,7 @@ class InitializableLookupTableBase(LookupInterface): """ with ops.name_scope(name, "%s_Size" % self._name, [self._table_ref]) as scope: - # pylint: disable=protected-access - return gen_lookup_ops._lookup_table_size_v2(self._table_ref, name=scope) - # pylint: enable=protected-access + return gen_lookup_ops.lookup_table_size_v2(self._table_ref, name=scope) def lookup(self, keys, name=None): """Looks up `keys` in a table, outputs the corresponding values. @@ -227,10 +225,8 @@ class InitializableLookupTableBase(LookupInterface): with ops.name_scope(name, "%s_Lookup" % self._name, (self._table_ref, key_tensor, self._default_value)) as scope: - # pylint: disable=protected-access - values = gen_lookup_ops._lookup_table_find_v2( + values = gen_lookup_ops.lookup_table_find_v2( self._table_ref, key_tensor, self._default_value, name=scope) - # pylint: enable=protected-access values.set_shape(key_tensor.get_shape()) if isinstance(keys, sparse_tensor.SparseTensor): @@ -274,13 +270,11 @@ class HashTable(InitializableLookupTableBase): """ with ops.name_scope(name, "hash_table", (initializer, default_value)) as scope: - # pylint: disable=protected-access - table_ref = gen_lookup_ops._hash_table_v2( + table_ref = gen_lookup_ops.hash_table_v2( shared_name=shared_name, key_dtype=initializer.key_dtype, value_dtype=initializer.value_dtype, name=scope) - # pylint: enable=protected-access super(HashTable, self).__init__(table_ref, default_value, initializer) @@ -352,10 +346,8 @@ class KeyValueTensorInitializer(TableInitializerBase): with ops.name_scope( self._name, values=(table.table_ref, self._keys, self._values)) as scope: - # pylint: disable=protected-access - init_op = gen_lookup_ops._initialize_table_v2( + init_op = gen_lookup_ops.initialize_table_v2( table.table_ref, self._keys, self._values, name=scope) - # pylint: enable=protected-access ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op) return init_op @@ -518,8 +510,7 @@ class TextFileInitializer(TableInitializerBase): (table.table_ref,)) as scope: filename = ops.convert_to_tensor( self._filename, dtypes.string, name="asset_filepath") - # pylint: disable=protected-access - init_op = gen_lookup_ops._initialize_table_from_text_file_v2( + init_op = gen_lookup_ops.initialize_table_from_text_file_v2( table.table_ref, filename, self._key_index, @@ -527,7 +518,6 @@ class TextFileInitializer(TableInitializerBase): -1 if self._vocab_size is None else self._vocab_size, self._delimiter, name=scope) - # pylint: enable=protected-access ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op) # If the filename tensor is anything other than a string constant (e.g., if # it is a placeholder) then it does not make sense to track it as an asset. diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index bf28f74153..51e19b4ad3 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -302,16 +302,14 @@ def _NegGrad(_, grad): def _InvGrad(op, grad): """Returns -grad * (1 / x^2).""" y = op.outputs[0] # y = 1 / x - # pylint: disable=protected-access - return gen_math_ops._reciprocal_grad(y, grad) + return gen_math_ops.reciprocal_grad(y, grad) @ops.RegisterGradient("Reciprocal") def _ReciprocalGrad(op, grad): """Returns -grad * (1 / x^2).""" y = op.outputs[0] # y = 1 / x - # pylint: disable=protected-access - return gen_math_ops._reciprocal_grad(y, grad) + return gen_math_ops.reciprocal_grad(y, grad) @ops.RegisterGradient("InvGrad") @@ -321,8 +319,7 @@ def _InvGradGrad(op, grad): with ops.control_dependencies([grad]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) - # pylint: disable=protected-access - return cg * -2.0 * b * ca, gen_math_ops._reciprocal_grad(ca, grad) + return cg * -2.0 * b * ca, gen_math_ops.reciprocal_grad(ca, grad) @ops.RegisterGradient("ReciprocalGrad") @@ -332,8 +329,7 @@ def _ReciprocalGradGrad(op, grad): with ops.control_dependencies([grad]): ca = math_ops.conj(op.inputs[0]) cg = math_ops.conj(grad) - # pylint: disable=protected-access - return cg * -2.0 * b * ca, gen_math_ops._reciprocal_grad(ca, grad) + return cg * -2.0 * b * ca, gen_math_ops.reciprocal_grad(ca, grad) @ops.RegisterGradient("Square") @@ -348,9 +344,7 @@ def _SquareGrad(op, grad): @ops.RegisterGradient("Sqrt") def _SqrtGrad(op, grad): y = op.outputs[0] # y = x^(1/2) - # pylint: disable=protected-access - return gen_math_ops._sqrt_grad(y, grad) - # pylint: enable=protected-access + return gen_math_ops.sqrt_grad(y, grad) @ops.RegisterGradient("SqrtGrad") @@ -366,9 +360,7 @@ def _SqrtGradGrad(op, grad): def _RsqrtGrad(op, grad): """Returns -0.5 * grad * conj(y)^3.""" y = op.outputs[0] # y = x^(-1/2) - # pylint: disable=protected-access - return gen_math_ops._rsqrt_grad(y, grad) - # pylint: enable=protected-access + return gen_math_ops.rsqrt_grad(y, grad) @ops.RegisterGradient("RsqrtGrad") @@ -380,8 +372,7 @@ def _RsqrtGradGrad(op, grad): ca = math_ops.conj(a) cg = math_ops.conj(grad) grad_a = -1.5 * cg * b * math_ops.square(ca) - # pylint: disable=protected-access - grad_b = gen_math_ops._rsqrt_grad(ca, grad) + grad_b = gen_math_ops.rsqrt_grad(ca, grad) return grad_a, grad_b @@ -446,8 +437,7 @@ def _TanhGrad(op, grad): y = op.outputs[0] # y = tanh(x) with ops.control_dependencies([grad]): y = math_ops.conj(y) - # pylint: disable=protected-access - return gen_math_ops._tanh_grad(y, grad) + return gen_math_ops.tanh_grad(y, grad) @ops.RegisterGradient("Asinh") @@ -485,8 +475,7 @@ def _TanhGradGrad(op, grad): with ops.control_dependencies([grad]): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) - # pylint: disable=protected-access - return grad * -2.0 * b * a, gen_math_ops._tanh_grad(a, grad) + return grad * -2.0 * b * a, gen_math_ops.tanh_grad(a, grad) @ops.RegisterGradient("Erf") @@ -634,8 +623,7 @@ def _SigmoidGrad(op, grad): y = op.outputs[0] # y = sigmoid(x) with ops.control_dependencies([grad]): y = math_ops.conj(y) - # pylint: disable=protected-access - return gen_math_ops._sigmoid_grad(y, grad) + return gen_math_ops.sigmoid_grad(y, grad) @ops.RegisterGradient("SigmoidGrad") @@ -644,8 +632,7 @@ def _SigmoidGradGrad(op, grad): a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) gb = grad * b - # pylint: disable=protected-access - return gb - 2.0 * gb * a, gen_math_ops._sigmoid_grad(a, grad) + return gb - 2.0 * gb * a, gen_math_ops.sigmoid_grad(a, grad) @ops.RegisterGradient("Sign") @@ -792,7 +779,7 @@ def _MulGrad(op, grad): if (isinstance(grad, ops.Tensor) and _ShapesFullySpecifiedAndEqual(x, y, grad) and grad.dtype in (dtypes.int32, dtypes.float32)): - return gen_math_ops._mul(grad, y), gen_math_ops._mul(grad, x) + return gen_math_ops.mul(grad, y), gen_math_ops.mul(grad, x) assert x.dtype.base_dtype == y.dtype.base_dtype, (x.dtype, " vs. ", y.dtype) sx = array_ops.shape(x) sy = array_ops.shape(y) @@ -800,9 +787,9 @@ def _MulGrad(op, grad): x = math_ops.conj(x) y = math_ops.conj(y) return (array_ops.reshape( - math_ops.reduce_sum(gen_math_ops._mul(grad, y), rx), sx), + math_ops.reduce_sum(gen_math_ops.mul(grad, y), rx), sx), array_ops.reshape( - math_ops.reduce_sum(gen_math_ops._mul(x, grad), ry), sy)) + math_ops.reduce_sum(gen_math_ops.mul(x, grad), ry), sy)) # pylint: enable=protected-access @@ -976,20 +963,18 @@ def _MatMulGrad(op, grad): t_b = op.get_attr("transpose_b") a = math_ops.conj(op.inputs[0]) b = math_ops.conj(op.inputs[1]) - # pylint: disable=protected-access if not t_a and not t_b: - grad_a = gen_math_ops._mat_mul(grad, b, transpose_b=True) - grad_b = gen_math_ops._mat_mul(a, grad, transpose_a=True) + grad_a = gen_math_ops.mat_mul(grad, b, transpose_b=True) + grad_b = gen_math_ops.mat_mul(a, grad, transpose_a=True) elif not t_a and t_b: - grad_a = gen_math_ops._mat_mul(grad, b) - grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True) + grad_a = gen_math_ops.mat_mul(grad, b) + grad_b = gen_math_ops.mat_mul(grad, a, transpose_a=True) elif t_a and not t_b: - grad_a = gen_math_ops._mat_mul(b, grad, transpose_b=True) - grad_b = gen_math_ops._mat_mul(a, grad) + grad_a = gen_math_ops.mat_mul(b, grad, transpose_b=True) + grad_b = gen_math_ops.mat_mul(a, grad) elif t_a and t_b: - grad_a = gen_math_ops._mat_mul(b, grad, transpose_a=True, transpose_b=True) - grad_b = gen_math_ops._mat_mul(grad, a, transpose_a=True, transpose_b=True) - # pylint: enable=protected-access + grad_a = gen_math_ops.mat_mul(b, grad, transpose_a=True, transpose_b=True) + grad_b = gen_math_ops.mat_mul(grad, a, transpose_a=True, transpose_b=True) return grad_a, grad_b diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index c3899c7e12..14d6862919 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -89,8 +89,6 @@ See the @{$python/math_ops} guide. @@matrix_inverse @@cholesky @@cholesky_solve -@@matrix_exponential -@@matrix_logarithm @@matrix_solve @@matrix_triangular_solve @@matrix_solve_ls @@ -260,7 +258,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin with ops.name_scope(name, "Abs", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): if x.values.dtype.is_complex: - x_abs = gen_math_ops._complex_abs( + x_abs = gen_math_ops.complex_abs( x.values, Tout=x.values.dtype.real_dtype, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_abs, dense_shape=x.dense_shape) @@ -270,7 +268,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin else: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex: - return gen_math_ops._complex_abs(x, Tout=x.dtype.real_dtype, name=name) + return gen_math_ops.complex_abs(x, Tout=x.dtype.real_dtype, name=name) return gen_math_ops._abs(x, name=name) @@ -279,7 +277,7 @@ def abs(x, name=None): # pylint: disable=redefined-builtin # pylint: disable=redefined-builtin def _bucketize(input, boundaries, name=None): - return gen_math_ops._bucketize(input=input, boundaries=boundaries, name=name) + return gen_math_ops.bucketize(input=input, boundaries=boundaries, name=name) # pylint: enable=redefined-builtin @@ -322,10 +320,10 @@ def divide(x, y, name=None): @tf_export("multiply") def multiply(x, y, name=None): - return gen_math_ops._mul(x, y, name) + return gen_math_ops.mul(x, y, name) -multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") +multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Mul", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes @@ -333,19 +331,19 @@ multiply.__doc__ = gen_math_ops._mul.__doc__.replace("Mul", "`tf.multiply`") "2016-12-30", "`tf.mul(x, y)` is deprecated, please use `tf.multiply(x, y)` or `x * y`") def _mul(x, y, name=None): - return gen_math_ops._mul(x, y, name) + return gen_math_ops.mul(x, y, name) _mul.__doc__ = ( - gen_math_ops._mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__)) + gen_math_ops.mul.__doc__ + ("" if _mul.__doc__ is None else _mul.__doc__)) @tf_export("subtract") def subtract(x, y, name=None): - return gen_math_ops._sub(x, y, name) + return gen_math_ops.sub(x, y, name) -subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") +subtract.__doc__ = gen_math_ops.sub.__doc__.replace("`Sub`", "`tf.subtract`") # TODO(aselle): put deprecation in after another round of global code changes @@ -353,11 +351,11 @@ subtract.__doc__ = gen_math_ops._sub.__doc__.replace("`Sub`", "`tf.subtract`") "2016-12-30", "`tf.sub(x, y)` is deprecated, please use `tf.subtract(x, y)` or `x - y`") def _sub(x, y, name=None): - return gen_math_ops._sub(x, y, name) + return gen_math_ops.sub(x, y, name) _sub.__doc__ = ( - gen_math_ops._sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__)) + gen_math_ops.sub.__doc__ + ("" if _sub.__doc__ is None else _sub.__doc__)) # pylint: disable=g-docstring-has-escape @@ -377,11 +375,11 @@ def negative(x, name=None): """ with ops.name_scope(name, "Neg", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): - x_neg = gen_math_ops._neg(x.values, name=name) + x_neg = gen_math_ops.neg(x.values, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_neg, dense_shape=x.dense_shape) else: - return gen_math_ops._neg(x, name=name) + return gen_math_ops.neg(x, name=name) # pylint: enable=g-docstring-has-escape @@ -895,7 +893,7 @@ def to_bfloat16(x, name="ToBFloat16"): return cast(x, dtypes.bfloat16, name=name) -ops.Tensor._override_operator("__neg__", gen_math_ops._neg) +ops.Tensor._override_operator("__neg__", gen_math_ops.neg) ops.Tensor._override_operator("__abs__", abs) # __invert__ corresponds to the ~ operator. Here we follow the numpy convention # ~ marks an elementwise bit-wise inverse. This is only implemented for boolean @@ -1024,7 +1022,7 @@ def _truediv_python3(x, y, name=None): if dtype is not None: x = cast(x, dtype) y = cast(y, dtype) - return gen_math_ops._real_div(x, y, name=name) + return gen_math_ops.real_div(x, y, name=name) def _div_python2(x, y, name=None): @@ -1047,9 +1045,9 @@ def _div_python2(x, y, name=None): raise TypeError("x and y must have the same dtype, got %r != %r" % (x_dtype, y_dtype)) if x_dtype.is_floating or x_dtype.is_complex: - return gen_math_ops._real_div(x, y, name=name) + return gen_math_ops.real_div(x, y, name=name) else: - return gen_math_ops._floor_div(x, y, name=name) + return gen_math_ops.floor_div(x, y, name=name) @tf_export("truediv") @@ -1107,7 +1105,7 @@ def div(x, y, name=None): # TODO(aselle): This should be removed -mod = gen_math_ops._floor_mod +mod = gen_math_ops.floor_mod # TODO(aselle): Deprecate this once all internal functionality uses @@ -1140,22 +1138,22 @@ def floordiv(x, y, name=None): TypeError: If the inputs are complex. """ with ops.name_scope(name, "floordiv", [x, y]) as name: - return gen_math_ops._floor_div(x, y, name=name) + return gen_math_ops.floor_div(x, y, name=name) -realdiv = gen_math_ops._real_div -truncatediv = gen_math_ops._truncate_div +realdiv = gen_math_ops.real_div +truncatediv = gen_math_ops.truncate_div # TODO(aselle): Rename this to floordiv when we can. -floor_div = gen_math_ops._floor_div -truncatemod = gen_math_ops._truncate_mod -floormod = gen_math_ops._floor_mod +floor_div = gen_math_ops.floor_div +truncatemod = gen_math_ops.truncate_mod +floormod = gen_math_ops.floor_mod def _mul_dispatch(x, y, name=None): """Dispatches cwise mul for "Dense*Dense" and "Dense*Sparse".""" is_tensor_y = isinstance(y, ops.Tensor) if is_tensor_y: - return gen_math_ops._mul(x, y, name=name) + return gen_math_ops.mul(x, y, name=name) else: assert isinstance(y, sparse_tensor.SparseTensor) # Case: Dense * Sparse. new_vals = gen_sparse_ops.sparse_dense_cwise_mul(y.indices, y.values, @@ -1174,12 +1172,12 @@ _OverrideBinaryOperatorHelper(gen_sparse_ops.sparse_dense_cwise_mul, "mul", sparse_tensor.SparseTensor) _OverrideBinaryOperatorHelper(gen_math_ops.add, "add") -_OverrideBinaryOperatorHelper(gen_math_ops._sub, "sub") +_OverrideBinaryOperatorHelper(gen_math_ops.sub, "sub") _OverrideBinaryOperatorHelper(_mul_dispatch, "mul") _OverrideBinaryOperatorHelper(_div_python2, "div") _OverrideBinaryOperatorHelper(_truediv_python3, "truediv") _OverrideBinaryOperatorHelper(floordiv, "floordiv") -_OverrideBinaryOperatorHelper(gen_math_ops._floor_mod, "mod") +_OverrideBinaryOperatorHelper(gen_math_ops.floor_mod, "mod") _OverrideBinaryOperatorHelper(pow, "pow") @@ -1501,7 +1499,7 @@ def reduce_mean(input_tensor, if keepdims is None: keepdims = False return _may_reduce_to_scalar(keepdims, axis, reduction_indices, - gen_math_ops._mean( + gen_math_ops.mean( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), @@ -1551,7 +1549,7 @@ def reduce_prod(input_tensor, if keepdims is None: keepdims = False return _may_reduce_to_scalar(keepdims, axis, reduction_indices, - gen_math_ops._prod( + gen_math_ops.prod( input_tensor, _ReductionDims(input_tensor, axis, reduction_indices), @@ -2020,7 +2018,7 @@ def matmul(a, if transpose_b: b = conj(b) adjoint_b = True - return gen_math_ops._batch_mat_mul( + return gen_math_ops.batch_mat_mul( a, b, adj_x=adjoint_a, adj_y=adjoint_b, name=name) # Neither matmul nor sparse_matmul support adjoint, so we conjugate @@ -2057,13 +2055,13 @@ def matmul(a, ret = cast(ret, dtypes.bfloat16) return ret else: - return gen_math_ops._mat_mul( + return gen_math_ops.mat_mul( a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name) _OverrideBinaryOperatorHelper(matmul, "matmul") -sparse_matmul = gen_math_ops._sparse_mat_mul +sparse_matmul = gen_math_ops.sparse_mat_mul @ops.RegisterStatistics("MatMul", "flops") @@ -2168,7 +2166,7 @@ def add_n(inputs, name=None): if name: return array_ops.identity(inputs[0], name=name) return inputs[0] - return gen_math_ops._add_n(inputs, name=name) + return gen_math_ops.add_n(inputs, name=name) @tf_export("accumulate_n") @@ -2246,7 +2244,7 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): # addressed return add_n(inputs, name=name) else: - return gen_math_ops._accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access + return gen_math_ops.accumulate_nv2(inputs, name=name, shape=shape) # pylint: disable=protected-access @ops.RegisterGradient("AccumulateNV2") @@ -2276,7 +2274,7 @@ def sigmoid(x, name=None): """ with ops.name_scope(name, "Sigmoid", [x]) as name: x = ops.convert_to_tensor(x, name="x") - return gen_math_ops._sigmoid(x, name=name) + return gen_math_ops.sigmoid(x, name=name) @tf_export("log_sigmoid") @@ -2295,7 +2293,7 @@ def log_sigmoid(x, name=None): """ with ops.name_scope(name, "LogSigmoid", [x]) as name: x = ops.convert_to_tensor(x, name="x") - return gen_math_ops._neg(gen_nn_ops.softplus(-x), name=name) + return gen_math_ops.neg(gen_nn_ops.softplus(-x), name=name) @tf_export("nn.tanh", "tanh") @@ -2312,11 +2310,11 @@ def tanh(x, name=None): """ with ops.name_scope(name, "Tanh", [x]) as name: if isinstance(x, sparse_tensor.SparseTensor): - x_tanh = gen_math_ops._tanh(x.values, name=name) + x_tanh = gen_math_ops.tanh(x.values, name=name) return sparse_tensor.SparseTensor( indices=x.indices, values=x_tanh, dense_shape=x.dense_shape) else: - return gen_math_ops._tanh(x, name=name) + return gen_math_ops.tanh(x, name=name) @tf_export("bincount") @@ -2505,7 +2503,7 @@ def conj(x, name=None): with ops.name_scope(name, "Conj", [x]) as name: x = ops.convert_to_tensor(x, name="x") if x.dtype.is_complex or x.dtype == dtypes.variant: - return gen_math_ops._conj(x, name=name) + return gen_math_ops.conj(x, name=name) elif x.dtype.is_floating or x.dtype.is_integer: return x else: diff --git a/tensorflow/python/ops/nn_batchnorm_test.py b/tensorflow/python/ops/nn_batchnorm_test.py index eebfb17085..3ac2c8eb17 100644 --- a/tensorflow/python/ops/nn_batchnorm_test.py +++ b/tensorflow/python/ops/nn_batchnorm_test.py @@ -57,7 +57,6 @@ class BatchNormalizationTest(test.TestCase): test_util.set_producer_version(ops.get_default_graph(), 8) return gen_nn_ops._batch_norm_with_global_normalization( x, m, v, beta, gamma, epsilon, scale_after_normalization) - # pylint: enable=protected-access def _tfBatchNormV1BW(self, x, m, v, beta, gamma, epsilon, scale_after_normalization): @@ -223,7 +222,7 @@ class BatchNormalizationTest(test.TestCase): for scale_after_normalization in [True, False]: # _batch_norm_with_global_normalization_grad is deprecated in v9 test_util.set_producer_version(ops.get_default_graph(), 8) - grad = gen_nn_ops._batch_norm_with_global_normalization_grad( + grad = gen_nn_ops.batch_norm_with_global_normalization_grad( x, m, v, gamma, backprop, epsilon, scale_after_normalization) dx, dm, dv, db, dg = grad self.assertEqual(grad.dx, dx) diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index dc24b821a5..5582daf2da 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -150,7 +150,7 @@ def _Conv3DBackpropFilterGrad(op, grad): @ops.RegisterGradient("AvgPool3D") def _AvgPool3DGrad(op, grad): - return gen_nn_ops._avg_pool3d_grad( + return gen_nn_ops.avg_pool3d_grad( array_ops.shape(op.inputs[0]), grad, ksize=op.get_attr("ksize"), @@ -172,7 +172,7 @@ def _AvgPool3DGradGrad(op, grad): @ops.RegisterGradient("MaxPool3D") def _MaxPool3DGrad(op, grad): - return gen_nn_ops._max_pool3d_grad( + return gen_nn_ops.max_pool3d_grad( op.inputs[0], op.outputs[0], grad, @@ -188,7 +188,7 @@ def _MaxPool3DGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool3d_grad_grad( + gen_nn_ops.max_pool3d_grad_grad( op.inputs[0], op.inputs[1], grad, @@ -204,7 +204,7 @@ def _MaxPool3DGradGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool3d_grad( + gen_nn_ops.max_pool3d_grad( op.inputs[0], op.inputs[1], grad, @@ -352,13 +352,13 @@ def _BiasAddGradV1(unused_bias_op, received_grad): @ops.RegisterGradient("Relu") def _ReluGrad(op, grad): - return gen_nn_ops._relu_grad(grad, op.outputs[0]) + return gen_nn_ops.relu_grad(grad, op.outputs[0]) @ops.RegisterGradient("EluGrad") def _EluGradGrad(op, grad): elu_x = op.inputs[1] - return (gen_nn_ops._elu_grad(grad, op.outputs[0]), + return (gen_nn_ops.elu_grad(grad, op.outputs[0]), array_ops.where(elu_x < 0, grad * op.inputs[0], array_ops.zeros( shape=array_ops.shape(elu_x), dtype=elu_x.dtype))) @@ -368,63 +368,63 @@ def _EluGradGrad(op, grad): def _SeluGradGrad(op, grad): x = op.inputs[1] scale_alpha = 1.7580993408473768599402175208123 - return (gen_nn_ops._elu_grad(grad, op.outputs[0]), + return (gen_nn_ops.elu_grad(grad, op.outputs[0]), array_ops.where(x < 0., - gen_nn_ops._elu_grad(grad, - op.outputs[0] + scale_alpha), + gen_nn_ops.elu_grad(grad, + op.outputs[0] + scale_alpha), array_ops.zeros( shape=array_ops.shape(x), dtype=x.dtype))) @ops.RegisterGradient("Relu6") def _Relu6Grad(op, grad): - return gen_nn_ops._relu6_grad(grad, op.outputs[0]) # pylint: disable=protected-access + return gen_nn_ops.relu6_grad(grad, op.outputs[0]) @ops.RegisterGradient("Relu6Grad") def _Relu6GradGrad(op, grad): x = op.inputs[1] - return (gen_nn_ops._relu6_grad(grad, x), + return (gen_nn_ops.relu6_grad(grad, x), array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) @ops.RegisterGradient("Elu") def _EluGrad(op, grad): - return gen_nn_ops._elu_grad(grad, op.outputs[0]) + return gen_nn_ops.elu_grad(grad, op.outputs[0]) @ops.RegisterGradient("Selu") def _SeluGrad(op, grad): - return gen_nn_ops._selu_grad(grad, op.outputs[0]) + return gen_nn_ops.selu_grad(grad, op.outputs[0]) @ops.RegisterGradient("Softplus") def _SoftplusGrad(op, grad): - return gen_nn_ops._softplus_grad(grad, op.inputs[0]) + return gen_nn_ops.softplus_grad(grad, op.inputs[0]) @ops.RegisterGradient("SoftplusGrad") def _SoftplusGradGrad(op, grad): # Let: # y = tf.nn.softplus(x) - # dx = gen_nn_ops._softplus_grad(dy, x) = dy / (1 + exp(-x)) + # dx = gen_nn_ops.softplus_grad(dy, x) = dy / (1 + exp(-x)) # This op computes (ddy, d2x) from op.inputs == [dy, x] and grad == ddx. dy, x = op.inputs with ops.control_dependencies([grad]): - ddy = gen_nn_ops._softplus_grad(grad, x) # pylint: disable=protected-access + ddy = gen_nn_ops.softplus_grad(grad, x) d2x = grad * dy / (math_ops.exp(-x) + 2.0 + math_ops.exp(x)) return (ddy, d2x) @ops.RegisterGradient("Softsign") def _SoftsignGrad(op, grad): - return gen_nn_ops._softsign_grad(grad, op.inputs[0]) + return gen_nn_ops.softsign_grad(grad, op.inputs[0]) @ops.RegisterGradient("ReluGrad") def _ReluGradGrad(op, grad): x = op.inputs[1] - return (gen_nn_ops._relu_grad(grad, x), + return (gen_nn_ops.relu_grad(grad, x), array_ops.zeros(shape=array_ops.shape(x), dtype=x.dtype)) @@ -565,14 +565,14 @@ def _LRNGrad(op, grad): alpha = op.get_attr("alpha") beta = op.get_attr("beta") return [ - gen_nn_ops._lrn_grad(grad, op.inputs[0], op.outputs[0], depth_radius, - bias, alpha, beta) + gen_nn_ops.lrn_grad(grad, op.inputs[0], op.outputs[0], depth_radius, bias, + alpha, beta) ] @ops.RegisterGradient("AvgPool") def _AvgPoolGrad(op, grad): - return gen_nn_ops._avg_pool_grad( + return gen_nn_ops.avg_pool_grad( array_ops.shape(op.inputs[0]), grad, op.get_attr("ksize"), @@ -584,7 +584,7 @@ def _AvgPoolGrad(op, grad): @ops.RegisterGradient("AvgPoolGrad") def _AvgPoolGradGrad(op, grad): return (array_ops.stop_gradient(op.inputs[0]), - gen_nn_ops._avg_pool( + gen_nn_ops.avg_pool( grad, op.get_attr("ksize"), op.get_attr("strides"), @@ -594,7 +594,7 @@ def _AvgPoolGradGrad(op, grad): @ops.RegisterGradient("MaxPool") def _MaxPoolGrad(op, grad): - return gen_nn_ops._max_pool_grad( + return gen_nn_ops.max_pool_grad( op.inputs[0], op.outputs[0], grad, @@ -620,7 +620,7 @@ def _MaxPoolGradV2(op, grad): @ops.RegisterGradient("MaxPoolWithArgmax") def _MaxPoolGradWithArgmax(op, grad, unused_argmax_grad): - return gen_nn_ops._max_pool_grad_with_argmax( + return gen_nn_ops.max_pool_grad_with_argmax( op.inputs[0], grad, op.outputs[1], @@ -635,7 +635,7 @@ def _MaxPoolGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool_grad_grad( + gen_nn_ops.max_pool_grad_grad( op.inputs[0], op.inputs[1], grad, @@ -669,7 +669,7 @@ def _MaxPoolGradGradGrad(op, grad): shape=array_ops.shape(op.inputs[0]), dtype=op.inputs[0].dtype), array_ops.zeros( shape=array_ops.shape(op.inputs[1]), dtype=op.inputs[1].dtype), - gen_nn_ops._max_pool_grad( + gen_nn_ops.max_pool_grad( op.inputs[0], op.inputs[1], grad, @@ -696,8 +696,7 @@ def _FractionalMaxPoolGrad(op, grad_0, unused_grad_1, unused_grad_2): Returns: Input backprop for FractionalMaxPool op. """ - # pylint: disable=protected-access - return gen_nn_ops._fractional_max_pool_grad( + return gen_nn_ops.fractional_max_pool_grad( op.inputs[0], op.outputs[0], grad_0, op.outputs[1], op.outputs[2], op.get_attr("overlapping")) @@ -719,10 +718,9 @@ def _FractionalAvgPoolGrad(op, grad_0, unused_grad_1, unused_grad_2): Returns: Input backprop for FractionalAvgPool op. """ - # pylint: disable=protected-access - return gen_nn_ops._fractional_avg_pool_grad(op.inputs[0].get_shape(), grad_0, - op.outputs[1], op.outputs[2], - op.get_attr("overlapping")) + return gen_nn_ops.fractional_avg_pool_grad(op.inputs[0].get_shape(), grad_0, + op.outputs[1], op.outputs[2], + op.get_attr("overlapping")) @ops.RegisterGradient("BatchNormWithGlobalNormalization") @@ -746,7 +744,7 @@ def _BatchNormWithGlobalNormalizationGrad(op, grad): last dimension. dg: Backprop for gamma, which is (grad * ((x - m) * rsqrt(v + epsilon))) """ - dx, dm, dv, db, dg = gen_nn_ops._batch_norm_with_global_normalization_grad( + dx, dm, dv, db, dg = gen_nn_ops.batch_norm_with_global_normalization_grad( op.inputs[0], op.inputs[1], op.inputs[2], op.inputs[4], grad, op.get_attr("variance_epsilon"), op.get_attr("scale_after_normalization")) return dx, dm, dv, db, dg diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 5fa5708114..7814a27311 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -888,12 +888,10 @@ def fused_batch_norm( # TODO(reedwm): In a few weeks, switch to using the V2 version exclusively. We # currently only use the V2 version for float16 inputs, which is not supported # by the V1 version. - # pylint: disable=protected-access if x.dtype == dtypes.float16 or x.dtype == dtypes.bfloat16: - fused_batch_norm_func = gen_nn_ops._fused_batch_norm_v2 + fused_batch_norm_func = gen_nn_ops.fused_batch_norm_v2 else: - fused_batch_norm_func = gen_nn_ops._fused_batch_norm - # pylint: enable=protected-access + fused_batch_norm_func = gen_nn_ops._fused_batch_norm # pylint: disable=protected-access y, batch_mean, batch_var, _, _ = fused_batch_norm_func( x, scale, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 8fbe698914..a0d500afce 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1481,7 +1481,6 @@ def conv3d_transpose( name=name) -# pylint: disable=protected-access @tf_export("nn.bias_add") def bias_add(value, bias, data_format=None, name=None): """Adds `bias` to `value`. @@ -1506,10 +1505,9 @@ def bias_add(value, bias, data_format=None, name=None): with ops.name_scope(name, "BiasAdd", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") - return gen_nn_ops._bias_add(value, bias, data_format=data_format, name=name) + return gen_nn_ops.bias_add(value, bias, data_format=data_format, name=name) -# pylint: disable=protected-access def bias_add_v1(value, bias, name=None): """Adds `bias` to `value`. @@ -1534,7 +1532,7 @@ def bias_add_v1(value, bias, name=None): with ops.name_scope(name, "BiasAddV1", [value, bias]) as name: value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") - return gen_nn_ops._bias_add_v1(value, bias, name=name) + return gen_nn_ops.bias_add_v1(value, bias, name=name) @tf_export("nn.crelu") @@ -1580,7 +1578,7 @@ def relu6(features, name=None): """ with ops.name_scope(name, "Relu6", [features]) as name: features = ops.convert_to_tensor(features, name="features") - return gen_nn_ops._relu6(features, name=name) + return gen_nn_ops.relu6(features, name=name) @tf_export("nn.leaky_relu") @@ -1645,7 +1643,7 @@ def _softmax(logits, compute_op, dim=-1, name=None): Args: logits: A non-empty `Tensor`. Must be one of the following types: `half`, `float32`, `float64`. - compute_op: Either gen_nn_ops._softmax or gen_nn_ops._log_softmax + compute_op: Either gen_nn_ops.softmax or gen_nn_ops.log_softmax dim: The dimension softmax would be performed on. The default is -1 which indicates the last dimension. name: A name for the operation (optional). @@ -1739,7 +1737,7 @@ def softmax(logits, axis=None, name=None, dim=None): axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 - return _softmax(logits, gen_nn_ops._softmax, axis, name) + return _softmax(logits, gen_nn_ops.softmax, axis, name) @tf_export("nn.log_softmax") @@ -1769,7 +1767,7 @@ def log_softmax(logits, axis=None, name=None, dim=None): axis = deprecation.deprecated_argument_lookup("axis", axis, "dim", dim) if axis is None: axis = -1 - return _softmax(logits, gen_nn_ops._log_softmax, axis, name) + return _softmax(logits, gen_nn_ops.log_softmax, axis, name) def _ensure_xent_args(name, sentinel, labels, logits): @@ -1871,7 +1869,7 @@ def softmax_cross_entropy_with_logits_v2( # Do the actual op computation. # The second output tensor contains the gradients. We use it in # _CrossEntropyGrad() in nn_grad but not here. - cost, unused_backprop = gen_nn_ops._softmax_cross_entropy_with_logits( + cost, unused_backprop = gen_nn_ops.softmax_cross_entropy_with_logits( precise_logits, labels, name=name) # The output cost shape should be the input minus dim. @@ -2038,7 +2036,7 @@ def sparse_softmax_cross_entropy_with_logits( (labels_static_shape.ndims, logits.get_shape().ndims)) # Check if no reshapes are required. if logits.get_shape().ndims == 2: - cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( precise_logits, labels, name=name) if logits.dtype == dtypes.float16: return math_ops.cast(cost, dtypes.float16) @@ -2051,7 +2049,7 @@ def sparse_softmax_cross_entropy_with_logits( labels = array_ops.reshape(labels, [-1]) # The second output tensor contains the gradients. We use it in # _CrossEntropyGrad() in nn_grad but not here. - cost, _ = gen_nn_ops._sparse_softmax_cross_entropy_with_logits( + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( precise_logits, labels, name=name) cost = array_ops.reshape(cost, labels_shape) cost.set_shape(labels_static_shape) @@ -2086,7 +2084,7 @@ def avg_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """ with ops.name_scope(name, "AvgPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") - return gen_nn_ops._avg_pool( + return gen_nn_ops.avg_pool( value, ksize=ksize, strides=strides, @@ -2116,12 +2114,13 @@ def max_pool(value, ksize, strides, padding, data_format="NHWC", name=None): """ with ops.name_scope(name, "MaxPool", [value]) as name: value = ops.convert_to_tensor(value, name="input") - return gen_nn_ops._max_pool(value, - ksize=ksize, - strides=strides, - padding=padding, - data_format=data_format, - name=name) + return gen_nn_ops.max_pool( + value, + ksize=ksize, + strides=strides, + padding=padding, + data_format=data_format, + name=name) @ops.RegisterStatistics("Conv2D", "flops") @@ -2331,7 +2330,7 @@ def top_k(input, k=1, sorted=True, name=None): # pylint: disable=redefined-buil values: The `k` largest elements along each last dimensional slice. indices: The indices of `values` within the last dimension of `input`. """ - return gen_nn_ops._top_kv2(input, k=k, sorted=sorted, name=name) + return gen_nn_ops.top_kv2(input, k=k, sorted=sorted, name=name) def nth_element(input, n, reverse=False, name=None): # pylint: disable=redefined-builtin @@ -2650,4 +2649,4 @@ def in_top_k(predictions, targets, k, name=None): A `Tensor` of type `bool`. Computed Precision at `k` as a `bool Tensor`. """ with ops.name_scope(name, "in_top_k"): - return gen_nn_ops._in_top_kv2(predictions, targets, k, name=name) + return gen_nn_ops.in_top_kv2(predictions, targets, k, name=name) diff --git a/tensorflow/python/ops/parsing_ops.py b/tensorflow/python/ops/parsing_ops.py index b0315ceee2..075b38d743 100644 --- a/tensorflow/python/ops/parsing_ops.py +++ b/tensorflow/python/ops/parsing_ops.py @@ -700,8 +700,7 @@ def _parse_example_raw(serialized, # Finally, convert dense_shapes to TensorShapeProto dense_shapes = [shape.as_proto() for shape in dense_shapes] - # pylint: disable=protected-access - outputs = gen_parsing_ops._parse_example( + outputs = gen_parsing_ops.parse_example( serialized=serialized, names=names, dense_defaults=dense_defaults_vec, @@ -710,7 +709,6 @@ def _parse_example_raw(serialized, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) - # pylint: enable=protected-access (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs @@ -1132,8 +1130,7 @@ def _parse_single_sequence_example_raw(serialized, feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto() for shape in feature_list_dense_shapes] - # pylint: disable=protected-access - outputs = gen_parsing_ops._parse_single_sequence_example( + outputs = gen_parsing_ops.parse_single_sequence_example( serialized=serialized, debug_name=debug_name, context_dense_defaults=context_dense_defaults_vec, @@ -1149,7 +1146,6 @@ def _parse_single_sequence_example_raw(serialized, feature_list_dense_missing_assumed_empty=( feature_list_dense_missing_assumed_empty), name=name) - # pylint: enable=protected-access (context_sparse_indices, context_sparse_values, context_sparse_shapes, context_dense_values, @@ -1182,7 +1178,6 @@ def _parse_single_sequence_example_raw(serialized, @tf_export("decode_csv") def decode_csv(records, record_defaults, field_delim=",", use_quote_delim=True, name=None, na_value=""): - # pylint: disable=protected-access """Convert CSV records to tensors. Each column maps to one tensor. RFC 4180 format is expected for the CSV records. @@ -1211,11 +1206,13 @@ def decode_csv(records, record_defaults, field_delim=",", Each tensor will have the same shape as records. """ # TODO(martinwicke), remove the wrapper when new Python API generator is done. - return gen_parsing_ops._decode_csv( - records=records, record_defaults=record_defaults, - field_delim=field_delim, use_quote_delim=use_quote_delim, - na_value=na_value, name=name) - # pylint: enable=protected-access + return gen_parsing_ops.decode_csv( + records=records, + record_defaults=record_defaults, + field_delim=field_delim, + use_quote_delim=use_quote_delim, + na_value=na_value, + name=name) # TODO(b/70890287): Combine the implementation of this op and @@ -1391,7 +1388,6 @@ def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, # Finally, convert dense_shapes to TensorShapeProto dense_shapes = [shape.as_proto() for shape in dense_shapes] - # pylint: disable=protected-access outputs = gen_parsing_ops.parse_single_example( serialized=serialized, dense_defaults=dense_defaults_vec, @@ -1401,7 +1397,6 @@ def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, dense_keys=dense_keys, dense_shapes=dense_shapes, name=name) - # pylint: enable=protected-access (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs diff --git a/tensorflow/python/ops/random_ops.py b/tensorflow/python/ops/random_ops.py index 2c86358d21..db8159579a 100644 --- a/tensorflow/python/ops/random_ops.py +++ b/tensorflow/python/ops/random_ops.py @@ -43,7 +43,6 @@ def _ShapeTensor(shape): return ops.convert_to_tensor(shape, dtype=dtype, name="shape") -# pylint: disable=protected-access @tf_export("random_normal") def random_normal(shape, mean=0.0, @@ -74,7 +73,7 @@ def random_normal(shape, mean_tensor = ops.convert_to_tensor(mean, dtype=dtype, name="mean") stddev_tensor = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._random_standard_normal( + rnd = gen_random_ops.random_standard_normal( shape_tensor, dtype, seed=seed1, seed2=seed2) mul = rnd * stddev_tensor value = math_ops.add(mul, mean_tensor, name=name) @@ -126,7 +125,7 @@ def parameterized_truncated_normal(shape, minvals_tensor = ops.convert_to_tensor(minvals, dtype=dtype, name="minvals") maxvals_tensor = ops.convert_to_tensor(maxvals, dtype=dtype, name="maxvals") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._parameterized_truncated_normal( + rnd = gen_random_ops.parameterized_truncated_normal( shape_tensor, means_tensor, stddevs_tensor, @@ -171,7 +170,7 @@ def truncated_normal(shape, mean_tensor = ops.convert_to_tensor(mean, dtype=dtype, name="mean") stddev_tensor = ops.convert_to_tensor(stddev, dtype=dtype, name="stddev") seed1, seed2 = random_seed.get_seed(seed) - rnd = gen_random_ops._truncated_normal( + rnd = gen_random_ops.truncated_normal( shape_tensor, dtype, seed=seed1, seed2=seed2) mul = rnd * stddev_tensor value = math_ops.add(mul, mean_tensor, name=name) @@ -237,11 +236,10 @@ def random_uniform(shape, maxval = ops.convert_to_tensor(maxval, dtype=dtype, name="max") seed1, seed2 = random_seed.get_seed(seed) if dtype.is_integer: - return gen_random_ops._random_uniform_int( + return gen_random_ops.random_uniform_int( shape, minval, maxval, seed=seed1, seed2=seed2, name=name) else: - rnd = gen_random_ops._random_uniform( - shape, dtype, seed=seed1, seed2=seed2) + rnd = gen_random_ops.random_uniform(shape, dtype, seed=seed1, seed2=seed2) return math_ops.add(rnd * (maxval - minval), minval, name=name) @@ -275,7 +273,7 @@ def random_shuffle(value, seed=None, name=None): dimension. """ seed1, seed2 = random_seed.get_seed(seed) - return gen_random_ops._random_shuffle( + return gen_random_ops.random_shuffle( value, seed=seed1, seed2=seed2, name=name) @@ -420,7 +418,7 @@ def random_gamma(shape, seed1, seed2 = random_seed.get_seed(seed) return math_ops.maximum( np.finfo(dtype.as_numpy_dtype).tiny, - gen_random_ops._random_gamma( + gen_random_ops.random_gamma( shape, alpha_broadcast, seed=seed1, seed2=seed2) / beta) ops.NotDifferentiable("RandomGamma") diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 6fe2f61016..01f0b81684 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -219,18 +219,16 @@ def _internal_py_func(func, inp, Tout, stateful=None, eager=False, name=None): graph._cleanup_py_funcs_used_in_graph.append(cleanup) # pylint: enable=protected-access - # pylint: disable=protected-access if eager: - result = gen_script_ops._eager_py_func( + result = gen_script_ops.eager_py_func( input=inp, token=token, Tout=Tout, name=name) else: if stateful: - result = gen_script_ops._py_func( + result = gen_script_ops.py_func( input=inp, token=token, Tout=Tout, name=name) else: - result = gen_script_ops._py_func_stateless( + result = gen_script_ops.py_func_stateless( input=inp, token=token, Tout=Tout, name=name) - # pylint: enable=protected-access return result if is_list_or_tuple else result[0] diff --git a/tensorflow/python/ops/session_ops.py b/tensorflow/python/ops/session_ops.py index cedd36c1de..ad38845153 100644 --- a/tensorflow/python/ops/session_ops.py +++ b/tensorflow/python/ops/session_ops.py @@ -16,7 +16,6 @@ """Tensor Handle Operations. See the @{$python/session_ops} guide. @@get_session_handle -@@get_session_handle_v2 @@get_session_tensor @@delete_session_tensor """ @@ -182,7 +181,7 @@ def get_session_handle(data, name=None): # Colocate this operation with data. with ops.colocate_with(data): - return gen_data_flow_ops._get_session_handle(data, name=name) # pylint: disable=protected-access + return gen_data_flow_ops.get_session_handle(data, name=name) @tf_export("get_session_tensor") @@ -222,7 +221,7 @@ def get_session_tensor(handle, dtype, name=None): with ops.device(handle_device): holder = array_ops.placeholder(dtypes.string) _register_handle_feeder(holder.graph, holder, dtype) - tensor = gen_data_flow_ops._get_session_tensor(holder, dtype, name=name) + tensor = gen_data_flow_ops.get_session_tensor(holder, dtype, name=name) return (holder, tensor) @@ -246,7 +245,7 @@ def delete_session_tensor(handle, name=None): handle_device = TensorHandle._get_device_name(handle) with ops.device(handle_device): holder = array_ops.placeholder(dtypes.string) - deleter = gen_data_flow_ops._delete_session_tensor(holder, name=name) + deleter = gen_data_flow_ops.delete_session_tensor(holder, name=name) return (holder, deleter) @@ -268,7 +267,7 @@ def _get_handle_reader(graph, handle, dtype): with graph.as_default(), graph.device(handle_device): holder = array_ops.placeholder(dtypes.string) _register_handle_feeder(holder.graph, holder, dtype) - reader = gen_data_flow_ops._get_session_tensor(holder, dtype) + reader = gen_data_flow_ops.get_session_tensor(holder, dtype) result = (holder, reader) graph._handle_readers[graph_key] = result return result @@ -289,7 +288,7 @@ def _get_handle_mover(graph, feeder, handle): # Create mover if we haven't done it. holder, reader = _get_handle_reader(graph, handle, dtype) with graph.as_default(), graph.device(feeder.op.device): - mover = gen_data_flow_ops._get_session_handle(reader) # pylint: disable=protected-access + mover = gen_data_flow_ops.get_session_handle(reader) result = (holder, mover) graph._handle_movers[graph_key] = result return result @@ -303,7 +302,7 @@ def _get_handle_deleter(graph, deleter_key, handle): handle_device = TensorHandle._get_device_name(handle) with graph.as_default(), graph.device(handle_device): holder = array_ops.placeholder(dtypes.string) - deleter = gen_data_flow_ops._delete_session_tensor(holder) + deleter = gen_data_flow_ops.delete_session_tensor(holder) result = (holder, deleter) graph._handle_deleters[deleter_key] = result return result diff --git a/tensorflow/python/ops/sparse_grad.py b/tensorflow/python/ops/sparse_grad.py index 5295e7d21c..97353d6c74 100644 --- a/tensorflow/python/ops/sparse_grad.py +++ b/tensorflow/python/ops/sparse_grad.py @@ -88,10 +88,8 @@ def _SparseAddGrad(op, *grads): # the non-zero elements of the sum, and we will peek into `sum_indices` in the # gradient op. - # pylint: disable=protected-access - a_val_grad, b_val_grad = gen_sparse_ops._sparse_add_grad(val_grad, a_indices, - b_indices, - sum_indices) + a_val_grad, b_val_grad = gen_sparse_ops.sparse_add_grad( + val_grad, a_indices, b_indices, sum_indices) a_val_grad.set_shape(op.inputs[1].get_shape()) b_val_grad.set_shape(op.inputs[4].get_shape()) # (a_indices, a_values, a_shape, b_indices, b_values, b_shape, thresh) @@ -151,7 +149,7 @@ def _SparseTensorDenseMatMulGrad(op, grad): "complex gradients.") # gradient w.r.t. dense - b_grad = gen_sparse_ops._sparse_tensor_dense_mat_mul( # pylint: disable=protected-access + b_grad = gen_sparse_ops.sparse_tensor_dense_mat_mul( a_indices, a_values, a_shape, grad, adjoint_a=not adj_a) if adj_b: b_grad = array_ops.transpose(b_grad) @@ -278,8 +276,7 @@ def _SparseFillEmptyRowsGrad(op, unused_grad_output_indices, output_grad_values, """Gradients for SparseFillEmptyRows.""" reverse_index_map = op.outputs[3] - # pylint: disable=protected-access - d_values, d_default_value = gen_sparse_ops._sparse_fill_empty_rows_grad( + d_values, d_default_value = gen_sparse_ops.sparse_fill_empty_rows_grad( reverse_index_map=reverse_index_map, grad_values=output_grad_values) # d_indices, d_values, d_dense_shape, d_default_value. diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index 0fbbf5a805..a01bba632f 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -234,7 +234,7 @@ def sparse_concat(axis, ] output_ind, output_val, output_shape = ( - gen_sparse_ops._sparse_concat(inds, vals, shapes, axis, name=name)) + gen_sparse_ops.sparse_concat(inds, vals, shapes, axis, name=name)) return sparse_tensor.SparseTensor(output_ind, output_val, output_shape) @@ -302,8 +302,8 @@ def sparse_add(a, b, thresh=0): thresh = ops.convert_to_tensor( thresh, dtype=a.values.dtype.real_dtype.base_dtype, name="thresh") output_ind, output_val, output_shape = ( - gen_sparse_ops._sparse_add(a.indices, a.values, a.dense_shape, - b.indices, b.values, b.dense_shape, thresh)) + gen_sparse_ops.sparse_add(a.indices, a.values, a.dense_shape, + b.indices, b.values, b.dense_shape, thresh)) # Attempt to get output_shape statically. a.get_shape().assert_is_compatible_with(b.get_shape()) @@ -317,8 +317,8 @@ def sparse_add(a, b, thresh=0): # swap to make `a` the SparseTensor. if isinstance(b, sparse_classes): a, b = b, a - return gen_sparse_ops._sparse_tensor_dense_add(a.indices, a.values, - a.dense_shape, b) + return gen_sparse_ops.sparse_tensor_dense_add(a.indices, a.values, + a.dense_shape, b) def _sparse_cross(inputs, name=None): @@ -402,7 +402,7 @@ def _sparse_cross_internal(inputs, num_buckets=0, hash_key=None, name=None): - """See gen_sparse_ops._sparse_cross.""" + """See gen_sparse_ops.sparse_cross.""" if not isinstance(inputs, list): raise TypeError("Inputs must be a list") if not all( @@ -432,7 +432,7 @@ def _sparse_cross_internal(inputs, dense_inputs[i] = math_ops.to_int64(dense_inputs[i]) internal_type = dtypes.int64 - indices_out, values_out, shape_out = gen_sparse_ops._sparse_cross( + indices_out, values_out, shape_out = gen_sparse_ops.sparse_cross( indices=indices, values=values, shapes=shapes, @@ -511,7 +511,7 @@ def sparse_reorder(sp_input, name=None): sp_input = _convert_to_sparse_tensor(sp_input) reordered_ind, reordered_val = ( - gen_sparse_ops._sparse_reorder( + gen_sparse_ops.sparse_reorder( sp_input.indices, sp_input.values, sp_input.dense_shape, name=name)) if sp_input.get_shape().is_fully_defined(): @@ -575,7 +575,7 @@ def sparse_reshape(sp_input, shape, name=None): shape = math_ops.cast(shape, dtype=dtypes.int64) with ops.name_scope(name, "SparseReshape", [sp_input]) as name: - reshaped_ind, reshaped_shape = gen_sparse_ops._sparse_reshape( + reshaped_ind, reshaped_shape = gen_sparse_ops.sparse_reshape( sp_input.indices, sp_input.dense_shape, shape, name=name) reshaped_shape_const = tensor_util.constant_value(shape) @@ -671,7 +671,7 @@ def sparse_split(keyword_required=KeywordRequired(), sp_input = _convert_to_sparse_tensor(sp_input) output_inds, output_vals, output_shapes = ( - gen_sparse_ops._sparse_split( + gen_sparse_ops.sparse_split( axis, sp_input.indices, sp_input.values, @@ -782,7 +782,7 @@ def sparse_to_dense(sparse_indices, Dense `Tensor` of shape `output_shape`. Has the same type as `sparse_values`. """ - return gen_sparse_ops._sparse_to_dense( + return gen_sparse_ops.sparse_to_dense( sparse_indices, output_shape, sparse_values, @@ -1412,7 +1412,7 @@ def sparse_fill_empty_rows(sp_input, default_value, name=None): default_value = ops.convert_to_tensor( default_value, dtype=sp_input.values.dtype) (output_indices, output_values, empty_row_indicator, - unused_reverse_index_map) = gen_sparse_ops._sparse_fill_empty_rows( + unused_reverse_index_map) = gen_sparse_ops.sparse_fill_empty_rows( indices=sp_input.indices, values=sp_input.values, dense_shape=sp_input.dense_shape, @@ -1441,7 +1441,7 @@ def serialize_sparse(sp_input, name=None, out_type=dtypes.string): """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._serialize_sparse( + return gen_sparse_ops.serialize_sparse( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -1476,7 +1476,7 @@ def serialize_many_sparse(sp_input, name=None, out_type=dtypes.string): """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._serialize_many_sparse( + return gen_sparse_ops.serialize_many_sparse( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -1541,7 +1541,7 @@ def deserialize_sparse(serialized_sparse, dtype, rank=None, name=None): """ output_indices, output_values, output_shape = ( - gen_sparse_ops._deserialize_sparse(serialized_sparse, dtype, name=name)) + gen_sparse_ops.deserialize_sparse(serialized_sparse, dtype, name=name)) # Feed rank data back in, if available output_indices.set_shape([None, rank]) @@ -1610,7 +1610,7 @@ def deserialize_many_sparse(serialized_sparse, dtype, rank=None, name=None): All of the serialized `SparseTensor`s must have had the same rank and type. """ output_indices, output_values, output_shape = ( - gen_sparse_ops._deserialize_many_sparse( + gen_sparse_ops.deserialize_many_sparse( serialized_sparse, dtype, name=name)) # Feed rank data back in, if available @@ -1828,7 +1828,7 @@ def sparse_tensor_dense_matmul(sp_a, with ops.name_scope(name, "SparseTensorDenseMatMul", [sp_a.indices, sp_a.values, b]) as name: b = ops.convert_to_tensor(b, name="b") - return gen_sparse_ops._sparse_tensor_dense_mat_mul( + return gen_sparse_ops.sparse_tensor_dense_mat_mul( a_indices=sp_a.indices, a_values=sp_a.values, a_shape=sp_a.dense_shape, diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index b62e556967..65b788c31a 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -186,7 +186,6 @@ _allowed_symbols_array_ops = [ "quantize_and_dequantize", # to-doc # TODO(drpng): legacy symbols to be removed. - "list_diff", # Use tf.listdiff instead. "batch_matrix_diag", "batch_matrix_band_part", "batch_matrix_diag_part", diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index 6c0a090d16..fd4419640a 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -99,8 +99,8 @@ def variable_op(shape, dtype, name="Variable", set_shape=True, container="", """Deprecated. Used variable_op_v2 instead.""" if not set_shape: shape = tensor_shape.unknown_shape() - ret = gen_state_ops._variable(shape=shape, dtype=dtype, name=name, - container=container, shared_name=shared_name) + ret = gen_state_ops.variable(shape=shape, dtype=dtype, name=name, + container=container, shared_name=shared_name) # TODO(mrry): Move this to where it is used, so we can get rid of this op # wrapper? if set_shape: @@ -127,11 +127,12 @@ def variable_op_v2(shape, dtype, name="Variable", container="", shared_name=""): Returns: A variable tensor. """ - return gen_state_ops._variable_v2(shape=shape, - dtype=dtype, - name=name, - container=container, - shared_name=shared_name) + return gen_state_ops.variable_v2( + shape=shape, + dtype=dtype, + name=name, + container=container, + shared_name=shared_name) def init_variable(v, init, name="init"): diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index b8c39d91b4..0335d2456a 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -93,10 +93,8 @@ def string_split(source, delimiter=" ", skip_empty=True): # pylint: disable=inv delimiter = ops.convert_to_tensor(delimiter, dtype=dtypes.string) source = ops.convert_to_tensor(source, dtype=dtypes.string) - # pylint: disable=protected-access - indices, values, shape = gen_string_ops._string_split( + indices, values, shape = gen_string_ops.string_split( source, delimiter=delimiter, skip_empty=skip_empty) - # pylint: enable=protected-access indices.set_shape([None, 2]) values.set_shape([None]) shape.set_shape([2]) diff --git a/tensorflow/python/ops/summary_ops.py b/tensorflow/python/ops/summary_ops.py index 7f4f4ce5ab..037bc9845a 100644 --- a/tensorflow/python/ops/summary_ops.py +++ b/tensorflow/python/ops/summary_ops.py @@ -13,7 +13,6 @@ # limitations under the License. # ============================================================================== """Summary Operations.""" -# pylint: disable=protected-access from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -74,7 +73,7 @@ def tensor_summary(name, with summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): - val = gen_logging_ops._tensor_summary_v2( + val = gen_logging_ops.tensor_summary_v2( tensor=tensor, tag=tag, name=scope, diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 3c08870146..6226f426be 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -148,7 +148,7 @@ class _GraphTensorArray(object): # will retroactively set the device value of this op. def create(): """Create the TensorArray op.""" - return gen_data_flow_ops._tensor_array_v3( + return gen_data_flow_ops.tensor_array_v3( dtype=dtype, size=size, element_shape=element_shape, @@ -237,7 +237,7 @@ class _GraphTensorArray(object): flow = self.flow with ops.name_scope(name, "TensorArrayGrad", [self._handle]): with ops.colocate_with(self._handle): - g_handle, unused_flow = gen_data_flow_ops._tensor_array_grad_v3( + g_handle, unused_flow = gen_data_flow_ops.tensor_array_grad_v3( handle=self._handle, source=source, flow_in=flow, name=name) with ops.control_dependencies([g_handle]): flow = array_ops.identity(flow, name="gradient_flow") @@ -252,7 +252,7 @@ class _GraphTensorArray(object): def read(self, index, name=None): """See TensorArray.""" - value = gen_data_flow_ops._tensor_array_read_v3( + value = gen_data_flow_ops.tensor_array_read_v3( handle=self._handle, index=index, flow_in=self._flow, @@ -270,7 +270,7 @@ class _GraphTensorArray(object): if self._infer_shape: self._merge_element_shape(value.shape) with self._maybe_colocate_with(value): - flow_out = gen_data_flow_ops._tensor_array_write_v3( + flow_out = gen_data_flow_ops.tensor_array_write_v3( handle=self._handle, index=index, value=value, @@ -296,7 +296,7 @@ class _GraphTensorArray(object): element_shape = self._element_shape[0] else: element_shape = tensor_shape.TensorShape(None) - value = gen_data_flow_ops._tensor_array_gather_v3( + value = gen_data_flow_ops.tensor_array_gather_v3( handle=self._handle, indices=indices, flow_in=self._flow, @@ -314,7 +314,7 @@ class _GraphTensorArray(object): tensor_shape.TensorShape(self._element_shape[0].dims[1:])) else: element_shape_except0 = tensor_shape.TensorShape(None) - value, _ = gen_data_flow_ops._tensor_array_concat_v3( + value, _ = gen_data_flow_ops.tensor_array_concat_v3( handle=self._handle, flow_in=self._flow, dtype=self._dtype, @@ -341,7 +341,7 @@ class _GraphTensorArray(object): if self._infer_shape and context.in_graph_mode(): self._merge_element_shape(value.shape[1:]) with self._maybe_colocate_with(value): - flow_out = gen_data_flow_ops._tensor_array_scatter_v3( + flow_out = gen_data_flow_ops.tensor_array_scatter_v3( handle=self._handle, indices=indices, value=value, @@ -370,7 +370,7 @@ class _GraphTensorArray(object): self._merge_element_shape( tensor_shape.TensorShape([clengths[0]]).concatenate( value.shape[1:])) - flow_out = gen_data_flow_ops._tensor_array_split_v3( + flow_out = gen_data_flow_ops.tensor_array_split_v3( handle=self._handle, value=value, lengths=lengths_64, @@ -386,13 +386,13 @@ class _GraphTensorArray(object): def size(self, name=None): """See TensorArray.""" - return gen_data_flow_ops._tensor_array_size_v3( + return gen_data_flow_ops.tensor_array_size_v3( handle=self._handle, flow_in=self.flow, name=name) @tf_should_use.should_use_result def close(self, name=None): """See TensorArray.""" - return gen_data_flow_ops._tensor_array_close_v3( + return gen_data_flow_ops.tensor_array_close_v3( handle=self._handle, name=name) # pylint: enable=protected-access diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index b80ad79074..7ff633a654 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -152,8 +152,7 @@ def image(name, tensor, max_outputs=3, collections=None, family=None): """ with _summary_op_util.summary_scope( name, family, values=[tensor]) as (tag, scope): - # pylint: disable=protected-access - val = _gen_logging_ops._image_summary( + val = _gen_logging_ops.image_summary( tag=tag, tensor=tensor, max_images=max_outputs, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) return val @@ -237,10 +236,9 @@ def audio(name, tensor, sample_rate, max_outputs=3, collections=None, """ with _summary_op_util.summary_scope( name, family=family, values=[tensor]) as (tag, scope): - # pylint: disable=protected-access sample_rate = _ops.convert_to_tensor( sample_rate, dtype=_dtypes.float32, name='sample_rate') - val = _gen_logging_ops._audio_summary_v2( + val = _gen_logging_ops.audio_summary_v2( tag=tag, tensor=tensor, max_outputs=max_outputs, sample_rate=sample_rate, name=scope) _summary_op_util.collect(val, collections, [_ops.GraphKeys.SUMMARIES]) @@ -286,8 +284,7 @@ def merge(inputs, collections=None, name=None): 'Use tf.contrib.summary instead.') name = _summary_op_util.clean_tag(name) with _ops.name_scope(name, 'Merge', inputs): - # pylint: disable=protected-access - val = _gen_logging_ops._merge_summary(inputs=inputs, name=name) + val = _gen_logging_ops.merge_summary(inputs=inputs, name=name) _summary_op_util.collect(val, collections, []) return val diff --git a/tensorflow/python/training/checkpoint_ops.py b/tensorflow/python/training/checkpoint_ops.py index 7f92d94d2b..a6e9662b73 100644 --- a/tensorflow/python/training/checkpoint_ops.py +++ b/tensorflow/python/training/checkpoint_ops.py @@ -149,7 +149,7 @@ def _load_and_remap_matrix(ckpt_path, num_rows_present = num_rows_to_load if remap_rows: row_remapping, num_rows_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=new_row_vocab_file, old_vocab_file=old_row_vocab_file, new_vocab_offset=new_row_vocab_offset, @@ -168,7 +168,7 @@ def _load_and_remap_matrix(ckpt_path, num_cols_present = new_col_vocab_size if remap_cols: col_remapping, num_cols_present = ( - gen_checkpoint_ops._generate_vocab_remapping( # pylint: disable=protected-access + gen_checkpoint_ops.generate_vocab_remapping( new_vocab_file=new_col_vocab_file, old_vocab_file=old_col_vocab_file, new_vocab_offset=0, # Offset is unused for cols (no partitioning). @@ -178,7 +178,7 @@ def _load_and_remap_matrix(ckpt_path, num_rows_to_load * new_col_vocab_size - num_rows_present * num_cols_present, 1 ]) - return_tensor = gen_checkpoint_ops._load_and_remap_matrix( # pylint: disable=protected-access + return_tensor = gen_checkpoint_ops.load_and_remap_matrix( ckpt_path=ckpt_path, old_tensor_name=old_tensor_name, row_remapping=row_remapping, diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py index 1ce8c156a0..23b30632f6 100644 --- a/tensorflow/python/training/learning_rate_decay_test.py +++ b/tensorflow/python/training/learning_rate_decay_test.py @@ -43,8 +43,8 @@ class LRDecayTest(test_util.TensorFlowTestCase): def testStaircase(self): with self.test_session(): - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable(shape=[], dtype=dtypes.int32, + name="step", container="", shared_name="") assign_100 = state_ops.assign(step, 100) assign_1 = state_ops.assign(step, 1) assign_2 = state_ops.assign(step, 2) @@ -264,8 +264,8 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr, step, @@ -281,8 +281,8 @@ class ExponentialDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.natural_exp_decay(initial_lr, @@ -304,8 +304,8 @@ class InverseDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr, @@ -323,8 +323,8 @@ class InverseDecayTest(test_util.TensorFlowTestCase): initial_lr = 0.1 k = 10 decay_rate = 0.96 - step = gen_state_ops._variable(shape=[], dtype=dtypes.int32, - name="step", container="", shared_name="") + step = gen_state_ops.variable( + shape=[], dtype=dtypes.int32, name="step", container="", shared_name="") assign_step = state_ops.assign(step, 0) increment_step = state_ops.assign_add(step, 1) decayed_lr = learning_rate_decay.inverse_time_decay(initial_lr, diff --git a/tensorflow/python/training/moving_averages_test.py b/tensorflow/python/training/moving_averages_test.py index 6efdeb2866..6717811bbb 100644 --- a/tensorflow/python/training/moving_averages_test.py +++ b/tensorflow/python/training/moving_averages_test.py @@ -376,7 +376,7 @@ class ExponentialMovingAverageTest(test.TestCase): with ops.device("/job:dev_v0"): v0 = variables.Variable(10.0, name="v0") with ops.device("/job:dev_v1"): - v1 = gen_state_ops._variable( + v1 = gen_state_ops.variable( shape=[1], dtype=dtypes.float32, name="v1", diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 9afd1e6643..e8ea5abfbd 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -311,8 +311,7 @@ class BaseSaverBuilder(object): Returns: A string tensor. """ - # pylint: disable=protected-access - return gen_io_ops._sharded_filename(filename_tensor, shard, num_shards) + return gen_io_ops.sharded_filename(filename_tensor, shard, num_shards) def _AddSaveOps(self, filename_tensor, saveables): """Add ops to save variables that are on the same shard. @@ -421,8 +420,7 @@ class BaseSaverBuilder(object): sharded_saves.append(self._AddSaveOps(sharded_filename, saveables)) # Return the sharded name for the save path. with ops.control_dependencies([x.op for x in sharded_saves]): - # pylint: disable=protected-access - return gen_io_ops._sharded_filespec(filename_tensor, num_shards_tensor) + return gen_io_ops.sharded_filespec(filename_tensor, num_shards_tensor) def _AddRestoreOps(self, filename_tensor, diff --git a/tensorflow/python/training/saver_test_utils.py b/tensorflow/python/training/saver_test_utils.py index 44b06b357e..0a8b7a09af 100644 --- a/tensorflow/python/training/saver_test_utils.py +++ b/tensorflow/python/training/saver_test_utils.py @@ -35,7 +35,7 @@ class CheckpointedOp(object): # pylint: disable=protected-access def __init__(self, name, table_ref=None): if table_ref is None: - self.table_ref = gen_lookup_ops._mutable_hash_table_v2( + self.table_ref = gen_lookup_ops.mutable_hash_table_v2( key_dtype=dtypes.string, value_dtype=dtypes.float32, name=name) else: self.table_ref = table_ref @@ -57,10 +57,10 @@ class CheckpointedOp(object): return CheckpointedOp.CustomSaveable(self, self.name) def insert(self, keys, values): - return gen_lookup_ops._lookup_table_insert_v2(self.table_ref, keys, values) + return gen_lookup_ops.lookup_table_insert_v2(self.table_ref, keys, values) def lookup(self, keys, default): - return gen_lookup_ops._lookup_table_find_v2(self.table_ref, keys, default) + return gen_lookup_ops.lookup_table_find_v2(self.table_ref, keys, default) def keys(self): return self._export()[0] @@ -69,8 +69,8 @@ class CheckpointedOp(object): return self._export()[1] def _export(self): - return gen_lookup_ops._lookup_table_export_v2(self.table_ref, dtypes.string, - dtypes.float32) + return gen_lookup_ops.lookup_table_export_v2(self.table_ref, dtypes.string, + dtypes.float32) class CustomSaveable(saver_module.BaseSaverBuilder.SaveableObject): """A custom saveable for CheckpointedOp.""" @@ -86,6 +86,6 @@ class CheckpointedOp(object): super(CheckpointedOp.CustomSaveable, self).__init__(table, specs, name) def restore(self, restore_tensors, shapes): - return gen_lookup_ops._lookup_table_import_v2( + return gen_lookup_ops.lookup_table_import_v2( self.op.table_ref, restore_tensors[0], restore_tensors[1]) # pylint: enable=protected-access diff --git a/tensorflow/python/user_ops/user_ops.py b/tensorflow/python/user_ops/user_ops.py index 17dbab706c..6f9b5d92bb 100644 --- a/tensorflow/python/user_ops/user_ops.py +++ b/tensorflow/python/user_ops/user_ops.py @@ -27,4 +27,4 @@ from tensorflow.python.ops.gen_user_ops import * # pylint: disable=wildcard-imp def my_fact(): """Example of overriding the generated code for an Op.""" - return _gen_user_ops._fact() # pylint: disable=protected-access + return _gen_user_ops.fact() -- GitLab From 8687aa6f7da68e378d5465914109498f23e300a0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 14:55:50 -0800 Subject: [PATCH 255/884] Remove unnecessary stack operations from graphs. This change implements the first such optimization that removes stack pushes without corresponding pops. PiperOrigin-RevId: 187387794 --- tensorflow/core/grappler/op_types.cc | 13 ++++ tensorflow/core/grappler/op_types.h | 4 ++ .../grappler/optimizers/loop_optimizer.cc | 62 ++++++++++++++++++- .../optimizers/loop_optimizer_test.cc | 59 ++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 2 +- tensorflow/core/grappler/utils.cc | 17 +++++ tensorflow/core/grappler/utils.h | 8 +++ 7 files changed, 161 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 9b3755ddce..fb46b584b2 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -300,6 +300,19 @@ bool IsSquaredDifference(const NodeDef& node) { bool IsSqueeze(const NodeDef& node) { return node.op() == "Squeeze"; } +bool IsStackOp(const NodeDef& node) { + return node.op() == "Stack" || node.op() == "StackV2"; +} +bool IsStackCloseOp(const NodeDef& node) { + return node.op() == "StackClose" || node.op() == "StackCloseV2"; +} +bool IsStackPushOp(const NodeDef& node) { + return node.op() == "StackPush" || node.op() == "StackPushV2"; +} +bool IsStackPopOp(const NodeDef& node) { + return node.op() == "StackPop" || node.op() == "StackPopV2"; +} + bool IsStopGradient(const NodeDef& node) { const auto& op = node.op(); return op == "StopGradient" || op == "PreventGradient"; diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 1fa43a9b66..a7c33ef97b 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -118,6 +118,10 @@ bool IsSplitV(const NodeDef& node); bool IsSqrtGrad(const NodeDef& node); bool IsSquaredDifference(const NodeDef& node); bool IsSqueeze(const NodeDef& node); +bool IsStackOp(const NodeDef& node); +bool IsStackCloseOp(const NodeDef& node); +bool IsStackPushOp(const NodeDef& node); +bool IsStackPopOp(const NodeDef& node); bool IsStopGradient(const NodeDef& node); bool IsStridedSlice(const NodeDef& node); bool IsStridedSliceGrad(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 102526e22f..cc226c01db 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -22,20 +22,76 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { namespace grappler { +namespace { -Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, - GraphDef* optimized_graph) { - *optimized_graph = item.graph; +Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { + SimpleGraphView graph_view; + TF_RETURN_IF_ERROR(graph_view.Initialize(graph)); + const std::unordered_set op_types_to_traverse( + {"Stack", "StackV2", "Enter", "Switch", "RefSwitch", "Identity"}); + std::set nodes_to_delete; + for (int node_idx = 0; node_idx < graph.node_size(); ++node_idx) { + const NodeDef& node = graph.node(node_idx); + if (IsStackOp(node)) { + std::set nodes_found; + graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &nodes_found); + bool found_pop = false; + bool found_unexpected = false; + for (int found_idx : nodes_found) { + const NodeDef& node = graph.node(found_idx); + if (IsStackPushOp(node) || IsStackOp(node) || IsStackCloseOp(node)) { + continue; + } else if (IsStackPopOp(node)) { + found_pop = true; + } else { + // Don't modify the graph if we found an unexpected op. There may be + // a pop hiding behind it. + found_unexpected = true; + } + } + if (!found_unexpected && !found_pop) { + VLOG(1) << "Found stack node with no pop: " << node.DebugString(); + // Remove all pushes. + for (int found_idx : nodes_found) { + const NodeDef& node = graph.node(found_idx); + if (IsStackPushOp(node)) { + nodes_to_delete.insert(found_idx); + } + } + } + } + } + *optimized_graph = graph; + if (!nodes_to_delete.empty()) { + int last = optimized_graph->node_size() - 1; + for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); + ++it) { + const int node_to_delete = *it; + optimized_graph->mutable_node()->SwapElements(node_to_delete, last); + --last; + } + optimized_graph->mutable_node()->DeleteSubrange(last + 1, + nodes_to_delete.size()); + } return Status::OK(); } +} // namespace + +Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, + GraphDef* optimized_graph) { + Status status = RemoveStackOps(item.graph, optimized_graph); + return status; +} + void LoopOptimizer::Feedback(Cluster* /*cluster*/, const GrapplerItem& /*item*/, const GraphDef& /*optimized_graph*/, double /*result*/) { diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index c09434f609..bb2ee6b02b 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -57,6 +57,65 @@ TEST_F(LoopOptimizerTest, NoOp) { VerifyGraphsEqual(item.graph, output, __FUNCTION__); } +namespace { +NodeDef* AddNode(const string& name, const string& op, + const std::vector& inputs, GraphDef* graph) { + NodeDef* node = graph->add_node(); + node->set_name(name); + node->set_op(op); + for (const string& input : inputs) { + node->add_input(input); + } + return node; +} +} // namespace + +TEST_F(LoopOptimizerTest, RemovePush_NoOp) { + GrapplerItem item; + GraphDef& graph = item.graph; + // Stack with corresponding push/pop. + AddNode("stack1", "StackV2", {}, &graph); + AddNode("push1", "StackPushV2", {"stack1"}, &graph); + AddNode("pop1", "StackPopV2", {"stack1"}, &graph); + // Stack with corresponding push/pop behind Enter. + AddNode("stack2", "StackV2", {}, &graph); + AddNode("push_enter", "Enter", {"stack1"}, &graph); + AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + AddNode("pop_enter", "Enter", {"stack1"}, &graph); + AddNode("pop2", "StackPopV2", {"pop_enter"}, &graph); + // Stack with unexpected op type in fanout of Stack. + AddNode("stack3", "StackV2", {}, &graph); + AddNode("push3", "StackPushV2", {"stack3"}, &graph); + AddNode("stop", "StopGradient", {"stack3"}, &graph); + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { + GrapplerItem item; + GraphDef& graph = item.graph; + AddNode("stack1", "StackV2", {}, &graph); + AddNode("push1", "StackPushV2", {"stack1"}, &graph); + AddNode("stack2", "StackV2", {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, &graph); + AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + EXPECT_EQ(3, output.node_size()); + int found = 0; + for (int i = 0; i < output.node_size(); ++i) { + if (output.node(i).name() == "stack1") ++found; + if (output.node(i).name() == "push_enter") ++found; + if (output.node(i).name() == "stack2") ++found; + } + EXPECT_EQ(3, found); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 93658a6475..b674ee1553 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -110,7 +110,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new DependencyOptimizer(cfg_.dependency_optimization()))); } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { + if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( new LoopOptimizer(cfg_.loop_optimization()))); } diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index 81bb5e6c3b..a611a93086 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -348,6 +348,7 @@ inline void STLSortAndRemoveDuplicates(T* v) { Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, bool dedup_outputs) { + graph_ = &graph; const int num_nodes = graph.node_size(); inputs_.clear(); inputs_.resize(num_nodes); @@ -394,6 +395,22 @@ Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, return Status::OK(); } +void SimpleGraphView::DepthFirstSearch( + const std::unordered_set& op_types_to_traverse, int node_idx, + std::set* nodes_found) const { + const NodeDef& node = graph_->node(node_idx); + if (op_types_to_traverse.find(node.op()) == op_types_to_traverse.end()) { + nodes_found->insert(node_idx); + return; + } + if (nodes_found->find(node_idx) != nodes_found->end()) { + return; + } + for (auto output_idx : this->outputs(node_idx)) { + DepthFirstSearch(op_types_to_traverse, output_idx, nodes_found); + } +} + string SimpleGraphView::PrintToString() const { string str; for (int i = 0; i < num_nodes(); ++i) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index 255319693a..1b91a57154 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -194,9 +194,17 @@ class SimpleGraphView { return outputs_[node_idx]; } + // Traverse the graph starting at `node_idx`, collecting indices of nodes + // visited in nodes_found. If a node has an op in `op_types_to_traverse`, the + // walk continues to its children. It is assumed that *graph_ was not modified + // after the call to Initialize(). + void DepthFirstSearch(const std::unordered_set& op_types_to_traverse, + int node_idx, std::set* nodes_found) const; + string PrintToString() const; private: + const GraphDef* graph_; // Not owned. std::vector index_to_name_; std::unordered_map name_to_index_; std::vector> inputs_; -- GitLab From d3c8659b27c644268156d15ec4b556e60db21491 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 28 Feb 2018 15:18:29 -0800 Subject: [PATCH 256/884] keras: Avoid unneccesary call to .call() when building models with subclassing. This fixes a regression in the defun microbenchmarks (ResNet50Benchmarks.eager_train_with_defun_gpu_batch_32_channels_first etc.) in tensorflow/contrib/eager/python/examples/resnet50/resnet50_test.py seen after https://github.com/tensorflow/tensorflow/commit/9a84277be2cb8233c5c14270db6fcdff31ab4d93 (which embeds a model in model) Without this change, converting a model call to a graph function using something like: model.call = tfe.defun(model.call) could result in redundant nodes being added to the graph function as the model._set_inputs() call would invoke model.call() again. PiperOrigin-RevId: 187391494 --- .../keras/_impl/keras/engine/base_layer.py | 7 +++-- .../keras/_impl/keras/engine/training.py | 28 +++++++++++-------- 2 files changed, 20 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 142325041b..7f215f5645 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -240,9 +240,10 @@ class Layer(tf_base_layers.Layer): if context.in_eager_mode(): return output - # Un-built subclassed network: build it - if hasattr(self, '_set_inputs') and not self.inputs: - self._set_inputs(inputs, training=kwargs.get('training')) + if hasattr(self, '_symbolic_set_inputs') and not self.inputs: + # Subclassed network: explicitly set metadata normally set by a call to + # self._set_inputs(). + self._symbolic_set_inputs(inputs, output) # Update learning phase info. output_tensors = generic_utils.to_list(output) diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 63bea08ac5..c121d819ff 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -1835,14 +1835,17 @@ class Model(Network): 'output_%d' % (i + 1) for i in range(len(dummy_output_values))] self.built = True - def _symbolic_set_inputs(self, inputs, training=None): - """Set model's inputs based on the input data received from the user. + def _symbolic_set_inputs(self, inputs, outputs=None, training=None): + """Set model's inputs and output specs based. This is to be used for Model subclasses, which do not know at instantiation time what their inputs look like. Args: inputs: Argument `x` (input data) passed by the user upon first model use. + outputs: None, a data tensor, or a list of data tensors. If None, the + outputs will be determined by invoking self.call(), otherwise the + provided value will be used. training: Boolean or None. Only relevant in symbolic mode. Specifies whether to build the model's graph in inference mode (False), training mode (True), or using the Keras learning phase (None). @@ -1892,17 +1895,18 @@ class Model(Network): self._feed_input_names.append(name) self._feed_input_shapes.append(K.int_shape(v)) - # Obtain symbolic outputs by calling the model. - if len(self.inputs) == 1: - if self._expects_training_arg: - outputs = self.call(self.inputs[0], training=training) - else: - outputs = self.call(self.inputs[0]) - else: - if self._expects_training_arg: - outputs = self.call(self.inputs, training=training) + if outputs is None: + # Obtain symbolic outputs by calling the model. + if len(self.inputs) == 1: + if self._expects_training_arg: + outputs = self.call(self.inputs[0], training=training) + else: + outputs = self.call(self.inputs[0]) else: - outputs = self.call(self.inputs) + if self._expects_training_arg: + outputs = self.call(self.inputs, training=training) + else: + outputs = self.call(self.inputs) if isinstance(outputs, (list, tuple)): outputs = list(outputs) else: -- GitLab From 656055e0c9acd944b7a34bfe01c06ad122f87da8 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Wed, 28 Feb 2018 15:36:39 -0800 Subject: [PATCH 257/884] Exclude more tests for cuda_on_cpu project. PiperOrigin-RevId: 187394209 --- tensorflow/core/grappler/optimizers/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 7b801db2c8..b8995ef365 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -382,6 +382,7 @@ cc_library( tf_cc_test_gpu( name = "memory_optimizer_test", srcs = ["memory_optimizer_test.cc"], + tags = ["no_cuda_on_cpu_tap"], deps = [ ":memory_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From b98a1f31bca1e773ee215f2c32aa0509843c1247 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 15:44:55 -0800 Subject: [PATCH 258/884] Propagate NaNs for floating point min/max operations. PiperOrigin-RevId: 187395444 --- .../compiler/xla/service/hlo_evaluator.cc | 39 ++++++++--- .../compiler/xla/service/llvm_ir/llvm_util.cc | 12 ++-- .../xla/tests/array_elementwise_ops_test.cc | 70 +++---------------- .../xla/tests/scalar_computations_test.cc | 12 ++++ 4 files changed, 59 insertions(+), 74 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index edb1ad2360..42de7ada61 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -613,14 +613,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return Status::OK(); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> + Status HandleMaximum(HloInstruction* maximum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[maximum], + ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { + return std::max(lhs, rhs); + })); + return Status::OK(); + } + + template ::value>::type* = nullptr> Status HandleMaximum(HloInstruction* maximum) { TF_ASSIGN_OR_RETURN( parent_->evaluated_[maximum], ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) { - return std::fmax(lhs, rhs); + return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs; })); return Status::OK(); } @@ -636,18 +647,30 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { return HandleMaximum(maximum); } - template < - typename NativeT, - typename std::enable_if::value>::type* = nullptr> + template ::value>::type* = + nullptr> Status HandleMinimum(HloInstruction* minimum) { TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum], ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, ElementwiseT rhs_el) { - return std::fmin(lhs_el, rhs_el); + return std::min(lhs_el, rhs_el); })); return Status::OK(); } + template ::value>::type* = nullptr> + Status HandleMinimum(HloInstruction* minimum) { + TF_ASSIGN_OR_RETURN( + parent_->evaluated_[minimum], + ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el, + ElementwiseT rhs_el) { + return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el; + })); + return Status::OK(); + } + template < typename NativeT, typename std::enable_if::value>::type* = nullptr> diff --git a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc index 5c1866311d..2a282f3be7 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/llvm_util.cc @@ -106,8 +106,10 @@ llvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpUGE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::maxnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_ge = ir_builder->CreateFCmpOGE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_ge, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } @@ -117,8 +119,10 @@ llvm::Value* EmitFloatMin(llvm::Value* lhs_value, llvm::Value* rhs_value, auto cmp = ir_builder->CreateFCmpULE(lhs_value, rhs_value); return ir_builder->CreateSelect(cmp, lhs_value, rhs_value); } else { - return EmitCallToIntrinsic(llvm::Intrinsic::minnum, {lhs_value, rhs_value}, - {lhs_value->getType()}, ir_builder); + auto cmp_le = ir_builder->CreateFCmpOLE(lhs_value, rhs_value); + auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value); + auto sel_lhs = ir_builder->CreateOr(cmp_le, lhs_is_nan); + return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value); } } diff --git a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc index 8b35259013..6e21dda25d 100644 --- a/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc +++ b/tensorflow/compiler/xla/tests/array_elementwise_ops_test.cc @@ -1648,33 +1648,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) { ComputeAndCompareR4(&builder, expected, {}, error_spec_); } -// GPU backend emits nvvm intrinsic for fmin and fmax, whose semantics is NOT -// such -// * fmin(NaN, x) = x -// * fmax(NaN, x) = x -// so we only test NAN on CPU. -// -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. XLA_TEST_F(ArrayElementwiseOpTest, MinF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0f, -5.0f, 1.0f}, -#else - {1.0f, -5.0f, 1.0f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0f, -5.0f, 1.0f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { @@ -1685,50 +1667,26 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto minimum = builder.Min(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {1.0, -5.0, 1.0}, -#else - {1.0, -5.0, 1.0, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {1.0, -5.0, 1.0, NAN, NAN}, {}, + error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF32s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f}); - auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0f, 1.0f, 2.25f, NAN, 6.0f}); auto rhs = builder.ConstantR1({2.0f, -5.0f, 1.0f, 10.0f, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0f, 1.0f, 2.25f}, -#else - {2.0f, 1.0f, 2.25f, 10.0f, 6.0f}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0f, 1.0f, 2.25f, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { @@ -1739,27 +1697,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) { ComputeAndCompareR1(&builder, {}, {}, error_spec_); } -// TODO(b/28180546): Make this compile in a way that is consistent -// among backends. See comment on MinF32s test above. XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) { ComputationBuilder builder(client_, TestName()); -#if !defined(XLA_TEST_BACKEND_CPU) - auto lhs = builder.ConstantR1({1.0, 1.0, 2.25}); - auto rhs = builder.ConstantR1({2.0, -5.0, 1.0}); -#else SetFastMathDisabled(true); auto lhs = builder.ConstantR1({1.0, 1.0, 2.25, NAN, 6.0}); auto rhs = builder.ConstantR1({2.0, -5.0, 1.0, 10.0, NAN}); -#endif auto maximum = builder.Max(lhs, rhs); - ComputeAndCompareR1(&builder, -#if !defined(XLA_TEST_BACKEND_CPU) - {2.0, 1.0, 2.25}, -#else - {2.0, 1.0, 2.25, 10.0, 6.0}, -#endif - {}, error_spec_); + ComputeAndCompareR1(&builder, {2.0, 1.0, 2.25, NAN, NAN}, {}, + error_spec_); } XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) { diff --git a/tensorflow/compiler/xla/tests/scalar_computations_test.cc b/tensorflow/compiler/xla/tests/scalar_computations_test.cc index d7bda77e87..0c88bef69d 100644 --- a/tensorflow/compiler/xla/tests/scalar_computations_test.cc +++ b/tensorflow/compiler/xla/tests/scalar_computations_test.cc @@ -860,6 +860,12 @@ XLA_TEST_F(ScalarComputationsTest, MinF32Below) { TestMinMax(-100.1f, 3.1f, -100.1f, &ComputationBuilder::Min); } +XLA_TEST_F(ScalarComputationsTest, MinPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Min); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Min); +} + XLA_TEST_F(ScalarComputationsTest, MaxF32Above) { TestMinMax(10.1f, 3.1f, 10.1f, &ComputationBuilder::Max); } @@ -868,6 +874,12 @@ XLA_TEST_F(ScalarComputationsTest, MaxF32Below) { TestMinMax(-100.1f, 3.1f, 3.1f, &ComputationBuilder::Max); } +XLA_TEST_F(ScalarComputationsTest, MaxPropagatesNan) { + SetFastMathDisabled(true); + TestMinMax(NAN, 3.1f, NAN, &ComputationBuilder::Max); + TestMinMax(-3.1f, NAN, NAN, &ComputationBuilder::Max); +} + XLA_TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionF32) { // Compute the expression (1 * (3 - 1) * (7 + 0) - 4) / 20. ComputationBuilder b(client_, TestName()); -- GitLab From 9a52edb4760f13dda1b27f9126f8117d6c4f9bc9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 15:47:01 -0800 Subject: [PATCH 259/884] Update a few tests to work with Grappler constant folding. PiperOrigin-RevId: 187395886 --- tensorflow/python/kernel_tests/pooling_ops_test.py | 8 ++++++-- tensorflow/python/kernel_tests/reduction_ops_test.py | 4 +++- tensorflow/python/kernel_tests/softmax_op_test.py | 3 +-- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/pooling_ops_test.py b/tensorflow/python/kernel_tests/pooling_ops_test.py index a0ac355b60..2f3bea5825 100644 --- a/tensorflow/python/kernel_tests/pooling_ops_test.py +++ b/tensorflow/python/kernel_tests/pooling_ops_test.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variables import tensorflow.python.ops.nn_grad # pylint: disable=unused-import from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -731,7 +732,8 @@ class PoolingTest(test.TestCase): [1, 1, 1, 3], "evenly divide") if test.is_gpu_available(): with self.test_session(use_gpu=True): - t = constant_op.constant(1.0, shape=[1, 2, 2, 4]) + t = variables.Variable(np.ones([1, 2, 2, 4])) + variables.global_variables_initializer().run() with self.assertRaisesOpError("for CPU devices"): nn_ops.max_pool( t, ksize=[1, 1, 1, 2], strides=[1, 1, 1, 2], @@ -1210,7 +1212,9 @@ class PoolingTest(test.TestCase): padding, use_gpu, v2): pool_func = gen_nn_ops.max_pool_v2 if v2 else nn_ops.max_pool with self.test_session(use_gpu=use_gpu): - input_tensor = constant_op.constant(input_data, shape=input_sizes) + input_tensor = variables.Variable( + np.array(input_data, dtype=np.float32).reshape(input_sizes)) + variables.global_variables_initializer().run() output_tensor = pool_func(input_tensor, [1, window_rows, window_cols, 1], [1, row_stride, col_stride, 1], padding) output_backprop_tensor = constant_op.constant( diff --git a/tensorflow/python/kernel_tests/reduction_ops_test.py b/tensorflow/python/kernel_tests/reduction_ops_test.py index d306d1b8d6..589ea54973 100644 --- a/tensorflow/python/kernel_tests/reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/reduction_ops_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables from tensorflow.python.platform import test # The maximum input rank to test. @@ -212,7 +213,8 @@ class SumReductionTest(BaseReductionTest): arr = np.ones([68000], dtype=np.float16) with self.test_session(graph=ops.Graph(), use_gpu=True) as sess: - tf_arr = array_ops.constant(arr) + tf_arr = variables.Variable(arr) + variables.global_variables_initializer().run() tf_mean = math_ops.reduce_mean(tf_arr, 0, False) tf_out_mean = sess.run(tf_mean) self.assertAllClose(tf_out_mean, 1.) diff --git a/tensorflow/python/kernel_tests/softmax_op_test.py b/tensorflow/python/kernel_tests/softmax_op_test.py index 4d89831aae..2b8e99e18e 100644 --- a/tensorflow/python/kernel_tests/softmax_op_test.py +++ b/tensorflow/python/kernel_tests/softmax_op_test.py @@ -20,7 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors_impl from tensorflow.python.framework import test_util @@ -166,7 +165,7 @@ class SoftmaxTest(test.TestCase): def testEmptyInput(self): with self.test_session(): - x = constant_op.constant([[]], shape=[0, 3]) + x = array_ops.placeholder(dtypes.float32, shape=[0, 3]) self.assertEqual(0, array_ops.size(x).eval()) # reshape would raise if logits is empty with self.assertRaises(errors_impl.InvalidArgumentError): -- GitLab From 8be4ab7b2d2ad00ffa84da82e9cbba88c677877d Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 28 Feb 2018 15:51:18 -0800 Subject: [PATCH 260/884] Add all_files target to gcs_smoke_test BUILD file. PiperOrigin-RevId: 187396477 --- tensorflow/BUILD | 1 + .../integration_tests/gcs_smoke_test/BUILD.bazel | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/BUILD b/tensorflow/BUILD index a4e7602bea..4b2facd6b3 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -674,6 +674,7 @@ filegroup( "//tensorflow/tools/docs:all_files", "//tensorflow/tools/git:all_files", "//tensorflow/tools/graph_transforms:all_files", + "//tensorflow/tools/integration_tests/gcs_smoke_test:all_files", "//tensorflow/tools/mlpbtxt:all_files", "//tensorflow/tools/proto_text:all_files", "//tensorflow/tools/quantization:all_files", diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel index 439d86c5d2..0acc139df9 100755 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel +++ b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel @@ -54,3 +54,14 @@ integration_test( test_docker_image = toolchain_container_images()["tensorflow"], test_type = "MultiMachine", ) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), +) -- GitLab From a5b336194f4fd1a26bcd5dfd159d6edf4dfdd081 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Wed, 28 Feb 2018 15:59:33 -0800 Subject: [PATCH 261/884] Remove record_gradient param from benchmark function PiperOrigin-RevId: 187397610 --- tensorflow/python/eager/benchmarks_test.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 228ff62b20..527a919ab0 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -35,7 +35,6 @@ from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop # pylint: disable=unused-import from tensorflow.python.eager import context from tensorflow.python.eager import core -from tensorflow.python.eager import execute from tensorflow.python.eager import function from tensorflow.python.eager import test from tensorflow.python.framework import dtypes @@ -60,7 +59,7 @@ def c_tfe_py_fastpath_execute(a, ), "The prototype doesn't contain C code for graph construction" try: return pywrap_tensorflow.TFE_Py_FastPathExecute( - ctx._handle, ctx.device_name, "MatMul", execute.record_gradient, name, + ctx._handle, ctx.device_name, "MatMul", name, ctx._post_execution_callbacks, a, b, "transpose_a", transpose_a, "transpose_b", transpose_b) except core._NotOkStatusException as e: -- GitLab From e670c81d85f3353ea3b701569f8f5126714a02bf Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 28 Feb 2018 16:22:42 -0800 Subject: [PATCH 262/884] GCS: HTTP error code 308 retries during upload. Previously, it would only permit 308 when getting the status of an upload. This matches the behavior of the official library: https://github.com/google/apitools/blob/master/apitools/base/py/transfer.py#L925 And the general description here: https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload PiperOrigin-RevId: 187400843 --- .../core/platform/cloud/curl_http_request.cc | 8 +++-- .../platform/cloud/gcs_file_system_test.cc | 33 +++++++++++++------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 88a5d1e96d..4b5f6974c1 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -493,14 +493,18 @@ Status CurlHttpRequest::Send() { case 303: // See Other case 304: // Not Modified case 307: // Temporary Redirect - case 308: // Resume Incomplete case 412: // Precondition Failed case 413: // Payload Too Large result = errors::FailedPrecondition(error_message); break; // UNAVAILABLE indicates a problem that can go away if the request - // is just retried without any modification. + // is just retried without any modification. 308 return codes are intended + // for write requests that can be retried. See the documentation and the + // official library: + // https://cloud.google.com/storage/docs/json_api/v1/how-tos/resumable-upload + // https://github.com/google/apitools/blob/master/apitools/base/py/transfer.py + case 308: // Resume Incomplete case 409: // Conflict case 429: // Too Many Requests case 500: // Internal Server Error diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index d452074ce3..cd9fd3adea 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -393,7 +393,7 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) { "Timeouts: 5 1 10\n" "Header Content-Range: bytes */17\n" "Put: yes\n", - "", errors::FailedPrecondition("308"), nullptr, + "", errors::Unavailable("308"), nullptr, {{"Range", "0-10"}}, 308), new FakeHttpRequest("Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" @@ -406,13 +406,26 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadSucceeds) { "Timeouts: 5 1 10\n" "Header Content-Range: bytes */17\n" "Put: yes\n", - "", errors::FailedPrecondition("308"), nullptr, + "", errors::Unavailable("308"), nullptr, {{"Range", "bytes=0-12"}}, 308), new FakeHttpRequest("Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" "Header Content-Range: bytes 13-16/17\n" "Timeouts: 5 1 30\n" "Put body: ent2\n", + "", errors::Unavailable("308"), 308), + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n" + "Header Content-Range: bytes */17\n" + "Put: yes\n", + "", errors::Unavailable("308"), nullptr, + {{"Range", "bytes=0-14"}}, 308), + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Header Content-Range: bytes 15-16/17\n" + "Timeouts: 5 1 30\n" + "Put body: t2\n", "")}); GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), std::unique_ptr( @@ -521,14 +534,14 @@ TEST(GcsFileSystemTest, NewWritableFile_ResumeUploadAllAttemptsFail) { "Put body: content1,content2\n", "", errors::Unavailable("503"), 503)}); for (int i = 0; i < 10; i++) { - requests.emplace_back(new FakeHttpRequest( - "Uri: https://custom/upload/location\n" - "Auth Token: fake_token\n" - "Timeouts: 5 1 10\n" - "Header Content-Range: bytes */17\n" - "Put: yes\n", - "", errors::FailedPrecondition("important HTTP error 308"), nullptr, - {{"Range", "0-10"}}, 308)); + requests.emplace_back( + new FakeHttpRequest("Uri: https://custom/upload/location\n" + "Auth Token: fake_token\n" + "Timeouts: 5 1 10\n" + "Header Content-Range: bytes */17\n" + "Put: yes\n", + "", errors::Unavailable("important HTTP error 308"), + nullptr, {{"Range", "0-10"}}, 308)); requests.emplace_back(new FakeHttpRequest( "Uri: https://custom/upload/location\n" "Auth Token: fake_token\n" -- GitLab From 86061c8e8034c5bee955659bdda8366f640f543d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 16:41:17 -0800 Subject: [PATCH 263/884] Adding the documentation for building the iOS demo for TensorFlow Lite. PiperOrigin-RevId: 187403346 --- tensorflow/docs_src/mobile/leftnav_files | 1 + tensorflow/docs_src/mobile/tflite/demo_ios.md | 68 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tensorflow/docs_src/mobile/tflite/demo_ios.md diff --git a/tensorflow/docs_src/mobile/leftnav_files b/tensorflow/docs_src/mobile/leftnav_files index ac50f528ba..4cf134cc3c 100644 --- a/tensorflow/docs_src/mobile/leftnav_files +++ b/tensorflow/docs_src/mobile/leftnav_files @@ -2,6 +2,7 @@ index.md ### TensorFlow Lite tflite/index.md tflite/demo_android.md +tflite/demo_ios.md >>> ### TensorFlow Mobile mobile_intro.md diff --git a/tensorflow/docs_src/mobile/tflite/demo_ios.md b/tensorflow/docs_src/mobile/tflite/demo_ios.md new file mode 100644 index 0000000000..3ee9b1cbca --- /dev/null +++ b/tensorflow/docs_src/mobile/tflite/demo_ios.md @@ -0,0 +1,68 @@ +# TensorFlow Lite Demo for iOS + +The TensorFlow Lite demo is a camera app that continuously classifies whatever +it sees from your device's back camera, using a quantized MobileNet model. These +instructions walk you through building and running the demo on an iOS device. + +## Prerequisites + +* You must have [Xcode](https://developer.apple.com/xcode/) installed and have a + valid Apple Developer ID, and have an iOS device set up and linked to your + developer account with all of the appropriate certificates. For these + instructions, we assume that you have already been able to build and deploy an + app to an iOS device with your current developer environment. + +* The demo app requires a camera and must be executed on a real iOS device. You + can build it and run with the iPhone Simulator but it won't have any camera + information to classify. + +* You don't need to build the entire TensorFlow library to run the demo, but you + will need to clone the TensorFlow repository if you haven't already: + + git clone https://github.com/tensorflow/tensorflow + +* You'll also need the Xcode command-line tools: + + xcode-select --install + + If this is a new install, you will need to run the Xcode application once to + agree to the license before continuing. + +## Building the iOS Demo App + +1. Install CocoaPods if you don't have it: + + sudo gem install cocoapods + +2. Download the model files used by the demo app (this is done from inside the + cloned directory): + + sh tensorflow/contrib/lite/examples/ios/download_models.sh + +3. Install the pod to generate the workspace file: + + cd tensorflow/contrib/lite/examples/ios/camera + pod install + + If you have installed this pod before and that command doesn't work, try + + pod update + + At the end of this step you should have a file called + `tflite_camera_example.xcworkspace`. + +4. Open the project in Xcode by typing this on the command line: + + open tflite_camera_example.xcworkspace + + This launches Xcode if it isn't open already and opens the + `tflite_camera_example` project. + +5. Build and run the app in Xcode. + + Note that as mentioned earlier, you must already have a device set up and + linked to your Apple Developer account in order to deploy the app on a + device. + +You'll have to grant permissions for the app to use the device's camera. Point +the camera at various objects and enjoy seeing how the model classifies things! -- GitLab From 6a2bb85654655d7dc6e5017de6586e76634ebcd1 Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Wed, 28 Feb 2018 17:18:52 -0800 Subject: [PATCH 264/884] Docs: Update Fixed Point Quantization in performance. PiperOrigin-RevId: 187408106 --- tensorflow/docs_src/performance/leftnav_files | 4 +- .../docs_src/performance/quantization.md | 461 +++++++++--------- 2 files changed, 245 insertions(+), 220 deletions(-) diff --git a/tensorflow/docs_src/performance/leftnav_files b/tensorflow/docs_src/performance/leftnav_files index 316f023f43..d11a7e5d07 100644 --- a/tensorflow/docs_src/performance/leftnav_files +++ b/tensorflow/docs_src/performance/leftnav_files @@ -2,6 +2,7 @@ performance_guide.md datasets_performance.md performance_models.md benchmarks.md +quantization.md ### XLA xla/index.md @@ -11,6 +12,3 @@ xla/jit.md xla/operation_semantics.md xla/shapes.md xla/tfcompile.md - -### Quantization -quantization.md diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 544274cab6..63448c2ebe 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -1,226 +1,253 @@ -# How to Quantize Neural Networks with TensorFlow - -When modern neural networks were being developed, the biggest challenge was -getting them to work at all! That meant that accuracy and speed during training -were the top priorities. Using floating point arithmetic was the easiest way to -preserve accuracy, and GPUs were well-equipped to accelerate those calculations, -so it's natural that not much attention was paid to other numerical formats. - -These days, we actually have a lot of models being deployed in commercial -applications. The computation demands of training grow with the number of -researchers, but the cycles needed for inference expand in proportion to users. -That means pure inference efficiency has become a burning issue for a lot of -teams. - -That is where quantization comes in. It's an umbrella term that covers a lot of -different techniques to store numbers and perform calculations on them in more -compact formats than 32-bit floating point. I am going to focus on eight-bit -fixed point, for reasons I'll go into more detail on later. - -[TOC] - -## Why does Quantization Work? - -Training neural networks is done by applying many tiny nudges to the weights, -and these small increments typically need floating point precision to work -(though there are research efforts to use quantized representations here too). - -Taking a pre-trained model and running inference is very different. One of the -magical qualities of deep networks is that they tend to cope very well with high -levels of noise in their inputs. If you think about recognizing an object in a -photo you've just taken, the network has to ignore all the CCD noise, lighting -changes, and other non-essential differences between it and the training -examples it's seen before, and focus on the important similarities instead. This -ability means that they seem to treat low-precision calculations as just another -source of noise, and still produce accurate results even with numerical formats -that hold less information. - -## Why Quantize? - -Neural network models can take up a lot of space on disk, with the original -AlexNet being over 200 MB in float format for example. Almost all of that size -is taken up with the weights for the neural connections, since there are often -many millions of these in a single model. Because they're all slightly different -floating point numbers, simple compression formats like zip don't compress them -well. They are arranged in large layers though, and within each layer the -weights tend to be normally distributed within a certain range, for example -3.0 -to 6.0. - -The simplest motivation for quantization is to shrink file sizes by storing the -min and max for each layer, and then compressing each float value to an -eight-bit integer representing the closest real number in a linear set of 256 -within the range. For example with the -3.0 to 6.0 range, a 0 byte would -represent -3.0, a 255 would stand for 6.0, and 128 would represent about 1.5. -I'll go into the exact calculations later, since there's some subtleties, but -this means you can get the benefit of a file on disk that's shrunk by 75%, and -then convert back to float after loading so that your existing floating-point -code can work without any changes. - -Another reason to quantize is to reduce the computational resources you need to -do the inference calculations, by running them entirely with eight-bit inputs -and outputs. This is a lot more difficult since it requires changes everywhere -you do calculations, but offers a lot of potential rewards. Fetching eight-bit -values only requires 25% of the memory bandwidth of floats, so you'll make much -better use of caches and avoid bottlenecking on RAM access. You can also -typically use SIMD operations that do many more operations per clock cycle. In -some case you'll have a DSP chip available that can accelerate eight-bit -calculations too, which can offer a lot of advantages. - -Moving calculations over to eight bit will help you run your models faster, and -use less power (which is especially important on mobile devices). It also opens -the door to a lot of embedded systems that can't run floating point code -efficiently, so it can enable a lot of applications in the IoT world. - -## Why Not Train in Lower Precision Directly? - -There have been some experiments training at lower bit depths, but the results -seem to indicate that you need higher than eight bit to handle the back -propagation and gradients. That makes implementing the training more -complicated, and so starting with inference made sense. We also already have a -lot of float models already that we use and know well, so being able to convert -them directly is very convenient. - -## How Can You Quantize Your Models? - -TensorFlow has production-grade support for eight-bit calculations built in. It -also has a process for converting many models trained in floating-point over to -equivalent graphs using quantized calculations for inference. For example, -here's how you can translate the latest GoogLeNet model into a version that uses -eight-bit computations: - -```sh -curl -L "https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz" | - tar -C tensorflow/examples/label_image/data -xz -bazel build tensorflow/tools/graph_transforms:transform_graph -bazel-bin/tensorflow/tools/graph_transforms/transform_graph \ - --in_graph=tensorflow/examples/label_image/data/inception_v3_2016_08_28_frozen.pb \ - --out_graph=/tmp/quantized_graph.pb \ - --inputs=input \ - --outputs=InceptionV3/Predictions/Reshape_1 \ - --transforms='add_default_attributes strip_unused_nodes(type=float, shape="1,299,299,3") - remove_nodes(op=Identity, op=CheckNumerics) fold_constants(ignore_errors=true) - fold_batch_norms fold_old_batch_norms quantize_weights quantize_nodes - strip_unused_nodes sort_by_execution_order' +# Fixed Point Quantization + +Quantization techniques store and calculate numbers in more compact formats. +[TensorFlow Lite](/mobile/tflite/) adds quantization that uses an 8-bit fixed +point representation. + +Since a challenge for modern neural networks is optimizing for high accuracy, the +priority has been improving accuracy and speed during training. Using floating +point arithmetic is an easy way to preserve accuracy and GPUs are designed to +accelerate these calculations. + +However, as more machine learning models are deployed to mobile devices, +inference efficiency has become a critical issue. Where the computational demand +for *training* grows with the amount of models trained on different +architectures, the computational demand for *inference* grows in proportion to +the amount of users. + +## Quantization benefits + + +Using 8-bit calculations help your models run faster and use less power. This is +especially important for mobile devices and embedded applications that can't run +floating point code efficiently, for example, Internet of Things (IoT) and +robotics devices. There are additional opportunities to extend this support to +more backends and research lower precision networks. + +### Smaller file sizes {: .hide-from-toc} + +Neural network models require a lot of space on disk. For example, the original +AlexNet requires over 200 MB for the float format—almost all of that for the +model's millions of weights. Because the weights are slightly different +floating point numbers, simple compression formats perform poorly (like zip). + +Weights fall in large layers of numerical values. For each layer, weights tend to +be normally distributed within a range. Quantization can shrink file sizes by +storing the minimum and maximum weight for each layer, then compress each +weight's float value to an 8-bit integer representing the closest real number in +a linear set of 256 within the range. + +### Faster inference {: .hide-from-toc} + +Since calculations are run entirely on 8-bit inputs and outputs, quantization +reduces the computational resources needed for inference calculations. This is +more involved, requiring changes to all floating point calculations, but results +in a large speed-up for inference time. + +### Memory efficiency {: .hide-from-toc} + +Since fetching 8-bit values only requires 25% of the memory bandwidth of floats, +more efficient caches avoid bottlenecks for RAM access. In many cases, the power +consumption for running a neural network is dominated by memory access. The +savings from using fixed-point 8-bit weights and activations are significant. + +Typically, SIMD operations are available that run more operations per clock +cycle. In some cases, a DSP chip is available that accelerates 8-bit calculations +resulting in a massive speedup. + +## Fixed point quantization techniques + +The goal is to use the same precision for weights and activations during both +training and inference. But an important difference is that training consists of +a forward pass and a backward pass, while inference only uses a forward pass. +When we train the model with quantization in the loop, we ensure that the forward +pass matches precision for both training and inference. + +To minimize the loss in accuracy for fully fixed point models (weights and +activations), train the model with quantization in the loop. This simulates +quantization in the forward pass of a model so weights tend towards values that +perform better during quantized inference. The backward pass uses quantized +weights and activations and models quantization as a straight through estimator. +(See Bengio et al., [2013](https://arxiv.org/abs/1308.3432)) + +Additionally, the minimum and maximum values for activations are determined +during training. This allows a model trained with quantization in the loop to be +converted to a fixed point inference model with little effort, eliminating the +need for a separate calibration step. + +## Quantization training with TensorFlow + +TensorFlow can train models with quantization in the loop. Because training +requires small gradient adjustments, floating point values are still used. To +keep models as floating point while adding the quantization error in the training +loop, @{$array_ops#Fake_quantization} nodes simulate the effect of quantization +in the forward and backward passes. + +Since it's difficult to add these fake quantization operations to all the +required locations in the model, there's a function available that rewrites the +training graph. To create a fake quantized training graph: + +``` +# Build forward pass of model. +loss = tf.losses.get_total_loss() + +# Call the training rewrite which rewrites the graph in-place with +# FakeQuantization nodes and folds batchnorm for training. It is +# often needed to fine tune a floating point model for quantization +# with this training tool. When training from scratch, quant_delay +# can be used to activate quantization after training to converge +# with the float graph, effectively fine-tuning the model. +tf.contrib.quantize.create_training_graph(quant_delay=2000000) + +# Call backward pass optimizer as usual. +optimizer = tf.train.GradientDescentOptimizer(learning_rate) +optimizer.minimize(loss) ``` -This will produce a new model that runs the same operations as the original, but -with eight bit calculations internally, and all weights quantized as well. If -you look at the file size, you'll see it's about a quarter of the original (23MB -versus 91MB). You can still run this model using exactly the same inputs and -outputs though, and you should get equivalent results. Here's an example: +The rewritten *eval graph* is non-trivially different from the *training graph* +since the quantization ops affect the batch normalization step. Because of this, +we've added a separate rewrite for the *eval graph*: -```sh -bazel build tensorflow/examples/label_image:label_image -bazel-bin/tensorflow/examples/label_image/label_image \ ---graph=/tmp/quantized_graph.pb \ +``` +# Build eval model +logits = tf.nn.softmax_cross_entropy_with_logits(...) + +# Call the eval rewrite which rewrites the graph in-place with +# FakeQuantization nodes and fold batchnorm for eval. +tf.contrib.quantize.create_eval_graph() + +# Save the checkpoint and eval graph proto to disk for freezing +# and providing to TFLite. +with open(eval_graph_file, ‘w’) as f: + f.write(str(g.as_graph_def())) +saver = tf.train.Saver() +saver.save(sess, checkpoint_name) +``` + +Methods to rewrite the training and eval graphs are an active area of research +and experimentation. Although rewrites and quantized training might not work or +improve performance for all models, we are working to generalize these +techniques. + +## Generating fully quantized models + +The previously demonstrated after-rewrite eval graph only *simulates* +quantization. To generate real fixed point computations from a trained +quantization model, convert it to a fixed point kernel. Tensorflow Lite supports +this conversion from the graph resulting from `create_eval_graph`. + +First, create a frozen graph that will be the input for the TensorFlow Lite +toolchain: + +``` +bazel build tensorflow/python/tools:freeze_graph && \ + bazel-bin/tensorflow/python/tools/freeze_graph \ + --input_graph=eval_graph_def.pb \ + --input_checkpoint=checkpoint \ + --output_graph=frozen_eval_graph.pb --output_node_names=outputs ``` -You'll see that this runs the newly-quantized graph, and outputs a very similar -answer to the original. - -You can run the same process on your own models saved out as GraphDefs, with the -input and output names adapted to those your network requires. I recommend that -you run them through the freeze_graph script first, to convert checkpoints into -constants stored in the file. - -## How Does the Quantization Process Work? - -We've implemented quantization by writing equivalent eight-bit versions of -operations that are commonly used during inference. These include convolution, -matrix multiplication, activation functions, pooling operations and -concatenation. The conversion script first replaces all the individual ops it -knows about with quantized equivalents. These are small sub-graphs that have -conversion functions before and after to move the data between float and -eight-bit. Below is an example of what they look like. First here's the original -Relu operation, with float inputs and outputs: - -![Relu Diagram](https://www.tensorflow.org/images/quantization0.png) - -Then, this is the equivalent converted subgraph, still with float inputs and -outputs, but with internal conversions so the calculations are done in eight -bit. - -![Converted Diagram](https://www.tensorflow.org/images/quantization1.png) - -The min and max operations actually look at the values in the input float -tensor, and then feeds them into the Dequantize operation that converts the -tensor into eight-bits. There are more details on how the quantized representation -works later on. - -Once the individual operations have been converted, the next stage is to remove -unnecessary conversions to and from float. If there are consecutive sequences of -operations that all have float equivalents, then there will be a lot of adjacent -Dequantize/Quantize ops. This stage spots that pattern, recognizes that they -cancel each other out, and removes them, like this: - -![Stripping Diagram](https://www.tensorflow.org/images/quantization2.png) - -Applied on a large scale to models where all of the operations have quantized -equivalents, this gives a graph where all of the tensor calculations are done in -eight bit, without having to convert to float. - -## What Representation is Used for Quantized Tensors? - -We approach converting floating-point arrays of numbers into eight-bit -representations as a compression problem. We know that the weights and -activation tensors in trained neural network models tend to have values that are -distributed across comparatively small ranges (for example you might have -15 to -+15 for weights, -500 to 1000 for activations on an image model, though the -exact numbers will vary). We also know from experiment that neural nets tend to -be very robust in the face of noise, and so the noise-like error produced by -quantizing down to a small set of values will not hurt the precision of the -overall results very much. We also want to pick a representation that's easy to -perform calculations on, especially the large matrix multiplications that form -the bulk of the work that's needed to run a model. - -These led us to pick a representation that has two floats to store the overall -minimum and maximum values that are represented by the lowest and highest -quantized value. Each entry in the quantized array represents a float value in -that range, distributed linearly between the minimum and maximum. For example, -if we have minimum = -10.0, and maximum = 30.0f, and an eight-bit array, here's -what the quantized values represent: +Provide this to the TensorFlow Lite Optimizing Converter (TOCO) to get a fully +quantized TensorFLow Lite model: ``` -Quantized | Float ---------- | ----- -0 | -10.0 -255 | 30.0 -128 | 10.0 +bazel build tensorflow/contrib/lite/toco:toco && \ + ./bazel-bin/third_party/tensorflow/contrib/lite/toco/toco \ + --input_file=frozen_eval_graph.pb \ + --output_file=tflite_model.tflite \ + --input_format=TENSORFLOW_GRAPHDEF --output_format=TFLITE \ + --inference_type=QUANTIZED_UINT8 \ + --input_shape="1,224, 224,3" \ + --input_array=input \ + --output_array=outputs \ + --std_value=127.5 --mean_value=127.5 ``` -The advantages of this format are that it can represent arbitrary magnitudes of -ranges, they don't have to be symmetrical, it can represent signed and unsigned -values, and the linear spread makes doing multiplications straightforward. There -are alternatives like [Song Han's code books](http://arxiv.org/pdf/1510.00149.pdf) -that can use lower bit depths by non-linearly distributing the float values -across the representation, but these tend to be more expensive to calculate on. - -The advantage of having a strong and clear definition of the quantized format is -that it's always possible to convert back and forth from float for operations -that aren't quantization-ready, or to inspect the tensors for debugging -purposes. One implementation detail in TensorFlow that we're hoping to improve -in the future is that the minimum and maximum float values need to be passed as -separate tensors to the one holding the quantized values, so graphs can get a -bit dense! - -The nice thing about the minimum and maximum ranges is that they can often be -pre-calculated. Weight parameters are constants known at load time, so their -ranges can also be stored as constants. We often know the ranges for inputs (for -examples images are usually RGB values in the range 0.0 to 255.0), and many -activation functions have known ranges too. This can avoid having to analyze the -outputs of an operation to determine the range, which we need to do for math ops -like convolution or matrix multiplication which produce 32-bit accumulated -results from 8-bit inputs. - -## What's Next? - -We've found that we can get extremely good performance on mobile and embedded -devices by using eight-bit arithmetic rather than floating-point. You can see -the framework we use to optimize matrix multiplications at -[gemmlowp](https://github.com/google/gemmlowp). We still need to apply all the -lessons we've learned to the TensorFlow ops to get maximum performance on -mobile, but we're actively working on that. Right now, this quantized -implementation is a reasonably fast and accurate reference implementation that -we're hoping will enable wider support for our eight-bit models on a wider -variety of devices. We also hope that this demonstration will encourage the -community to explore what's possible with low-precision neural networks. +See the documentation for @{tf.contrib.quantize} and +[TensorFlow Lite](/mobile/tflite/). + +## Quantized accuracy + +Fixed point [MobileNet](https://arxiv.org/abs/1704.0486) models are released with +8-bit weights and activations. Using the rewriters, these models achieve the +Top-1 accuracies listed in Table 1. For comparison, the floating point accuracies +are listed for the same models. The code used to generate these models +[is available](https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md) +along with links to all of the pretrained mobilenet_v1 models. + +
+
Version:CPU/GPU:Python Version:Compiler:Build Tools:cuDNN:CUDA:
tensorflow-1.6.0rc1CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0rc1GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.6.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.6.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.5.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
tensorflow_gpu-1.5.0GPU3.5-3.6MSVC 2015 update 3Cmake v3.6.379
tensorflow-1.4.0CPU3.5-3.6MSVC 2015 update 3Cmake v3.6.3N/AN/A
+ + + + + + + + + + + + + + + + + + + + + + +
Image SizeDepthTop-1 Accuracy:
Floating point
Top-1 Accuracy:
Fixed point: 8 bit weights and activations
1280.250.4150.399
1280.50.5630.549
1280.750.6210.598
12810.6520.64
1600.250.4550.435
1600.50.5910.577
1600.750.6530.639
16010.680.673
1920.250.4770.458
1920.50.6170.604
1920.750.6720.662
19210.70.69
2240.250.4980.482
2240.50.6330.622
2240.750.6840.679
22410.7090.697
+
+ Table 1: MobileNet Top-1 accuracy on Imagenet Validation dataset. +
+ + +## Representation for quantized tensors + +TensorFlow approaches the conversion of floating-point arrays of numbers into +8-bit representations as a compression problem. Since the weights and activation +tensors in trained neural network models tend to have values that are distributed +across comparatively small ranges (for example, -15 to +15 for weights or -500 to +1000 for image model activations). And since neural nets tend to be robust +handling noise, the error introduced by quantizing to a small set of values +maintains the precision of the overall results within an acceptable threshold. A +chosen representation must perform fast calculations, especially the large matrix +multiplications that comprise the bulk of the computations while running a model. + +This is represented with two floats that store the overall minimum and maximum +values corresponding to the lowest and highest quantized value. Each entry in the +quantized array represents a float value in that range, distributed linearly +between the minimum and maximum. For example, with a minimum of -10.0 and maximum +of 30.0f, and an 8-bit array, the quantized values represent the following: + +
+ + + + + +
QuantizedFloat
0-10.0
25530.0
12810.0
+
+ Table 2: Example quantized value range +
+
+ +The advantages of this representation format are: + +* It efficiently represents an arbitrary magnitude of ranges. +* The values don't have to be symmetrical. +* The format represents both signed and unsigned values. +* The linear spread makes multiplications straightforward. + +Alternative techniques use lower bit depths by non-linearly distributing the +float values across the representation, but currently are more expensive in terms +of computation time. (See Han et al., +[2016](https://arxiv.org/abs/1510.00149).) + +The advantage of having a clear definition of the quantized format is that it's +always possible to convert back and forth from fixed-point to floating-point for +operations that aren't quantization-ready, or to inspect the tensors for +debugging. -- GitLab From 6fdb9ad1baf7686a75f9e660178f7ac595e7fc2e Mon Sep 17 00:00:00 2001 From: 4d55397500 <4d55397500@users.noreply.github.com> Date: Wed, 28 Feb 2018 17:57:35 -0800 Subject: [PATCH 265/884] Fix return value in sampled_softmax_loss --- tensorflow/python/ops/nn_impl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/nn_impl.py b/tensorflow/python/ops/nn_impl.py index 5fa5708114..254f0051a4 100644 --- a/tensorflow/python/ops/nn_impl.py +++ b/tensorflow/python/ops/nn_impl.py @@ -1345,4 +1345,4 @@ def sampled_softmax_loss(weights, sampled_losses = nn_ops.softmax_cross_entropy_with_logits( labels=labels, logits=logits) # sampled_losses is a [batch_size] tensor. - return sampled_losses + return sampled_losses \ No newline at end of file -- GitLab From f5e2a70e0363c1b08a342e395c4e040114b7a424 Mon Sep 17 00:00:00 2001 From: Anna R Date: Wed, 28 Feb 2018 17:54:56 -0800 Subject: [PATCH 266/884] Set generated ops to hidden if they are not included in TensorFlow Python API. Also, update endpoints in ApiDef files for a few ops. PiperOrigin-RevId: 187412039 --- tensorflow/core/api_def/python_api/api_def_Abort.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorApplyGradient.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorNumAccumulated.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorSetGlobalStep.pbtxt | 4 ++++ .../python_api/api_def_AccumulatorTakeGradient.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AdjustContrast.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AdjustHue.pbtxt | 4 ++++ .../api_def/python_api/api_def_AdjustSaturation.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdadelta.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdagrad.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAdam.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyAddSign.pbtxt | 4 ++++ .../python_api/api_def_ApplyCenteredRMSProp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyFtrl.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt | 4 ++++ .../python_api/api_def_ApplyGradientDescent.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyMomentum.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyPowerSign.pbtxt | 4 ++++ .../python_api/api_def_ApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_ApplyProximalGradientDescent.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ApplyRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ApproximateEqual.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignAddVariableOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignSubVariableOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_AssignVariableOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_AvgPool3D.pbtxt | 6 ++++++ .../core/api_def/python_api/api_def_BatchDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixBandPart.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixDiag.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt | 4 ++++ .../api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_BiasAddGrad.pbtxt | 4 ++++ .../python_api/api_def_BytesProducedStatsDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_CacheDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_CholeskyGrad.pbtxt | 4 ++++ .../api_def/python_api/api_def_CompareAndBitpack.pbtxt | 4 ++++ .../api_def/python_api/api_def_ConcatenateDataset.pbtxt | 4 ++++ .../python_api/api_def_ConditionalAccumulator.pbtxt | 4 ++++ .../api_def/python_api/api_def_ConsumeMutexLock.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ControlTrigger.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt | 6 ++++++ .../python_api/api_def_Conv2DBackpropFilter.pbtxt | 6 ++++++ .../api_def/python_api/api_def_Conv2DBackpropInput.pbtxt | 6 ++++++ tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt | 6 ++++++ .../python_api/api_def_Conv3DBackpropFilter.pbtxt | 4 ++++ .../python_api/api_def_Conv3DBackpropFilterV2.pbtxt | 6 ++++++ .../api_def/python_api/api_def_Conv3DBackpropInput.pbtxt | 4 ++++ .../python_api/api_def_Conv3DBackpropInputV2.pbtxt | 4 ++++ .../python_api/api_def_CropAndResizeGradBoxes.pbtxt | 4 ++++ .../python_api/api_def_CropAndResizeGradImage.pbtxt | 4 ++++ .../api_def/python_api/api_def_DataFormatDimMap.pbtxt | 4 ++++ .../python_api/api_def_DataFormatVecPermute.pbtxt | 4 ++++ .../python_api/api_def_DatasetToSingleElement.pbtxt | 4 ++++ .../api_def/python_api/api_def_DecodeCompressed.pbtxt | 4 ++++ .../python_api/api_def_DenseToDenseSetOperation.pbtxt | 4 ++++ .../python_api/api_def_DenseToSparseBatchDataset.pbtxt | 4 ++++ .../python_api/api_def_DenseToSparseSetOperation.pbtxt | 4 ++++ .../api_def/python_api/api_def_DeserializeIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_DestroyResourceOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Dilation2D.pbtxt | 6 ++++++ .../python_api/api_def_Dilation2DBackpropFilter.pbtxt | 4 ++++ .../python_api/api_def_Dilation2DBackpropInput.pbtxt | 4 ++++ .../python_api/api_def_EnqueueInQueueDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt | 9 +++++++++ .../core/api_def/python_api/api_def_FilterDataset.pbtxt | 4 ++++ .../python_api/api_def_FixedLengthRecordDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_FlatMapDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_FusedBatchNormGrad.pbtxt | 4 ++++ .../python_api/api_def_FusedBatchNormGradV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_FusedPadConv2D.pbtxt | 4 ++++ .../python_api/api_def_FusedResizeAndPadConv2D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_GatherV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_GeneratorDataset.pbtxt | 4 ++++ .../python_api/api_def_GroupByWindowDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt | 9 +++++++++ tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ImmutableConst.pbtxt | 4 ++++ .../api_def/python_api/api_def_InterleaveDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Inv.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Iterator.pbtxt | 4 ++++ .../python_api/api_def_IteratorFromStringHandle.pbtxt | 4 ++++ .../api_def/python_api/api_def_IteratorGetNext.pbtxt | 4 ++++ .../api_def/python_api/api_def_IteratorGetNextSync.pbtxt | 4 ++++ .../python_api/api_def_IteratorSetStatsAggregator.pbtxt | 4 ++++ .../python_api/api_def_IteratorToStringHandle.pbtxt | 4 ++++ .../api_def/python_api/api_def_LatencyStatsDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_LoopCond.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MakeIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapAndBatchDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapClear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapIncompleteSize.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapStage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MapUnstage.pbtxt | 4 ++++ .../api_def/python_api/api_def_MapUnstageNoKey.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MaxPool3D.pbtxt | 6 ++++++ .../api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_MergeV2Checkpoints.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_MutexLock.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_NextIteration.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_NthElement.pbtxt | 4 ++++ .../api_def/python_api/api_def_OneShotIterator.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapClear.pbtxt | 4 ++++ .../python_api/api_def_OrderedMapIncompleteSize.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_OrderedMapPeek.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_OrderedMapSize.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapStage.pbtxt | 4 ++++ .../api_def/python_api/api_def_OrderedMapUnstage.pbtxt | 4 ++++ .../python_api/api_def_OrderedMapUnstageNoKey.pbtxt | 4 ++++ .../api_def/python_api/api_def_PaddedBatchDataset.pbtxt | 4 ++++ .../python_api/api_def_ParallelDynamicStitch.pbtxt | 4 ++++ .../python_api/api_def_ParallelInterleaveDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_ParallelMapDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_PlaceholderV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_PopulationCount.pbtxt | 4 ++++ .../api_def/python_api/api_def_PrefetchDataset.pbtxt | 4 ++++ .../api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_PreventGradient.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantize.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantizeV2.pbtxt | 4 ++++ .../python_api/api_def_QuantizeAndDequantizeV3.pbtxt | 4 ++++ .../python_api/api_def_QuantizeDownAndShrinkRange.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedAdd.pbtxt | 4 ++++ ...i_def_QuantizedBatchNormWithGlobalNormalization.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedBiasAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedConv2D.pbtxt | 6 ++++++ .../python_api/api_def_QuantizedInstanceNorm.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedMatMul.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedMul.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedRelu.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QuantizedRelu6.pbtxt | 4 ++++ .../api_def/python_api/api_def_QuantizedReshape.pbtxt | 4 ++++ .../python_api/api_def_QuantizedResizeBilinear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_QueueIsClosed.pbtxt | 4 ++++ .../api_def/python_api/api_def_QueueIsClosedV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RandomDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_RandomPoissonV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RangeDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ReadVariableOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RecordInput.pbtxt | 4 ++++ .../api_def/python_api/api_def_RefNextIteration.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RefSelect.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RefSwitch.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RemoteCall.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RepeatDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_RequantizationRange.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_Requantize.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdadelta.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdagrad.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAdagradDA.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyAdam.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyAddSign.pbtxt | 4 ++++ .../api_def_ResourceApplyCenteredRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyFtrl.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt | 4 ++++ .../api_def_ResourceApplyGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyMomentum.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyPowerSign.pbtxt | 4 ++++ .../api_def_ResourceApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_ResourceApplyProximalGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceApplyRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceCountUpTo.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ResourceGather.pbtxt | 4 ++++ .../api_def/python_api/api_def_ResourceScatterAdd.pbtxt | 4 ++++ .../python_api/api_def_ResourceScatterNdUpdate.pbtxt | 4 ++++ .../python_api/api_def_ResourceScatterUpdate.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyAdadelta.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyAdagrad.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyAdagradDA.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyCenteredRMSProp.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyFtrl.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyMomentum.pbtxt | 4 ++++ .../api_def_ResourceSparseApplyProximalAdagrad.pbtxt | 4 ++++ ..._def_ResourceSparseApplyProximalGradientDescent.pbtxt | 4 ++++ .../python_api/api_def_ResourceSparseApplyRMSProp.pbtxt | 4 ++++ .../python_api/api_def_ResourceStridedSliceAssign.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_RestoreV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Roll.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ScanDataset.pbtxt | 4 ++++ .../python_api/api_def_ScatterNdNonAliasingAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_SerializeIterator.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt | 4 ++++ .../python_api/api_def_ShuffleAndRepeatDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ShuffleDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SkipDataset.pbtxt | 4 ++++ .../api_def_SparseAccumulatorApplyGradient.pbtxt | 4 ++++ .../api_def_SparseAccumulatorTakeGradient.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyAdadelta.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyAdagrad.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyAdagradDA.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyCenteredRMSProp.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyFtrl.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyMomentum.pbtxt | 4 ++++ .../python_api/api_def_SparseApplyProximalAdagrad.pbtxt | 4 ++++ .../api_def_SparseApplyProximalGradientDescent.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseApplyRMSProp.pbtxt | 4 ++++ .../api_def_SparseConditionalAccumulator.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt | 4 ++++ .../python_api/api_def_SparseSegmentMeanGrad.pbtxt | 4 ++++ .../api_def_SparseSegmentMeanWithNumSegments.pbtxt | 4 ++++ .../python_api/api_def_SparseSegmentSqrtNGrad.pbtxt | 4 ++++ .../api_def_SparseSegmentSqrtNWithNumSegments.pbtxt | 4 ++++ .../api_def_SparseSegmentSumWithNumSegments.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseSparseMaximum.pbtxt | 4 ++++ .../api_def/python_api/api_def_SparseSparseMinimum.pbtxt | 4 ++++ .../python_api/api_def_SparseTensorSliceDataset.pbtxt | 4 ++++ .../python_api/api_def_SparseToSparseSetOperation.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SqlDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Stage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StageClear.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StagePeek.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_StageSize.pbtxt | 4 ++++ .../python_api/api_def_StatsAggregatorHandle.pbtxt | 4 ++++ .../python_api/api_def_StatsAggregatorSummary.pbtxt | 4 ++++ .../api_def/python_api/api_def_StridedSliceAssign.pbtxt | 4 ++++ .../api_def/python_api/api_def_StridedSliceGrad.pbtxt | 4 ++++ .../api_def/python_api/api_def_TFRecordDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_TakeDataset.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_TensorDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_TensorSliceDataset.pbtxt | 4 ++++ .../api_def/python_api/api_def_TextLineDataset.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_VarHandleOp.pbtxt | 4 ++++ .../api_def/python_api/api_def_VarIsInitializedOp.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_VariableShape.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ZipDataset.pbtxt | 4 ++++ 243 files changed, 1010 insertions(+) create mode 100644 tensorflow/core/api_def/python_api/api_def_Abort.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Inv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Roll.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Stage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt b/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt new file mode 100644 index 0000000000..3f95aaf12c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Abort.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Abort" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt new file mode 100644 index 0000000000..1e76d6dadc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorApplyGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorApplyGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt new file mode 100644 index 0000000000..fbe971ab2e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorNumAccumulated.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorNumAccumulated" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt new file mode 100644 index 0000000000..0047b25af6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorSetGlobalStep.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorSetGlobalStep" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt new file mode 100644 index 0000000000..860fbe1245 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AccumulatorTakeGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AccumulatorTakeGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt new file mode 100644 index 0000000000..0311ad92b7 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustContrast.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustContrast" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt new file mode 100644 index 0000000000..b441167711 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustHue.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustHue" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt b/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt new file mode 100644 index 0000000000..893219e17a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AdjustSaturation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AdjustSaturation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt new file mode 100644 index 0000000000..d8776b19f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt new file mode 100644 index 0000000000..7e659c1bb3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..d647c5eb0a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt new file mode 100644 index 0000000000..66d9095c8f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAdam.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAdam" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt new file mode 100644 index 0000000000..b7fe1aa654 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyAddSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyAddSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..56003c5e6f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt new file mode 100644 index 0000000000..680b3ef480 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..5ab3bb6efd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt new file mode 100644 index 0000000000..467bf7db55 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt new file mode 100644 index 0000000000..7c3f0fef95 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt new file mode 100644 index 0000000000..f376b1dc6e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyPowerSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyPowerSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..0c6e2a4bb1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..90c1655fe9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt new file mode 100644 index 0000000000..18cce1915a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt b/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt new file mode 100644 index 0000000000..707f6716f9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ApproximateEqual.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ApproximateEqual" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt new file mode 100644 index 0000000000..e30ec092e6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignAddVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignAddVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt new file mode 100644 index 0000000000..81290a56ec --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignSubVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignSubVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt new file mode 100644 index 0000000000..3ffa4a11c4 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AssignVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "AssignVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt new file mode 100644 index 0000000000..cc16523a15 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_AvgPool3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "AvgPool3D" + endpoint { + name: "nn.avg_pool3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt new file mode 100644 index 0000000000..4289c1daf9 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt new file mode 100644 index 0000000000..0a699e2050 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixBandPart.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixBandPart" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt new file mode 100644 index 0000000000..40be51eccc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixDiag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt new file mode 100644 index 0000000000..1ef78fa5ec --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixDiagPart.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixDiagPart" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt b/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt new file mode 100644 index 0000000000..644c1270a2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BatchMatrixSetDiag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BatchMatrixSetDiag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt new file mode 100644 index 0000000000..9226c6791c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BiasAddGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BiasAddGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt new file mode 100644 index 0000000000..fcf541f903 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_BytesProducedStatsDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "BytesProducedStatsDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt new file mode 100644 index 0000000000..2bbb4ff9e3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CacheDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CacheDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt new file mode 100644 index 0000000000..3538afb2a7 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CholeskyGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CholeskyGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt b/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt new file mode 100644 index 0000000000..493a7e4866 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CompareAndBitpack.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CompareAndBitpack" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt new file mode 100644 index 0000000000..c005a4da0f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConcatenateDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConcatenateDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt new file mode 100644 index 0000000000..a4663e8eb3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConditionalAccumulator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConditionalAccumulator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt b/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt new file mode 100644 index 0000000000..9559947490 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ConsumeMutexLock.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ConsumeMutexLock" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt b/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt new file mode 100644 index 0000000000..33941493af --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ControlTrigger.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ControlTrigger" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt new file mode 100644 index 0000000000..2ae75d6da2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2D" + endpoint { + name: "nn.conv2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt new file mode 100644 index 0000000000..6f21d8c880 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropFilter.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2DBackpropFilter" + endpoint { + name: "nn.conv2d_backprop_filter" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt new file mode 100644 index 0000000000..ea976799cb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv2DBackpropInput.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv2DBackpropInput" + endpoint { + name: "nn.conv2d_backprop_input" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt new file mode 100644 index 0000000000..ba8d178263 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv3D" + endpoint { + name: "nn.conv3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt new file mode 100644 index 0000000000..634545f427 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropFilter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt new file mode 100644 index 0000000000..1da8ee3a25 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropFilterV2.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Conv3DBackpropFilterV2" + endpoint { + name: "nn.conv3d_backprop_filter_v2" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt new file mode 100644 index 0000000000..e2b0a0d19f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt new file mode 100644 index 0000000000..4e5c4f74fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Conv3DBackpropInputV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Conv3DBackpropInputV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt new file mode 100644 index 0000000000..ac44494193 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradBoxes.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CropAndResizeGradBoxes" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt new file mode 100644 index 0000000000..eecd0536f2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_CropAndResizeGradImage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "CropAndResizeGradImage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt b/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt new file mode 100644 index 0000000000..82a39cfc59 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DataFormatDimMap.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DataFormatDimMap" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt b/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt new file mode 100644 index 0000000000..9ec292df8f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DataFormatVecPermute.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DataFormatVecPermute" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt b/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt new file mode 100644 index 0000000000..e3d34cc15b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DatasetToSingleElement.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DatasetToSingleElement" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt b/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt new file mode 100644 index 0000000000..f0b7539918 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DecodeCompressed.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DecodeCompressed" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt new file mode 100644 index 0000000000..1c47ec09c5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToDenseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToDenseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt new file mode 100644 index 0000000000..0a8e068afb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToSparseBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToSparseBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt new file mode 100644 index 0000000000..a30757df4d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DenseToSparseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DenseToSparseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt new file mode 100644 index 0000000000..170d37be4e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DeserializeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DeserializeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt new file mode 100644 index 0000000000..b9dde0080a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DestroyResourceOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DestroyResourceOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt new file mode 100644 index 0000000000..6d73ecf1bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "Dilation2D" + endpoint { + name: "nn.dilation2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt new file mode 100644 index 0000000000..feb9f083db --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropFilter.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Dilation2DBackpropFilter" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt new file mode 100644 index 0000000000..9a6b09f5cc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Dilation2DBackpropInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Dilation2DBackpropInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt new file mode 100644 index 0000000000..051cf14c0e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_EnqueueInQueueDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "EnqueueInQueueDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt new file mode 100644 index 0000000000..9ed1341dfe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FFT2D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "FFT2D" + endpoint { + name: "spectral.fft2d" + } + endpoint { + name: "fft2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt new file mode 100644 index 0000000000..5a4e1d6adf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FFT3D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "FFT3D" + endpoint { + name: "spectral.fft3d" + } + endpoint { + name: "fft3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt new file mode 100644 index 0000000000..6f91b84218 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FilterDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FilterDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt new file mode 100644 index 0000000000..d0703471d3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FixedLengthRecordDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FixedLengthRecordDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt new file mode 100644 index 0000000000..9de61ac263 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FlatMapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FlatMapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt new file mode 100644 index 0000000000..56409f32d8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedBatchNormGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt new file mode 100644 index 0000000000..f5a4200b76 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedBatchNormGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedBatchNormGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt new file mode 100644 index 0000000000..03b5fdd5a1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedPadConv2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedPadConv2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt new file mode 100644 index 0000000000..52165d9b4d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_FusedResizeAndPadConv2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "FusedResizeAndPadConv2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt new file mode 100644 index 0000000000..029bc59b51 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GatherV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GatherV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt new file mode 100644 index 0000000000..9dcfa0f7d2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GeneratorDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GeneratorDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt new file mode 100644 index 0000000000..8d40208e61 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_GroupByWindowDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "GroupByWindowDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt new file mode 100644 index 0000000000..d6b36a314b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IFFT2D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "IFFT2D" + endpoint { + name: "spectral.ifft2d" + } + endpoint { + name: "ifft2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt new file mode 100644 index 0000000000..6def5b36da --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IFFT3D.pbtxt @@ -0,0 +1,9 @@ +op { + graph_op_name: "IFFT3D" + endpoint { + name: "spectral.ifft3d" + } + endpoint { + name: "ifft3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt new file mode 100644 index 0000000000..8fa74a4317 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt new file mode 100644 index 0000000000..2021cad639 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt new file mode 100644 index 0000000000..5d1eab6003 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IRFFT3D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IRFFT3D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt b/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt new file mode 100644 index 0000000000..997013914b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ImmutableConst.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ImmutableConst" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt new file mode 100644 index 0000000000..ef1b06b19c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_InterleaveDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "InterleaveDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt b/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt new file mode 100644 index 0000000000..ed58a276f6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Inv.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Inv" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt new file mode 100644 index 0000000000..a021db1534 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Iterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Iterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt new file mode 100644 index 0000000000..f9efe2d144 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorFromStringHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorFromStringHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt new file mode 100644 index 0000000000..f7066484ce --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorGetNext.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorGetNext" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt new file mode 100644 index 0000000000..d94edbc71d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorGetNextSync.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorGetNextSync" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt new file mode 100644 index 0000000000..db51ae3873 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorSetStatsAggregator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorSetStatsAggregator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt new file mode 100644 index 0000000000..8a4251f76b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IteratorToStringHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IteratorToStringHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt new file mode 100644 index 0000000000..94bf6106ad --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_LatencyStatsDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LatencyStatsDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt b/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt new file mode 100644 index 0000000000..4cfa295b2a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_LoopCond.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "LoopCond" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt new file mode 100644 index 0000000000..acc3342c9b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MakeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MakeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt new file mode 100644 index 0000000000..cffd2910fb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapAndBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapAndBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt new file mode 100644 index 0000000000..67c1c3e2dd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt new file mode 100644 index 0000000000..0b1d2f2c73 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt new file mode 100644 index 0000000000..db7921e13b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapIncompleteSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapIncompleteSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt new file mode 100644 index 0000000000..85fab17229 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapPeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapPeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt new file mode 100644 index 0000000000..8b6ed1a0cf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt new file mode 100644 index 0000000000..3ae70d5d57 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapStage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapStage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt new file mode 100644 index 0000000000..e5f92e37db --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapUnstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapUnstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt b/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt new file mode 100644 index 0000000000..2c2a25db21 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MapUnstageNoKey.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MapUnstageNoKey" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt new file mode 100644 index 0000000000..e8576c9ff2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPool3D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "MaxPool3D" + endpoint { + name: "nn.max_pool3d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt new file mode 100644 index 0000000000..534cc90e41 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MaxPoolGradGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt new file mode 100644 index 0000000000..e79f839686 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MaxPoolGradV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MaxPoolGradV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt b/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt new file mode 100644 index 0000000000..ca9f74e0c1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MergeV2Checkpoints.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MergeV2Checkpoints" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt b/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt new file mode 100644 index 0000000000..74e6e10357 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MutexLock.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MutexLock" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt new file mode 100644 index 0000000000..013f42d855 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_MutexV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "MutexV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt b/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt new file mode 100644 index 0000000000..28ac301e41 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_NextIteration.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "NextIteration" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt b/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt new file mode 100644 index 0000000000..ec83858510 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_NthElement.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "NthElement" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt new file mode 100644 index 0000000000..ee9d777b4e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OneShotIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OneShotIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt new file mode 100644 index 0000000000..b8276b964a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt new file mode 100644 index 0000000000..1ba6c5b2fc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapIncompleteSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapIncompleteSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt new file mode 100644 index 0000000000..8f0c7afd46 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapPeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapPeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt new file mode 100644 index 0000000000..2e155726da --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt new file mode 100644 index 0000000000..6222c1fc4c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapStage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapStage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt new file mode 100644 index 0000000000..5cca8d9f93 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapUnstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt new file mode 100644 index 0000000000..d67b95b65b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OrderedMapUnstageNoKey.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OrderedMapUnstageNoKey" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt new file mode 100644 index 0000000000..c6223b3132 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PaddedBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PaddedBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt new file mode 100644 index 0000000000..a36ad27364 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelDynamicStitch.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelDynamicStitch" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt new file mode 100644 index 0000000000..93cd5719fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelInterleaveDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelInterleaveDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt new file mode 100644 index 0000000000..09d200dd24 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParallelMapDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParallelMapDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt new file mode 100644 index 0000000000..a30360d2de --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PlaceholderV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PlaceholderV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt b/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt new file mode 100644 index 0000000000..d35550236a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PopulationCount.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PopulationCount" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt new file mode 100644 index 0000000000..ec4e214eb5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PrefetchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PrefetchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt new file mode 100644 index 0000000000..228c4047d2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PrependFromQueueAndPaddedBatchDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PrependFromQueueAndPaddedBatchDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt new file mode 100644 index 0000000000..9565f5632b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_PreventGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "PreventGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt new file mode 100644 index 0000000000..d2468f1b24 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt new file mode 100644 index 0000000000..15e181be20 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantizeV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt new file mode 100644 index 0000000000..f1edc6f5fa --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeAndDequantizeV3.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeAndDequantizeV3" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt new file mode 100644 index 0000000000..9a2a86d25d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeDownAndShrinkRange.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeDownAndShrinkRange" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt new file mode 100644 index 0000000000..b952d6eccb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt new file mode 100644 index 0000000000..e009ada553 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedBatchNormWithGlobalNormalization.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedBatchNormWithGlobalNormalization" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt new file mode 100644 index 0000000000..3432962e59 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedBiasAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedBiasAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt new file mode 100644 index 0000000000..2409d12abe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedConv2D.pbtxt @@ -0,0 +1,6 @@ +op { + graph_op_name: "QuantizedConv2D" + endpoint { + name: "nn.quantized_conv2d" + } +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt new file mode 100644 index 0000000000..47a4931a05 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedInstanceNorm.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedInstanceNorm" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt new file mode 100644 index 0000000000..3ca9d2ae07 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedMatMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedMatMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt new file mode 100644 index 0000000000..c026fba194 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt new file mode 100644 index 0000000000..e5da4f25f0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedRelu" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt new file mode 100644 index 0000000000..ef1e648312 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedRelu6.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedRelu6" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt new file mode 100644 index 0000000000..7e6d9ed718 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedReshape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedReshape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt new file mode 100644 index 0000000000..a8da4128c2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizedResizeBilinear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizedResizeBilinear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt b/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt new file mode 100644 index 0000000000..f1d2ef63f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QueueIsClosed.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QueueIsClosed" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt new file mode 100644 index 0000000000..07cf1a7497 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QueueIsClosedV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QueueIsClosedV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt new file mode 100644 index 0000000000..e9719255ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt new file mode 100644 index 0000000000..1336a64408 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT2D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT2D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt b/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt new file mode 100644 index 0000000000..978b5814ff --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RFFT3D.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RFFT3D" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt new file mode 100644 index 0000000000..a5f6f8c6f1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RandomDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RandomDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt new file mode 100644 index 0000000000..8cc217c50e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RandomPoissonV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RandomPoissonV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt new file mode 100644 index 0000000000..4cd8296b22 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RangeDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RangeDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt new file mode 100644 index 0000000000..e250b78eff --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReadVariableOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReadVariableOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt b/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt new file mode 100644 index 0000000000..29f798050e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RecordInput.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RecordInput" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt new file mode 100644 index 0000000000..f9dfcf5e97 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefNextIteration.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefNextIteration" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt new file mode 100644 index 0000000000..8f9909aa86 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefSelect.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefSelect" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt b/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt new file mode 100644 index 0000000000..68b0f4a694 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RefSwitch.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RefSwitch" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt b/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt new file mode 100644 index 0000000000..fc069d857d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RemoteCall.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RemoteCall" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt new file mode 100644 index 0000000000..be301da838 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RepeatDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RepeatDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt b/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt new file mode 100644 index 0000000000..e327595a38 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RequantizationRange.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RequantizationRange" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt b/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt new file mode 100644 index 0000000000..f26f0611ba --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Requantize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Requantize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt new file mode 100644 index 0000000000..e0413a67a3 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt new file mode 100644 index 0000000000..52b8ba0b0e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..edfc0a733f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt new file mode 100644 index 0000000000..ca2713b533 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAdam.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAdam" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt new file mode 100644 index 0000000000..50dd643953 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyAddSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyAddSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..20592e38c8 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt new file mode 100644 index 0000000000..72b49e09d6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..af1d24c344 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt new file mode 100644 index 0000000000..75d6afd426 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt new file mode 100644 index 0000000000..3e499cf72e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt new file mode 100644 index 0000000000..b23ad0d061 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyPowerSign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyPowerSign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..6ad124c590 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..d684a5dd67 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt new file mode 100644 index 0000000000..c4c20e1382 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt new file mode 100644 index 0000000000..87376b7447 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceCountUpTo.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceCountUpTo" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt new file mode 100644 index 0000000000..714ba4a7ca --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceGather.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceGather" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt new file mode 100644 index 0000000000..4d4601cafd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt new file mode 100644 index 0000000000..54c66708ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterNdUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterNdUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt new file mode 100644 index 0000000000..30f885bee0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceScatterUpdate.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceScatterUpdate" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt new file mode 100644 index 0000000000..a7e4dad138 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt new file mode 100644 index 0000000000..1388da789c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..c5beaa4f58 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..f3de3d93df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt new file mode 100644 index 0000000000..f83833d351 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..71adbb0bcd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt new file mode 100644 index 0000000000..28a19caacc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..e8cda7f4ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..5fa1ade669 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt new file mode 100644 index 0000000000..86cc9a41ae --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceSparseApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceSparseApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt b/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt new file mode 100644 index 0000000000..ef6e19fea0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ResourceStridedSliceAssign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ResourceStridedSliceAssign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt new file mode 100644 index 0000000000..34d07239a1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_RestoreV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "RestoreV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt b/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt new file mode 100644 index 0000000000..9cc919f36f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Roll.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Roll" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt new file mode 100644 index 0000000000..617897ee44 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SaveV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SaveV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt new file mode 100644 index 0000000000..e71b655c22 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScanDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScanDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt new file mode 100644 index 0000000000..ecf71cd625 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ScatterNdNonAliasingAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ScatterNdNonAliasingAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt b/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt new file mode 100644 index 0000000000..07d2f200fe --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SerializeIterator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SerializeIterator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt new file mode 100644 index 0000000000..ee9c71036b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SetSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SetSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt new file mode 100644 index 0000000000..7b0d2994f0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ShuffleAndRepeatDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ShuffleAndRepeatDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt new file mode 100644 index 0000000000..8f0be9197a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ShuffleDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ShuffleDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt new file mode 100644 index 0000000000..96a551c5b6 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SkipDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SkipDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt new file mode 100644 index 0000000000..5e158c9ca0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorApplyGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseAccumulatorApplyGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt new file mode 100644 index 0000000000..5326f23def --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseAccumulatorTakeGradient.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseAccumulatorTakeGradient" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt new file mode 100644 index 0000000000..d30a8676e0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdadelta.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdadelta" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt new file mode 100644 index 0000000000..cb5ddef212 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt new file mode 100644 index 0000000000..c3b87b0953 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyAdagradDA.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyAdagradDA" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt new file mode 100644 index 0000000000..db47328738 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyCenteredRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyCenteredRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt new file mode 100644 index 0000000000..14e37b8ba2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrl.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyFtrl" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt new file mode 100644 index 0000000000..0d307af9b4 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyFtrlV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyFtrlV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt new file mode 100644 index 0000000000..ed34c0485d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyMomentum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyMomentum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt new file mode 100644 index 0000000000..ff2d3b6731 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalAdagrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyProximalAdagrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt new file mode 100644 index 0000000000..f342a611bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyProximalGradientDescent.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyProximalGradientDescent" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt new file mode 100644 index 0000000000..7f337d50e5 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseApplyRMSProp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseApplyRMSProp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt new file mode 100644 index 0000000000..bad4120795 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseConditionalAccumulator.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseConditionalAccumulator" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt new file mode 100644 index 0000000000..c5e7c9851f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseAdd.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseAdd" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt new file mode 100644 index 0000000000..f72031cf68 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseDiv.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseDiv" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt new file mode 100644 index 0000000000..a87004ee5f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseDenseCwiseMul.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseDenseCwiseMul" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt new file mode 100644 index 0000000000..771083cd51 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMeanGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt new file mode 100644 index 0000000000..fcb029535c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMeanWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMeanWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt new file mode 100644 index 0000000000..0682a597bb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtNGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt new file mode 100644 index 0000000000..7311a093df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtNWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtNWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt new file mode 100644 index 0000000000..81c2b8554e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSumWithNumSegments.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSumWithNumSegments" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt new file mode 100644 index 0000000000..0dbadc01ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSparseMaximum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSparseMaximum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt new file mode 100644 index 0000000000..0e3ffcbddf --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSparseMinimum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSparseMinimum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt new file mode 100644 index 0000000000..19c0c7f199 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseTensorSliceDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseTensorSliceDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt new file mode 100644 index 0000000000..735ee18e14 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseToSparseSetOperation.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseToSparseSetOperation" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt new file mode 100644 index 0000000000..2ab4c3e441 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SqlDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SqlDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt b/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt new file mode 100644 index 0000000000..66de5901bc --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Stage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Stage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt b/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt new file mode 100644 index 0000000000..f54a1c1c04 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StageClear.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StageClear" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt b/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt new file mode 100644 index 0000000000..710394d30d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StagePeek.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StagePeek" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt b/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt new file mode 100644 index 0000000000..472032ac42 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StageSize.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StageSize" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt new file mode 100644 index 0000000000..f7bed36602 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorHandle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StatsAggregatorHandle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt new file mode 100644 index 0000000000..8b1bab2440 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StatsAggregatorSummary.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StatsAggregatorSummary" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt new file mode 100644 index 0000000000..bcf1df228e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSliceAssign.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSliceAssign" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt new file mode 100644 index 0000000000..05d7d57511 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSliceGrad.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSliceGrad" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt new file mode 100644 index 0000000000..3c270ada3c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TFRecordDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TFRecordDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt new file mode 100644 index 0000000000..711b335dc1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TakeDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TakeDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt new file mode 100644 index 0000000000..5bc3920c56 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TensorDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TensorDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt new file mode 100644 index 0000000000..89ad016483 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TensorSliceDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TensorSliceDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt new file mode 100644 index 0000000000..08d785191b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_TextLineDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "TextLineDataset" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt b/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt new file mode 100644 index 0000000000..65eb756b87 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Unstage.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Unstage" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt new file mode 100644 index 0000000000..2c93a6db93 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VarHandleOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VarHandleOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt b/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt new file mode 100644 index 0000000000..de5d9850ac --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VarIsInitializedOp.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VarIsInitializedOp" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt b/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt new file mode 100644 index 0000000000..9b317152dd --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_VariableShape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "VariableShape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt b/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt new file mode 100644 index 0000000000..dd1459521f --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ZipDataset.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ZipDataset" + visibility: HIDDEN +} -- GitLab From 8c557a579384e2665fd438a944fd416f544a2a81 Mon Sep 17 00:00:00 2001 From: Yuefeng Zhou Date: Wed, 28 Feb 2018 18:36:57 -0800 Subject: [PATCH 267/884] Use NodeExecStats's output_slot field to identify output instead of just using proto index. PiperOrigin-RevId: 187416101 --- tensorflow/core/common_runtime/step_stats_collector.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/step_stats_collector.cc b/tensorflow/core/common_runtime/step_stats_collector.cc index cb900db10a..f21536d586 100644 --- a/tensorflow/core/common_runtime/step_stats_collector.cc +++ b/tensorflow/core/common_runtime/step_stats_collector.cc @@ -226,13 +226,14 @@ void StepStatsCollector::BuildCostModel( if (node) { for (int i = 0; i < stats.output_size(); ++i) { const auto& output = stats.output(i); - cm->RecordMaxMemorySize(node, i, + int output_slot = output.slot(); + cm->RecordMaxMemorySize(node, output_slot, Bytes(output.tensor_description() .allocation_description() .allocated_bytes()), - stats.output(i).tensor_description().shape(), - node->output_types()[i]); - cm->RecordAllocationId(node, i, + output.tensor_description().shape(), + node->output_types()[output_slot]); + cm->RecordAllocationId(node, output_slot, output.tensor_description() .allocation_description() .allocation_id()); -- GitLab From af6cdb9e5eae7e5e41824336fa5b3084402d43e9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 18:56:01 -0800 Subject: [PATCH 268/884] Use half_val instead of int_val to get the bfloat16 tensor value in MakeNdarray. PiperOrigin-RevId: 187417908 --- tensorflow/python/framework/tensor_util.py | 9 ++++----- .../python/framework/tensor_util_test.py | 20 +++++++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 27afaa074a..135562e831 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -559,16 +559,16 @@ def MakeNdarray(tensor): if tensor.tensor_content: return (np.frombuffer(tensor.tensor_content, dtype=dtype).copy() .reshape(shape)) - elif tensor_dtype == dtypes.float16: + elif tensor_dtype == dtypes.float16 or tensor_dtype == dtypes.bfloat16: # the half_val field of the TensorProto stores the binary representation # of the fp16: we need to reinterpret this as a proper float16 if len(tensor.half_val) == 1: tmp = np.array(tensor.half_val[0], dtype=np.uint16) - tmp.dtype = np.float16 + tmp.dtype = tensor_dtype.as_numpy_dtype return np.repeat(tmp, num_elements).reshape(shape) else: tmp = np.fromiter(tensor.half_val, dtype=np.uint16) - tmp.dtype = np.float16 + tmp.dtype = tensor_dtype.as_numpy_dtype return tmp.reshape(shape) elif tensor_dtype == dtypes.float32: if len(tensor.float_val) == 1: @@ -586,8 +586,7 @@ def MakeNdarray(tensor): return np.fromiter(tensor.double_val, dtype=dtype).reshape(shape) elif tensor_dtype in [ dtypes.int32, dtypes.uint8, dtypes.uint16, dtypes.int16, dtypes.int8, - dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16, - dtypes.bfloat16 + dtypes.qint32, dtypes.quint8, dtypes.qint8, dtypes.qint16, dtypes.quint16 ]: if len(tensor.int_val) == 1: return np.repeat(np.array(tensor.int_val[0], dtype=dtype), diff --git a/tensorflow/python/framework/tensor_util_test.py b/tensorflow/python/framework/tensor_util_test.py index 6b1b3dd40c..35fff80c61 100644 --- a/tensorflow/python/framework/tensor_util_test.py +++ b/tensorflow/python/framework/tensor_util_test.py @@ -235,6 +235,26 @@ class TensorUtilTest(test.TestCase): self.assertEquals(np.float16, a.dtype) self.assertAllClose(np.array([10.0, 20.0], dtype=np.float16), a) + def testBfloat16(self): + test_type = dtypes.bfloat16.as_numpy_dtype + t = tensor_util.make_tensor_proto(np.array([10.0, 20.0], dtype=test_type)) + # 10.0: 16672 = 010000010(130) 0100000: (1+0/2+1/4) * 2^(130-127) + # 20.0: 16800 = 010000011(131) 0100000: (1+0/2+1/4) * 2^(131-127) + self.assertProtoEquals(""" + dtype: DT_BFLOAT16 + tensor_shape { + dim { + size: 2 + } + } + half_val: 16672 + half_val: 16800 + """, t) + + a = tensor_util.MakeNdarray(t) + self.assertEquals(test_type, a.dtype) + self.assertAllClose(np.array([10.0, 20.0], dtype=test_type), a) + def testInt(self): t = tensor_util.make_tensor_proto(10) self.assertProtoEquals(""" -- GitLab From 63646c32c629f750706c9c63f87735bdbcec4963 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 28 Feb 2018 18:59:41 -0800 Subject: [PATCH 269/884] Add bfloat16 random_op for CPU. PiperOrigin-RevId: 187418131 --- tensorflow/core/kernels/random_op.cc | 1 + .../core/lib/random/random_distributions.h | 119 ++++++++++++++++++ .../lib/random/random_distributions_test.cc | 24 +++- 3 files changed, 142 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/random_op.cc b/tensorflow/core/kernels/random_op.cc index 78ff7948fb..e37232539f 100644 --- a/tensorflow/core/kernels/random_op.cc +++ b/tensorflow/core/kernels/random_op.cc @@ -495,6 +495,7 @@ class RandomGammaOp : public OpKernel { RandomUniformIntOp); TF_CALL_half(REGISTER); +TF_CALL_bfloat16(REGISTER); TF_CALL_float(REGISTER); TF_CALL_double(REGISTER); TF_CALL_int32(REGISTER_INT); diff --git a/tensorflow/core/lib/random/random_distributions.h b/tensorflow/core/lib/random/random_distributions.h index 3fe1f9bc6c..2ebe608fc9 100644 --- a/tensorflow/core/lib/random/random_distributions.h +++ b/tensorflow/core/lib/random/random_distributions.h @@ -32,6 +32,8 @@ namespace random { // Helper function to convert a 16-bit integer to a half between [0..1). PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x); +// Helper function to convert a 16-bit integer to a bfloat16 between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x); // Helper function to convert a 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x); // Helper function to convert two 32-bit integers to a double between [0..1). @@ -75,6 +77,30 @@ class UniformDistribution { } }; +template +class UniformDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 3; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + for (int i = 0; i < kResultElementCount; ++i) { + result[i] = Uint16ToGfloat16(sample[i]); + } + return result; + } +}; + template class UniformDistribution { public: @@ -305,6 +331,36 @@ class NormalDistribution { } }; +template +class NormalDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = Generator::kResultElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 70; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = false; + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(Generator* gen) { + typename Generator::ResultType sample = (*gen)(); + ResultType result; + static_assert(kResultElementCount % 2 == 0, + "kResultElementCount should be an even number"); + for (int i = 0; i < kResultElementCount; i += 2) { + float f[2]; + // Box-Muller transform requires processing 2 elements at a time. + BoxMullerFloat(sample[i], sample[i + 1], &f[0], &f[1]); + result[i] = bfloat16(f[0]); + result[i + 1] = bfloat16(f[1]); + } + return result; + } +}; + template class NormalDistribution { public: @@ -414,6 +470,48 @@ class TruncatedNormalDistribution { } }; +template +class TruncatedNormalDistribution { + public: + // The number of elements that will be returned. + static const int kResultElementCount = + SingleSampleGenerator::kNativeElementCount; + // Cost of generation of a single element (in cycles). + static const int kElementCost = 90; + // Indicate that this distribution may take variable number of samples + // during the runtime. + static const bool kVariableSamplesPerOutput = true; + // The threshold where the normal distribution is truncated. + const float kTruncateValue = 2.0f; + + typedef Array ResultType; + typedef bfloat16 ResultElementType; + + PHILOX_DEVICE_INLINE + ResultType operator()(SingleSampleGenerator* gen) { + ResultType results; + int index = 0; + while (true) { + // Repeatedly take samples from the normal distribution, until we have + // the desired number of elements that fall within the pre-defined cutoff + // threshold. + const uint32 x0 = (*gen)(); + const uint32 x1 = (*gen)(); + float f[2]; + BoxMullerFloat(x0, x1, &f[0], &f[1]); + + for (int i = 0; i < 2; ++i) { + if (Eigen::numext::abs(f[i]) < kTruncateValue) { + results[index++] = bfloat16(f[i]); + if (index >= kResultElementCount) { + return results; + } + } + } + } + } +}; + // Partial specialization for float. template class TruncatedNormalDistribution { @@ -567,6 +665,27 @@ PHILOX_DEVICE_INLINE Eigen::half Uint16ToHalf(uint16 x) { return result - Eigen::half(1.0); } +// Helper function to convert an 16-bit integer to a bfloat16 between [0..1). +// This can create a uniform distribution of values between [0..1). +PHILOX_DEVICE_INLINE bfloat16 Uint16ToGfloat16(uint16 x) { + // bfloat are formatted as follows (MSB first): + // sign(1) exponent(8) mantissa(7) + // Conceptually construct the following: + // sign == 0 + // exponent == 127 -- an excess 127 representation of a zero exponent + // mantissa == 7 random bits + const uint16 man = x & 0x7fu; // 7 bit mantissa + const uint16 exp = static_cast(127); + const uint16 val = (exp << 7) | man; + + bfloat16 result; + memcpy(&result, &val, sizeof(val)); + // The mantissa has an implicit leading 1, so the above code creates a value + // in [1, 2). The minus will not cause a rounding that makes the result 1. + // Instead it will just be close to 1. + return result - bfloat16(1.0); +} + // Helper function to convert an 32-bit integer to a float between [0..1). PHILOX_DEVICE_INLINE float Uint32ToFloat(uint32 x) { // IEEE754 floats are formatted as follows (MSB first): diff --git a/tensorflow/core/lib/random/random_distributions_test.cc b/tensorflow/core/lib/random/random_distributions_test.cc index 85d68f456e..8868672a10 100644 --- a/tensorflow/core/lib/random/random_distributions_test.cc +++ b/tensorflow/core/lib/random/random_distributions_test.cc @@ -37,6 +37,10 @@ namespace { // unit normal distribution, it should almost definitely never exceed 6. static constexpr float kZLimit = 6.0; +// As bfloat16 has much less precision, the largest z-value will should be +// larger than float32. +static constexpr float kZLimitBfloat16 = 20.0; + // A utility function to fill the given array with samples from the given // distribution, using the single adapter of the underlying generator template @@ -93,7 +97,7 @@ bool CheckSamplesMoments(const std::vector& samples, // mode, given the large number of samples. moments_data[i] += moment; ++moments_sample_count_data[i]; - moment *= samples_data[index]; + moment *= static_cast(samples_data[index]); } } @@ -125,7 +129,7 @@ bool CheckSamplesMoments(const std::vector& samples, const double z_test = fabs((moments[i] - moments_i_mean) / sqrt(total_variance)); - if (z_test > z_limit) { + if (z_test > static_cast(z_limit)) { LOG(ERROR) << "failing z_test:" << " moment: " << i << " stride: " << stride << " z_test: " << z_test << " z_limit: " << z_limit @@ -252,6 +256,22 @@ void RandomParametersMomentsTest(int count, int max_moments, } } +TEST(PhiloxRandomTest, UniformBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + UniformMomentsTest(1 << 20, 40, strides, bfloat16(kZLimitBfloat16)); +} + +TEST(PhiloxRandomTest, NormalBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + NormalMomentsTest(8 << 20, 25, strides, bfloat16(kZLimitBfloat16)); +} + +TEST(PhiloxRandomTest, RandomParametersBfloat16MomentsTest) { + const std::vector strides = {0, 1, 4, 17}; + RandomParametersMomentsTest(1 << 20, 40, strides, + bfloat16(kZLimitBfloat16)); +} + TEST(PhiloxRandomTest, UniformFloatMomentsTest) { const std::vector strides = {0, 1, 4, 17}; UniformMomentsTest(1 << 20, 40, strides, kZLimit); -- GitLab From 1927250a3c2388631583c855ce04a836a084e7ca Mon Sep 17 00:00:00 2001 From: Clayne Robison Date: Wed, 28 Feb 2018 19:14:34 -0800 Subject: [PATCH 270/884] Removing unnecessary check for reorder --- tensorflow/core/kernels/mkl_input_conversion_op.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_input_conversion_op.cc b/tensorflow/core/kernels/mkl_input_conversion_op.cc index e9a2376b54..d91f7107c5 100644 --- a/tensorflow/core/kernels/mkl_input_conversion_op.cc +++ b/tensorflow/core/kernels/mkl_input_conversion_op.cc @@ -442,12 +442,11 @@ class MklInputConversionOp : public OpKernel { auto input_tf_md = mkl_output_mkl_shape.GetTfLayout(); tf_input.SetUsrMem(input_tf_md, tf_tensor); - // Create reorder between tensorflow layout and Mkl layout. + // Create reorder between tensorflow layout and Mkl layout if necessary std::vector net; - CHECK_EQ(tf_input.CheckReorderToOpMem( + tf_input.CheckReorderToOpMem( memory::primitive_desc(output_mkl_md, cpu_engine), - tensor_out, &net), - true); + tensor_out, &net); stream(stream::kind::eager).submit(net).wait(); // -- The tensor in MKL format passes through -- -- GitLab From 16b4fbd56f1b460cefa41c6c50864c0245ecad91 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Wed, 28 Feb 2018 21:07:39 -0800 Subject: [PATCH 271/884] [XLA] Reshape/Transpose should not be bitcast if element type changes. PiperOrigin-RevId: 187427133 --- tensorflow/compiler/xla/shape_util.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 3152789016..9810e818f6 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -1076,6 +1076,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, CHECK(LayoutUtil::HasLayout(input_shape) && LayoutUtil::HasLayout(output_shape)); + if (!SameElementType(input_shape, output_shape)) { + return false; + } + // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) && LayoutUtil::IsPadded(output_shape)) { return false; @@ -1106,6 +1110,10 @@ ShapeUtil::DimensionsUnmodifiedByReshape(const Shape& input_shape, CHECK(LayoutUtil::HasLayout(input_shape) && LayoutUtil::HasLayout(output_shape)); + if (!SameElementType(input_shape, output_shape)) { + return false; + } + // Padding is not handled. if (LayoutUtil::IsPadded(input_shape) || LayoutUtil::IsPadded(output_shape)) { return false; -- GitLab From dab98b7a93105a7b3d0a5e015453e895049d160f Mon Sep 17 00:00:00 2001 From: june-one Date: Thu, 1 Mar 2018 15:54:33 +0900 Subject: [PATCH 272/884] Fix error : ConvNDLSTMCell does not pass name parameter --- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index a6c2d9cdbb..675b4f9f64 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2131,7 +2131,7 @@ class Conv1DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_1d_lstm_cell", **kwargs): """Construct Conv1DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv1DLSTMCell, self).__init__(conv_ndims=1, **kwargs) + super(Conv1DLSTMCell, self).__init__(conv_ndims=1, name=name, **kwargs) class Conv2DLSTMCell(ConvLSTMCell): @@ -2142,7 +2142,7 @@ class Conv2DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_2d_lstm_cell", **kwargs): """Construct Conv2DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv2DLSTMCell, self).__init__(conv_ndims=2, **kwargs) + super(Conv2DLSTMCell, self).__init__(conv_ndims=2, name=name, **kwargs) class Conv3DLSTMCell(ConvLSTMCell): @@ -2153,7 +2153,7 @@ class Conv3DLSTMCell(ConvLSTMCell): def __init__(self, name="conv_3d_lstm_cell", **kwargs): """Construct Conv3DLSTM. See `ConvLSTMCell` for more details.""" - super(Conv3DLSTMCell, self).__init__(conv_ndims=3, **kwargs) + super(Conv3DLSTMCell, self).__init__(conv_ndims=3, name=name, **kwargs) def _conv(args, filter_size, num_features, bias, bias_start=0.0): -- GitLab From 6c6bd9524764c1b15d2dc791f88f5de8cf0b51c1 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 28 Feb 2018 22:58:19 -0800 Subject: [PATCH 273/884] [tf.data] Add optional `shuffle` argument to `Dataset.list_files()`. This option makes it easier to shuffle a set of filenames on each iteration, and default to true to match the recommended best practices when training on a large dataset. PiperOrigin-RevId: 187434282 --- .../list_files_dataset_op_test.py | 49 ++++++++++++++++--- tensorflow/python/data/ops/dataset_ops.py | 22 +++++++-- .../api/golden/tensorflow.data.-dataset.pbtxt | 2 +- ...ow.data.-fixed-length-record-dataset.pbtxt | 2 +- .../tensorflow.data.-t-f-record-dataset.pbtxt | 2 +- .../tensorflow.data.-text-line-dataset.pbtxt | 2 +- 6 files changed, 66 insertions(+), 13 deletions(-) diff --git a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py index 4e7691ee81..6442eb9ff5 100644 --- a/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/list_files_dataset_op_test.py @@ -46,8 +46,9 @@ class ListFilesDatasetOpTest(test.TestCase): dataset = dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*')) with self.test_session() as sess: itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() with self.assertRaises(errors.OutOfRangeError): - sess.run(itr.get_next()) + sess.run(next_element) def testSimpleDirectory(self): filenames = ['a', 'b', 'c'] @@ -56,13 +57,14 @@ class ListFilesDatasetOpTest(test.TestCase): dataset = dataset_ops.Dataset.list_files(path.join(self.tmp_dir, '*')) with self.test_session() as sess: itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() full_filenames = [] produced_filenames = [] for filename in filenames: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): sess.run(itr.get_next()) @@ -73,12 +75,13 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')}) with self.assertRaises(errors.OutOfRangeError): - sess.run(itr.get_next()) + sess.run(next_element) def testSimpleDirectoryInitializer(self): filenames = ['a', 'b', 'c'] @@ -89,6 +92,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*')}) @@ -98,7 +102,7 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) @@ -114,6 +118,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py')}) @@ -123,7 +128,7 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames[1:-1]: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): @@ -138,6 +143,7 @@ class ListFilesDatasetOpTest(test.TestCase): with self.test_session() as sess: itr = dataset.make_initializable_iterator() + next_element = itr.get_next() sess.run( itr.initializer, feed_dict={filename_placeholder: path.join(self.tmp_dir, '*.py*')}) @@ -147,13 +153,44 @@ class ListFilesDatasetOpTest(test.TestCase): for filename in filenames[1:]: full_filenames.append( compat.as_bytes(path.join(self.tmp_dir, filename))) - produced_filenames.append(compat.as_bytes(sess.run(itr.get_next()))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) self.assertItemsEqual(full_filenames, produced_filenames) with self.assertRaises(errors.OutOfRangeError): sess.run(itr.get_next()) + def testNoShuffle(self): + filenames = ['a', 'b', 'c'] + self._touchTempFiles(filenames) + + # Repeat the list twice and ensure that the order is the same each time. + # NOTE(mrry): This depends on an implementation detail of `list_files()`, + # which is that the list of files is captured when the iterator is + # initialized. Otherwise, or if e.g. the iterator were initialized more than + # once, it's possible that the non-determinism of `tf.matching_files()` + # would cause this test to fail. However, it serves as a useful confirmation + # that the `shuffle=False` argument is working as intended. + # TODO(b/73959787): Provide some ordering guarantees so that this test is + # more meaningful. + dataset = dataset_ops.Dataset.list_files( + path.join(self.tmp_dir, '*'), shuffle=False).repeat(2) + with self.test_session() as sess: + itr = dataset.make_one_shot_iterator() + next_element = itr.get_next() + + full_filenames = [] + produced_filenames = [] + for filename in filenames * 2: + full_filenames.append( + compat.as_bytes(path.join(self.tmp_dir, filename))) + produced_filenames.append(compat.as_bytes(sess.run(next_element))) + with self.assertRaises(errors.OutOfRangeError): + sess.run(itr.get_next()) + self.assertItemsEqual(full_filenames, produced_filenames) + self.assertEqual(produced_filenames[:len(filenames)], + produced_filenames[len(filenames):]) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 5751f35fe1..7c5aa4c767 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -36,6 +36,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import gen_io_ops from tensorflow.python.ops import math_ops @@ -557,7 +558,7 @@ class Dataset(object): return PrefetchDataset(self, buffer_size) @staticmethod - def list_files(file_pattern): + def list_files(file_pattern, shuffle=None): """A dataset of all files matching a pattern. Example: @@ -570,16 +571,31 @@ class Dataset(object): - /path/to/dir/b.py - /path/to/dir/c.py - NOTE: The order of the file names returned can be non-deterministic. + NOTE: The order of the file names returned can be non-deterministic even + when `shuffle` is `False`. Args: file_pattern: A string or scalar string `tf.Tensor`, representing the filename pattern that will be matched. + shuffle: (Optional.) If `True`, the file names will be shuffled randomly. + Defaults to `True`. Returns: Dataset: A `Dataset` of strings corresponding to file names. """ - return Dataset.from_tensor_slices(gen_io_ops.matching_files(file_pattern)) + # TODO(b/73959787): Add a `seed` argument and make the `shuffle=False` + # behavior deterministic (e.g. by sorting the filenames). + if shuffle is None: + shuffle = True + matching_files = gen_io_ops.matching_files(file_pattern) + dataset = Dataset.from_tensor_slices(matching_files) + if shuffle: + # NOTE(mrry): The shuffle buffer size must be greater than zero, but the + # list of files might be empty. + buffer_size = math_ops.maximum( + array_ops.shape(matching_files, out_type=dtypes.int64)[0], 1) + dataset = dataset.shuffle(buffer_size) + return dataset def repeat(self, count=None): """Repeats this dataset `count` times. diff --git a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt index 42de5c0c80..0900adaf76 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-dataset.pbtxt @@ -64,7 +64,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt index e2fc8d6cb1..7b16ac90c9 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-fixed-length-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt index 709ec127ce..9cf5f2ae20 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-t-f-record-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" diff --git a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt index 7263230c1c..8c3d669143 100644 --- a/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.data.-text-line-dataset.pbtxt @@ -65,7 +65,7 @@ tf_class { } member_method { name: "list_files" - argspec: "args=[\'file_pattern\'], varargs=None, keywords=None, defaults=None" + argspec: "args=[\'file_pattern\', \'shuffle\'], varargs=None, keywords=None, defaults=[\'None\'], " } member_method { name: "make_initializable_iterator" -- GitLab From 65011bd51dcae889e631c6db46e7bcbf0d6843d1 Mon Sep 17 00:00:00 2001 From: Penghao Cen Date: Thu, 1 Mar 2018 16:16:37 +0800 Subject: [PATCH 274/884] Add default whl file location and minor update comments --- tensorflow/tools/dist_test/README.md | 8 ++++++++ tensorflow/tools/dist_test/local_test.sh | 22 ++++++++-------------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/tensorflow/tools/dist_test/README.md b/tensorflow/tools/dist_test/README.md index c1b1f79bbd..228d5ee35d 100644 --- a/tensorflow/tools/dist_test/README.md +++ b/tensorflow/tools/dist_test/README.md @@ -17,6 +17,14 @@ cesnsu model: ./local_test.sh --model_name CENSUS_WIDENDEEP +You can test specify version of TensorFlow: + +```shell +./local_test.sh ${whl_file_url} +``` + +For example, you can find these TensorFlow python package URLs from [here](https://www.tensorflow.org/install/install_linux#the_url_of_the_tensorflow_python_package) for Ubuntu. + **2) Launch a remote k8s cluster on Google Kubernetes Engine (GKE) and run the test suite on it** diff --git a/tensorflow/tools/dist_test/local_test.sh b/tensorflow/tools/dist_test/local_test.sh index 435f9d0dc9..caae7fd530 100755 --- a/tensorflow/tools/dist_test/local_test.sh +++ b/tensorflow/tools/dist_test/local_test.sh @@ -16,12 +16,11 @@ # # Tests distributed TensorFlow on a locally running TF GRPC cluster. # -# This script peforms the following steps: -# 1) Build the docker-in-docker (dind) image capable of running docker and -# Kubernetes (k8s) cluster inside. +# This script performs the following steps: +# 1) Build the docker image capable of running distributed TensorFlow in docker. # 2) Run a container from the aforementioned image and start docker service # in it -# 3) Call a script to launch a k8s TensorFlow GRPC cluster inside the container +# 3) Call a script to launch a distributed TensorFlow GRPC cluster inside the container # and run the distributed test suite. # # Usage: local_test.sh @@ -64,15 +63,9 @@ die() { # Configurations DOCKER_IMG_NAME="tensorflow/tf-dist-test-local-cluster" -LOCAL_K8S_CACHE=${HOME}/kubernetes -# Helper function -get_container_id_by_image_name() { - # Get the id of a container by image name - # Usage: get_docker_container_id_by_image_name - - docker ps | grep $1 | awk '{print $1}' -} +# Use TensorFlow v1.5.0 for Python 2.7 and CPU only as we set num_gpus to 0 in the below +DEFAULT_WHL_FILE_LOCATION="https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.5.0-cp27-none-linux_x86_64.whl" # Parse input arguments LEAVE_CONTAINER_RUNNING=0 @@ -84,7 +77,8 @@ SYNC_REPLICAS_FLAG="" WHL_FILE_LOCATION=${1} if [[ -z "${WHL_FILE_LOCATION}" ]]; then - die "whl file location is not specified" + WHL_FILE_LOCATION=${DEFAULT_WHL_FILE_LOCATION} + echo "use default whl file location" fi while true; do @@ -121,7 +115,7 @@ DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # Get utility functions source ${DIR}/scripts/utils.sh -# Build docker-in-docker image for local k8s cluster. +# Build docker image for local distributed TensorFlow cluster. NO_CACHE_FLAG="" if [[ ! -z "${TF_DIST_DOCKER_NO_CACHE}" ]] && [[ "${TF_DIST_DOCKER_NO_CACHE}" != "0" ]]; then -- GitLab From 46355f9065967dd39cd340b17d91a91f70d2c0c1 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Thu, 1 Mar 2018 01:44:33 -0800 Subject: [PATCH 275/884] Ensure folding of batch norms is idempotent. Added more rigorous testing. (Also fixed a couple of naming nits in the code as I looked through) PiperOrigin-RevId: 187446976 --- .../quantize/python/fold_batch_norms.py | 40 +++++++++++++------ .../python/quantize_parameterized_test.py | 23 ++++++++--- 2 files changed, 46 insertions(+), 17 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 75d9eb0e58..1f0648bbb6 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -194,7 +194,7 @@ def _FindFusedBatchNorms(graph): layer_op = match_result.get_op(layer_pattern) layer_tensor = match_result.get_tensor(layer_pattern) bn_op = match_result.get_op(batch_norm_pattern) - batch_epsilon_tensor = bn_op.get_attr('epsilon') + batch_epsilon = bn_op.get_attr('epsilon') # In the MatMul case, the output of batch norm is reshaped back into a # 2D tensor, so the output_tensor is the output of the Reshape op. @@ -207,6 +207,11 @@ def _FindFusedBatchNorms(graph): continue output_tensor = output_reshape_op.outputs[0] + # Ensure that the output tensor has consumers, otherwise this is a dangling + # node and not a match. + if not output_tensor.consumers(): + continue + input_tensor = match_result.get_tensor(input_pattern) weight_tensor = match_result.get_tensor(weight_pattern) gamma_tensor = match_result.get_tensor(gamma_pattern) @@ -270,7 +275,7 @@ def _FindFusedBatchNorms(graph): moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, - batch_epsilon_tensor=batch_epsilon_tensor) + batch_epsilon=batch_epsilon) def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, @@ -313,9 +318,8 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, g = ops.get_default_graph() with g.name_scope(context + '/batch_norm_correction'): recip_sigma_mv = math_ops.rsqrt( - match.moving_variance_tensor + match.batch_epsilon_tensor) - recip_sigma = math_ops.rsqrt( - match.variance_tensor + match.batch_epsilon_tensor) + match.moving_variance_tensor + match.batch_epsilon) + recip_sigma = math_ops.rsqrt(match.variance_tensor + match.batch_epsilon) correction_scale = math_ops.divide( recip_sigma_mv, recip_sigma, name='scale_compute') correction_scale = array_ops.identity( @@ -434,6 +438,9 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): for bn in common.BatchNormGroups(graph): has_scaling = _HasScaling(graph, input_to_ops_map, bn) + if not _IsValidUnfusedBatchNorm(graph, bn): + continue + # The mangling code intimately depends on BatchNorm node's internals. original_op, folded_op = _CreateFoldedOp( graph, @@ -462,6 +469,15 @@ def _FoldUnfusedBatchNorms(graph, is_training, freeze_batch_norm_delay): raise ValueError('Unexpected inputs to op: %s' % add_bypass.name) +def _IsValidUnfusedBatchNorm(graph, context): + """Checks that the output of the unfused batch norm has consumers.""" + add_shift = graph.get_operation_by_name( + context + '/BatchNorm/batchnorm/add_1') + # Ensure that the output tensor of batch norm has consumers, otherwise this + # is a dangling node and not a match. + return bool(add_shift.outputs[0].consumers()) + + def _GetBatchNormParams(graph, context, has_scaling): """Extracts relevant tensors for folding batch norms. @@ -478,7 +494,7 @@ def _GetBatchNormParams(graph, context, has_scaling): batch_variance_tensor = None moving_mean_tensor = None moving_variance_tensor = None - batch_epsilon_tensor = None + batch_epsilon = None bn_decay_mean_tensor = None bn_decay_var_tensor = None @@ -509,7 +525,7 @@ def _GetBatchNormParams(graph, context, has_scaling): if op.name.endswith(op_suffix_moving_variance): moving_variance_tensor = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_epsilon): - batch_epsilon_tensor = graph.get_tensor_by_name(op.name + ':0') + batch_epsilon = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_bn_decay_mean): bn_decay_mean_tensor = graph.get_tensor_by_name(op.name + ':0') if op.name.endswith(op_suffix_bn_decay_var): @@ -535,7 +551,7 @@ def _GetBatchNormParams(graph, context, has_scaling): moving_variance_tensor=moving_variance_tensor, bn_decay_mean_tensor=bn_decay_mean_tensor, bn_decay_var_tensor=bn_decay_var_tensor, - batch_epsilon_tensor=batch_epsilon_tensor) + batch_epsilon=batch_epsilon) def _CreateFoldedOp(graph, context, has_scaling, freeze_batch_norm_delay, @@ -816,7 +832,7 @@ class _BatchNormMatch(object): def __init__(self, layer_op, bn_op, output_tensor, input_tensor, weight_tensor, gamma_tensor, beta_tensor, mean_tensor, variance_tensor, moving_mean_tensor, moving_variance_tensor, - bn_decay_mean_tensor, bn_decay_var_tensor, batch_epsilon_tensor): + bn_decay_mean_tensor, bn_decay_var_tensor, batch_epsilon): self._layer_op = layer_op self._bn_op = bn_op self._output_tensor = output_tensor @@ -830,7 +846,7 @@ class _BatchNormMatch(object): self._moving_variance_tensor = moving_variance_tensor self._bn_decay_mean_tensor = bn_decay_mean_tensor self._bn_decay_var_tensor = bn_decay_var_tensor - self._batch_epsilon_tensor = batch_epsilon_tensor + self._batch_epsilon = batch_epsilon @property def layer_op(self): @@ -877,8 +893,8 @@ class _BatchNormMatch(object): return self._moving_variance_tensor @property - def batch_epsilon_tensor(self): - return self._batch_epsilon_tensor + def batch_epsilon(self): + return self._batch_epsilon @property def bn_decay_mean_tensor(self): diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 639a7454a9..dd73f6c860 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -87,8 +87,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) + quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) @@ -130,6 +130,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_Conv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -163,7 +164,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -205,6 +205,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_FCWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -239,7 +240,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): update_barrier = control_flow_ops.no_op(name='update_barrier') with ops.control_dependencies([update_barrier]): array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -282,6 +282,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_DepthwiseConv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( @@ -364,7 +365,6 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, True, quant_delay=delay) quantization_node_name = 'FakeQuantWithMinMaxVars' @@ -404,6 +404,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_FCWithBatchNorm(self): self._RunBatchNormTestOverParameters(self._TestQuantize_FCWithBatchNorm) @@ -487,6 +488,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) def testQuantize_DepthwiseConv2dWithBatchNorm(self): self._RunBatchNormTestOverParameters( @@ -535,8 +537,8 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') fold_batch_norms.FoldBatchNorms(graph, is_training=True) - quantize.Quantize(graph, True, quant_delay=delay) + quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) @@ -574,6 +576,17 @@ class QuantizeTest(test_util.TensorFlowTestCase): output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._TestIdempotent(graph) + + def _TestIdempotent(self, graph): + # Ensure that calling the rewrite again doesn't change the graph. + graph_def_before = str(graph.as_graph_def()) + with graph.as_default(): + # Ensuring that calling the rewrite again doesn't add more nodes. + fold_batch_norms.FoldBatchNorms(graph, is_training=True) + quantize.Quantize(graph, True) + graph_def_after = str(graph.as_graph_def()) + self.assertEqual(graph_def_before, graph_def_after) def _BatchNormParams(self, fused=False): return {'center': True, 'scale': True, 'decay': 1.0 - 0.003, 'fused': fused} -- GitLab From 2b7a7ee30666d160929c9aa3e941fbc94c17cc52 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:03:38 -0800 Subject: [PATCH 276/884] Add RegexReplace Op that internally calls RE2::Replace. PiperOrigin-RevId: 187467840 --- .../base_api/api_def_RegexReplace.pbtxt | 25 ++++++ tensorflow/core/kernels/BUILD | 8 ++ tensorflow/core/kernels/regex_replace_op.cc | 76 +++++++++++++++++++ tensorflow/core/ops/string_ops.cc | 14 ++++ tensorflow/python/kernel_tests/BUILD | 12 +++ .../kernel_tests/regex_replace_op_test.py | 71 +++++++++++++++++ tensorflow/python/ops/string_ops.py | 2 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + 8 files changed, 212 insertions(+) create mode 100644 tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt create mode 100644 tensorflow/core/kernels/regex_replace_op.cc create mode 100644 tensorflow/python/kernel_tests/regex_replace_op_test.py diff --git a/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt b/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt new file mode 100644 index 0000000000..70ad521926 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_RegexReplace.pbtxt @@ -0,0 +1,25 @@ +op { + graph_op_name: "RegexReplace" + in_arg { + name: "input" + description: "The text to be processed." + } + in_arg { + name: "pattern" + description: "The regular expression to match the input." + } + in_arg { + name: "rewrite" + description: "The rewrite to be applied to the matched expresion." + } + out_arg { + name: "output" + description: "The text after applying pattern and rewrite." + } + attr { + name: "replace_global" + description: "If True, the replacement is global, otherwise the replacement\nis done only on the first match." + } + summary: "Replaces the match of pattern in input with rewrite." + description: "It follows the re2 syntax (https://github.com/google/re2/wiki/Syntax)" +} diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 3426cf6e40..feacee5d63 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -4155,6 +4155,7 @@ cc_library( ":as_string_op", ":base64_ops", ":reduce_join_op", + ":regex_replace_op", ":string_join_op", ":string_split_op", ":string_to_hash_bucket_op", @@ -4189,6 +4190,12 @@ tf_kernel_library( deps = STRING_DEPS, ) +tf_kernel_library( + name = "regex_replace_op", + prefix = "regex_replace_op", + deps = STRING_DEPS + ["@com_googlesource_code_re2//:re2"], +) + tf_kernel_library( name = "string_split_op", prefix = "string_split_op", @@ -5063,6 +5070,7 @@ filegroup( "scatter_nd_op*", "mutex_ops.*", "batch_kernels.*", + "regex_replace_op.cc", ], ), visibility = ["//visibility:public"], diff --git a/tensorflow/core/kernels/regex_replace_op.cc b/tensorflow/core/kernels/regex_replace_op.cc new file mode 100644 index 0000000000..59ec854a79 --- /dev/null +++ b/tensorflow/core/kernels/regex_replace_op.cc @@ -0,0 +1,76 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "re2/re2.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { + +class RegexReplaceOp : public OpKernel { + public: + explicit RegexReplaceOp(OpKernelConstruction* ctx) : OpKernel(ctx) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("replace_global", &replace_global_)); + } + + void Compute(OpKernelContext* ctx) override { + const Tensor* input_tensor; + OP_REQUIRES_OK(ctx, ctx->input("input", &input_tensor)); + const auto& input_flat = input_tensor->flat(); + + const Tensor* pattern_tensor; + OP_REQUIRES_OK(ctx, ctx->input("pattern", &pattern_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(pattern_tensor->shape()), + errors::InvalidArgument("Pattern must be scalar, but received ", + pattern_tensor->shape().DebugString())); + const string pattern = pattern_tensor->flat()(0); + const RE2 match(pattern); + OP_REQUIRES(ctx, match.ok(), + errors::InvalidArgument("Invalid pattern: ", pattern, + ", error: ", match.error())); + + const Tensor* rewrite_tensor; + OP_REQUIRES_OK(ctx, ctx->input("rewrite", &rewrite_tensor)); + OP_REQUIRES(ctx, TensorShapeUtils::IsScalar(rewrite_tensor->shape()), + errors::InvalidArgument("Rewrite must be scalar, but received ", + rewrite_tensor->shape().DebugString())); + const string rewrite = rewrite_tensor->flat()(0); + + Tensor* output_tensor = nullptr; + OP_REQUIRES_OK(ctx, ctx->allocate_output("output", input_tensor->shape(), + &output_tensor)); + auto output_flat = output_tensor->flat(); + for (size_t i = 0; i < input_flat.size(); ++i) { + output_flat(i) = input_flat(i); + if (replace_global_) { + RE2::GlobalReplace(&output_flat(i), match, rewrite); + } else { + RE2::Replace(&output_flat(i), match, rewrite); + } + } + } + + private: + bool replace_global_; +}; + +REGISTER_KERNEL_BUILDER(Name("RegexReplace").Device(DEVICE_CPU), + RegexReplaceOp); + +} // namespace tensorflow diff --git a/tensorflow/core/ops/string_ops.cc b/tensorflow/core/ops/string_ops.cc index e4c5bcfb54..05f216a83e 100644 --- a/tensorflow/core/ops/string_ops.cc +++ b/tensorflow/core/ops/string_ops.cc @@ -23,6 +23,20 @@ using shape_inference::DimensionHandle; using shape_inference::InferenceContext; using shape_inference::ShapeHandle; +REGISTER_OP("RegexReplace") + .Input("input: string") + .Input("pattern: string") + .Input("rewrite: string") + .Output("output: string") + .Attr("replace_global: bool = true") + .SetShapeFn([](InferenceContext* c) { + ShapeHandle unused; + TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused)); + TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused)); + c->set_output(0, c->input(0)); + return Status::OK(); + }); + REGISTER_OP("StringToHashBucketFast") .Input("input: string") .Output("output: int64") diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index c9aa4a252d..0f13e8bba5 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -712,6 +712,18 @@ cuda_py_test( ], ) +tf_py_test( + name = "regex_replace_op_test", + size = "small", + srcs = ["regex_replace_op_test.py"], + additional_deps = [ + "//tensorflow/python:client_testlib", + "//tensorflow/python:constant_op", + "//tensorflow/python:dtypes", + "//tensorflow/python:string_ops", + ], +) + tf_py_test( name = "save_restore_ops_test", size = "small", diff --git a/tensorflow/python/kernel_tests/regex_replace_op_test.py b/tensorflow/python/kernel_tests/regex_replace_op_test.py new file mode 100644 index 0000000000..6739ac3224 --- /dev/null +++ b/tensorflow/python/kernel_tests/regex_replace_op_test.py @@ -0,0 +1,71 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for RegexReplace op from string_ops.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import string_ops +from tensorflow.python.platform import test + + +class RegexReplaceOpTest(test.TestCase): + + def testRemovePrefix(self): + values = ["a:foo", "a:bar", "a:foo", "b:baz", "b:qux", "ca:b"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace( + input_vector, "^(a:|b:)", "", replace_global=False).eval() + self.assertAllEqual([b"foo", b"bar", b"foo", b"baz", b"qux", b"ca:b"], + stripped) + + def testRegexReplace(self): + values = ["aba\naba", "abcdabcde"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "a.*a", "(\\0)").eval() + self.assertAllEqual([b"(aba)\n(aba)", b"(abcda)bcde"], stripped) + + def testEmptyMatch(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "", "x").eval() + self.assertAllEqual([b"xaxbxcx", b"x1x"], stripped) + + def testInvalidPattern(self): + values = ["abc", "1"] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + invalid_pattern = "A[" + replace = string_ops.regex_replace(input_vector, invalid_pattern, "x") + with self.assertRaisesOpError("Invalid pattern"): + replace.eval() + + def testGlobal(self): + values = ["ababababab", "abcabcabc", ""] + with self.test_session(): + input_vector = constant_op.constant(values, dtypes.string) + stripped = string_ops.regex_replace(input_vector, "ab", "abc", + True).eval() + self.assertAllEqual([b"abcabcabcabcabc", b"abccabccabcc", b""], stripped) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/python/ops/string_ops.py b/tensorflow/python/ops/string_ops.py index 0335d2456a..5bd75b9215 100644 --- a/tensorflow/python/ops/string_ops.py +++ b/tensorflow/python/ops/string_ops.py @@ -17,6 +17,7 @@ See the @{$python/string_ops} guide. +@@regex_replace @@string_to_hash_bucket_fast @@string_to_hash_bucket_strong @@string_to_hash_bucket @@ -139,6 +140,7 @@ def reduce_join(inputs, axis=None, reduce_join.__doc__ = deprecation.rewrite_argument_docstring( gen_string_ops.reduce_join.__doc__, "reduction_indices", "axis") +ops.NotDifferentiable("RegexReplace") ops.NotDifferentiable("StringToHashBucket") ops.NotDifferentiable("StringToHashBucketFast") ops.NotDifferentiable("StringToHashBucketStrong") diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 2333736583..8c9e7af89b 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -1600,6 +1600,10 @@ tf_module { name: "reduce_sum" argspec: "args=[\'input_tensor\', \'axis\', \'keepdims\', \'name\', \'reduction_indices\', \'keep_dims\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "regex_replace" + argspec: "args=[\'input\', \'pattern\', \'rewrite\', \'replace_global\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " + } member_method { name: "register_tensor_conversion_function" argspec: "args=[\'base_type\', \'conversion_func\', \'priority\'], varargs=None, keywords=None, defaults=[\'100\'], " -- GitLab From 8a06526e9ac4cd47c14975bd75640966bd11daf9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:18:11 -0800 Subject: [PATCH 277/884] Update ops-related pbtxt files. PiperOrigin-RevId: 187468981 --- .../core/ops/compat/ops_history.v1.pbtxt | 26 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 26 +++++++++++++++++++ 2 files changed, 52 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index dddde1624a..35c49658b3 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -37666,6 +37666,32 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexReplace" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + input_arg { + name: "rewrite" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } + attr { + name: "replace_global" + type: "bool" + default_value { + b: true + } + } +} op { name: "Relu" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index 55be0519a7..bf7682712c 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -19353,6 +19353,32 @@ op { } allows_uninitialized_input: true } +op { + name: "RegexReplace" + input_arg { + name: "input" + type: DT_STRING + } + input_arg { + name: "pattern" + type: DT_STRING + } + input_arg { + name: "rewrite" + type: DT_STRING + } + output_arg { + name: "output" + type: DT_STRING + } + attr { + name: "replace_global" + type: "bool" + default_value { + b: true + } + } +} op { name: "Relu" input_arg { -- GitLab From 166980803009ec4577806b4437579159f5e9dd5a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:25:38 -0800 Subject: [PATCH 278/884] Support 0 size literals in Literal::Slice PiperOrigin-RevId: 187469563 --- tensorflow/compiler/xla/literal_util.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index c3eb8caa57..a345e95a8b 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -813,7 +813,7 @@ std::unique_ptr Literal::Slice( CHECK_GE(start_indices[dnum], 0); CHECK_LE(limit_indices[dnum], shape().dimensions(dnum)); int64 dimension = limit_indices[dnum] - start_indices[dnum]; - CHECK_GT(dimension, 0); + CHECK_GE(dimension, 0); result_dimensions.push_back(dimension); } const auto result_shape = -- GitLab From bf048d60fbf68fd731df6b2f2ff36a5722b73bb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 06:45:58 -0800 Subject: [PATCH 279/884] Go: Update generated wrapper functions for TensorFlow ops. PiperOrigin-RevId: 187471483 --- tensorflow/go/op/wrappers.go | 1486 +++++++++++++++++----------------- 1 file changed, 743 insertions(+), 743 deletions(-) diff --git a/tensorflow/go/op/wrappers.go b/tensorflow/go/op/wrappers.go index d9e684a661..336df7c2f7 100644 --- a/tensorflow/go/op/wrappers.go +++ b/tensorflow/go/op/wrappers.go @@ -384,122 +384,6 @@ func FakeQuantWithMinMaxVarsGradient(scope *Scope, gradients tf.Output, inputs t return op.Output(0), op.Output(1), op.Output(2) } -// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. -type MutableHashTableOfTensorsV2Attr func(optionalAttr) - -// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// If not specified, defaults to false -func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. -// If not specified, defaults to <> -func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { - return func(m optionalAttr) { - m["value_shape"] = value - } -} - -// Creates an empty hash table. -// -// This op creates a mutable hash table, specifying the type of its keys and -// values. Each value must be a vector. Data can be inserted into the table using -// the insert operations. It does not support the initialization operation. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MutableHashTableOfTensorsV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. -type ResourceApplyProximalAdagradAttr func(optionalAttr) - -// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If True, updating of the var and accum tensors will be protected by -// a lock; otherwise the behavior is undefined, but may exhibit less contention. -// If not specified, defaults to false -func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. -// -// accum += grad * grad -// prox_v = var - lr * grad * (1 / sqrt(accum)) -// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// l1: L1 regularization. Must be a scalar. -// l2: L2 regularization. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyProximalAdagrad", - Input: []tf.Input{ - var_, accum, lr, l1, l2, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) -} - // MutableHashTableV2Attr is an optional argument to MutableHashTableV2. type MutableHashTableV2Attr func(optionalAttr) @@ -564,142 +448,6 @@ func MutableHashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.Data return op.Output(0) } -// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. -type MapUnstageNoKeyAttr func(optionalAttr) - -// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// MapUnstageNoKeyContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op removes and returns a random (key, value) -// -// from the underlying container. If the underlying container -// does not contain elements, the op will block until it does. -func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "MapUnstageNoKey", - Input: []tf.Input{ - indices, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - key = op.Output(idx) - if values, idx, err = makeOutputList(op, idx, "values"); err != nil { - scope.UpdateErr("MapUnstageNoKey", err) - return - } - return key, values -} - -// HashTableV2Attr is an optional argument to HashTableV2. -type HashTableV2Attr func(optionalAttr) - -// HashTableV2Container sets the optional container attribute to value. -// -// value: If non-empty, this table is placed in the given container. -// Otherwise, a default container is used. -// If not specified, defaults to "" -func HashTableV2Container(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// HashTableV2SharedName sets the optional shared_name attribute to value. -// -// value: If non-empty, this table is shared under the given name across -// multiple sessions. -// If not specified, defaults to "" -func HashTableV2SharedName(value string) HashTableV2Attr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. -// -// value: If true and shared_name is empty, the table is shared -// using the node name. -// If not specified, defaults to false -func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { - return func(m optionalAttr) { - m["use_node_name_sharing"] = value - } -} - -// Creates a non-initialized hash table. -// -// This op creates a hash table, specifying the type of its keys and values. -// Before using the table you will have to initialize it. After initialization the -// table will be immutable. -// -// Arguments: -// key_dtype: Type of the table keys. -// value_dtype: Type of the table values. -// -// Returns Handle to a table. -func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "HashTableV2", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // Replaces the contents of the table with the specified keys and values. // // The tensor `keys` must be of the same type as the keys of the table. @@ -5642,113 +5390,8 @@ func QuantizedReluX(scope *Scope, features tf.Output, max_value tf.Output, min_f return op.Output(0), op.Output(1), op.Output(2) } -// SummaryWriterAttr is an optional argument to SummaryWriter. -type SummaryWriterAttr func(optionalAttr) - -// SummaryWriterSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func SummaryWriterSharedName(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// SummaryWriterContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func SummaryWriterContainer(value string) SummaryWriterAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// Returns a handle to be used to access a summary writer. -// -// The summary writer is an in-graph resource which can be used by ops to write -// summaries to event files. -// -// Returns the summary writer resource. Scalar handle. -func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "SummaryWriter", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes gradients for SparseSegmentMean. -// -// Returns tensor "output" with same shape as grad, except for dimension 0 whose -// value is output_dim0. -// -// Arguments: -// grad: gradient propagated to the SparseSegmentMean op. -// indices: indices passed to the corresponding SparseSegmentMean op. -// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. -// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. -func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSegmentMeanGrad", - Input: []tf.Input{ - grad, indices, segment_ids, output_dim0, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Applies softmax to a batched N-D `SparseTensor`. -// -// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` -// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. -// -// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost -// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly -// zero elements do not participate*. Specifically, the algorithm is equivalent -// to the following: -// -// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix -// with shape `[B, C]`, along the size-C dimension; -// (2) Masks out the original implicitly-zero locations; -// (3) Renormalizes the remaining elements. -// -// Hence, the `SparseTensor` result has exactly the same non-zero indices and -// shape. -// -// Arguments: -// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a -// SparseTensor, in canonical ordering. -// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. -// sp_shape: 1-D. Shape of the input SparseTensor. -// -// Returns 1-D. The `NNZ` values for the result `SparseTensor`. -func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { - if scope.Err() != nil { - return - } - opspec := tf.OpSpec{ - Type: "SparseSoftmax", - Input: []tf.Input{ - sp_indices, sp_values, sp_shape, - }, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - -// RandomPoissonAttr is an optional argument to RandomPoisson. -type RandomPoissonAttr func(optionalAttr) +// RandomPoissonAttr is an optional argument to RandomPoisson. +type RandomPoissonAttr func(optionalAttr) // RandomPoissonSeed sets the optional seed attribute to value. // If not specified, defaults to 0 @@ -7025,67 +6668,32 @@ func DestroyResourceOp(scope *Scope, resource tf.Output, optional ...DestroyReso return scope.AddOperation(opspec) } -// CumprodAttr is an optional argument to Cumprod. -type CumprodAttr func(optionalAttr) +// SummaryWriterAttr is an optional argument to SummaryWriter. +type SummaryWriterAttr func(optionalAttr) -// CumprodExclusive sets the optional exclusive attribute to value. -// -// value: If `True`, perform exclusive cumprod. -// If not specified, defaults to false -func CumprodExclusive(value bool) CumprodAttr { +// SummaryWriterSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func SummaryWriterSharedName(value string) SummaryWriterAttr { return func(m optionalAttr) { - m["exclusive"] = value + m["shared_name"] = value } } -// CumprodReverse sets the optional reverse attribute to value. -// -// value: A `bool` (default: False). -// If not specified, defaults to false -func CumprodReverse(value bool) CumprodAttr { +// SummaryWriterContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func SummaryWriterContainer(value string) SummaryWriterAttr { return func(m optionalAttr) { - m["reverse"] = value + m["container"] = value } } -// Compute the cumulative product of the tensor `x` along `axis`. -// -// By default, this op performs an inclusive cumprod, which means that the first -// element of the input is identical to the first element of the output: -// -// ```python -// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] -// ``` -// -// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is -// performed instead: -// -// ```python -// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] -// ``` -// -// By setting the `reverse` kwarg to `True`, the cumprod is performed in the -// opposite direction: -// -// ```python -// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] -// ``` -// -// This is more efficient than using separate `tf.reverse` ops. -// -// The `reverse` and `exclusive` kwargs can also be combined: +// Returns a handle to be used to access a summary writer. // -// ```python -// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] -// ``` +// The summary writer is an in-graph resource which can be used by ops to write +// summaries to event files. // -// Arguments: -// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, -// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, -// `complex128`, `qint8`, `quint8`, `qint32`, `half`. -// axis: A `Tensor` of type `int32` (default: 0). Must be in the range -// `[-rank(x), rank(x))`. -func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { +// Returns the summary writer resource. Scalar handle. +func SummaryWriter(scope *Scope, optional ...SummaryWriterAttr) (writer tf.Output) { if scope.Err() != nil { return } @@ -7094,93 +6702,347 @@ func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) a(attrs) } opspec := tf.OpSpec{ - Type: "Cumprod", - Input: []tf.Input{ - x, axis, - }, + Type: "SummaryWriter", + Attrs: attrs, } op := scope.AddOperation(opspec) return op.Output(0) } -// Computes the mean along segments of a tensor. -// -// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of -// segments. -// -// Computes a tensor such that -// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is -// over `j` such that `segment_ids[j] == i` and `N` is the total number of -// values summed. -// -// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// Computes gradients for SparseSegmentMean. // -//
-// -//
+// Returns tensor "output" with same shape as grad, except for dimension 0 whose +// value is output_dim0. // // Arguments: -// -// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s -// first dimension. Values should be sorted and can be repeated. -// -// Returns Has same shape as data, except for dimension 0 which -// has size `k`, the number of segments. -func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { +// grad: gradient propagated to the SparseSegmentMean op. +// indices: indices passed to the corresponding SparseSegmentMean op. +// segment_ids: segment_ids passed to the corresponding SparseSegmentMean op. +// output_dim0: dimension 0 of "data" passed to SparseSegmentMean op. +func SparseSegmentMeanGrad(scope *Scope, grad tf.Output, indices tf.Output, segment_ids tf.Output, output_dim0 tf.Output) (output tf.Output) { if scope.Err() != nil { return } opspec := tf.OpSpec{ - Type: "SegmentMean", + Type: "SparseSegmentMeanGrad", Input: []tf.Input{ - data, segment_ids, + grad, indices, segment_ids, output_dim0, }, } op := scope.AddOperation(opspec) return op.Output(0) } -// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. -type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) - -// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var, mg, ms, and mom tensors is -// protected by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the centered RMSProp algorithm. +// Applies softmax to a batched N-D `SparseTensor`. // -// The centered RMSProp algorithm uses an estimate of the centered second moment -// (i.e., the variance) for normalization, as opposed to regular RMSProp, which -// uses the (uncentered) second moment. This often helps with training, but is -// slightly more expensive in terms of computation and memory. +// The inputs represent an N-D SparseTensor with logical shape `[..., B, C]` +// (where `N >= 2`), and with indices sorted in the canonical lexicographic order. // -// Note that in dense implementation of this algorithm, mg, ms, and mom will -// update even if the grad is zero, but in this sparse implementation, mg, ms, -// and mom will not update in iterations during which the grad is zero. +// This op is equivalent to applying the normal `tf.nn.softmax()` to each innermost +// logical submatrix with shape `[B, C]`, but with the catch that *the implicitly +// zero elements do not participate*. Specifically, the algorithm is equivalent +// to the following: // -// mean_square = decay * mean_square + (1-decay) * gradient ** 2 -// mean_grad = decay * mean_grad + (1-decay) * gradient -// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// (1) Applies `tf.nn.softmax()` to a densified view of each innermost submatrix +// with shape `[B, C]`, along the size-C dimension; +// (2) Masks out the original implicitly-zero locations; +// (3) Renormalizes the remaining elements. // -// ms <- rho * ms_{t-1} + (1-rho) * grad * grad -// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) -// var <- var - mom +// Hence, the `SparseTensor` result has exactly the same non-zero indices and +// shape. // // Arguments: -// var_: Should be from a Variable(). -// mg: Should be from a Variable(). -// ms: Should be from a Variable(). -// mom: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. +// sp_indices: 2-D. `NNZ x R` matrix with the indices of non-empty values in a +// SparseTensor, in canonical ordering. +// sp_values: 1-D. `NNZ` non-empty values corresponding to `sp_indices`. +// sp_shape: 1-D. Shape of the input SparseTensor. +// +// Returns 1-D. The `NNZ` values for the result `SparseTensor`. +func SparseSoftmax(scope *Scope, sp_indices tf.Output, sp_values tf.Output, sp_shape tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SparseSoftmax", + Input: []tf.Input{ + sp_indices, sp_values, sp_shape, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Partitions `data` into `num_partitions` tensors using indices from `partitions`. +// +// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` +// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` +// are placed in `outputs[i]` in lexicographic order of `js`, and the first +// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. +// In detail, +// +// ```python +// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] +// +// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) +// ``` +// +// `data.shape` must start with `partitions.shape`. +// +// For example: +// +// ```python +// # Scalar partitions. +// partitions = 1 +// num_partitions = 2 +// data = [10, 20] +// outputs[0] = [] # Empty with shape [0, 2] +// outputs[1] = [[10, 20]] +// +// # Vector partitions. +// partitions = [0, 0, 1, 1, 0] +// num_partitions = 2 +// data = [10, 20, 30, 40, 50] +// outputs[0] = [10, 20, 50] +// outputs[1] = [30, 40] +// ``` +// +// See `dynamic_stitch` for an example on how to merge partitions back. +// +//
+// +//
+// +// Arguments: +// +// partitions: Any shape. Indices in the range `[0, num_partitions)`. +// num_partitions: The number of partitions to output. +func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"num_partitions": num_partitions} + opspec := tf.OpSpec{ + Type: "DynamicPartition", + Input: []tf.Input{ + data, partitions, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + if scope.Err() != nil { + return + } + var idx int + var err error + if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { + scope.UpdateErr("DynamicPartition", err) + return + } + return outputs +} + +// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. +type ResourceApplyAdagradAttr func(optionalAttr) + +// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var and accum tensors will be protected +// by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the adagrad scheme. +// +// accum += grad * grad +// var -= lr * grad * (1 / sqrt(accum)) +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyAdagrad", + Input: []tf.Input{ + var_, accum, lr, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// CumprodAttr is an optional argument to Cumprod. +type CumprodAttr func(optionalAttr) + +// CumprodExclusive sets the optional exclusive attribute to value. +// +// value: If `True`, perform exclusive cumprod. +// If not specified, defaults to false +func CumprodExclusive(value bool) CumprodAttr { + return func(m optionalAttr) { + m["exclusive"] = value + } +} + +// CumprodReverse sets the optional reverse attribute to value. +// +// value: A `bool` (default: False). +// If not specified, defaults to false +func CumprodReverse(value bool) CumprodAttr { + return func(m optionalAttr) { + m["reverse"] = value + } +} + +// Compute the cumulative product of the tensor `x` along `axis`. +// +// By default, this op performs an inclusive cumprod, which means that the first +// element of the input is identical to the first element of the output: +// +// ```python +// tf.cumprod([a, b, c]) # => [a, a * b, a * b * c] +// ``` +// +// By setting the `exclusive` kwarg to `True`, an exclusive cumprod is +// performed instead: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True) # => [1, a, a * b] +// ``` +// +// By setting the `reverse` kwarg to `True`, the cumprod is performed in the +// opposite direction: +// +// ```python +// tf.cumprod([a, b, c], reverse=True) # => [a * b * c, b * c, c] +// ``` +// +// This is more efficient than using separate `tf.reverse` ops. +// +// The `reverse` and `exclusive` kwargs can also be combined: +// +// ```python +// tf.cumprod([a, b, c], exclusive=True, reverse=True) # => [b * c, c, 1] +// ``` +// +// Arguments: +// x: A `Tensor`. Must be one of the following types: `float32`, `float64`, +// `int64`, `int32`, `uint8`, `uint16`, `int16`, `int8`, `complex64`, +// `complex128`, `qint8`, `quint8`, `qint32`, `half`. +// axis: A `Tensor` of type `int32` (default: 0). Must be in the range +// `[-rank(x), rank(x))`. +func Cumprod(scope *Scope, x tf.Output, axis tf.Output, optional ...CumprodAttr) (out tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "Cumprod", + Input: []tf.Input{ + x, axis, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the mean along segments of a tensor. +// +// Read @{$math_ops#segmentation$the section on segmentation} for an explanation of +// segments. +// +// Computes a tensor such that +// \\(output_i = \frac{\sum_j data_j}{N}\\) where `mean` is +// over `j` such that `segment_ids[j] == i` and `N` is the total number of +// values summed. +// +// If the mean is empty for a given segment ID `i`, `output[i] = 0`. +// +//
+// +//
+// +// Arguments: +// +// segment_ids: A 1-D tensor whose rank is equal to the rank of `data`'s +// first dimension. Values should be sorted and can be repeated. +// +// Returns Has same shape as data, except for dimension 0 which +// has size `k`, the number of segments. +func SegmentMean(scope *Scope, data tf.Output, segment_ids tf.Output) (output tf.Output) { + if scope.Err() != nil { + return + } + opspec := tf.OpSpec{ + Type: "SegmentMean", + Input: []tf.Input{ + data, segment_ids, + }, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceSparseApplyCenteredRMSPropAttr is an optional argument to ResourceSparseApplyCenteredRMSProp. +type ResourceSparseApplyCenteredRMSPropAttr func(optionalAttr) + +// ResourceSparseApplyCenteredRMSPropUseLocking sets the optional use_locking attribute to value. +// +// value: If `True`, updating of the var, mg, ms, and mom tensors is +// protected by a lock; otherwise the behavior is undefined, but may exhibit less +// contention. +// If not specified, defaults to false +func ResourceSparseApplyCenteredRMSPropUseLocking(value bool) ResourceSparseApplyCenteredRMSPropAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' according to the centered RMSProp algorithm. +// +// The centered RMSProp algorithm uses an estimate of the centered second moment +// (i.e., the variance) for normalization, as opposed to regular RMSProp, which +// uses the (uncentered) second moment. This often helps with training, but is +// slightly more expensive in terms of computation and memory. +// +// Note that in dense implementation of this algorithm, mg, ms, and mom will +// update even if the grad is zero, but in this sparse implementation, mg, ms, +// and mom will not update in iterations during which the grad is zero. +// +// mean_square = decay * mean_square + (1-decay) * gradient ** 2 +// mean_grad = decay * mean_grad + (1-decay) * gradient +// Delta = learning_rate * gradient / sqrt(mean_square + epsilon - mean_grad ** 2) +// +// ms <- rho * ms_{t-1} + (1-rho) * grad * grad +// mom <- momentum * mom_{t-1} + lr * grad / sqrt(ms + epsilon) +// var <- var - mom +// +// Arguments: +// var_: Should be from a Variable(). +// mg: Should be from a Variable(). +// ms: Should be from a Variable(). +// mom: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. // rho: Decay rate. Must be a scalar. // // epsilon: Ridge term. Must be a scalar. @@ -7909,63 +7771,6 @@ func ResourceScatterNdUpdate(scope *Scope, ref tf.Output, indices tf.Output, upd return scope.AddOperation(opspec) } -// StageSizeAttr is an optional argument to StageSize. -type StageSizeAttr func(optionalAttr) - -// StageSizeCapacity sets the optional capacity attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeCapacity(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["capacity"] = value - } -} - -// StageSizeMemoryLimit sets the optional memory_limit attribute to value. -// If not specified, defaults to 0 -// -// REQUIRES: value >= 0 -func StageSizeMemoryLimit(value int64) StageSizeAttr { - return func(m optionalAttr) { - m["memory_limit"] = value - } -} - -// StageSizeContainer sets the optional container attribute to value. -// If not specified, defaults to "" -func StageSizeContainer(value string) StageSizeAttr { - return func(m optionalAttr) { - m["container"] = value - } -} - -// StageSizeSharedName sets the optional shared_name attribute to value. -// If not specified, defaults to "" -func StageSizeSharedName(value string) StageSizeAttr { - return func(m optionalAttr) { - m["shared_name"] = value - } -} - -// Op returns the number of elements in the underlying container. -func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"dtypes": dtypes} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "StageSize", - - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} - // NonMaxSuppressionAttr is an optional argument to NonMaxSuppression. type NonMaxSuppressionAttr func(optionalAttr) @@ -8702,121 +8507,7 @@ func SparseReduceSum(scope *Scope, input_indices tf.Output, input_values tf.Outp Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Partitions `data` into `num_partitions` tensors using indices from `partitions`. -// -// For each index tuple `js` of size `partitions.ndim`, the slice `data[js, ...]` -// becomes part of `outputs[partitions[js]]`. The slices with `partitions[js] = i` -// are placed in `outputs[i]` in lexicographic order of `js`, and the first -// dimension of `outputs[i]` is the number of entries in `partitions` equal to `i`. -// In detail, -// -// ```python -// outputs[i].shape = [sum(partitions == i)] + data.shape[partitions.ndim:] -// -// outputs[i] = pack([data[js, ...] for js if partitions[js] == i]) -// ``` -// -// `data.shape` must start with `partitions.shape`. -// -// For example: -// -// ```python -// # Scalar partitions. -// partitions = 1 -// num_partitions = 2 -// data = [10, 20] -// outputs[0] = [] # Empty with shape [0, 2] -// outputs[1] = [[10, 20]] -// -// # Vector partitions. -// partitions = [0, 0, 1, 1, 0] -// num_partitions = 2 -// data = [10, 20, 30, 40, 50] -// outputs[0] = [10, 20, 50] -// outputs[1] = [30, 40] -// ``` -// -// See `dynamic_stitch` for an example on how to merge partitions back. -// -//
-// -//
-// -// Arguments: -// -// partitions: Any shape. Indices in the range `[0, num_partitions)`. -// num_partitions: The number of partitions to output. -func DynamicPartition(scope *Scope, data tf.Output, partitions tf.Output, num_partitions int64) (outputs []tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{"num_partitions": num_partitions} - opspec := tf.OpSpec{ - Type: "DynamicPartition", - Input: []tf.Input{ - data, partitions, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - if scope.Err() != nil { - return - } - var idx int - var err error - if outputs, idx, err = makeOutputList(op, idx, "outputs"); err != nil { - scope.UpdateErr("DynamicPartition", err) - return - } - return outputs -} - -// ResourceApplyAdagradAttr is an optional argument to ResourceApplyAdagrad. -type ResourceApplyAdagradAttr func(optionalAttr) - -// ResourceApplyAdagradUseLocking sets the optional use_locking attribute to value. -// -// value: If `True`, updating of the var and accum tensors will be protected -// by a lock; otherwise the behavior is undefined, but may exhibit less -// contention. -// If not specified, defaults to false -func ResourceApplyAdagradUseLocking(value bool) ResourceApplyAdagradAttr { - return func(m optionalAttr) { - m["use_locking"] = value - } -} - -// Update '*var' according to the adagrad scheme. -// -// accum += grad * grad -// var -= lr * grad * (1 / sqrt(accum)) -// -// Arguments: -// var_: Should be from a Variable(). -// accum: Should be from a Variable(). -// lr: Scaling factor. Must be a scalar. -// grad: The gradient. -// -// Returns the created operation. -func ResourceApplyAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, grad tf.Output, optional ...ResourceApplyAdagradAttr) (o *tf.Operation) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "ResourceApplyAdagrad", - Input: []tf.Input{ - var_, accum, lr, grad, - }, - Attrs: attrs, - } - return scope.AddOperation(opspec) + return op.Output(0) } // Returns element-wise remainder of division. This emulates C semantics in that @@ -9482,83 +9173,335 @@ func TensorArrayV3(scope *Scope, size tf.Output, dtype tf.DataType, optional ... Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0), op.Output(1) + return op.Output(0), op.Output(1) +} + +// MaxPool3DAttr is an optional argument to MaxPool3D. +type MaxPool3DAttr func(optionalAttr) + +// MaxPool3DDataFormat sets the optional data_format attribute to value. +// +// value: The data format of the input and output data. With the +// default format "NDHWC", the data is stored in the order of: +// [batch, in_depth, in_height, in_width, in_channels]. +// Alternatively, the format could be "NCDHW", the data storage order is: +// [batch, in_channels, in_depth, in_height, in_width]. +// If not specified, defaults to "NDHWC" +func MaxPool3DDataFormat(value string) MaxPool3DAttr { + return func(m optionalAttr) { + m["data_format"] = value + } +} + +// Performs 3D max pooling on the input. +// +// Arguments: +// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. +// ksize: 1-D tensor of length 5. The size of the window for each dimension of +// the input tensor. Must have `ksize[0] = ksize[4] = 1`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +// +// Returns The max pooled output tensor. +func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MaxPool3D", + Input: []tf.Input{ + input, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// Computes the gradients of 3-D convolution with respect to the input. +// +// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 +// +// Arguments: +// input: Shape `[batch, depth, rows, cols, in_channels]`. +// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. +// `in_channels` must match between `input` and `filter`. +// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, +// out_channels]`. +// strides: 1-D tensor of length 5. The stride of the sliding window for each +// dimension of `input`. Must have `strides[0] = strides[4] = 1`. +// padding: The type of padding algorithm to use. +func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"strides": strides, "padding": padding} + opspec := tf.OpSpec{ + Type: "Conv3DBackpropInput", + Input: []tf.Input{ + input, filter, out_backprop, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// ResourceApplyProximalAdagradAttr is an optional argument to ResourceApplyProximalAdagrad. +type ResourceApplyProximalAdagradAttr func(optionalAttr) + +// ResourceApplyProximalAdagradUseLocking sets the optional use_locking attribute to value. +// +// value: If True, updating of the var and accum tensors will be protected by +// a lock; otherwise the behavior is undefined, but may exhibit less contention. +// If not specified, defaults to false +func ResourceApplyProximalAdagradUseLocking(value bool) ResourceApplyProximalAdagradAttr { + return func(m optionalAttr) { + m["use_locking"] = value + } +} + +// Update '*var' and '*accum' according to FOBOS with Adagrad learning rate. +// +// accum += grad * grad +// prox_v = var - lr * grad * (1 / sqrt(accum)) +// var = sign(prox_v)/(1+lr*l2) * max{|prox_v|-lr*l1,0} +// +// Arguments: +// var_: Should be from a Variable(). +// accum: Should be from a Variable(). +// lr: Scaling factor. Must be a scalar. +// l1: L1 regularization. Must be a scalar. +// l2: L2 regularization. Must be a scalar. +// grad: The gradient. +// +// Returns the created operation. +func ResourceApplyProximalAdagrad(scope *Scope, var_ tf.Output, accum tf.Output, lr tf.Output, l1 tf.Output, l2 tf.Output, grad tf.Output, optional ...ResourceApplyProximalAdagradAttr) (o *tf.Operation) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "ResourceApplyProximalAdagrad", + Input: []tf.Input{ + var_, accum, lr, l1, l2, grad, + }, + Attrs: attrs, + } + return scope.AddOperation(opspec) +} + +// MutableHashTableOfTensorsV2Attr is an optional argument to MutableHashTableOfTensorsV2. +type MutableHashTableOfTensorsV2Attr func(optionalAttr) + +// MutableHashTableOfTensorsV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2Container(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MutableHashTableOfTensorsV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func MutableHashTableOfTensorsV2SharedName(value string) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// MutableHashTableOfTensorsV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// If not specified, defaults to false +func MutableHashTableOfTensorsV2UseNodeNameSharing(value bool) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// MutableHashTableOfTensorsV2ValueShape sets the optional value_shape attribute to value. +// If not specified, defaults to <> +func MutableHashTableOfTensorsV2ValueShape(value tf.Shape) MutableHashTableOfTensorsV2Attr { + return func(m optionalAttr) { + m["value_shape"] = value + } +} + +// Creates an empty hash table. +// +// This op creates a mutable hash table, specifying the type of its keys and +// values. Each value must be a vector. Data can be inserted into the table using +// the insert operations. It does not support the initialization operation. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func MutableHashTableOfTensorsV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...MutableHashTableOfTensorsV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "MutableHashTableOfTensorsV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + +// HashTableV2Attr is an optional argument to HashTableV2. +type HashTableV2Attr func(optionalAttr) + +// HashTableV2Container sets the optional container attribute to value. +// +// value: If non-empty, this table is placed in the given container. +// Otherwise, a default container is used. +// If not specified, defaults to "" +func HashTableV2Container(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// HashTableV2SharedName sets the optional shared_name attribute to value. +// +// value: If non-empty, this table is shared under the given name across +// multiple sessions. +// If not specified, defaults to "" +func HashTableV2SharedName(value string) HashTableV2Attr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// HashTableV2UseNodeNameSharing sets the optional use_node_name_sharing attribute to value. +// +// value: If true and shared_name is empty, the table is shared +// using the node name. +// If not specified, defaults to false +func HashTableV2UseNodeNameSharing(value bool) HashTableV2Attr { + return func(m optionalAttr) { + m["use_node_name_sharing"] = value + } +} + +// Creates a non-initialized hash table. +// +// This op creates a hash table, specifying the type of its keys and values. +// Before using the table you will have to initialize it. After initialization the +// table will be immutable. +// +// Arguments: +// key_dtype: Type of the table keys. +// value_dtype: Type of the table values. +// +// Returns Handle to a table. +func HashTableV2(scope *Scope, key_dtype tf.DataType, value_dtype tf.DataType, optional ...HashTableV2Attr) (table_handle tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"key_dtype": key_dtype, "value_dtype": value_dtype} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "HashTableV2", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) } -// MaxPool3DAttr is an optional argument to MaxPool3D. -type MaxPool3DAttr func(optionalAttr) +// MapUnstageNoKeyAttr is an optional argument to MapUnstageNoKey. +type MapUnstageNoKeyAttr func(optionalAttr) -// MaxPool3DDataFormat sets the optional data_format attribute to value. +// MapUnstageNoKeyCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 // -// value: The data format of the input and output data. With the -// default format "NDHWC", the data is stored in the order of: -// [batch, in_depth, in_height, in_width, in_channels]. -// Alternatively, the format could be "NCDHW", the data storage order is: -// [batch, in_channels, in_depth, in_height, in_width]. -// If not specified, defaults to "NDHWC" -func MaxPool3DDataFormat(value string) MaxPool3DAttr { +// REQUIRES: value >= 0 +func MapUnstageNoKeyCapacity(value int64) MapUnstageNoKeyAttr { return func(m optionalAttr) { - m["data_format"] = value + m["capacity"] = value } } -// Performs 3D max pooling on the input. +// MapUnstageNoKeyMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 // -// Arguments: -// input: Shape `[batch, depth, rows, cols, channels]` tensor to pool over. -// ksize: 1-D tensor of length 5. The size of the window for each dimension of -// the input tensor. Must have `ksize[0] = ksize[4] = 1`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. +// REQUIRES: value >= 0 +func MapUnstageNoKeyMemoryLimit(value int64) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// MapUnstageNoKeyContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeyContainer(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// MapUnstageNoKeySharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func MapUnstageNoKeySharedName(value string) MapUnstageNoKeyAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op removes and returns a random (key, value) // -// Returns The max pooled output tensor. -func MaxPool3D(scope *Scope, input tf.Output, ksize []int64, strides []int64, padding string, optional ...MaxPool3DAttr) (output tf.Output) { +// from the underlying container. If the underlying container +// does not contain elements, the op will block until it does. +func MapUnstageNoKey(scope *Scope, indices tf.Output, dtypes []tf.DataType, optional ...MapUnstageNoKeyAttr) (key tf.Output, values []tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"ksize": ksize, "strides": strides, "padding": padding} + attrs := map[string]interface{}{"dtypes": dtypes} for _, a := range optional { a(attrs) } opspec := tf.OpSpec{ - Type: "MaxPool3D", + Type: "MapUnstageNoKey", Input: []tf.Input{ - input, + indices, }, Attrs: attrs, } op := scope.AddOperation(opspec) - return op.Output(0) -} - -// Computes the gradients of 3-D convolution with respect to the input. -// -// DEPRECATED at GraphDef version 10: Use Conv3DBackpropInputV2 -// -// Arguments: -// input: Shape `[batch, depth, rows, cols, in_channels]`. -// filter: Shape `[depth, rows, cols, in_channels, out_channels]`. -// `in_channels` must match between `input` and `filter`. -// out_backprop: Backprop signal of shape `[batch, out_depth, out_rows, out_cols, -// out_channels]`. -// strides: 1-D tensor of length 5. The stride of the sliding window for each -// dimension of `input`. Must have `strides[0] = strides[4] = 1`. -// padding: The type of padding algorithm to use. -func Conv3DBackpropInput(scope *Scope, input tf.Output, filter tf.Output, out_backprop tf.Output, strides []int64, padding string) (output tf.Output) { if scope.Err() != nil { return } - attrs := map[string]interface{}{"strides": strides, "padding": padding} - opspec := tf.OpSpec{ - Type: "Conv3DBackpropInput", - Input: []tf.Input{ - input, filter, out_backprop, - }, - Attrs: attrs, + var idx int + var err error + key = op.Output(idx) + if values, idx, err = makeOutputList(op, idx, "values"); err != nil { + scope.UpdateErr("MapUnstageNoKey", err) + return } - op := scope.AddOperation(opspec) - return op.Output(0) + return key, values } // Inverse 2D fast Fourier transform. @@ -12257,6 +12200,63 @@ func MutableDenseHashTableV2(scope *Scope, empty_key tf.Output, value_dtype tf.D return op.Output(0) } +// StageSizeAttr is an optional argument to StageSize. +type StageSizeAttr func(optionalAttr) + +// StageSizeCapacity sets the optional capacity attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageSizeCapacity(value int64) StageSizeAttr { + return func(m optionalAttr) { + m["capacity"] = value + } +} + +// StageSizeMemoryLimit sets the optional memory_limit attribute to value. +// If not specified, defaults to 0 +// +// REQUIRES: value >= 0 +func StageSizeMemoryLimit(value int64) StageSizeAttr { + return func(m optionalAttr) { + m["memory_limit"] = value + } +} + +// StageSizeContainer sets the optional container attribute to value. +// If not specified, defaults to "" +func StageSizeContainer(value string) StageSizeAttr { + return func(m optionalAttr) { + m["container"] = value + } +} + +// StageSizeSharedName sets the optional shared_name attribute to value. +// If not specified, defaults to "" +func StageSizeSharedName(value string) StageSizeAttr { + return func(m optionalAttr) { + m["shared_name"] = value + } +} + +// Op returns the number of elements in the underlying container. +func StageSize(scope *Scope, dtypes []tf.DataType, optional ...StageSizeAttr) (size tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{"dtypes": dtypes} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "StageSize", + + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Produces the max pool of the input tensor for quantized types. // // Arguments: @@ -12999,6 +12999,56 @@ func Neg(scope *Scope, x tf.Output) (y tf.Output) { return op.Output(0) } +// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. +type FakeQuantWithMinMaxVarsAttr func(optionalAttr) + +// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. +// If not specified, defaults to 8 +func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["num_bits"] = value + } +} + +// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. +// If not specified, defaults to false +func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { + return func(m optionalAttr) { + m["narrow_range"] = value + } +} + +// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` +// +// and `max` to 'outputs' tensor of same shape as `inputs`. +// +// `[min; max]` define the clamping range for the `inputs` data. +// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` +// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and +// then de-quantized and output as floats in `[min; max]` interval. +// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. +// +// This operation has a gradient and thus allows for training `min` and `max` +// values. +func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { + if scope.Err() != nil { + return + } + attrs := map[string]interface{}{} + for _, a := range optional { + a(attrs) + } + opspec := tf.OpSpec{ + Type: "FakeQuantWithMinMaxVars", + Input: []tf.Input{ + inputs, min, max, + }, + Attrs: attrs, + } + op := scope.AddOperation(opspec) + return op.Output(0) +} + // Writes a `Summary` protocol buffer with a histogram. // // The generated @@ -28230,53 +28280,3 @@ func QuantizedInstanceNorm(scope *Scope, x tf.Output, x_min tf.Output, x_max tf. op := scope.AddOperation(opspec) return op.Output(0), op.Output(1), op.Output(2) } - -// FakeQuantWithMinMaxVarsAttr is an optional argument to FakeQuantWithMinMaxVars. -type FakeQuantWithMinMaxVarsAttr func(optionalAttr) - -// FakeQuantWithMinMaxVarsNumBits sets the optional num_bits attribute to value. -// If not specified, defaults to 8 -func FakeQuantWithMinMaxVarsNumBits(value int64) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["num_bits"] = value - } -} - -// FakeQuantWithMinMaxVarsNarrowRange sets the optional narrow_range attribute to value. -// If not specified, defaults to false -func FakeQuantWithMinMaxVarsNarrowRange(value bool) FakeQuantWithMinMaxVarsAttr { - return func(m optionalAttr) { - m["narrow_range"] = value - } -} - -// Fake-quantize the 'inputs' tensor of type float via global float scalars `min` -// -// and `max` to 'outputs' tensor of same shape as `inputs`. -// -// `[min; max]` define the clamping range for the `inputs` data. -// `inputs` values are quantized into the quantization range (`[0; 2^num_bits - 1]` -// when `narrow_range` is false and `[1; 2^num_bits - 1]` when it is true) and -// then de-quantized and output as floats in `[min; max]` interval. -// `num_bits` is the bitwidth of the quantization; between 2 and 8, inclusive. -// -// This operation has a gradient and thus allows for training `min` and `max` -// values. -func FakeQuantWithMinMaxVars(scope *Scope, inputs tf.Output, min tf.Output, max tf.Output, optional ...FakeQuantWithMinMaxVarsAttr) (outputs tf.Output) { - if scope.Err() != nil { - return - } - attrs := map[string]interface{}{} - for _, a := range optional { - a(attrs) - } - opspec := tf.OpSpec{ - Type: "FakeQuantWithMinMaxVars", - Input: []tf.Input{ - inputs, min, max, - }, - Attrs: attrs, - } - op := scope.AddOperation(opspec) - return op.Output(0) -} -- GitLab From 5110763dc8e71ca5331144e6a837d0f3886bcbd9 Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Fri, 2 Mar 2018 00:34:36 +0800 Subject: [PATCH 280/884] Fix some minor typos in get started docs to keep consistent (#17357) --- tensorflow/docs_src/get_started/checkpoints.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/get_started/checkpoints.md b/tensorflow/docs_src/get_started/checkpoints.md index dfa2110e69..4aa07c7f2a 100644 --- a/tensorflow/docs_src/get_started/checkpoints.md +++ b/tensorflow/docs_src/get_started/checkpoints.md @@ -154,7 +154,7 @@ classifier = tf.estimator.DNNClassifier( The first time you call an Estimator's `train` method, TensorFlow saves a checkpoint to the `model_dir`. Each subsequent call to the Estimator's -`train`, `eval`, or `predict` method causes the following: +`train`, `evaluate`, or `predict` method causes the following: 1. The Estimator builds the model's [graph](https://developers.google.com/machine-learning/glossary/#graph) @@ -222,7 +222,7 @@ does not match the shape stored in checkpoint: [20] To run experiments in which you train and compare slightly different versions of a model, save a copy of the code that created each -`model-dir`, possibly by creating a separate git branch for each version. +`model_dir`, possibly by creating a separate git branch for each version. This separation will keep your checkpoints recoverable. ## Summary -- GitLab From 873768ca8e9eebb1e0985b6fd4fe8d56ad2389ff Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Thu, 1 Mar 2018 08:41:55 -0800 Subject: [PATCH 281/884] Fix link text PiperOrigin-RevId: 187483166 --- tensorflow/docs_src/performance/quantization.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/performance/quantization.md b/tensorflow/docs_src/performance/quantization.md index 63448c2ebe..411889cb1c 100644 --- a/tensorflow/docs_src/performance/quantization.md +++ b/tensorflow/docs_src/performance/quantization.md @@ -80,8 +80,8 @@ need for a separate calibration step. TensorFlow can train models with quantization in the loop. Because training requires small gradient adjustments, floating point values are still used. To keep models as floating point while adding the quantization error in the training -loop, @{$array_ops#Fake_quantization} nodes simulate the effect of quantization -in the forward and backward passes. +loop, @{$array_ops#Fake_quantization$fake quantization} nodes simulate the +effect of quantization in the forward and backward passes. Since it's difficult to add these fake quantization operations to all the required locations in the model, there's a function available that rewrites the -- GitLab From 88a13b85c9559e1a14e25f36c26fb4f95fd63dde Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 1 Mar 2018 08:44:45 -0800 Subject: [PATCH 282/884] [XLA] Fix signatures of c_foo functions and add c_any_of. Embarrassingly, we were often copying the container in c_foo. Oops. This fixes that, and also adds some perfect forwarding that was missing. It also adds a c_any_of function. PiperOrigin-RevId: 187483435 --- tensorflow/compiler/xla/util.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index e14c8cefa1..82e5a59da0 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -427,30 +427,37 @@ std::vector> CommonFactors( string SanitizeFileName(string file_name); template -bool c_all_of(Container container, Predicate&& predicate) { +bool c_all_of(const Container& container, Predicate&& predicate) { return std::all_of(std::begin(container), std::end(container), std::forward(predicate)); } +template +bool c_any_of(const Container& container, Predicate&& predicate) { + return std::any_of(std::begin(container), std::end(container), + std::forward(predicate)); +} + template -OutputIterator c_transform(InputContainer input_container, +OutputIterator c_transform(const InputContainer& input_container, OutputIterator output_iterator, - UnaryOperation unary_op) { + UnaryOperation&& unary_op) { return std::transform(std::begin(input_container), std::end(input_container), - output_iterator, unary_op); + output_iterator, + std::forward(unary_op)); } template -OutputIterator c_copy_if(InputContainer input_container, +OutputIterator c_copy_if(const InputContainer& input_container, OutputIterator output_iterator, - UnaryPredicate predicate) { + UnaryPredicate&& predicate) { return std::copy_if(std::begin(input_container), std::end(input_container), - output_iterator, predicate); + output_iterator, std::forward(predicate)); } template -OutputIterator c_copy(InputContainer input_container, +OutputIterator c_copy(const InputContainer& input_container, OutputIterator output_iterator) { return std::copy(std::begin(input_container), std::end(input_container), output_iterator); @@ -468,7 +475,7 @@ void c_sort(InputContainer& input_container, Comparator&& comparator) { } template -bool c_binary_search(Sequence& sequence, T&& value) { +bool c_binary_search(const Sequence& sequence, T&& value) { return std::binary_search(std::begin(sequence), std::end(sequence), std::forward(value)); } -- GitLab From c4cc731f4f92f76dfd5f09b87c9c4acbabaace46 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 08:55:41 -0800 Subject: [PATCH 283/884] Fix TF doc style. PiperOrigin-RevId: 187484534 --- tensorflow/docs_src/community/roadmap.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/docs_src/community/roadmap.md b/tensorflow/docs_src/community/roadmap.md index 1f934acab6..a3170a10f2 100644 --- a/tensorflow/docs_src/community/roadmap.md +++ b/tensorflow/docs_src/community/roadmap.md @@ -75,8 +75,7 @@ across image recognition, speech, object detection, and ### Community and Partner Engagement #### Special Interest Groups: * Mobilizing the community to work together in focused domains -* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute) -: build and packaging of TensorFlow +* [tf-distribute](https://groups.google.com/a/tensorflow.org/forum/#!forum/tf-distribute): build and packaging of TensorFlow * More to be identified and launched #### Community: -- GitLab From 03de984caa1f1403d4417357b67e96dfb7edbc3e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 09:10:17 -0800 Subject: [PATCH 284/884] Correct struct array initialization syntax. PiperOrigin-RevId: 187486332 --- tensorflow/python/eager/pywrap_tensor.cc | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 3ec2109d32..d3aaede749 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -520,16 +520,11 @@ PyTypeObject* EagerTensorType = nullptr; #if PY_MAJOR_VERSION >= 3 static PyType_Slot EagerTensor_Type_slots[] = { - Py_tp_dealloc, - reinterpret_cast(EagerTensor_dealloc), - Py_tp_methods, - reinterpret_cast(EagerTensor_methods), - Py_tp_getset, - reinterpret_cast(EagerTensor_getseters), - Py_tp_init, - reinterpret_cast(EagerTensor_init), - 0, - nullptr, + {Py_tp_dealloc, reinterpret_cast(EagerTensor_dealloc)}, + {Py_tp_methods, reinterpret_cast(EagerTensor_methods)}, + {Py_tp_getset, reinterpret_cast(EagerTensor_getseters)}, + {Py_tp_init, reinterpret_cast(EagerTensor_init)}, + {0, nullptr}, }; PyType_Spec EagerTensor_Type_spec = {"EagerTensor", sizeof(EagerTensor), 0, -- GitLab From c65343d282cdf5ccf4f7d3229f6c492fec344f8d Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 1 Mar 2018 09:27:57 -0800 Subject: [PATCH 285/884] Keep track of eager op device for tensor handles. Force-colocates ops using resources with the resources. PiperOrigin-RevId: 187488175 --- tensorflow/c/eager/c_api.cc | 37 +++++++++++++++++++++------ tensorflow/c/eager/c_api_internal.h | 10 ++++++-- tensorflow/python/eager/core_test.py | 14 ++++++++++ tensorflow/python/lib/core/py_func.cc | 5 ++-- 4 files changed, 54 insertions(+), 12 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 29c709b06d..252ceab54a 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -159,7 +159,7 @@ TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { tensorflow::Tensor tensor; status->status = tensorflow::TF_TensorToTensor(t, &tensor); if (!status->status.ok()) return nullptr; - return new TFE_TensorHandle(tensor, nullptr); + return new TFE_TensorHandle(tensor, nullptr, nullptr); } void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; } @@ -222,7 +222,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // has device type XLA_CPU, and the other CPU. const bool both_on_cpu = src_cpu && dst_cpu; if (is_same_device || both_on_cpu) { - return new TFE_TensorHandle(h->t, dst_cpu ? nullptr : dstd); + dstd = dst_cpu ? nullptr : dstd; + return new TFE_TensorHandle(h->t, dstd, dstd); } tensorflow::Tensor* src = &(h->t); if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && @@ -241,7 +242,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, } tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); if (src->shape().num_elements() == 0) { - return new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd); + dstd = dst_cpu ? nullptr : dstd; + return new TFE_TensorHandle(dst, dstd, dstd); } tensorflow::DeviceContext* src_device_context = nullptr; if (!src_cpu) { @@ -269,7 +271,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, }); n.WaitForNotification(); return (TF_GetCode(status) == TF_OK) - ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd) + ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd, + dst_cpu ? nullptr : dstd) : nullptr; } @@ -325,6 +328,7 @@ void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { if (!status->status.ok()) return; op->inputs.push_back(h->t); op->input_devices.push_back(h->d); + op->input_op_devices.push_back(h->op_device); op->attrs.NumInputs(op->inputs.size()); } @@ -540,7 +544,8 @@ tensorflow::Status ValidateInputTypeAndPlacement( } // We are only here if the policy is warn or silent copies, so we should // trigger a copy. - TFE_TensorHandle original{op->inputs[i], op->input_devices[i]}; + TFE_TensorHandle original{op->inputs[i], op->input_devices[i], + op->device}; TF_Status* s = TF_NewStatus(); TFE_TensorHandle* copied_tensor = TFE_TensorHandleCopyToDevice( &original, ctx, expected_device->name().c_str(), s); @@ -744,6 +749,7 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { // via `op_input_to_func_input`, adjust the actual inputs accordingly. launch_op->inputs = op->inputs; launch_op->input_devices = op->input_devices; + launch_op->input_op_devices = op->input_op_devices; if (!op_input_to_func_input.empty()) { DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size()); if (!op->input_devices.empty()) { @@ -832,9 +838,24 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, op = xla_launch_op.get(); } #endif // TENSORFLOW_EAGER_USE_XLA - TFE_Context* ctx = op->ctx; tensorflow::Device* device = op->device; + // Ensure all resource-touching ops run in the device the resource is, + // regardless of anything else that has been specified. This is identical to + // the graph mode behavior. + for (int i = 0; i < op->inputs.size(); ++i) { + if (op->inputs[i].dtype() == tensorflow::DT_RESOURCE && + op->input_op_devices[i] != device) { + tensorflow::Device* d = op->input_op_devices[i] == nullptr + ? ctx->devices()[0] + : op->input_op_devices[i]; + VLOG(1) << "Changing device of operation " << op->name << " to " + << d->name() << " because input #" << i + << " is a resource in this device."; + device = d; + op->device = d; + } + } if (!ctx->soft_placement && device == nullptr) { // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU device = ctx->devices()[0]; @@ -968,7 +989,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, (*output_memory_types)[i] == tensorflow::HOST_MEMORY) { d = nullptr; } - retvals[i] = new TFE_TensorHandle(outputs[i], d); + retvals[i] = new TFE_TensorHandle(outputs[i], d, device); } } @@ -994,7 +1015,7 @@ void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { - return new TFE_TensorHandle(t, nullptr); + return new TFE_TensorHandle(t, nullptr, nullptr); } const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 53c21b64cb..145e4c95cf 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -101,8 +101,9 @@ struct TFE_Context { }; struct TFE_TensorHandle { - TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d) - : t(t), d(d) {} + TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d, + tensorflow::Device* op_device) + : t(t), d(d), op_device(op_device) {} tensorflow::Tensor t; // TODO(ashankar): d == nullptr iff local CPU @@ -114,6 +115,10 @@ struct TFE_TensorHandle { // TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a // TFE_TensorHandle does not outlive the TFE_Context from which it came? tensorflow::Device* d; + + // Device in which the op producing this tensor was executed. Equals to d for + // constant tensors. + tensorflow::Device* op_device; }; struct TFE_Op { @@ -130,6 +135,7 @@ struct TFE_Op { const tensorflow::AttrTypeMap* attr_types; std::vector inputs; std::vector input_devices; + std::vector input_op_devices; tensorflow::Device* device; bool use_xla = false; }; diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index 0e40d8a5c0..e418be5fae 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -34,7 +34,9 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import resource_variable_ops def execute(op_name, num_outputs, inputs, attrs=None): @@ -181,6 +183,18 @@ class TFETest(test_util.TensorFlowTestCase): attrs=('T', x.dtype.as_datatype_enum))[0].cpu().numpy() self.assertEqual(3, result) + def testResourceTensorPlacement(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + + with context.device('gpu:0'): + v = resource_variable_ops.ResourceVariable(1.0) + with context.device('cpu:0'): + # Check that even though we specified the cpu device we'll run the read op + # in the device where the handle is. + self.assertAllEqual( + gen_resource_variable_ops.read_variable_op(v.handle, v.dtype), 1.0) + def testCopyBetweenDevices(self): if not context.context().num_gpus(): self.skipTest('No GPUs found') diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index e0422ef80a..343415b264 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -79,10 +79,11 @@ Status MakeArgTuple(const PyCall* call, PyObject** tuple) { const Tensor& t = call->ins[i]; if (call->eager) { if (call->gpu) { - arg = EagerTensorFromHandle(new TFE_TensorHandle(t, call->device)); + arg = EagerTensorFromHandle( + new TFE_TensorHandle(t, call->device, call->device)); } else { // TFE_TensorHandle assumes that CPU is identified by `nullptr`. - arg = EagerTensorFromHandle(new TFE_TensorHandle(t, nullptr)); + arg = EagerTensorFromHandle(new TFE_TensorHandle(t, nullptr, nullptr)); } if (arg == nullptr) { return errors::Internal("Unable to procure EagerTensor from Tensor."); -- GitLab From 2c4eca575e1fc36c7b2f1d1c312426ff4c4cec16 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Thu, 1 Mar 2018 09:31:20 -0800 Subject: [PATCH 286/884] [XLA] Don't dump the "contents" of constants with a zero-sized dimension in the HLO graph dumper. Previously we'd dump e.g. "{ {}, {}, ... }" for an f32[100, 0], which is just noise. PiperOrigin-RevId: 187488625 --- tensorflow/compiler/xla/service/hlo_graph_dumper.cc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 2861fec39e..99c4932a38 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -782,6 +782,14 @@ string HloDotDumper::GetInstructionNodeInlinedOperands( auto stringify_constant = [](const HloInstruction* constant) { const auto& shape = constant->shape(); + // If the shape has a dimension of size zero, print it as e.g. + // "{} (f32[42, 0, 10])". The alternative, calling Literal::ToString(), + // enumerates all of its empty dimensions (e.g. "{ { {}, {} }, ..."), which + // is just noise. + if (ShapeUtil::HasZeroElements(shape)) { + return Printf("{} (%s)", ShapeUtil::HumanString(constant->shape())); + } + // Print the literal value of constants with <= K elements. optional elem_count; if (!ShapeUtil::IsOpaque(shape) && !ShapeUtil::IsTuple(shape)) { -- GitLab From 7129d6a0746d0798e0a3015f645697b0fee12c37 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 09:52:00 -0800 Subject: [PATCH 287/884] Fixed tf.reduce_sum usage on 2-D tensors. PiperOrigin-RevId: 187491311 --- .../resolve_constant_unary.cc | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index f227554bc5..d96b3d522d 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -138,12 +138,32 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { memcpy(output_float_data.data(), (*input_float_data).data(), output_buffer_size * sizeof(output_float_data[0])); } else if (unary_op->type == OperatorType::kTensorFlowSum) { - // At the moment only full reduction across all dimensions is supported. - float sum = 0.f; - for (int i = 0; i < input_buffer_size; i++) { - sum += (*input_float_data)[i]; + CHECK_EQ(unary_op->inputs.size(), 2) << "Sum needs 2 inputs"; + if (!IsConstantParameterArray(*model, unary_op->inputs[1])) { + AddMessageF("Axis input is non-constant"); + return false; } - for (int i = 0; i < output_buffer_size; ++i) { + auto& axis_array = model->GetArray(unary_op->inputs[1]); + CHECK(axis_array.data_type == ArrayDataType::kInt32); + int axis = axis_array.GetBuffer().data[0]; + CHECK_LT(axis, input_shape.dimensions_count()) << "Axis out of bounds"; + + // We currently only handle reduction on axis 0. + CHECK_EQ(axis, 0) << "Only reduction along axis 0 is supported"; + // We currently only handle 1-D and 2-D input tensors. + CHECK_LE(input_shape.dimensions_count(), 2) << "Rank >2 not yet supported"; + // We only support keep_dims=true; shape prop will need to change otherwise. + auto sum_op = static_cast(unary_op); + CHECK(sum_op->keep_dims) << "Only keep_dims=true is supported"; + + std::vector indices(input_shape.dimensions_count()); + for (int i = 0; i < input_shape.dims(1); ++i) { + indices[1] = i; + float sum = 0.f; + for (int j = 0; j < input_shape.dims(0); ++j) { + indices[0] = j; + sum += (*input_float_data)[Offset(input_shape, indices)]; + } output_float_data[i] = sum; } } else if (unary_op->type == OperatorType::kTensorFlowMin) { -- GitLab From 02b5fe290aea0e3cb8680d9e484f2b485bc92042 Mon Sep 17 00:00:00 2001 From: imsheridan Date: Fri, 2 Mar 2018 01:58:06 +0800 Subject: [PATCH 288/884] Fix the error activation function link in custom_estimators --- tensorflow/docs_src/get_started/custom_estimators.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md index 42a246678a..ae89b639b4 100644 --- a/tensorflow/docs_src/get_started/custom_estimators.md +++ b/tensorflow/docs_src/get_started/custom_estimators.md @@ -213,7 +213,7 @@ is connected to every node in the preceding layer. Here's the relevant code: ``` * The `units` parameter defines the number of output neurons in a given layer. -* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#a) — +* The `activation` parameter defines the [activation function](https://developers.google.com/machine-learning/glossary/#activation_function) — [Relu](https://developers.google.com/machine-learning/glossary/#ReLU) in this case. -- GitLab From 0265b5e632b35c2a5dff30e72e06aa5229bf0d45 Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Thu, 1 Mar 2018 10:23:57 -0800 Subject: [PATCH 289/884] [XLA] Update operation semantics doc for BatchNorm operations - Update formulas for BatchNormGrad. The rendered version of the new formulas can be found here: https://latexbase.com/d/1ad54ff9-f9d5-4479-beef-156ea26a0632 - Update output table to include the symbol name for each output. - Fix a typo in BatchNormGrad's input table to correct display the symbol `beta`. PiperOrigin-RevId: 187496086 --- .../performance/xla/operation_semantics.md | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index eaf6aeba3d..8162382846 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -45,27 +45,30 @@ feature dimension in `operand`), the operation calculates the gradients with respect to `operand`, `offset` and `scale` across all the other dimensions. The `feature_index` must be a valid index for the feature dimension in `operand`. -The three gradients are defined by the following formulas: +The three gradients are defined by the following formulas (Assuming a +4-dimensional tensor as `operand` and (l) is the index for feature dimension): -\\( \nabla x = \nabla y * \gamma * \sqrt{\sigma^2+\epsilon} \\) +\\( coef_l = \frac{1}{mwh}\sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h (\nabla y_{ijkl} * (x_{ijkl} - \mu_l) / (\sigma^2_{l}+\epsilon)) \\) -\\( \nabla \gamma = sum(\nabla y * (x - \mu) * \sqrt{\sigma^2 + \epsilon}) \\) +\\( \nabla x_{ijkl} = \gamma_{l} * (1/\sqrt{\sigma^2_{l}+\epsilon}) * [\nabla y_{ijkl} - mean(\nabla y) - (x_{ijkl} - \mu_{l}) * coef_l] \\) -\\( \nabla \beta = sum(\nabla y) \\) +\\( \nabla \beta_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} \\) + +\\( \nabla \gamma_l = \sum_{i=1}^m\sum_{j=1}^w\sum_{k=1}^h \nabla y_{ijkl} * ((x_{ijkl} - \mu_l) / \sqrt{\sigma^2_{l}+\epsilon}) \\) The inputs `mean` and `variance` represents moments value across batch and spatial dimensions. The output type is a tuple of three handles: -|Outputs | Type | Semantics | -|------------- | ----------------------- | ------------------------------------| -|`grad_operand`| `ComputationDataHandle` | gradient with respect to input | -: : : `operand` : -|`grad_scale` | `ComputationDataHandle` | gradient with respect to input | -: : : `scale` : -|`grad_offset` | `ComputationDataHandle` | gradient with respect to input | -: : : `offset` : +|Outputs | Type | Semantics | +|------------- | ----------------------- | ------------------------------------ | +|`grad_operand`| `ComputationDataHandle` | gradient with respect to input | +: : : `operand` (\\( \nabla x\\)) : +|`grad_scale` | `ComputationDataHandle` | gradient with respect to input | +: : : `scale` (\\( \nabla \gamma\\)) : +|`grad_offset` | `ComputationDataHandle` | gradient with respect to input | +: : : `offset`(\\( \nabla \beta\\)) : ## BatchNormInference @@ -119,11 +122,11 @@ Normalizes an array across batch and spatial dimensions. | Arguments | Type | Semantics | | --------------- | ----------------------- | -------------------------------- | | `operand` | `ComputationDataHandle` | n dimensional array to be | -: : : normalized : +: : : normalized (x) : | `scale` | `ComputationDataHandle` | 1 dimensional array | : : : (\\(\gamma\\)) : | `offset` | `ComputationDataHandle` | 1 dimensional array | -: : : (\\(\beta\\ ) : +: : : (\\(\beta\\)) : | `epsilon` | `float` | Epsilon value (\\(\epsilon\\)) | | `feature_index` | `int64` | Index to feature dimension | : : : in `operand` : -- GitLab From 16f7cb272f4810cb09f8238ba6b87f5945cd2b03 Mon Sep 17 00:00:00 2001 From: Guozhong Zhuang Date: Thu, 1 Mar 2018 10:29:11 -0800 Subject: [PATCH 290/884] Fix improper comments such as tf --> TensorFlow --- tensorflow/core/kernels/mkl_fused_batch_norm_op.cc | 4 ++-- tensorflow/core/kernels/mkl_relu_op.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index b7dee3fb3e..eccdece5e3 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -1249,8 +1249,8 @@ class MklFusedBatchNormGradOp : public OpKernel { tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are tf layout, - // so get tf shape from anyont should be ok + // both src and diff_dst are TensorFlow layout, + // so it is OK to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, kDiffSrcIndex, &diff_src_tensor, diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 924b9da7e0..6c873af566 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -600,8 +600,8 @@ class MklReluGradOpBase : public OpKernel { tf_shape_diff_src.AddDim(diff_src_pd.get_size() / sizeof(T)); } else { dnn_shape_diff_src.SetMklTensor(false); - // both src and diff_dst are tf layout, - // so get tf shape from anyone should be ok + // both src and diff_dst are TensorFlow layout, + // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, -- GitLab From ce8783a0d535b4657ecaab8e621ab7de568b80d6 Mon Sep 17 00:00:00 2001 From: Vijay Vasudevan Date: Thu, 1 Mar 2018 10:37:45 -0800 Subject: [PATCH 291/884] Remove old note that no longer applies. PiperOrigin-RevId: 187498339 --- tensorflow/core/BUILD | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 08832b58da..3271825251 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2224,8 +2224,6 @@ tf_cuda_library( alwayslink = 1, ) -# This library is deprecated and no longer publicly available. -# Do not add more uses of it. cc_library( name = "regexp_internal", hdrs = [ -- GitLab From 006d228201a1e9e140aa0651a59c51d3396a2d12 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 1 Mar 2018 10:38:27 -0800 Subject: [PATCH 292/884] Fixed the typo in RunConfig pydoc. PiperOrigin-RevId: 187498424 --- tensorflow/python/estimator/run_config.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/estimator/run_config.py b/tensorflow/python/estimator/run_config.py index 3e021242c4..62f035bce5 100644 --- a/tensorflow/python/estimator/run_config.py +++ b/tensorflow/python/estimator/run_config.py @@ -345,7 +345,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'worker', 'index': 1}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host4:2222' assert config.task_id == 1 assert config.num_ps_replicas == 2 @@ -363,7 +363,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'chief', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == 'host0:2222' assert config.task_id == 0 assert config.num_ps_replicas == 2 @@ -381,7 +381,7 @@ class RunConfig(object): os.environ['TF_CONFIG'] = json.dumps( {'cluster': cluster, 'task': {'type': 'evaluator', 'index': 0}}) - config = ClusterConfig() + config = RunConfig() assert config.master == '' assert config.evaluator_master == '' assert config.task_id == 0 -- GitLab From 12bd86fb45d1b5981896ea7500a465cc017c3ab8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 11:16:18 -0800 Subject: [PATCH 293/884] Internal cleanup. PiperOrigin-RevId: 187504966 --- .../java/org/tensorflow/lite/Interpreter.java | 6 + .../lite/NativeInterpreterWrapper.java | 25 +++- .../native/nativeinterpreterwrapper_jni.cc | 107 ++++++++++++++---- .../native/nativeinterpreterwrapper_jni.h | 11 +- .../lite/NativeInterpreterWrapperTest.java | 24 ++++ 5 files changed, 140 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index dd883d69d2..9286814b74 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -80,6 +80,9 @@ public final class Interpreter implements AutoCloseable { /** * Runs model inference if the model takes only one input, and provides only one output. * + *

Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please + * consider using {@link ByteBuffer} to feed input data for better performance. + * * @param input an array or multidimensional array, or a {@link ByteBuffer} of primitive types * including int, float, long, and byte. {@link ByteBuffer} is the preferred way to pass large * input data. When {@link ByteBuffer} is used, its content should remain unchanged until @@ -96,6 +99,9 @@ public final class Interpreter implements AutoCloseable { /** * Runs model inference if the model takes multiple inputs, or returns multiple outputs. * + *

Warning: The API runs much faster if {@link ByteBuffer} is used as input data type. Please + * consider using {@link ByteBuffer} to feed input data for better performance. + * * @param inputs an array of input data. The inputs should be in the same order as inputs of the * model. Each input can be an array or multidimensional array, or a {@link ByteBuffer} of * primitive types including int, float, long, and byte. {@link ByteBuffer} is the preferred diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java index 7612be0ddd..bca4a3cae6 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/NativeInterpreterWrapper.java @@ -35,6 +35,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModel(modelPath, errorHandle); interpreterHandle = createInterpreter(modelHandle, errorHandle); + isMemoryAllocated = true; } /** @@ -47,6 +48,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { errorHandle = createErrorReporter(ERROR_BUFFER_SIZE); modelHandle = createModelWithBuffer(modelByteBuffer, errorHandle); interpreterHandle = createInterpreter(modelHandle, errorHandle); + isMemoryAllocated = true; } /** Releases resources associated with this {@code NativeInterpreterWrapper}. */ @@ -59,6 +61,7 @@ final class NativeInterpreterWrapper implements AutoCloseable { modelByteBuffer = null; inputsIndexes = null; outputsIndexes = null; + isMemoryAllocated = false; } /** Sets inputs, runs model inference and returns outputs. */ @@ -93,10 +96,19 @@ final class NativeInterpreterWrapper implements AutoCloseable { } inferenceDurationNanoseconds = -1; long[] outputsHandles = - run(interpreterHandle, errorHandle, sizes, dataTypes, numsOfBytes, inputs, this); + run( + interpreterHandle, + errorHandle, + sizes, + dataTypes, + numsOfBytes, + inputs, + this, + isMemoryAllocated); if (outputsHandles == null || outputsHandles.length == 0) { throw new IllegalStateException("Interpreter has no outputs."); } + isMemoryAllocated = true; Tensor[] outputs = new Tensor[outputsHandles.length]; for (int i = 0; i < outputsHandles.length; ++i) { outputs[i] = Tensor.fromHandle(outputsHandles[i]); @@ -111,14 +123,17 @@ final class NativeInterpreterWrapper implements AutoCloseable { int[] dtypes, int[] numsOfBytes, Object[] values, - NativeInterpreterWrapper wrapper); + NativeInterpreterWrapper wrapper, + boolean memoryAllocated); /** Resizes dimensions of a specific input. */ void resizeInput(int idx, int[] dims) { - resizeInput(interpreterHandle, errorHandle, idx, dims); + if (resizeInput(interpreterHandle, errorHandle, idx, dims)) { + isMemoryAllocated = false; + } } - private static native void resizeInput( + private static native boolean resizeInput( long interpreterHandle, long errorHandle, int inputIdx, int[] dims); void setUseNNAPI(boolean useNNAPI) { @@ -264,6 +279,8 @@ final class NativeInterpreterWrapper implements AutoCloseable { private Map outputsIndexes; + private boolean isMemoryAllocated = false; + private static native String[] getInputNames(long interpreterHandle); private static native String[] getOutputNames(long interpreterHandle); diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index e405df0745..47bf4c9c9d 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -149,6 +149,45 @@ TfLiteStatus checkInputs(JNIEnv* env, tflite::Interpreter* interpreter, return kTfLiteOk; } +// Checks whether there is any difference between dimensions of a tensor and a +// given dimensions. Returns true if there is difference, else false. +bool areDimsDifferent(JNIEnv* env, TfLiteTensor* tensor, jintArray dims) { + int num_dims = static_cast(env->GetArrayLength(dims)); + jint* ptr = env->GetIntArrayElements(dims, nullptr); + if (ptr == nullptr) { + throwException(env, kIllegalArgumentException, + "Empty dimensions of input array."); + return true; + } + if (tensor->dims->size != num_dims) { + return true; + } + for (int i = 0; i < num_dims; ++i) { + if (ptr[i] != tensor->dims->data[i]) { + return true; + } + } + env->ReleaseIntArrayElements(dims, ptr, JNI_ABORT); + return false; +} + +bool areInputDimensionsTheSame(JNIEnv* env, tflite::Interpreter* interpreter, + int input_size, jobjectArray sizes) { + if (interpreter->inputs().size() != input_size) { + return false; + } + for (int i = 0; i < input_size; ++i) { + int input_idx = interpreter->inputs()[i]; + jintArray dims = + static_cast(env->GetObjectArrayElement(sizes, i)); + TfLiteTensor* target = interpreter->tensor(input_idx); + if (areDimsDifferent(env, target, dims)) return false; + env->DeleteLocalRef(dims); + if (env->ExceptionCheck()) return false; + } + return true; +} + TfLiteStatus resizeInputs(JNIEnv* env, tflite::Interpreter* interpreter, int input_size, jobjectArray sizes) { for (int i = 0; i < input_size; ++i) { @@ -344,6 +383,15 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( throwException(env, kIllegalArgumentException, "Cannot create interpreter: %s", error_reporter->CachedErrorMessage()); + return 0; + } + // allocates memory + status = interpreter->AllocateTensors(); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, + "Can not allocate memory for the interpreter", + error_reporter->CachedErrorMessage()); + return 0; } return reinterpret_cast(interpreter.release()); } @@ -353,7 +401,7 @@ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values, jobject wrapper) { + jobjectArray values, jobject wrapper, jboolean memory_allocated) { tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); if (interpreter == nullptr) return nullptr; @@ -365,20 +413,23 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( TfLiteStatus status = checkInputs(env, interpreter, input_size, data_types, nums_of_bytes, values, sizes); if (status != kTfLiteOk) return nullptr; - // resizes inputs - status = resizeInputs(env, interpreter, input_size, sizes); - if (status != kTfLiteOk) { - throwException(env, kNullPointerException, "Can not resize the input: %s", - error_reporter->CachedErrorMessage()); - return nullptr; - } - // allocates memory - status = interpreter->AllocateTensors(); - if (status != kTfLiteOk) { - throwException(env, kNullPointerException, - "Can not allocate memory for the given inputs: %s", - error_reporter->CachedErrorMessage()); - return nullptr; + if (!memory_allocated || + !areInputDimensionsTheSame(env, interpreter, input_size, sizes)) { + // resizes inputs + status = resizeInputs(env, interpreter, input_size, sizes); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, "Can not resize the input: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } + // allocates memory + status = interpreter->AllocateTensors(); + if (status != kTfLiteOk) { + throwException(env, kNullPointerException, + "Can not allocate memory for the given inputs: %s", + error_reporter->CachedErrorMessage()); + return nullptr; + } } // sets inputs status = setInputs(env, interpreter, input_size, data_types, nums_of_bytes, @@ -448,29 +499,37 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( return outputs; } -JNIEXPORT void JNICALL +JNIEXPORT jboolean JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jint input_idx, jintArray dims) { BufferErrorReporter* error_reporter = convertLongToErrorReporter(env, error_handle); - if (error_reporter == nullptr) return; + if (error_reporter == nullptr) return JNI_FALSE; tflite::Interpreter* interpreter = convertLongToInterpreter(env, interpreter_handle); - if (interpreter == nullptr) return; + if (interpreter == nullptr) return JNI_FALSE; const int idx = static_cast(input_idx); if (idx < 0 || idx >= interpreter->inputs().size()) { throwException(env, kIllegalArgumentException, "Can not resize %d-th input for a model having %d inputs.", idx, interpreter->inputs().size()); + return JNI_FALSE; } - TfLiteStatus status = interpreter->ResizeInputTensor( - interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); - if (status != kTfLiteOk) { - throwException(env, kIllegalArgumentException, - "Failed to resize %d-th input: %s", idx, - error_reporter->CachedErrorMessage()); + // check whether it is resizing with the same dimensions. + TfLiteTensor* target = interpreter->tensor(input_idx); + bool is_changed = areDimsDifferent(env, target, dims); + if (is_changed) { + TfLiteStatus status = interpreter->ResizeInputTensor( + interpreter->inputs()[idx], convertJIntArrayToVector(env, dims)); + if (status != kTfLiteOk) { + throwException(env, kIllegalArgumentException, + "Failed to resize %d-th input: %s", idx, + error_reporter->CachedErrorMessage()); + return JNI_FALSE; + } } + return is_changed ? JNI_TRUE : JNI_FALSE; } JNIEXPORT void JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_delete( diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h index 31c8f1bc88..f7c2d9bf82 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.h @@ -109,13 +109,13 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_createInterpreter( * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: * Signature: - * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Lorg/tensorflow/lite/NativeInterpreterWrapper;)[J + * (JJ[Ljava/lang/Object;[I[I[Ljava/lang/Object;Ljava/lang/Object;Z)[J */ JNIEXPORT jlongArray JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_run( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jobjectArray sizes, jintArray data_types, jintArray nums_of_bytes, - jobjectArray values, jobject wrapper); + jobjectArray values, jobject wrapper, jboolean memory_allocated); /* * Class: org_tensorflow_lite_NativeInterpreterWrapper @@ -132,11 +132,12 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_getInputDims( /* * Class: org_tensorflow_lite_NativeInterpreterWrapper * Method: - * Signature: (JJI[I) + * Signature: (JJI[I)Z * - * It resizes dimensions of a input. + * It returns true if resizing input tensor to different dimensions, else return + * false. */ -JNIEXPORT void JNICALL +JNIEXPORT jboolean JNICALL Java_org_tensorflow_lite_NativeInterpreterWrapper_resizeInput( JNIEnv* env, jclass clazz, jlong interpreter_handle, jlong error_handle, jint input_idx, jintArray dims); diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java index 8c1f2406f7..6371fb59dc 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/NativeInterpreterWrapperTest.java @@ -94,6 +94,30 @@ public final class NativeInterpreterWrapperTest { wrapper.close(); } + @Test + public void testRunWithInputsOfSameDims() { + NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(FLOAT_MODEL_PATH); + float[] oneD = {1.23f, -6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + Object[] inputs = {fourD}; + Tensor[] outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + float[][][][] parsedOutputs = new float[2][8][8][3]; + outputs[0].copyTo(parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, -19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + outputs = wrapper.run(inputs); + assertThat(outputs.length).isEqualTo(1); + parsedOutputs = new float[2][8][8][3]; + outputs[0].copyTo(parsedOutputs); + outputOneD = parsedOutputs[0][0][0]; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + wrapper.close(); + } + @Test public void testRunWithInt() { NativeInterpreterWrapper wrapper = new NativeInterpreterWrapper(INT_MODEL_PATH); -- GitLab From 34eddebe5127a984a058cb7c2b003c2fd49f5c82 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 1 Mar 2018 11:46:56 -0800 Subject: [PATCH 294/884] [XLA] Optimize away DynamicUpdateSlice with update parameter with a dimension of zero. A zero sized update has no effect. PiperOrigin-RevId: 187510099 --- .../xla/service/algebraic_simplifier.cc | 8 +++++++ .../xla/service/algebraic_simplifier_test.cc | 23 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index 5ddd8ec377..ecaa474336 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -1625,6 +1625,14 @@ Status AlgebraicSimplifierVisitor::HandleDynamicUpdateSlice( if (IsAll(start_indices, 0) && SameShape(dynamic_update_slice, update)) { return ReplaceInstruction(dynamic_update_slice, update); } + + // If any dimension of update is 0, elide the DynamicUpdateSlice. This + // optimization becomes invalid should we later prefer to warn about out of + // bound indices. + if (ShapeUtil::HasZeroElements(update->shape())) { + return ReplaceInstruction(dynamic_update_slice, + dynamic_update_slice->mutable_operand(0)); + } return Status::OK(); } diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc index 667ae01993..451294ef5d 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier_test.cc @@ -2800,6 +2800,29 @@ DotOfConcatTestSpec kDotOfConcatTestSpecs[] = { {/*m=*/1, /*k=*/16, /*n=*/1}, // }; +// Test that DynamicUpdateSlice update param with any dimension equal to zero +// gets removed. +TEST_F(AlgebraicSimplifierTest, DynamicUpdateSliceZeroUpdate) { + HloComputation::Builder builder(TestName()); + const Shape dslice_shape = ShapeUtil::MakeShape(F32, {10}); + HloInstruction* const operand = builder.AddInstruction( + HloInstruction::CreateParameter(0, dslice_shape, "operand")); + const Shape update_shape = ShapeUtil::MakeShape(F32, {0}); + HloInstruction* const update = builder.AddInstruction( + HloInstruction::CreateParameter(1, update_shape, "update")); + HloInstruction* const start_indices = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateR1({0}))); + builder.AddInstruction(HloInstruction::CreateDynamicUpdateSlice( + dslice_shape, operand, update, start_indices)); + const HloComputation* const computation = + module().AddEntryComputation(builder.Build()); + + AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false, + non_bitcasting_callback()); + ASSERT_TRUE(simplifier.Run(&module()).ValueOrDie()); + EXPECT_THAT(computation->root_instruction(), operand); +} + INSTANTIATE_TEST_CASE_P(DotOfConcatSimplificationTestInstantiation, DotOfConcatSimplificationTest, ::testing::ValuesIn(kDotOfConcatTestSpecs)); -- GitLab From f176a611605bb26b17ef16d096e66d9d9ab2bda9 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Thu, 1 Mar 2018 11:59:14 -0800 Subject: [PATCH 295/884] Refactor training part of the Keras engine. Also add support for sample/class weights with eager execution. Structure before: engine/training.py engine/training_eager.py After: engine/training.py engine/training_arrays.py engine/training_eager.py engine/training_generator.py engine/training_utils.py All new files are about 500 lines long. training.py is now 1700 lines long (about 1000 lines of logic). It was previously 3000 lines long. PiperOrigin-RevId: 187511923 --- tensorflow/python/keras/BUILD | 9 +- .../keras/_impl/keras/engine/training.py | 1494 +---------------- .../_impl/keras/engine/training_arrays.py | 495 ++++++ .../_impl/keras/engine/training_eager.py | 314 ++-- .../_impl/keras/engine/training_eager_test.py | 223 +++ .../_impl/keras/engine/training_generator.py | 439 +++++ .../keras/_impl/keras/engine/training_test.py | 14 +- .../_impl/keras/engine/training_utils.py | 534 ++++++ .../keras/_impl/keras/utils/__init__.py | 2 +- .../{training_utils.py => multi_gpu_utils.py} | 0 ..._utils_test.py => multi_gpu_utils_test.py} | 0 tensorflow/python/keras/utils/__init__.py | 2 +- 12 files changed, 1966 insertions(+), 1560 deletions(-) create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_arrays.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_generator.py create mode 100644 tensorflow/python/keras/_impl/keras/engine/training_utils.py rename tensorflow/python/keras/_impl/keras/utils/{training_utils.py => multi_gpu_utils.py} (100%) rename tensorflow/python/keras/_impl/keras/utils/{training_utils_test.py => multi_gpu_utils_test.py} (100%) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index a98d08f928..bd1aac5eae 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -45,7 +45,10 @@ py_library( "_impl/keras/engine/saving.py", "_impl/keras/engine/sequential.py", "_impl/keras/engine/training.py", + "_impl/keras/engine/training_arrays.py", "_impl/keras/engine/training_eager.py", + "_impl/keras/engine/training_generator.py", + "_impl/keras/engine/training_utils.py", "_impl/keras/estimator.py", "_impl/keras/initializers.py", "_impl/keras/layers/__init__.py", @@ -78,8 +81,8 @@ py_library( "_impl/keras/utils/generic_utils.py", "_impl/keras/utils/io_utils.py", "_impl/keras/utils/layer_utils.py", + "_impl/keras/utils/multi_gpu_utils.py", "_impl/keras/utils/np_utils.py", - "_impl/keras/utils/training_utils.py", "_impl/keras/utils/vis_utils.py", "_impl/keras/wrappers/__init__.py", "_impl/keras/wrappers/scikit_learn.py", @@ -646,9 +649,9 @@ py_test( ) py_test( - name = "training_utils_test", + name = "multi_gpu_utils_test", size = "medium", - srcs = ["_impl/keras/utils/training_utils_test.py"], + srcs = ["_impl/keras/utils/multi_gpu_utils_test.py"], srcs_version = "PY2AND3", tags = ["multi_gpu"], deps = [ diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index c121d819ff..2d040e7c0f 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -18,26 +18,21 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import copy - import numpy as np from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.keras._impl.keras import backend as K -from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module from tensorflow.python.keras._impl.keras import optimizers +from tensorflow.python.keras._impl.keras.engine import training_arrays from tensorflow.python.keras._impl.keras.engine import training_eager +from tensorflow.python.keras._impl.keras.engine import training_generator +from tensorflow.python.keras._impl.keras.engine import training_utils from tensorflow.python.keras._impl.keras.engine.base_layer import Layer from tensorflow.python.keras._impl.keras.engine.network import Network -from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer -from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer -from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence -from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches -from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.layers.base import _DeferredTensor from tensorflow.python.ops import array_ops @@ -45,472 +40,6 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import optimizer as tf_optimizer_module from tensorflow.python.util.tf_export import tf_export -try: - from scipy.sparse import issparse # pylint: disable=g-import-not-at-top -except ImportError: - issparse = None - - -def _standardize_input_data(data, - names, - shapes=None, - check_batch_axis=True, - exception_prefix=''): - """Normalizes inputs and targets provided by users. - - Users may pass data as a list of arrays, dictionary of arrays, - or as a single array. We normalize this to an ordered list of - arrays (same order as `names`), while checking that the provided - arrays have shapes that match the network's expectations. - - Arguments: - data: User-provided input data (polymorphic). - names: List of expected array names. - shapes: Optional list of expected array shapes. - check_batch_axis: Boolean; whether to check that - the batch axis of the arrays matches the expected - value found in `shapes`. - exception_prefix: String prefix used for exception formatting. - - Returns: - List of standardized input arrays (one array per model input). - - Raises: - ValueError: in case of improperly formatted user-provided data. - """ - if not names: - if data is not None and hasattr(data, '__len__') and len(data): - raise ValueError('Error when checking model ' + exception_prefix + ': ' - 'expected no data, but got:', data) - return [] - if data is None: - return [None for _ in range(len(names))] - - if isinstance(data, dict): - try: - data = [ - data[x].values - if data[x].__class__.__name__ == 'DataFrame' else data[x] - for x in names - ] - except KeyError as e: - raise ValueError('No data provided for "' + e.args[0] + '". Need data ' - 'for each key in: ' + str(names)) - elif isinstance(data, list): - if isinstance(data[0], list): - data = [np.asarray(d) for d in data] - elif len(names) == 1 and isinstance(data[0], (float, int)): - data = [np.asarray(data)] - else: - data = [ - x.values if x.__class__.__name__ == 'DataFrame' else x for x in data - ] - else: - data = data.values if data.__class__.__name__ == 'DataFrame' else data - data = [data] - data = [ - np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data - ] - - if len(data) != len(names): - if data and hasattr(data[0], 'shape'): - raise ValueError('Error when checking model ' + exception_prefix + - ': the list of Numpy arrays that you are passing to ' - 'your model is not the size the model expected. ' - 'Expected to see ' + str(len(names)) + ' array(s), ' - 'but instead got the following list of ' + - str(len(data)) + ' arrays: ' + str(data)[:200] + '...') - elif len(names) > 1: - raise ValueError( - 'Error when checking model ' + exception_prefix + - ': you are passing a list as input to your model, ' - 'but the model expects a list of ' + str(len(names)) + - ' Numpy arrays instead. The list you passed was: ' + str(data)[:200]) - elif len(data) == 1 and not hasattr(data[0], 'shape'): - raise TypeError('Error when checking model ' + exception_prefix + - ': data should be a Numpy array, or list/dict of ' - 'Numpy arrays. Found: ' + str(data)[:200] + '...') - elif len(names) == 1: - data = [np.asarray(data)] - - # Check shapes compatibility. - if shapes: - for i in range(len(names)): - if shapes[i] is not None: - data_shape = data[i].shape - shape = shapes[i] - if data[i].ndim != len(shape): - raise ValueError('Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have ' + - str(len(shape)) + ' dimensions, but got array ' - 'with shape ' + str(data_shape)) - if not check_batch_axis: - data_shape = data_shape[1:] - shape = shape[1:] - for dim, ref_dim in zip(data_shape, shape): - if ref_dim != dim and ref_dim: - raise ValueError( - 'Error when checking ' + exception_prefix + ': expected ' + - names[i] + ' to have shape ' + str(shape) + - ' but got array with shape ' + str(data_shape)) - return data - - -def _standardize_sample_or_class_weights(x_weight, output_names, weight_type): - """Maps `sample_weight` or `class_weight` to model outputs. - - Arguments: - x_weight: User-provided `sample_weight` or `class_weight` argument. - output_names: List of output names (strings) in the model. - weight_type: A string used purely for exception printing. - - Returns: - A list of `sample_weight` or `class_weight` where there are exactly - one element per model output. - - Raises: - ValueError: In case of invalid user-provided argument. - """ - if x_weight is None or len(x_weight) == 0: # pylint: disable=g-explicit-length-test - return [None for _ in output_names] - if len(output_names) == 1: - if isinstance(x_weight, list) and len(x_weight) == 1: - return x_weight - if isinstance(x_weight, dict) and output_names[0] in x_weight: - return [x_weight[output_names[0]]] - else: - return [x_weight] - if isinstance(x_weight, list): - if len(x_weight) != len(output_names): - raise ValueError('Provided `' + weight_type + '` was a list of ' + - str(len(x_weight)) + ' elements, but the model has ' + - str(len(output_names)) + ' outputs. ' - 'You should provide one `' + weight_type + '`' - 'array per model output.') - return x_weight - if isinstance(x_weight, dict): - x_weights = [] - for name in output_names: - x_weights.append(x_weight.get(name)) - return x_weights - else: - raise TypeError( - 'The model has multiple outputs, so `' + weight_type + '` ' - 'should be either a list or a dict. ' - 'Provided `' + weight_type + '` type not understood: ' + str(x_weight)) - - -def _standardize_class_weights(class_weight, output_names): - return _standardize_sample_or_class_weights(class_weight, output_names, - 'class_weight') - - -def _standardize_sample_weights(sample_weight, output_names): - return _standardize_sample_or_class_weights(sample_weight, output_names, - 'sample_weight') - - -def _check_array_lengths(inputs, targets, weights=None): - """Does user input validation for numpy arrays. - - Arguments: - inputs: list of Numpy arrays of inputs. - targets: list of Numpy arrays of targets. - weights: list of Numpy arrays of sample weights. - - Raises: - ValueError: in case of incorrectly formatted data. - """ - - def set_of_lengths(x): - # return a set with the variation between - # different shapes, with None => 0 - if x is None: - return {} - else: - return set([y.shape[0] for y in x if y is not None]) - - set_x = set_of_lengths(inputs) - set_y = set_of_lengths(targets) - set_w = set_of_lengths(weights) - if len(set_x) > 1: - raise ValueError('All input arrays (x) should have ' - 'the same number of samples. Got array shapes: ' + - str([x.shape for x in inputs])) - if len(set_y) > 1: - raise ValueError('All target arrays (y) should have ' - 'the same number of samples. Got array shapes: ' + - str([y.shape for y in targets])) - if set_x and set_y and list(set_x)[0] != list(set_y)[0]: - raise ValueError('Input arrays should have ' - 'the same number of samples as target arrays. ' - 'Found ' + str(list(set_x)[0]) + ' input samples ' - 'and ' + str(list(set_y)[0]) + ' target samples.') - if len(set_w) > 1: - raise ValueError('All sample_weight arrays should have ' - 'the same number of samples. Got array shapes: ' + - str([w.shape for w in weights])) - if set_y and set_w and list(set_y)[0] != list(set_w)[0]: - raise ValueError('Sample_weight arrays should have ' - 'the same number of samples as target arrays. Got ' + - str(list(set_y)[0]) + ' input samples and ' + - str(list(set_w)[0]) + ' target samples.') - - -def _check_loss_and_target_compatibility(targets, loss_fns, output_shapes): - """Does validation on the compatibility of targets and loss functions. - - This helps prevent users from using loss functions incorrectly. This check - is purely for UX purposes. - - Arguments: - targets: list of Numpy arrays of targets. - loss_fns: list of loss functions. - output_shapes: list of shapes of model outputs. - - Raises: - ValueError: if a loss function or target array - is incompatible with an output. - """ - key_losses = { - losses.mean_squared_error, losses.binary_crossentropy, - losses.categorical_crossentropy - } - for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None or tensor_util.is_tensor(y): - continue - if loss is losses.categorical_crossentropy: - if y.shape[-1] == 1: - raise ValueError('You are passing a target array of shape ' + str( - y.shape) + ' while using as loss `categorical_crossentropy`. ' - '`categorical_crossentropy` expects ' - 'targets to be binary matrices (1s and 0s) ' - 'of shape (samples, classes). ' - 'If your targets are integer classes, ' - 'you can convert them to the expected format via:\n' - '```\n' - 'from keras.utils import to_categorical\n' - 'y_binary = to_categorical(y_int)\n' - '```\n' - '\n' - 'Alternatively, you can use the loss function ' - '`sparse_categorical_crossentropy` instead, ' - 'which does expect integer targets.') - if loss in key_losses: - for target_dim, out_dim in zip(y.shape[1:], shape[1:]): - if out_dim is not None and target_dim != out_dim: - raise ValueError('A target array with shape ' + str(y.shape) + - ' was passed for an output of shape ' + str(shape) + - ' while using as loss `' + loss.__name__ + '`. ' - 'This loss expects ' - 'targets to have the same shape ' - 'as the output.') - - -def _collect_metrics(metrics, output_names): - """Maps metric functions to model outputs. - - Arguments: - metrics: a list or dict of metric functions. - output_names: a list of the names (strings) of model outputs. - - Returns: - A list (one entry per model output) of lists of metric functions. - For instance, if the model has 2 outputs, and for the first output - we want to compute "binary_accuracy" and "binary_crossentropy", - and just "binary_accuracy" for the second output, - the list would look like: - `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` - - Raises: - TypeError: if an incorrect type is passed for the `metrics` argument. - """ - if not metrics: - return [[] for _ in output_names] - if isinstance(metrics, list): - # we then apply all metrics to all outputs. - return [copy.copy(metrics) for _ in output_names] - elif isinstance(metrics, dict): - nested_metrics = [] - for name in output_names: - output_metrics = metrics.get(name, []) - if not isinstance(output_metrics, list): - output_metrics = [output_metrics] - nested_metrics.append(output_metrics) - return nested_metrics - else: - raise TypeError('Type of `metrics` argument not understood. ' - 'Expected a list or dictionary, found: ' + str(metrics)) - - -def _batch_shuffle(index_array, batch_size): - """Shuffles an array in a batch-wise fashion. - - Useful for shuffling HDF5 arrays - (where one cannot access arbitrary indices). - - Arguments: - index_array: array of indices to be shuffled. - batch_size: integer. - - Returns: - The `index_array` array, shuffled in a batch-wise fashion. - """ - batch_count = int(len(index_array) / batch_size) - # to reshape we need to be cleanly divisible by batch size - # we stash extra items and reappend them after shuffling - last_batch = index_array[batch_count * batch_size:] - index_array = index_array[:batch_count * batch_size] - index_array = index_array.reshape((batch_count, batch_size)) - np.random.shuffle(index_array) - index_array = index_array.flatten() - return np.append(index_array, last_batch) - - -def _weighted_masked_objective(fn): - """Adds support for masking and sample-weighting to an objective function. - - It transforms an objective function `fn(y_true, y_pred)` - into a sample-weighted, cost-masked objective function - `fn(y_true, y_pred, weights, mask)`. - - Arguments: - fn: The objective function to wrap, - with signature `fn(y_true, y_pred)`. - - Returns: - A function with signature `fn(y_true, y_pred, weights, mask)`. - """ - if fn is None: - return None - - def weighted(y_true, y_pred, weights, mask=None): - """Wrapper function. - - Arguments: - y_true: `y_true` argument of `fn`. - y_pred: `y_pred` argument of `fn`. - weights: Weights tensor. - mask: Mask tensor. - - Returns: - Scalar tensor. - """ - # score_array has ndim >= 2 - score_array = fn(y_true, y_pred) - if mask is not None: - # Cast the mask to floatX to avoid float64 upcasting in theano - mask = K.cast(mask, K.floatx()) - # mask should have the same shape as score_array - score_array *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - score_array /= K.mean(mask) - - # apply sample weighting - if weights is not None: - # reduce score_array to same ndim as weight array - ndim = K.ndim(score_array) - weight_ndim = K.ndim(weights) - score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) - score_array *= weights - score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) - return K.mean(score_array) - - return weighted - - -def _standardize_weights(y, - sample_weight=None, - class_weight=None, - sample_weight_mode=None): - """Performs sample weight validation and standardization. - - Everything gets normalized to a single sample-wise (or timestep-wise) - weight array. - - Arguments: - y: Numpy array of model targets to be weighted. - sample_weight: User-provided `sample_weight` argument. - class_weight: User-provided `class_weight` argument. - sample_weight_mode: One of `None` or `"temporal"`. - `"temporal"` indicated that we expect 2D weight data - that will be applied to the last 2 dimensions of - the targets (i.e. we are weighting timesteps, not samples). - - Returns: - A numpy array of target weights, one entry per sample to weight. - - Raises: - ValueError: In case of invalid user-provided arguments. - """ - if sample_weight_mode is not None: - if sample_weight_mode != 'temporal': - raise ValueError('"sample_weight_mode ' - 'should be None or "temporal". ' - 'Found: ' + str(sample_weight_mode)) - if len(y.shape) < 3: - raise ValueError('Found a sample_weight array for ' - 'an input with shape ' + str(y.shape) + '. ' - 'Timestep-wise sample weighting (use of ' - 'sample_weight_mode="temporal") is restricted to ' - 'outputs that are at least 3D, i.e. that have ' - 'a time dimension.') - if sample_weight is not None and len(sample_weight.shape) != 2: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weighting, ' - 'you should pass a 2D sample_weight array.') - else: - if sample_weight is not None and len(sample_weight.shape) != 1: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weights, ' - 'you should specify ' - 'sample_weight_mode="temporal" ' - 'in compile(). If you just mean to use ' - 'sample-wise weights, make sure your ' - 'sample_weight array is 1D.') - - if sample_weight is not None: - if len(sample_weight.shape) > len(y.shape): - raise ValueError( - 'Found a sample_weight with shape' + str(sample_weight.shape) + '.' - 'Expected sample_weight with rank ' - 'less than or equal to ' + str(len(y.shape))) - - if y.shape[:sample_weight.ndim] != sample_weight.shape: - raise ValueError( - 'Found a sample_weight array with shape ' + str(sample_weight.shape) + - ' for an input with shape ' + str(y.shape) + '. ' - 'sample_weight cannot be broadcast.') - return sample_weight - elif isinstance(class_weight, dict): - if len(y.shape) > 2: - raise ValueError('`class_weight` not supported for ' - '3+ dimensional targets.') - if y.shape[1] > 1: - y_classes = np.argmax(y, axis=1) - elif y.shape[1] == 1: - y_classes = np.reshape(y, y.shape[0]) - else: - y_classes = y - - weights = np.asarray( - [class_weight[cls] for cls in y_classes if cls in class_weight]) - - if len(weights) != len(y_classes): - # subtract the sets to pick all missing classes - existing_classes = set(y_classes) - existing_class_weight = set(class_weight.keys()) - raise ValueError('`class_weight` must contain all classes in the data.' - ' The classes %s exist in the data but not in ' - '`class_weight`.' % - (existing_classes - existing_class_weight)) - return weights - else: - return None - @tf_export('keras.models.Model', 'keras.Model') class Model(Network): @@ -687,7 +216,8 @@ class Model(Network): loss_functions = [loss_function for _ in range(len(self.outputs))] self.loss_functions = loss_functions - weighted_losses = [_weighted_masked_objective(fn) for fn in loss_functions] + weighted_losses = [training_utils.weighted_masked_objective(fn) + for fn in loss_functions] skip_target_indices = [] skip_target_weighing_indices = [] self._feed_outputs = [] @@ -744,7 +274,8 @@ class Model(Network): for i in range(len(self.outputs)): if len(self.outputs) > 1: self.metrics_names.append(self.output_names[i] + '_loss') - self.nested_metrics = _collect_metrics(metrics, self.output_names) + self.nested_metrics = training_utils.collect_metrics(metrics, + self.output_names) self._feed_sample_weight_modes = [] for i in range(len(self.outputs)): self._feed_sample_weight_modes.append(None) @@ -914,9 +445,9 @@ class Model(Network): # List of same size as output_names. # contains tuples (metrics for output, names of metrics). - nested_metrics = _collect_metrics(metrics, self.output_names) - nested_weighted_metrics = _collect_metrics(weighted_metrics, - self.output_names) + nested_metrics = training_utils.collect_metrics(metrics, self.output_names) + nested_weighted_metrics = training_utils.collect_metrics(weighted_metrics, + self.output_names) self.metrics_updates = [] self.stateful_metric_names = [] with K.name_scope('metrics'): @@ -962,11 +493,13 @@ class Model(Network): suffix = 'acc' elif metric in ('crossentropy', 'ce'): suffix = 'ce' - weighted_metric_fn = _weighted_masked_objective(metric_fn) + weighted_metric_fn = training_utils.weighted_masked_objective( + metric_fn) metric_name = metric_name_prefix + suffix else: metric_fn = metrics_module.get(metric) - weighted_metric_fn = _weighted_masked_objective(metric_fn) + weighted_metric_fn = training_utils.weighted_masked_objective( + metric_fn) # Get metric name as string if hasattr(metric_fn, 'name'): metric_name = metric_fn.name @@ -1104,451 +637,6 @@ class Model(Network): name='predict_function', **kwargs) - def _check_num_samples(self, - ins, - batch_size=None, - steps=None, - steps_name='steps'): - """Determine the number of samples provided for training and evaluation. - - The number of samples is not defined when running with `steps`, - in which case the number of samples is set to `None`. - - Arguments: - ins: List of tensors to be fed to the Keras function. - batch_size: Integer batch size or `None` if not defined. - steps: Total number of steps (batches of samples) - before declaring `_predict_loop` finished. - Ignored with the default value of `None`. - steps_name: The public API's parameter name for `steps`. - - Raises: - ValueError: when `steps` is `None` and the attribute `ins.shape` - does not exist. Also raises ValueError when `steps` is not `None` - and `batch_size` is not `None` because they are mutually - exclusive. - - Returns: - When steps is `None`, returns the number of samples to be - processed based on the size of the first dimension of the - first input numpy array. When steps is not `None` and - `batch_size` is `None`, returns `None`. - - Raises: - ValueError: In case of invalid arguments. - """ - if steps is not None: - num_samples = None - if batch_size is not None: - raise ValueError( - 'If ' + steps_name + ' is set, the `batch_size` must be None.') - elif ins and hasattr(ins[0], 'shape'): - num_samples = ins[0].shape[0] - else: - raise ValueError( - 'Either the input data should have ' - 'a defined shape, or ' + steps_name + ' should be specified.') - return num_samples - - def _fit_loop(self, - f, - ins, - out_labels=None, - batch_size=None, - epochs=100, - verbose=1, - callbacks=None, - val_f=None, - val_ins=None, - shuffle=True, - callback_metrics=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None): - """Abstract fit function for `f(ins)`. - - Assume that f returns a list, labeled by out_labels. - - Arguments: - f: Keras function returning a list of tensors - ins: List of tensors to be fed to `f` - out_labels: List of strings, display names of - the outputs of `f` - batch_size: Integer batch size or None if unknown. - epochs: Number of times to iterate over the data - verbose: Verbosity mode, 0, 1 or 2 - callbacks: List of callbacks to be called during training - val_f: Keras function to call for validation - val_ins: List of tensors to be fed to `val_f` - shuffle: Whether to shuffle the data at the beginning of each epoch - callback_metrics: List of strings, the display names of the metrics - passed to the callbacks. They should be the - concatenation of list the display names of the outputs of - `f` and the list of display names of the outputs of `f_val`. - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) - steps_per_epoch: Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. Ignored with the default value of `None`. - validation_steps: Number of steps to run validation for - (only if doing validation from data tensors). - Ignored with the default value of `None`. - - Returns: - `History` object. - - Raises: - ValueError: in case of invalid arguments. - """ - do_validation = False - if val_f and val_ins: - do_validation = True - if verbose and ins and hasattr(ins[0], 'shape') and hasattr( - val_ins[0], 'shape'): - print('Train on %d samples, validate on %d samples' % - (ins[0].shape[0], val_ins[0].shape[0])) - if validation_steps: - do_validation = True - if steps_per_epoch is None: - raise ValueError('Can only use `validation_steps` ' - 'when doing step-wise ' - 'training, i.e. `steps_per_epoch` ' - 'must be set.') - - num_train_samples = self._check_num_samples( - ins, batch_size, steps_per_epoch, 'steps_per_epoch') - if num_train_samples is not None: - index_array = np.arange(num_train_samples) - - self.history = cbks.History() - all_callbacks = [cbks.BaseLogger( - stateful_metrics=self.stateful_metric_names)] - if verbose: - if steps_per_epoch is not None: - count_mode = 'steps' - else: - count_mode = 'samples' - all_callbacks.append( - cbks.ProgbarLogger( - count_mode, stateful_metrics=self.stateful_metric_names)) - all_callbacks += (callbacks or []) + [self.history] - callbacks = cbks.CallbackList(all_callbacks) - out_labels = out_labels or [] - - # it's possible to callback a different model than self - # (used by Sequential models) - if hasattr(self, 'callback_model') and self.callback_model: - callback_model = self.callback_model - else: - callback_model = self - - callbacks.set_model(callback_model) - - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - callbacks.on_train_begin() - callback_model.stop_training = False - for cbk in callbacks: - cbk.validation_data = val_ins - - # To prevent a slowdown, we find beforehand the arrays that need conversion. - feed = self._feed_inputs + self._feed_targets + self._feed_sample_weights - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - for epoch in range(initial_epoch, epochs): - # Reset stateful metrics - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - # Update callbacks - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - if steps_per_epoch is not None: - for step_index in range(steps_per_epoch): - batch_logs = {} - batch_logs['batch'] = step_index - batch_logs['size'] = 1 - callbacks.on_batch_begin(step_index, batch_logs) - outs = f(ins) - - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(step_index, batch_logs) - if callback_model.stop_training: - break - - if do_validation: - val_outs = self._test_loop( - val_f, - val_ins, - batch_size=batch_size, - steps=validation_steps, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - else: - if shuffle == 'batch': - index_array = _batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - - batches = make_batches(num_train_samples, batch_size) - - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - try: - if isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - batch_logs = {} - batch_logs['batch'] = batch_index - batch_logs['size'] = len(batch_ids) - callbacks.on_batch_begin(batch_index, batch_logs) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - outs = f(ins_batch) - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(batch_index, batch_logs) - if callback_model.stop_training: - break - - if batch_index == len(batches) - 1: # Last batch. - if do_validation: - val_outs = self._test_loop( - val_f, val_ins, batch_size=batch_size, verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - callbacks.on_epoch_end(epoch, epoch_logs) - if callback_model.stop_training: - break - callbacks.on_train_end() - return self.history - - def _predict_loop(self, f, ins, batch_size=32, verbose=0, steps=None): - """Abstract method to loop over some data in batches. - - Arguments: - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring `_predict_loop` finished. - Ignored with the default value of `None`. - - Returns: - Array of predictions (if the model has a single output) - or list of arrays of predictions - (if the model has multiple outputs). - """ - if hasattr(self, 'metrics'): - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - - num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps, - stateful_metrics=self.stateful_metric_names) - else: - progbar = Progbar(target=num_samples, - stateful_metrics=self.stateful_metric_names) - - indices_for_conversion_to_dense = [] - for i in range(len(self._feed_inputs)): - if (issparse is not None and issparse(ins[i]) and - not K.is_sparse(self._feed_inputs[i])): - indices_for_conversion_to_dense.append(i) - - if steps is not None: - # Step-based predictions. - # Since we do not know how many samples - # we will see, we cannot pre-allocate - # the returned Numpy arrays. - # Instead, we store one array per batch seen - # and concatenate them upon returning. - unconcatenated_outs = [] - for step in range(steps): - batch_outs = f(ins) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if step == 0: - for batch_out in batch_outs: - unconcatenated_outs.append([]) - for i, batch_out in enumerate(batch_outs): - unconcatenated_outs[i].append(batch_out) - if verbose == 1: - progbar.update(step + 1) - if len(unconcatenated_outs) == 1: - return np.concatenate(unconcatenated_outs[0], axis=0) - return [ - np.concatenate(unconcatenated_outs[i], axis=0) - for i in range(len(unconcatenated_outs)) - ] - else: - # Sample-based predictions. - outs = [] - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_outs = f(ins_batch) - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if batch_index == 0: - # Pre-allocate the results arrays. - for batch_out in batch_outs: - shape = (num_samples,) + batch_out.shape[1:] - outs.append(np.zeros(shape, dtype=batch_out.dtype)) - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - if verbose == 1: - progbar.update(batch_end) - if len(outs) == 1: - return outs[0] - return outs - - def _test_loop(self, f, ins, batch_size=None, verbose=0, steps=None): - """Abstract method to loop over some data in batches. - - Arguments: - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size or `None`. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring predictions finished. - Ignored with the default value of `None`. - - Returns: - Scalar loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - if hasattr(self, 'metrics'): - for m in self.metrics: - if isinstance(m, Layer): - m.reset_states() - stateful_metric_indices = [ - i for i, name in enumerate(self.metrics_names) - if str(name) in self.stateful_metric_names - ] - else: - stateful_metric_indices = [] - - num_samples = self._check_num_samples(ins, batch_size, steps, 'steps') - outs = [] - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps) - else: - progbar = Progbar(target=num_samples) - - # To prevent a slowdown, we find beforehand the arrays that need conversion. - feed = self._feed_inputs + self._feed_targets + self._feed_sample_weights - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - if steps is not None: - for step in range(steps): - batch_outs = f(ins) - if isinstance(batch_outs, list): - if step == 0: - for _ in enumerate(batch_outs): - outs.append(0.) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out - else: - if step == 0: - outs.append(0.) - outs[0] += batch_outs - if verbose == 1: - progbar.update(step + 1) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= steps - else: - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], int): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_outs = f(ins_batch) - - if isinstance(batch_outs, list): - if batch_index == 0: - for batch_out in enumerate(batch_outs): - outs.append(0.) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out * len(batch_ids) - else: - if batch_index == 0: - outs.append(0.) - outs[0] += batch_outs * len(batch_ids) - if verbose == 1: - progbar.update(batch_end) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= num_samples - if len(outs) == 1: - return outs[0] - return outs - def _standardize_user_data(self, x, y=None, @@ -1688,7 +776,7 @@ class Model(Network): feed_input_shapes = self._feed_input_shapes # Standardize the inputs. - x = _standardize_input_data( + x = training_utils.standardize_input_data( x, feed_input_names, feed_input_shapes, @@ -1727,7 +815,7 @@ class Model(Network): feed_output_shapes.append(output_shape) # Standardize the outputs. - y = _standardize_input_data( + y = training_utils.standardize_input_data( y, feed_output_names, feed_output_shapes, @@ -1736,21 +824,21 @@ class Model(Network): # Generate sample-wise weight values given the `sample_weight` and # `class_weight` arguments. - sample_weights = _standardize_sample_weights(sample_weight, - feed_output_names) - class_weights = _standardize_class_weights(class_weight, - feed_output_names) + sample_weights = training_utils.standardize_sample_weights( + sample_weight, feed_output_names) + class_weights = training_utils.standardize_class_weights( + class_weight, feed_output_names) sample_weights = [ - _standardize_weights(ref, sw, cw, mode) + training_utils.standardize_weights(ref, sw, cw, mode) for (ref, sw, cw, mode) in zip(y, sample_weights, class_weights, feed_sample_weight_modes) ] # Check that all arrays have the same length. - _check_array_lengths(x, y, sample_weights) + training_utils.check_array_lengths(x, y, sample_weights) if self._is_graph_network and not context.in_eager_mode(): # Additional checks to avoid users mistakenly using improper loss fns. - _check_loss_and_target_compatibility(y, self._feed_loss_fns, - feed_output_shapes) + training_utils.check_loss_and_target_compatibility( + y, self._feed_loss_fns, feed_output_shapes) else: y = [] sample_weights = [] @@ -2052,10 +1140,7 @@ class Model(Network): class_weight=class_weight, batch_size=batch_size) # Prepare validation data. - do_validation = False - val_ins = [] if validation_data: - do_validation = True if len(validation_data) == 2: val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence val_sample_weight = None @@ -2075,7 +1160,6 @@ class Model(Network): batch_size=batch_size) elif validation_split and 0. < validation_split < 1.: - do_validation = True if hasattr(x[0], 'shape'): split_at = int(x[0].shape[0] * (1. - validation_split)) else: @@ -2088,78 +1172,40 @@ class Model(Network): val_x = [] val_y = [] val_sample_weights = [] - do_validation = True - - # Prepare display labels. - out_labels = self.metrics_names + else: + val_x = None + val_y = None + val_sample_weights = None if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - - if do_validation: - if any([w is not None for w in val_sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported' - ' when eager execution is enabled, for now.') - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels - ] - val_ins = val_x + val_y - else: - callback_metrics = copy.copy(out_labels) - return training_eager.fit_loop( self, - x + y, - out_labels=out_labels, + inputs=x, + targets=y, + sample_weights=sample_weights, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, - val_ins=val_ins, + val_inputs=val_x, + val_targets=val_y, + val_sample_weights=val_sample_weights, shuffle=shuffle, - callback_metrics=callback_metrics, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) else: - # Prepare input arrays and training function. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [1] - else: - ins = x + y + sample_weights - - self._make_train_function() - f = self.train_function - - if do_validation: - self._make_test_function() - val_f = self.test_function - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels - ] - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - val_ins = val_x + val_y + val_sample_weights + [0] - else: - val_ins = val_x + val_y + val_sample_weights - else: - val_f = None - callback_metrics = copy.copy(out_labels) - - # Delegate logic to `_fit_loop`. - return self._fit_loop( - f, - ins, - out_labels=out_labels, + return training_arrays.fit_loop( + self, x, y, + sample_weights=sample_weights, batch_size=batch_size, epochs=epochs, verbose=verbose, callbacks=callbacks, - val_f=val_f, - val_ins=val_ins, + val_inputs=val_x, + val_targets=val_y, + val_sample_weights=val_sample_weights, shuffle=shuffle, - callback_metrics=callback_metrics, initial_epoch=initial_epoch, steps_per_epoch=steps_per_epoch, validation_steps=validation_steps) @@ -2235,22 +1281,13 @@ class Model(Network): batch_size=batch_size) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') return training_eager.test_loop( - self, x + y, batch_size=batch_size, verbose=verbose, steps=steps) + self, inputs=x, targets=y, sample_weights=sample_weights, + batch_size=batch_size, verbose=verbose, steps=steps) else: - # Prepare inputs, delegate logic to `_test_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + y + sample_weights + [0] - else: - ins = x + y + sample_weights - - self._make_test_function() - f = self.test_function - return self._test_loop( - f, ins, batch_size=batch_size, verbose=verbose, steps=steps) + return training_arrays.test_loop( + self, inputs=x, targets=y, sample_weights=sample_weights, + batch_size=batch_size, verbose=verbose, steps=steps) def predict(self, x, batch_size=None, verbose=0, steps=None): """Generates output predictions for the input samples. @@ -2288,17 +1325,8 @@ class Model(Network): return training_eager.predict_loop( self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: - # Prepare inputs, delegate logic to `_predict_loop`. - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - ins = x + [0] - else: - ins = x - - self._make_predict_function() - f = self.predict_function - - return self._predict_loop( - f, ins, batch_size=batch_size, verbose=verbose, steps=steps) + return training_arrays.predict_loop( + self, x, batch_size=batch_size, verbose=verbose, steps=steps) def train_on_batch(self, x, y, sample_weight=None, class_weight=None): """Runs a single gradient update on a single batch of data. @@ -2345,10 +1373,8 @@ class Model(Network): class_weight=class_weight) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - outputs = training_eager.train_on_batch(self, x + y) + outputs = training_eager.train_on_batch( + self, x, y, sample_weights=sample_weights) else: if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [1] @@ -2397,10 +1423,8 @@ class Model(Network): x, y, sample_weight=sample_weight) if context.in_eager_mode(): - if any([w is not None for w in sample_weights]): - raise ValueError('`sample_weight` and `class_weight` is not supported ' - 'when eager execution is enabled, for now.') - outputs = training_eager.test_on_batch(self, x + y) + outputs = training_eager.test_on_batch( + self, x, y, sample_weights=sample_weights) else: if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + y + sample_weights + [0] @@ -2426,16 +1450,8 @@ class Model(Network): x, _, _ = self._standardize_user_data(x) if context.in_eager_mode(): - ins_batch_converted = [] - for ib in x: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - - eager_model_inputs = [] - for i in range(len(self.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - outs = self(eager_model_inputs) # pylint: disable=not-callable - return outs + inputs = [ops.convert_to_tensor(val, dtype=K.floatx()) for val in x] + return self(inputs) # pylint: disable=not-callable if context.in_graph_mode(): if self.uses_learning_phase and not isinstance(K.learning_phase(), int): @@ -2445,6 +1461,7 @@ class Model(Network): self._make_predict_function() outputs = self.predict_function(ins) + if len(outputs) == 1: return outputs[0] return outputs @@ -2560,213 +1577,21 @@ class Model(Network): raise NotImplementedError( '`fit_generator` is not yet enabled for Model subclasses') - wait_time = 0.01 # in seconds - epoch = initial_epoch - - do_validation = bool(validation_data) - self._make_train_function() - if do_validation: - self._make_test_function() - - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps_per_epoch is None: - if is_sequence: - steps_per_epoch = len(generator) - else: - raise ValueError('`steps_per_epoch=None` is only valid for a' - ' generator based on the `keras.utils.Sequence`' - ' class. Please specify `steps_per_epoch` or use' - ' the `keras.utils.Sequence` class.') - - # python 2 has 'next', 3 has '__next__' - # avoid any explicit version checks - val_gen = ( - hasattr(validation_data, 'next') or - hasattr(validation_data, '__next__') or - isinstance(validation_data, Sequence)) - if (val_gen and not isinstance(validation_data, Sequence) and - not validation_steps): - raise ValueError('`validation_steps=None` is only valid for a' - ' generator based on the `keras.utils.Sequence`' - ' class. Please specify `validation_steps` or use' - ' the `keras.utils.Sequence` class.') - - # Prepare display labels. - out_labels = self.metrics_names - callback_metrics = out_labels + ['val_%s' % n for n in out_labels] - - # prepare callbacks - self.history = cbks.History() - callbacks = [cbks.BaseLogger()] + (callbacks or []) + [self.history] - if verbose: - callbacks += [cbks.ProgbarLogger(count_mode='steps')] - callbacks = cbks.CallbackList(callbacks) - - # it's possible to callback a different model than self: - if hasattr(self, 'callback_model') and self.callback_model: - callback_model = self.callback_model - else: - callback_model = self - callbacks.set_model(callback_model) - callbacks.set_params({ - 'epochs': epochs, - 'steps': steps_per_epoch, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics, - }) - callbacks.on_train_begin() - - enqueuer = None - val_enqueuer = None - - try: - if do_validation: - if val_gen: - if workers > 0: - if isinstance(validation_data, Sequence): - val_enqueuer = OrderedEnqueuer( - validation_data, use_multiprocessing=use_multiprocessing) - if validation_steps is None: - validation_steps = len(validation_data) - else: - val_enqueuer = GeneratorEnqueuer( - validation_data, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) - validation_generator = val_enqueuer.get() - else: - validation_generator = validation_data - else: - if len(validation_data) == 2: - val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence - val_sample_weight = None - elif len(validation_data) == 3: - val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence - else: - raise ValueError( - '`validation_data` should be a tuple ' - '`(val_x, val_y, val_sample_weight)` ' - 'or `(val_x, val_y)`. Found: ' + str(validation_data)) - val_x, val_y, val_sample_weights = self._standardize_user_data( - val_x, val_y, val_sample_weight) - val_data = val_x + val_y + val_sample_weights - if self.uses_learning_phase and not isinstance( - K.learning_phase(), int): - val_data += [0] - for cbk in callbacks: - cbk.validation_data = val_data - - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - callback_model.stop_training = False - # Construct epoch logs. - epoch_logs = {} - while epoch < epochs: - callbacks.on_epoch_begin(epoch) - steps_done = 0 - batch_index = 0 - while steps_done < steps_per_epoch: - generator_output = next(output_generator) - - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - # build batch logs - batch_logs = {} - if isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - batch_logs['batch'] = batch_index - batch_logs['size'] = batch_size - callbacks.on_batch_begin(batch_index, batch_logs) - - outs = self.train_on_batch( - x, y, sample_weight=sample_weight, class_weight=class_weight) - - if not isinstance(outs, list): - outs = [outs] - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks.on_batch_end(batch_index, batch_logs) - - batch_index += 1 - steps_done += 1 - - # Epoch finished. - if steps_done >= steps_per_epoch and do_validation: - if val_gen: - val_outs = self.evaluate_generator( - validation_generator, validation_steps, workers=0) - else: - # No need for try/except because - # data has already been validated. - val_outs = self.evaluate( - val_x, - val_y, - batch_size=batch_size, - sample_weight=val_sample_weights, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - - if callback_model.stop_training: - break - - callbacks.on_epoch_end(epoch, epoch_logs) - epoch += 1 - if callback_model.stop_training: - break - - finally: - try: - if enqueuer is not None: - enqueuer.stop() - finally: - if val_enqueuer is not None: - val_enqueuer.stop() - - callbacks.on_train_end() - return self.history + return training_generator.fit_generator( + self, + generator, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + verbose=verbose, + callbacks=callbacks, + validation_data=validation_data, + validation_steps=validation_steps, + class_weight=class_weight, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle, + initial_epoch=initial_epoch) def evaluate_generator(self, generator, @@ -2819,87 +1644,13 @@ class Model(Network): raise NotImplementedError( '`evaluate_generator` is not yet enabled for Model subclasses') - self._make_test_function() - - steps_done = 0 - wait_time = 0.01 - all_outs = [] - batch_sizes = [] - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps is None: - if is_sequence: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - try: - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - while steps_done < steps: - generator_output = next(output_generator) - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + str(generator_output)) - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + str(generator_output)) - outs = self.test_on_batch(x, y, sample_weight=sample_weight) - - if isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - if batch_size == 0: - raise ValueError('Received an empty batch. ' - 'Batches should at least contain one item.') - all_outs.append(outs) - - steps_done += 1 - batch_sizes.append(batch_size) - - finally: - if enqueuer is not None: - enqueuer.stop() - - if not isinstance(outs, list): - return np.average(np.asarray(all_outs), weights=batch_sizes) - else: - averages = [] - for i in range(len(outs)): - averages.append( - np.average([out[i] for out in all_outs], weights=batch_sizes)) - return averages + return training_generator.evaluate_generator( + self, + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing) def predict_generator(self, generator, @@ -2947,88 +1698,11 @@ class Model(Network): raise NotImplementedError( '`predict_generator` is not yet enabled for Model subclasses') - self._make_predict_function() - - steps_done = 0 - wait_time = 0.01 - all_outs = [] - is_sequence = isinstance(generator, Sequence) - if not is_sequence and use_multiprocessing and workers > 1: - logging.warning( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the`keras.utils.Sequence' - ' class.')) - if steps is None: - if is_sequence: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - try: - if workers > 0: - if is_sequence: - enqueuer = OrderedEnqueuer( - generator, use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - output_generator = generator - - if verbose == 1: - progbar = Progbar(target=steps) - - while steps_done < steps: - generator_output = next(output_generator) - if isinstance(generator_output, tuple): - # Compatibility with the generators - # used for training. - if len(generator_output) == 2: - x, _ = generator_output - elif len(generator_output) == 3: - x, _, _ = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + str(generator_output)) - else: - # Assumes a generator that only - # yields inputs (not targets and sample weights). - x = generator_output - - outs = self.predict_on_batch(x) - if not isinstance(outs, list): - outs = [outs] - - if not all_outs: - for out in outs: - all_outs.append([]) - - for i, out in enumerate(outs): - all_outs[i].append(out) - steps_done += 1 - if verbose == 1: - progbar.update(steps_done) - - finally: - if enqueuer is not None: - enqueuer.stop() - - if len(all_outs) == 1: - if steps_done == 1: - return all_outs[0][0] - else: - return np.concatenate(all_outs[0]) - if steps_done == 1: - return [out[0] for out in all_outs] - else: - return [np.concatenate(out) for out in all_outs] + return training_generator.predict_generator( + self, + generator, + steps=steps, + max_queue_size=max_queue_size, + workers=workers, + use_multiprocessing=use_multiprocessing, + verbose=verbose) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py new file mode 100644 index 0000000000..9291ef5fe6 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py @@ -0,0 +1,495 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Part of the Keras training engine related to plain array data. +""" +# pylint: disable=protected-access +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +import numpy as np + +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import callbacks as cbks +from tensorflow.python.keras._impl.keras.engine import training_utils +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches +from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays + +try: + from scipy.sparse import issparse # pylint: disable=g-import-not-at-top +except ImportError: + issparse = None + + +def fit_loop(model, + inputs, + targets, + sample_weights=None, + batch_size=None, + epochs=100, + verbose=1, + callbacks=None, + val_inputs=None, + val_targets=None, + val_sample_weights=None, + shuffle=True, + callback_metrics=None, + initial_epoch=0, + steps_per_epoch=None, + validation_steps=None): + """Abstract fit function for arrays of data. + + Arguments: + model: Keras Model instance. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + batch_size: Integer batch size or None if unknown. + epochs: Number of times to iterate over the data + verbose: Verbosity mode, 0, 1 or 2 + callbacks: List of callbacks to be called during training + val_inputs: List of input arrays. + val_targets: List of target arrays. + val_sample_weights: Optional list of sample weight arrays. + shuffle: Whether to shuffle the data at the beginning of each epoch + callback_metrics: List of strings, the display names of the metrics + passed to the callbacks. They should be the + concatenation of list the display names of the outputs of + `f` and the list of display names of the outputs of `f_val`. + initial_epoch: Epoch at which to start training + (useful for resuming a previous training run) + steps_per_epoch: Total number of steps (batches of samples) + before declaring one epoch finished and starting the + next epoch. Ignored with the default value of `None`. + validation_steps: Number of steps to run validation for + (only if doing validation from data tensors). + Ignored with the default value of `None`. + + Returns: + `History` object. + + Raises: + ValueError: in case of invalid arguments. + """ + model._make_train_function() + f = model.train_function + + sample_weights = sample_weights or [] + val_sample_weights = val_sample_weights or [] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + targets + sample_weights + [1] + if val_inputs: + val_ins = val_inputs + val_targets + val_sample_weights + [1] + else: + ins = inputs + targets + sample_weights + if val_inputs: + val_ins = val_inputs + val_targets + val_sample_weights + if not val_inputs: + val_ins = [] + + do_validation = False + if val_inputs: + do_validation = True + if verbose and inputs and hasattr(inputs[0], 'shape') and hasattr( + val_inputs[0], 'shape'): + print('Train on %d samples, validate on %d samples' % + (inputs[0].shape[0], val_inputs[0].shape[0])) + if validation_steps: + do_validation = True + if steps_per_epoch is None: + raise ValueError('Can only use `validation_steps` ' + 'when doing step-wise ' + 'training, i.e. `steps_per_epoch` ' + 'must be set.') + + out_labels = model.metrics_names + if do_validation: + callback_metrics = copy.copy(out_labels) + [ + 'val_' + n for n in out_labels + ] + else: + callback_metrics = copy.copy(out_labels) + + num_train_samples = training_utils.check_num_samples( + ins, batch_size, steps_per_epoch, 'steps_per_epoch') + if num_train_samples is not None: + index_array = np.arange(num_train_samples) + + model.history = cbks.History() + all_callbacks = [cbks.BaseLogger( + stateful_metrics=model.stateful_metric_names)] + if verbose: + if steps_per_epoch is not None: + count_mode = 'steps' + else: + count_mode = 'samples' + all_callbacks.append( + cbks.ProgbarLogger( + count_mode, stateful_metrics=model.stateful_metric_names)) + all_callbacks += (callbacks or []) + [model.history] + callbacks = cbks.CallbackList(all_callbacks) + out_labels = out_labels or [] + + # it's possible to callback a different model than self + # (used by Sequential models) + if hasattr(model, 'callback_model') and model.callback_model: + callback_model = model.callback_model + else: + callback_model = model + + callbacks.set_model(callback_model) + + callbacks.set_params({ + 'batch_size': batch_size, + 'epochs': epochs, + 'steps': steps_per_epoch, + 'samples': num_train_samples, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics or [], + }) + callbacks.on_train_begin() + callback_model.stop_training = False + for cbk in callbacks: + cbk.validation_data = val_ins + + # To prevent a slowdown, we find beforehand the arrays that need conversion. + feed = model._feed_inputs + model._feed_targets + model._feed_sample_weights + indices_for_conversion_to_dense = [] + for i in range(len(feed)): + if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): + indices_for_conversion_to_dense.append(i) + + for epoch in range(initial_epoch, epochs): + # Reset stateful metrics + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + # Update callbacks + callbacks.on_epoch_begin(epoch) + epoch_logs = {} + if steps_per_epoch is not None: + for step_index in range(steps_per_epoch): + batch_logs = {} + batch_logs['batch'] = step_index + batch_logs['size'] = 1 + callbacks.on_batch_begin(step_index, batch_logs) + outs = f(ins) + + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(step_index, batch_logs) + if callback_model.stop_training: + break + + if do_validation: + val_outs = test_loop( + model, + val_inputs, + val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + steps=validation_steps, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + else: + if shuffle == 'batch': + index_array = training_utils.batch_shuffle(index_array, batch_size) + elif shuffle: + np.random.shuffle(index_array) + + batches = make_batches(num_train_samples, batch_size) + + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + try: + if isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + except TypeError: + raise TypeError('TypeError while preparing batch. ' + 'If using HDF5 input data, ' + 'pass shuffle="batch".') + batch_logs = {} + batch_logs['batch'] = batch_index + batch_logs['size'] = len(batch_ids) + callbacks.on_batch_begin(batch_index, batch_logs) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + outs = f(ins_batch) + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(batch_index, batch_logs) + if callback_model.stop_training: + break + + if batch_index == len(batches) - 1: # Last batch. + if do_validation: + val_outs = test_loop( + model, + val_inputs, + val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + callbacks.on_epoch_end(epoch, epoch_logs) + if callback_model.stop_training: + break + callbacks.on_train_end() + return model.history + + +def predict_loop(model, inputs, batch_size=32, verbose=0, steps=None): + """Abstract method to loop over some data in batches. + + Arguments: + model: Keras Model instance. + inputs: list of tensors to be fed to `f`. + batch_size: integer batch size. + verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + + Returns: + Array of predictions (if the model has a single output) + or list of arrays of predictions + (if the model has multiple outputs). + """ + model._make_predict_function() + f = model.predict_function + + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + [0] + else: + ins = inputs + + if hasattr(model, 'metrics'): + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + + num_samples = training_utils.check_num_samples( + inputs, batch_size, steps, 'steps') + if verbose == 1: + if steps is not None: + progbar = Progbar(target=steps, + stateful_metrics=model.stateful_metric_names) + else: + progbar = Progbar(target=num_samples, + stateful_metrics=model.stateful_metric_names) + + indices_for_conversion_to_dense = [] + for i in range(len(model._feed_inputs)): + if (issparse is not None and issparse(inputs[i]) and + not K.is_sparse(model._feed_inputs[i])): + indices_for_conversion_to_dense.append(i) + + if steps is not None: + # Step-based predictions. + # Since we do not know how many samples + # we will see, we cannot pre-allocate + # the returned Numpy arrays. + # Instead, we store one array per batch seen + # and concatenate them upon returning. + unconcatenated_outs = [] + for step in range(steps): + batch_outs = f(ins) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if step == 0: + for batch_out in batch_outs: + unconcatenated_outs.append([]) + for i, batch_out in enumerate(batch_outs): + unconcatenated_outs[i].append(batch_out) + if verbose == 1: + progbar.update(step + 1) + if len(unconcatenated_outs) == 1: + return np.concatenate(unconcatenated_outs[0], axis=0) + return [ + np.concatenate(unconcatenated_outs[i], axis=0) + for i in range(len(unconcatenated_outs)) + ] + else: + # Sample-based predictions. + outs = [] + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if ins and isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + batch_outs = f(ins_batch) + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + shape = (num_samples,) + batch_out.shape[1:] + outs.append(np.zeros(shape, dtype=batch_out.dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + if verbose == 1: + progbar.update(batch_end) + if len(outs) == 1: + return outs[0] + return outs + + +def test_loop(model, inputs, targets, + sample_weights=None, + batch_size=None, + verbose=0, + steps=None): + """Abstract method to loop over some data in batches. + + Arguments: + model: Keras Model instance. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + batch_size: integer batch size or `None`. + verbose: verbosity mode. + steps: Total number of steps (batches of samples) + before declaring predictions finished. + Ignored with the default value of `None`. + + Returns: + Scalar loss (if the model has a single output and no metrics) + or list of scalars (if the model has multiple outputs + and/or metrics). The attribute `model.metrics_names` will give you + the display labels for the scalar outputs. + """ + model._make_test_function() + f = model.test_function + + sample_weights = sample_weights or [] + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + ins = inputs + targets + sample_weights + [0] + else: + ins = inputs + targets + sample_weights + + if hasattr(model, 'metrics'): + for m in model.metrics: + if isinstance(m, Layer): + m.reset_states() + stateful_metric_indices = [ + i for i, name in enumerate(model.metrics_names) + if str(name) in model.stateful_metric_names + ] + else: + stateful_metric_indices = [] + + num_samples = training_utils.check_num_samples( + ins, batch_size, steps, 'steps') + outs = [] + if verbose == 1: + if steps is not None: + progbar = Progbar(target=steps) + else: + progbar = Progbar(target=num_samples) + + # To prevent a slowdown, we find beforehand the arrays that need conversion. + feed = model._feed_inputs + model._feed_targets + model._feed_sample_weights + indices_for_conversion_to_dense = [] + for i in range(len(feed)): + if issparse is not None and issparse(ins[i]) and not K.is_sparse(feed[i]): + indices_for_conversion_to_dense.append(i) + + if steps is not None: + for step in range(steps): + batch_outs = f(ins) + if isinstance(batch_outs, list): + if step == 0: + for _ in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out + else: + if step == 0: + outs.append(0.) + outs[0] += batch_outs + if verbose == 1: + progbar.update(step + 1) + for i in range(len(outs)): + if i not in stateful_metric_indices: + outs[i] /= steps + else: + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + if isinstance(ins[-1], int): + # Do not slice the training phase flag. + ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + else: + ins_batch = slice_arrays(ins, batch_ids) + for i in indices_for_conversion_to_dense: + ins_batch[i] = ins_batch[i].toarray() + + batch_outs = f(ins_batch) + + if isinstance(batch_outs, list): + if batch_index == 0: + for batch_out in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + if i in stateful_metric_indices: + outs[i] = batch_out + else: + outs[i] += batch_out * len(batch_ids) + else: + if batch_index == 0: + outs.append(0.) + outs[0] += batch_outs * len(batch_ids) + if verbose == 1: + progbar.update(batch_end) + for i in range(len(outs)): + if i not in stateful_metric_indices: + outs[i] /= num_samples + if len(outs) == 1: + return outs[0] + return outs diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index cdf189adef..75c96e6916 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -12,13 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== -"""Keras training and evaluation routines. +"""Keras training and evaluation routines for eager execution. """ # pylint: disable=protected-access from __future__ import absolute_import from __future__ import division from __future__ import print_function + +import copy + import numpy as np + from tensorflow.python.eager.backprop import GradientTape from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util @@ -26,6 +30,7 @@ from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module +from tensorflow.python.keras._impl.keras.engine import training_utils from tensorflow.python.keras._impl.keras.utils.generic_utils import make_batches from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays @@ -99,15 +104,15 @@ def _eager_metrics_fn(model, outputs, targets): return metric_names, metric_results -def _model_loss(model, inputs, targets, training=False): +def _model_loss(model, inputs, targets, sample_weights=None, training=False): """Calculates the loss for a given model. Arguments: - model: The model on which metrics are being calculated. - inputs: The inputs of the given model. This is typically the mini batch of - data that is fed to the model. - targets: The predictions or targets of the given model. - training: Whether the model should be run in inference or training mode. + model: The model on which metrics are being calculated. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + training: Whether the model should be run in inference or training mode. Returns: Returns the model output, total loss and loss value calculated using the @@ -134,23 +139,20 @@ def _model_loss(model, inputs, targets, training=False): loss_metrics = [] with K.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): - # compute the loss - output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn, - model.output_names[i]) - loss_metrics.append(K.mean(output_loss)) + if sample_weights: + weights = sample_weights[i] + else: + weights = None # TODO(fchollet): support masking; in practice `_keras_mask` is never # set in this context currently. mask = outs[i]._keras_mask - # adapted from weighted_loss_fn - if mask is not None: - # mask should have the same shape as output_loss - output_loss *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - output_loss /= K.mean(mask) - # TODO(fchollet): support sample weighting + weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn) + with K.name_scope(model.output_names[i] + '_loss'): + output_loss = weighted_masked_fn( + outs[i], targets[i], weights, mask=mask) + loss_metrics.append(K.mean(output_loss)) loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -171,16 +173,20 @@ def _model_loss(model, inputs, targets, training=False): return outs, total_loss, loss_metrics -def _process_single_batch(eager_model_inputs, eager_model_outputs, model, +def _process_single_batch(model, + inputs, + targets, + sample_weights=None, training=False): """Calculate the loss and gradient for one input batch. The model weights are updated if training is set to True. Arguments: - eager_model_inputs: Input batch data. - eager_model_outputs: Output batch data. model: Model whose loss has to be calculated. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. training: The boolean represents if the weights of the model are updated. 'fit' methods will set this to True while 'evaluate' methods will set this to False. @@ -193,8 +199,8 @@ def _process_single_batch(eager_model_inputs, eager_model_outputs, model, """ K.set_learning_phase(training) with GradientTape() as tape: - outs, loss, loss_metrics = _model_loss(model, eager_model_inputs, - eager_model_outputs, + outs, loss, loss_metrics = _model_loss(model, inputs, targets, + sample_weights=sample_weights, training=training) if loss is None: raise ValueError('The model cannot be run ' @@ -211,62 +217,61 @@ def _process_single_batch(eager_model_inputs, eager_model_outputs, model, return outs, loss, loss_metrics -def train_on_batch(model, ins): +def train_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss and gradient updates for one input batch. Arguments: - model: Given model on which loss and gradients are calculated. - ins: Input and output batch numpy arrays. + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. Returns: total loss and the loss associated with each output. """ - ins_batch_converted = [] - for ib in ins: - if ib is not None: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) + inputs = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + targets = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + sample_weights = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None for val in sample_weights] outs, loss, _ = _process_single_batch( - eager_model_inputs, eager_model_outputs, model, training=True) + model, inputs, targets, sample_weights=sample_weights, training=True) if not isinstance(outs, list): outs = [outs] _, metrics_results = _eager_metrics_fn( - model, outs, eager_model_outputs) + model, outs, targets) if not isinstance(loss, list): loss = [loss] return loss + metrics_results -def test_on_batch(model, ins): +def test_on_batch(model, inputs, targets, sample_weights=None): """Calculates the loss for one input batch. Arguments: - model: Given model on which loss is calculated. - ins: Input and output batch numpy arrays. + model: Model whose loss has to be calculated. + inputs: Input batch data. + targets: Target batch data. + sample_weights: Sample weight batch data. Returns: total loss, loss and metrics associated with each output. """ - ins_batch_converted = [] - for ib in ins: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) + inputs = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + targets = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + sample_weights = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None for val in sample_weights] outs, loss, loss_metrics = _process_single_batch( - eager_model_inputs, eager_model_outputs, model, training=False) + model, inputs, targets, sample_weights=sample_weights, training=False) if not isinstance(outs, list): outs = [outs] metric_names, metrics_results = _eager_metrics_fn( - model, outs, eager_model_outputs) + model, outs, targets) model.metrics_names.append(metric_names) if not isinstance(loss, list): loss = [loss] @@ -275,32 +280,35 @@ def test_on_batch(model, ins): def fit_loop( model, - ins, - out_labels=None, + inputs, + targets, + sample_weights=None, + val_inputs=None, + val_targets=None, + val_sample_weights=None, batch_size=None, epochs=100, verbose=1, callbacks=None, - val_ins=None, shuffle=True, callback_metrics=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None): - """Abstract fit function for `f(ins)`. - - Assume that f returns a list, labeled by out_labels. + """Abstract fit function for eager execution. Arguments: model: Instance of the model that is being executed in Eager mode. - ins: List of tensors to be fed to `f` - out_labels: List of strings, display names of - the outputs of `f` + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. + val_inputs: Input data for validation. + val_targets: Target data for validation. + val_sample_weights: Sample weight data for validation. batch_size: Integer batch size or None if unknown. epochs: Number of times to iterate over the data verbose: Verbosity mode, 0, 1 or 2 callbacks: List of callbacks to be called during training - val_ins: List of tensors to be fed to `val_f` shuffle: Whether to shuffle the data at the beginning of each epoch callback_metrics: List of strings, the display names of the metrics passed to the callbacks. They should be the @@ -324,20 +332,35 @@ def fit_loop( K.set_learning_phase(True) do_validation = False - if val_ins: + if val_inputs: do_validation = True - if (verbose and ins and hasattr(ins[0], 'shape') and - hasattr(val_ins[0], 'shape')): + if (verbose and inputs and hasattr(inputs[0], 'shape') and + hasattr(val_inputs[0], 'shape')): print('Train on %d samples, validate on %d samples' % - (ins[0].shape[0], val_ins[0].shape[0])) + (inputs[0].shape[0], val_inputs[0].shape[0])) if validation_steps: if steps_per_epoch is None: raise ValueError('Can only use `validation_steps` when doing step-wise ' 'training, i.e. `steps_per_epoch` must be set.') do_validation = True - num_train_samples = model._check_num_samples( - ins, batch_size, steps_per_epoch, 'steps_per_epoch') + out_labels = model.metrics_names + if do_validation: + callback_metrics = copy.copy(out_labels) + [ + 'val_' + n for n in out_labels + ] + else: + callback_metrics = copy.copy(out_labels) + + if sample_weights: + feed_data = inputs + targets + sample_weights + else: + feed_data = inputs + targets + num_train_samples = training_utils.check_num_samples( + feed_data, + batch_size=batch_size, + steps=steps_per_epoch, + steps_name='steps_per_epoch') if num_train_samples is not None: index_array = np.arange(num_train_samples) @@ -351,7 +374,6 @@ def fit_loop( count_mode = 'samples' callbacks += [cbks.ProgbarLogger(count_mode)] callbacks = cbks.CallbackList(callbacks) - out_labels = out_labels or [] # it's possible to callback a different model than self # (used by Sequential models) @@ -374,7 +396,12 @@ def fit_loop( callbacks.on_train_begin() callback_model.stop_training = False for cbk in callbacks: - cbk.validation_data = val_ins + if not val_inputs: + cbk.validation_data = [] + elif val_sample_weights: + cbk.validation_data = val_inputs + val_targets + val_sample_weights + else: + cbk.validation_data = val_inputs + val_targets for epoch in range(initial_epoch, epochs): callbacks.on_epoch_begin(epoch) @@ -389,11 +416,12 @@ def fit_loop( for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] try: - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) else: - ins_batch = slice_arrays(ins, batch_ids) + sample_weights_batch = None except TypeError: raise TypeError('TypeError while preparing batch. ' 'If using HDF5 input data, ' @@ -404,21 +432,22 @@ def fit_loop( callbacks.on_batch_begin(batch_index, batch_logs) - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) - - outs, loss, loss_metrics = _process_single_batch(eager_model_inputs, - eager_model_outputs, - model, - training=True) + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] + if sample_weights: + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None + for val in sample_weights_batch] + + outs, loss, loss_metrics = _process_single_batch( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=True) if not isinstance(outs, list): outs = [outs] @@ -426,8 +455,8 @@ def fit_loop( for l, o in zip(out_labels, outs): batch_logs[l] = o # Required for Eager mode - metrics_names, metrics_results = _eager_metrics_fn(model, outs, - eager_model_outputs) + metrics_names, metrics_results = _eager_metrics_fn( + model, outs, targets_batch) batch_logs['loss'] = tensor_util.constant_value(K.mean(loss)) # TODO(anjalisridhar): Move this to compile to avoid duplicate code. @@ -461,7 +490,10 @@ def fit_loop( if batch_index == len(batches) - 1: # Last batch. if do_validation: val_outs = test_loop( - model, val_ins, batch_size=batch_size, verbose=0) + model, val_inputs, val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + verbose=0) if not isinstance(val_outs, list): val_outs = [val_outs] # Same labels assumed. @@ -474,12 +506,18 @@ def fit_loop( return model.history -def test_loop(model, ins, batch_size=None, verbose=0, steps=None): +def test_loop(model, inputs, targets, + sample_weights=None, + batch_size=None, + verbose=0, + steps=None): """Abstract method to loop over some data in batches. Arguments: model: Model instance that is being evaluated in Eager mode. - ins: list of tensors to be fed to `f`. + inputs: List of input arrays. + targets: List of target arrays. + sample_weights: Optional list of sample weight arrays. batch_size: integer batch size or `None`. verbose: verbosity mode. steps: Total number of steps (batches of samples) @@ -493,7 +531,11 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): the display labels for the scalar outputs. """ K.set_learning_phase(False) - num_samples = model._check_num_samples(ins, batch_size, steps, 'steps') + feed_data = inputs + targets + if sample_weights: + feed_data += sample_weights + num_samples = training_utils.check_num_samples( + feed_data, batch_size=batch_size, steps=steps, steps_name='steps') outs = [] if verbose == 1: progbar = Progbar(target=num_samples) @@ -501,29 +543,30 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) else: - ins_batch = slice_arrays(ins, batch_ids) - - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) - - eager_model_inputs = [] - eager_model_outputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) - - for i in range(len(model.inputs), len(ins_batch_converted)): - eager_model_outputs.append(ins_batch_converted[i]) - - loss_outs, loss, loss_metrics = _model_loss(model, eager_model_inputs, - eager_model_outputs, - training=False) - _, metrics_results = _eager_metrics_fn(model, loss_outs, - eager_model_outputs) + sample_weights_batch = None + + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] + if sample_weights: + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) + if val is not None else None + for val in sample_weights_batch] + + loss_outs, loss, loss_metrics = _model_loss( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=False) + _, metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) batch_outs = [] for _, v in zip(model.metrics_names, [K.mean(loss)] + loss_metrics + metrics_results): @@ -549,12 +592,15 @@ def test_loop(model, ins, batch_size=None, verbose=0, steps=None): return outs -def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): +def predict_loop(model, inputs, + batch_size=32, + verbose=0, + steps=None): """Abstract method to loop over some data in batches. Arguments: model: - ins: list of tensors to be fed to `f`. + inputs: List of input arrays. batch_size: integer batch size. verbose: verbosity mode. steps: Total number of steps (batches of samples) @@ -567,7 +613,8 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): (if the model has multiple outputs). """ K.set_learning_phase(False) - num_samples = model._check_num_samples(ins, batch_size, steps, 'steps') + num_samples = training_utils.check_num_samples( + inputs, batch_size, steps, 'steps') if verbose == 1: if steps is not None: progbar = Progbar(target=steps) @@ -579,30 +626,21 @@ def predict_loop(model, ins, batch_size=32, verbose=0, steps=None): index_array = np.arange(num_samples) for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - - ins_batch_converted = [] - for ib in ins_batch: - ins_batch_converted.append(ops.convert_to_tensor(ib, dtype=K.floatx())) + inputs_batch = slice_arrays(inputs, batch_ids) - eager_model_inputs = [] - for i in range(len(model.inputs)): - eager_model_inputs.append(ins_batch_converted[i]) + inputs_batch = [ + ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] - if len(eager_model_inputs) == 1: + if len(inputs_batch) == 1: if model._expects_training_arg: - batch_outs = model.call(eager_model_inputs[0], training=False) + batch_outs = model.call(inputs_batch[0], training=False) else: - batch_outs = model.call(eager_model_inputs[0]) + batch_outs = model.call(inputs_batch[0]) else: if model._expects_training_arg: - batch_outs = model.call(eager_model_inputs, training=False) + batch_outs = model.call(inputs_batch, training=False) else: - batch_outs = model.call(eager_model_inputs) + batch_outs = model.call(inputs_batch) if not isinstance(batch_outs, list): batch_outs = [batch_outs] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py index 550b86a71d..8848b393d5 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager_test.py @@ -309,6 +309,229 @@ class TrainingTest(test.TestCase): optimizer='rms') +class LossWeightingTest(test.TestCase): + + def test_class_weights(self): + num_classes = 5 + batch_size = 5 + weighted_class = 3 + train_samples = 300 + test_samples = 300 + input_dim = 5 + + model = keras.models.Sequential() + model.add(keras.layers.Dense(10, input_shape=(input_dim,))) + model.add(keras.layers.Activation('relu')) + model.add(keras.layers.Dense(num_classes)) + model.add(keras.layers.Activation('softmax')) + model.compile(loss='categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + np.random.seed(1337) + (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_test = y_test.copy() + int_y_train = y_train.copy() + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + test_ids = np.where(int_y_test == np.array(weighted_class))[0] + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 4. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 4. + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight, + validation_data=(x_train, y_train, sample_weight)) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + class_weight=class_weight, + validation_split=0.1) + + model.train_on_batch( + x_train[:batch_size], y_train[:batch_size], class_weight=class_weight) + ref_score = model.evaluate(x_test, y_test, verbose=0) + score = model.evaluate( + x_test[test_ids, :], y_test[test_ids, :], verbose=0) + self.assertLess(score, ref_score) + + def test_sample_weights(self): + num_classes = 5 + batch_size = 5 + weighted_class = 3 + train_samples = 300 + test_samples = 300 + input_dim = 5 + + model = keras.models.Sequential() + model.add(keras.layers.Dense(10, input_shape=(input_dim,))) + model.add(keras.layers.Activation('relu')) + model.add(keras.layers.Dense(num_classes)) + model.add(keras.layers.Activation('softmax')) + model.compile(loss='categorical_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + np.random.seed(43) + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_train = y_train.copy() + y_train = keras.utils.to_categorical(y_train, num_classes) + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 4. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 4. + + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + sample_weight=sample_weight) + model.fit( + x_train, + y_train, + batch_size=batch_size, + epochs=2, + verbose=0, + sample_weight=sample_weight, + validation_split=0.1) + model.train_on_batch( + x_train[:batch_size], + y_train[:batch_size], + sample_weight=sample_weight[:batch_size]) + model.test_on_batch( + x_train[:batch_size], + y_train[:batch_size], + sample_weight=sample_weight[:batch_size]) + + def test_temporal_sample_weights(self): + num_classes = 5 + weighted_class = 3 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + timesteps = 3 + + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(num_classes), + input_shape=(timesteps, input_dim))) + model.add(keras.layers.Activation('softmax')) + + np.random.seed(1337) + (_, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + int_y_train = y_train.copy() + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + + class_weight = dict([(i, 1.) for i in range(num_classes)]) + class_weight[weighted_class] = 2. + + sample_weight = np.ones((y_train.shape[0])) + sample_weight[int_y_train == weighted_class] = 2. + with self.assertRaises(ValueError): + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001), + sample_weight_mode='temporal') + + def test_class_weight_invalid_use_case(self): + num_classes = 5 + train_samples = 1000 + test_samples = 1000 + input_dim = 5 + timesteps = 3 + + model = keras.models.Sequential() + model.add( + keras.layers.TimeDistributed( + keras.layers.Dense(num_classes), + input_shape=(timesteps, input_dim))) + model.add(keras.layers.Activation('softmax')) + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001)) + + (x_train, y_train), _ = testing_utils.get_test_data( + train_samples=train_samples, + test_samples=test_samples, + input_shape=(input_dim,), + num_classes=num_classes) + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + class_weight = dict([(i, 1.) for i in range(num_classes)]) + + del class_weight[1] + with self.assertRaises(ValueError): + model.fit(x_train, y_train, + epochs=0, verbose=0, class_weight=class_weight) + + with self.assertRaises(ValueError): + model.compile( + loss='binary_crossentropy', + optimizer=RMSPropOptimizer(learning_rate=0.001), + sample_weight_mode=[]) + + # Build multi-output model + x = keras.Input((3,)) + y1 = keras.layers.Dense(4, name='1')(x) + y2 = keras.layers.Dense(4, name='2')(x) + model = keras.models.Model(x, [y1, y2]) + model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse') + x_np = np.random.random((10, 3)) + y_np = np.random.random((10, 4)) + w_np = np.random.random((10,)) + # This will work + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': w_np}) + # These will not + with self.assertRaises(ValueError): + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=[w_np]) + with self.assertRaises(TypeError): + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight=w_np) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((11,)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((10, 2)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + with self.assertRaises(ValueError): + bad_w_np = np.random.random((10, 2, 2)) + model.fit(x_np, [y_np, y_np], epochs=1, sample_weight={'1': bad_w_np}) + + if __name__ == '__main__': # Bazel sets these environment variables to very long paths. # Tempfile uses them to create long paths, and in turn multiprocessing diff --git a/tensorflow/python/keras/_impl/keras/engine/training_generator.py b/tensorflow/python/keras/_impl/keras/engine/training_generator.py new file mode 100644 index 0000000000..4af62c85d5 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_generator.py @@ -0,0 +1,439 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Part of the Keras training engine related to Python generators of array data. +""" +# pylint: disable=protected-access +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import callbacks as cbks +from tensorflow.python.keras._impl.keras.utils.data_utils import GeneratorEnqueuer +from tensorflow.python.keras._impl.keras.utils.data_utils import OrderedEnqueuer +from tensorflow.python.keras._impl.keras.utils.data_utils import Sequence +from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar +from tensorflow.python.platform import tf_logging as logging + + +def fit_generator(model, + generator, + steps_per_epoch=None, + epochs=1, + verbose=1, + callbacks=None, + validation_data=None, + validation_steps=None, + class_weight=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + shuffle=True, + initial_epoch=0): + """See docstring for `Model.fit_generator`.""" + wait_time = 0.01 # in seconds + epoch = initial_epoch + + do_validation = bool(validation_data) + model._make_train_function() + if do_validation: + model._make_test_function() + + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps_per_epoch is None: + if is_sequence: + steps_per_epoch = len(generator) + else: + raise ValueError('`steps_per_epoch=None` is only valid for a' + ' generator based on the `keras.utils.Sequence`' + ' class. Please specify `steps_per_epoch` or use' + ' the `keras.utils.Sequence` class.') + + # python 2 has 'next', 3 has '__next__' + # avoid any explicit version checks + val_gen = ( + hasattr(validation_data, 'next') or + hasattr(validation_data, '__next__') or + isinstance(validation_data, Sequence)) + if (val_gen and not isinstance(validation_data, Sequence) and + not validation_steps): + raise ValueError('`validation_steps=None` is only valid for a' + ' generator based on the `keras.utils.Sequence`' + ' class. Please specify `validation_steps` or use' + ' the `keras.utils.Sequence` class.') + + # Prepare display labels. + out_labels = model.metrics_names + callback_metrics = out_labels + ['val_%s' % n for n in out_labels] + + # prepare callbacks + model.history = cbks.History() + callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history] + if verbose: + callbacks += [cbks.ProgbarLogger(count_mode='steps')] + callbacks = cbks.CallbackList(callbacks) + + # it's possible to callback a different model than self: + if hasattr(model, 'callback_model') and model.callback_model: + callback_model = model.callback_model + else: + callback_model = model + callbacks.set_model(callback_model) + callbacks.set_params({ + 'epochs': epochs, + 'steps': steps_per_epoch, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics, + }) + callbacks.on_train_begin() + + enqueuer = None + val_enqueuer = None + + try: + if do_validation: + if val_gen: + if workers > 0: + if isinstance(validation_data, Sequence): + val_enqueuer = OrderedEnqueuer( + validation_data, use_multiprocessing=use_multiprocessing) + if validation_steps is None: + validation_steps = len(validation_data) + else: + val_enqueuer = GeneratorEnqueuer( + validation_data, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) + validation_generator = val_enqueuer.get() + else: + validation_generator = validation_data + else: + if len(validation_data) == 2: + val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence + val_sample_weight = None + elif len(validation_data) == 3: + val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence + else: + raise ValueError( + '`validation_data` should be a tuple ' + '`(val_x, val_y, val_sample_weight)` ' + 'or `(val_x, val_y)`. Found: ' + str(validation_data)) + val_x, val_y, val_sample_weights = model._standardize_user_data( + val_x, val_y, val_sample_weight) + val_data = val_x + val_y + val_sample_weights + if model.uses_learning_phase and not isinstance( + K.learning_phase(), int): + val_data += [0] + for cbk in callbacks: + cbk.validation_data = val_data + + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + shuffle=shuffle) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + callback_model.stop_training = False + # Construct epoch logs. + epoch_logs = {} + while epoch < epochs: + callbacks.on_epoch_begin(epoch) + steps_done = 0 + batch_index = 0 + while steps_done < steps_per_epoch: + generator_output = next(output_generator) + + if not hasattr(generator_output, '__len__'): + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + + if len(generator_output) == 2: + x, y = generator_output + sample_weight = None + elif len(generator_output) == 3: + x, y, sample_weight = generator_output + else: + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + # build batch logs + batch_logs = {} + if isinstance(x, list): + batch_size = x[0].shape[0] + elif isinstance(x, dict): + batch_size = list(x.values())[0].shape[0] + else: + batch_size = x.shape[0] + batch_logs['batch'] = batch_index + batch_logs['size'] = batch_size + callbacks.on_batch_begin(batch_index, batch_logs) + + outs = model.train_on_batch( + x, y, sample_weight=sample_weight, class_weight=class_weight) + + if not isinstance(outs, list): + outs = [outs] + for l, o in zip(out_labels, outs): + batch_logs[l] = o + + callbacks.on_batch_end(batch_index, batch_logs) + + batch_index += 1 + steps_done += 1 + + # Epoch finished. + if steps_done >= steps_per_epoch and do_validation: + if val_gen: + val_outs = evaluate_generator( + model, validation_generator, validation_steps, workers=0) + else: + # No need for try/except because + # data has already been validated. + val_outs = model.evaluate( + val_x, + val_y, + batch_size=batch_size, + sample_weight=val_sample_weights, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + + if callback_model.stop_training: + break + + callbacks.on_epoch_end(epoch, epoch_logs) + epoch += 1 + if callback_model.stop_training: + break + + finally: + try: + if enqueuer is not None: + enqueuer.stop() + finally: + if val_enqueuer is not None: + val_enqueuer.stop() + + callbacks.on_train_end() + return model.history + + +def evaluate_generator(model, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False): + """See docstring for `Model.evaluate_generator`.""" + model._make_test_function() + + steps_done = 0 + wait_time = 0.01 + all_outs = [] + batch_sizes = [] + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps is None: + if is_sequence: + steps = len(generator) + else: + raise ValueError('`steps=None` is only valid for a generator' + ' based on the `keras.utils.Sequence` class.' + ' Please specify `steps` or use the' + ' `keras.utils.Sequence` class.') + enqueuer = None + + try: + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, use_multiprocessing=use_multiprocessing) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + while steps_done < steps: + generator_output = next(output_generator) + if not hasattr(generator_output, '__len__'): + raise ValueError('Output of generator should be a tuple ' + '(x, y, sample_weight) ' + 'or (x, y). Found: ' + str(generator_output)) + if len(generator_output) == 2: + x, y = generator_output + sample_weight = None + elif len(generator_output) == 3: + x, y, sample_weight = generator_output + else: + raise ValueError('Output of generator should be a tuple ' + '(x, y, sample_weight) ' + 'or (x, y). Found: ' + str(generator_output)) + outs = model.test_on_batch(x, y, sample_weight=sample_weight) + + if isinstance(x, list): + batch_size = x[0].shape[0] + elif isinstance(x, dict): + batch_size = list(x.values())[0].shape[0] + else: + batch_size = x.shape[0] + if batch_size == 0: + raise ValueError('Received an empty batch. ' + 'Batches should at least contain one item.') + all_outs.append(outs) + + steps_done += 1 + batch_sizes.append(batch_size) + + finally: + if enqueuer is not None: + enqueuer.stop() + + if not isinstance(outs, list): + return np.average(np.asarray(all_outs), weights=batch_sizes) + else: + averages = [] + for i in range(len(outs)): + averages.append( + np.average([out[i] for out in all_outs], weights=batch_sizes)) + return averages + + +def predict_generator(model, + generator, + steps=None, + max_queue_size=10, + workers=1, + use_multiprocessing=False, + verbose=0): + """See docstring for `Model.predict_generator`.""" + model._make_predict_function() + + steps_done = 0 + wait_time = 0.01 + all_outs = [] + is_sequence = isinstance(generator, Sequence) + if not is_sequence and use_multiprocessing and workers > 1: + logging.warning( + UserWarning('Using a generator with `use_multiprocessing=True`' + ' and multiple workers may duplicate your data.' + ' Please consider using the`keras.utils.Sequence' + ' class.')) + if steps is None: + if is_sequence: + steps = len(generator) + else: + raise ValueError('`steps=None` is only valid for a generator' + ' based on the `keras.utils.Sequence` class.' + ' Please specify `steps` or use the' + ' `keras.utils.Sequence` class.') + enqueuer = None + + try: + if workers > 0: + if is_sequence: + enqueuer = OrderedEnqueuer( + generator, use_multiprocessing=use_multiprocessing) + else: + enqueuer = GeneratorEnqueuer( + generator, + use_multiprocessing=use_multiprocessing, + wait_time=wait_time) + enqueuer.start(workers=workers, max_queue_size=max_queue_size) + output_generator = enqueuer.get() + else: + output_generator = generator + + if verbose == 1: + progbar = Progbar(target=steps) + + while steps_done < steps: + generator_output = next(output_generator) + if isinstance(generator_output, tuple): + # Compatibility with the generators + # used for training. + if len(generator_output) == 2: + x, _ = generator_output + elif len(generator_output) == 3: + x, _, _ = generator_output + else: + raise ValueError('Output of generator should be ' + 'a tuple `(x, y, sample_weight)` ' + 'or `(x, y)`. Found: ' + str(generator_output)) + else: + # Assumes a generator that only + # yields inputs (not targets and sample weights). + x = generator_output + + outs = model.predict_on_batch(x) + if not isinstance(outs, list): + outs = [outs] + + if not all_outs: + for out in outs: + all_outs.append([]) + + for i, out in enumerate(outs): + all_outs[i].append(out) + steps_done += 1 + if verbose == 1: + progbar.update(steps_done) + + finally: + if enqueuer is not None: + enqueuer.stop() + + if len(all_outs) == 1: + if steps_done == 1: + return all_outs[0][0] + else: + return np.concatenate(all_outs[0]) + if steps_done == 1: + return [out[0] for out in all_outs] + else: + return [np.concatenate(out) for out in all_outs] diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 6ca5941e9a..38ba0f0eae 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -25,7 +25,7 @@ import numpy as np from tensorflow.python.keras._impl import keras from tensorflow.python.keras._impl.keras import testing_utils -from tensorflow.python.keras._impl.keras.engine.training import _weighted_masked_objective +from tensorflow.python.keras._impl.keras.engine.training_utils import weighted_masked_objective from tensorflow.python.keras._impl.keras.utils.generic_utils import slice_arrays from tensorflow.python.platform import test @@ -705,7 +705,7 @@ class LossMaskingTest(test.TestCase): def test_loss_masking(self): with self.test_session(): - weighted_loss = _weighted_masked_objective(keras.losses.get('mae')) + weighted_loss = weighted_masked_objective(keras.losses.get('mae')) shape = (3, 4, 2) x = np.arange(24).reshape(shape) y = 2 * x @@ -1037,16 +1037,16 @@ class TestGeneratorMethods(test.TestCase): class TestTrainingUtils(test.TestCase): def test_check_array_lengths(self): - keras.engine.training._check_array_lengths(None, None, None) + keras.engine.training_utils.check_array_lengths(None, None, None) a_np = np.random.random((4, 3, 3)) - keras.engine.training._check_array_lengths(a_np, a_np, a_np) - keras.engine.training._check_array_lengths( + keras.engine.training_utils.check_array_lengths(a_np, a_np, a_np) + keras.engine.training_utils.check_array_lengths( [a_np, a_np], [a_np, a_np], [a_np, a_np]) - keras.engine.training._check_array_lengths([None], [None], [None]) + keras.engine.training_utils.check_array_lengths([None], [None], [None]) b_np = np.random.random((3, 4)) with self.assertRaises(ValueError): - keras.engine.training._check_array_lengths([a_np], [b_np], None) + keras.engine.training_utils.check_array_lengths([a_np], [b_np], None) def test_slice_arrays(self): input_a = np.random.random((10, 3)) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_utils.py b/tensorflow/python/keras/_impl/keras/engine/training_utils.py new file mode 100644 index 0000000000..105638ce10 --- /dev/null +++ b/tensorflow/python/keras/_impl/keras/engine/training_utils.py @@ -0,0 +1,534 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Training-related utilities. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import copy + +import numpy as np + +from tensorflow.python.framework import tensor_util +from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import losses + + +def check_num_samples(ins, + batch_size=None, + steps=None, + steps_name='steps'): + """Determine the number of samples provided for training and evaluation. + + The number of samples is not defined when running with `steps`, + in which case the number of samples is set to `None`. + + Arguments: + ins: List of tensors to be fed to the Keras function. + batch_size: Integer batch size or `None` if not defined. + steps: Total number of steps (batches of samples) + before declaring `_predict_loop` finished. + Ignored with the default value of `None`. + steps_name: The public API's parameter name for `steps`. + + Raises: + ValueError: when `steps` is `None` and the attribute `ins.shape` + does not exist. Also raises ValueError when `steps` is not `None` + and `batch_size` is not `None` because they are mutually + exclusive. + + Returns: + When steps is `None`, returns the number of samples to be + processed based on the size of the first dimension of the + first input numpy array. When steps is not `None` and + `batch_size` is `None`, returns `None`. + + Raises: + ValueError: In case of invalid arguments. + """ + if steps is not None: + num_samples = None + if batch_size is not None: + raise ValueError( + 'If ' + steps_name + ' is set, the `batch_size` must be None.') + elif ins and hasattr(ins[0], 'shape'): + num_samples = ins[0].shape[0] + else: + raise ValueError( + 'Either the input data should have ' + 'a defined shape, or ' + steps_name + ' should be specified.') + return num_samples + + +def standardize_input_data(data, + names, + shapes=None, + check_batch_axis=True, + exception_prefix=''): + """Normalizes inputs and targets provided by users. + + Users may pass data as a list of arrays, dictionary of arrays, + or as a single array. We normalize this to an ordered list of + arrays (same order as `names`), while checking that the provided + arrays have shapes that match the network's expectations. + + Arguments: + data: User-provided input data (polymorphic). + names: List of expected array names. + shapes: Optional list of expected array shapes. + check_batch_axis: Boolean; whether to check that + the batch axis of the arrays matches the expected + value found in `shapes`. + exception_prefix: String prefix used for exception formatting. + + Returns: + List of standardized input arrays (one array per model input). + + Raises: + ValueError: in case of improperly formatted user-provided data. + """ + if not names: + if data is not None and hasattr(data, '__len__') and len(data): + raise ValueError('Error when checking model ' + exception_prefix + ': ' + 'expected no data, but got:', data) + return [] + if data is None: + return [None for _ in range(len(names))] + + if isinstance(data, dict): + try: + data = [ + data[x].values + if data[x].__class__.__name__ == 'DataFrame' else data[x] + for x in names + ] + except KeyError as e: + raise ValueError('No data provided for "' + e.args[0] + '". Need data ' + 'for each key in: ' + str(names)) + elif isinstance(data, list): + if isinstance(data[0], list): + data = [np.asarray(d) for d in data] + elif len(names) == 1 and isinstance(data[0], (float, int)): + data = [np.asarray(data)] + else: + data = [ + x.values if x.__class__.__name__ == 'DataFrame' else x for x in data + ] + else: + data = data.values if data.__class__.__name__ == 'DataFrame' else data + data = [data] + data = [ + np.expand_dims(x, 1) if x is not None and x.ndim == 1 else x for x in data + ] + + if len(data) != len(names): + if data and hasattr(data[0], 'shape'): + raise ValueError('Error when checking model ' + exception_prefix + + ': the list of Numpy arrays that you are passing to ' + 'your model is not the size the model expected. ' + 'Expected to see ' + str(len(names)) + ' array(s), ' + 'but instead got the following list of ' + + str(len(data)) + ' arrays: ' + str(data)[:200] + '...') + elif len(names) > 1: + raise ValueError( + 'Error when checking model ' + exception_prefix + + ': you are passing a list as input to your model, ' + 'but the model expects a list of ' + str(len(names)) + + ' Numpy arrays instead. The list you passed was: ' + str(data)[:200]) + elif len(data) == 1 and not hasattr(data[0], 'shape'): + raise TypeError('Error when checking model ' + exception_prefix + + ': data should be a Numpy array, or list/dict of ' + 'Numpy arrays. Found: ' + str(data)[:200] + '...') + elif len(names) == 1: + data = [np.asarray(data)] + + # Check shapes compatibility. + if shapes: + for i in range(len(names)): + if shapes[i] is not None: + data_shape = data[i].shape + shape = shapes[i] + if data[i].ndim != len(shape): + raise ValueError('Error when checking ' + exception_prefix + + ': expected ' + names[i] + ' to have ' + + str(len(shape)) + ' dimensions, but got array ' + 'with shape ' + str(data_shape)) + if not check_batch_axis: + data_shape = data_shape[1:] + shape = shape[1:] + for dim, ref_dim in zip(data_shape, shape): + if ref_dim != dim and ref_dim: + raise ValueError( + 'Error when checking ' + exception_prefix + ': expected ' + + names[i] + ' to have shape ' + str(shape) + + ' but got array with shape ' + str(data_shape)) + return data + + +def standardize_sample_or_class_weights(x_weight, output_names, weight_type): + """Maps `sample_weight` or `class_weight` to model outputs. + + Arguments: + x_weight: User-provided `sample_weight` or `class_weight` argument. + output_names: List of output names (strings) in the model. + weight_type: A string used purely for exception printing. + + Returns: + A list of `sample_weight` or `class_weight` where there are exactly + one element per model output. + + Raises: + ValueError: In case of invalid user-provided argument. + """ + if x_weight is None or len(x_weight) == 0: # pylint: disable=g-explicit-length-test + return [None for _ in output_names] + if len(output_names) == 1: + if isinstance(x_weight, list) and len(x_weight) == 1: + return x_weight + if isinstance(x_weight, dict) and output_names[0] in x_weight: + return [x_weight[output_names[0]]] + else: + return [x_weight] + if isinstance(x_weight, list): + if len(x_weight) != len(output_names): + raise ValueError('Provided `' + weight_type + '` was a list of ' + + str(len(x_weight)) + ' elements, but the model has ' + + str(len(output_names)) + ' outputs. ' + 'You should provide one `' + weight_type + '`' + 'array per model output.') + return x_weight + if isinstance(x_weight, dict): + x_weights = [] + for name in output_names: + x_weights.append(x_weight.get(name)) + return x_weights + else: + raise TypeError( + 'The model has multiple outputs, so `' + weight_type + '` ' + 'should be either a list or a dict. ' + 'Provided `' + weight_type + '` type not understood: ' + str(x_weight)) + + +def standardize_class_weights(class_weight, output_names): + return standardize_sample_or_class_weights(class_weight, output_names, + 'class_weight') + + +def standardize_sample_weights(sample_weight, output_names): + return standardize_sample_or_class_weights(sample_weight, output_names, + 'sample_weight') + + +def check_array_lengths(inputs, targets, weights=None): + """Does user input validation for numpy arrays. + + Arguments: + inputs: list of Numpy arrays of inputs. + targets: list of Numpy arrays of targets. + weights: list of Numpy arrays of sample weights. + + Raises: + ValueError: in case of incorrectly formatted data. + """ + + def set_of_lengths(x): + # return a set with the variation between + # different shapes, with None => 0 + if x is None: + return {} + else: + return set([y.shape[0] for y in x if y is not None]) + + set_x = set_of_lengths(inputs) + set_y = set_of_lengths(targets) + set_w = set_of_lengths(weights) + if len(set_x) > 1: + raise ValueError('All input arrays (x) should have ' + 'the same number of samples. Got array shapes: ' + + str([x.shape for x in inputs])) + if len(set_y) > 1: + raise ValueError('All target arrays (y) should have ' + 'the same number of samples. Got array shapes: ' + + str([y.shape for y in targets])) + if set_x and set_y and list(set_x)[0] != list(set_y)[0]: + raise ValueError('Input arrays should have ' + 'the same number of samples as target arrays. ' + 'Found ' + str(list(set_x)[0]) + ' input samples ' + 'and ' + str(list(set_y)[0]) + ' target samples.') + if len(set_w) > 1: + raise ValueError('All sample_weight arrays should have ' + 'the same number of samples. Got array shapes: ' + + str([w.shape for w in weights])) + if set_y and set_w and list(set_y)[0] != list(set_w)[0]: + raise ValueError('Sample_weight arrays should have ' + 'the same number of samples as target arrays. Got ' + + str(list(set_y)[0]) + ' input samples and ' + + str(list(set_w)[0]) + ' target samples.') + + +def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): + """Does validation on the compatibility of targets and loss functions. + + This helps prevent users from using loss functions incorrectly. This check + is purely for UX purposes. + + Arguments: + targets: list of Numpy arrays of targets. + loss_fns: list of loss functions. + output_shapes: list of shapes of model outputs. + + Raises: + ValueError: if a loss function or target array + is incompatible with an output. + """ + key_losses = { + losses.mean_squared_error, losses.binary_crossentropy, + losses.categorical_crossentropy + } + for y, loss, shape in zip(targets, loss_fns, output_shapes): + if y is None or loss is None or tensor_util.is_tensor(y): + continue + if loss is losses.categorical_crossentropy: + if y.shape[-1] == 1: + raise ValueError('You are passing a target array of shape ' + str( + y.shape) + ' while using as loss `categorical_crossentropy`. ' + '`categorical_crossentropy` expects ' + 'targets to be binary matrices (1s and 0s) ' + 'of shape (samples, classes). ' + 'If your targets are integer classes, ' + 'you can convert them to the expected format via:\n' + '```\n' + 'from keras.utils import to_categorical\n' + 'y_binary = to_categorical(y_int)\n' + '```\n' + '\n' + 'Alternatively, you can use the loss function ' + '`sparse_categorical_crossentropy` instead, ' + 'which does expect integer targets.') + if loss in key_losses: + for target_dim, out_dim in zip(y.shape[1:], shape[1:]): + if out_dim is not None and target_dim != out_dim: + raise ValueError('A target array with shape ' + str(y.shape) + + ' was passed for an output of shape ' + str(shape) + + ' while using as loss `' + loss.__name__ + '`. ' + 'This loss expects ' + 'targets to have the same shape ' + 'as the output.') + + +def collect_metrics(metrics, output_names): + """Maps metric functions to model outputs. + + Arguments: + metrics: a list or dict of metric functions. + output_names: a list of the names (strings) of model outputs. + + Returns: + A list (one entry per model output) of lists of metric functions. + For instance, if the model has 2 outputs, and for the first output + we want to compute "binary_accuracy" and "binary_crossentropy", + and just "binary_accuracy" for the second output, + the list would look like: + `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` + + Raises: + TypeError: if an incorrect type is passed for the `metrics` argument. + """ + if not metrics: + return [[] for _ in output_names] + if isinstance(metrics, list): + # we then apply all metrics to all outputs. + return [copy.copy(metrics) for _ in output_names] + elif isinstance(metrics, dict): + nested_metrics = [] + for name in output_names: + output_metrics = metrics.get(name, []) + if not isinstance(output_metrics, list): + output_metrics = [output_metrics] + nested_metrics.append(output_metrics) + return nested_metrics + else: + raise TypeError('Type of `metrics` argument not understood. ' + 'Expected a list or dictionary, found: ' + str(metrics)) + + +def batch_shuffle(index_array, batch_size): + """Shuffles an array in a batch-wise fashion. + + Useful for shuffling HDF5 arrays + (where one cannot access arbitrary indices). + + Arguments: + index_array: array of indices to be shuffled. + batch_size: integer. + + Returns: + The `index_array` array, shuffled in a batch-wise fashion. + """ + batch_count = int(len(index_array) / batch_size) + # to reshape we need to be cleanly divisible by batch size + # we stash extra items and reappend them after shuffling + last_batch = index_array[batch_count * batch_size:] + index_array = index_array[:batch_count * batch_size] + index_array = index_array.reshape((batch_count, batch_size)) + np.random.shuffle(index_array) + index_array = index_array.flatten() + return np.append(index_array, last_batch) + + +def weighted_masked_objective(fn): + """Adds support for masking and sample-weighting to an objective function. + + It transforms an objective function `fn(y_true, y_pred)` + into a sample-weighted, cost-masked objective function + `fn(y_true, y_pred, weights, mask)`. + + Arguments: + fn: The objective function to wrap, + with signature `fn(y_true, y_pred)`. + + Returns: + A function with signature `fn(y_true, y_pred, weights, mask)`. + """ + if fn is None: + return None + + def weighted(y_true, y_pred, weights, mask=None): + """Wrapper function. + + Arguments: + y_true: `y_true` argument of `fn`. + y_pred: `y_pred` argument of `fn`. + weights: Weights tensor. + mask: Mask tensor. + + Returns: + Scalar tensor. + """ + # score_array has ndim >= 2 + score_array = fn(y_true, y_pred) + if mask is not None: + # Cast the mask to floatX to avoid float64 upcasting in theano + mask = K.cast(mask, K.floatx()) + # mask should have the same shape as score_array + score_array *= mask + # the loss per batch should be proportional + # to the number of unmasked samples. + score_array /= K.mean(mask) + + # apply sample weighting + if weights is not None: + # reduce score_array to same ndim as weight array + ndim = K.ndim(score_array) + weight_ndim = K.ndim(weights) + score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) + score_array *= weights + score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) + return K.mean(score_array) + + return weighted + + +def standardize_weights(y, + sample_weight=None, + class_weight=None, + sample_weight_mode=None): + """Performs sample weight validation and standardization. + + Everything gets normalized to a single sample-wise (or timestep-wise) + weight array. + + Arguments: + y: Numpy array of model targets to be weighted. + sample_weight: User-provided `sample_weight` argument. + class_weight: User-provided `class_weight` argument. + sample_weight_mode: One of `None` or `"temporal"`. + `"temporal"` indicated that we expect 2D weight data + that will be applied to the last 2 dimensions of + the targets (i.e. we are weighting timesteps, not samples). + + Returns: + A numpy array of target weights, one entry per sample to weight. + + Raises: + ValueError: In case of invalid user-provided arguments. + """ + if sample_weight_mode is not None: + if sample_weight_mode != 'temporal': + raise ValueError('"sample_weight_mode ' + 'should be None or "temporal". ' + 'Found: ' + str(sample_weight_mode)) + if len(y.shape) < 3: + raise ValueError('Found a sample_weight array for ' + 'an input with shape ' + str(y.shape) + '. ' + 'Timestep-wise sample weighting (use of ' + 'sample_weight_mode="temporal") is restricted to ' + 'outputs that are at least 3D, i.e. that have ' + 'a time dimension.') + if sample_weight is not None and len(sample_weight.shape) != 2: + raise ValueError('Found a sample_weight array with shape ' + + str(sample_weight.shape) + '. ' + 'In order to use timestep-wise sample weighting, ' + 'you should pass a 2D sample_weight array.') + else: + if sample_weight is not None and len(sample_weight.shape) != 1: + raise ValueError('Found a sample_weight array with shape ' + + str(sample_weight.shape) + '. ' + 'In order to use timestep-wise sample weights, ' + 'you should specify ' + 'sample_weight_mode="temporal" ' + 'in compile(). If you just mean to use ' + 'sample-wise weights, make sure your ' + 'sample_weight array is 1D.') + + if sample_weight is not None: + if len(sample_weight.shape) > len(y.shape): + raise ValueError( + 'Found a sample_weight with shape' + str(sample_weight.shape) + '.' + 'Expected sample_weight with rank ' + 'less than or equal to ' + str(len(y.shape))) + + if y.shape[:sample_weight.ndim] != sample_weight.shape: + raise ValueError( + 'Found a sample_weight array with shape ' + str(sample_weight.shape) + + ' for an input with shape ' + str(y.shape) + '. ' + 'sample_weight cannot be broadcast.') + return sample_weight + elif isinstance(class_weight, dict): + if len(y.shape) > 2: + raise ValueError('`class_weight` not supported for ' + '3+ dimensional targets.') + if y.shape[1] > 1: + y_classes = np.argmax(y, axis=1) + elif y.shape[1] == 1: + y_classes = np.reshape(y, y.shape[0]) + else: + y_classes = y + + weights = np.asarray( + [class_weight[cls] for cls in y_classes if cls in class_weight]) + + if len(weights) != len(y_classes): + # subtract the sets to pick all missing classes + existing_classes = set(y_classes) + existing_class_weight = set(class_weight.keys()) + raise ValueError('`class_weight` must contain all classes in the data.' + ' The classes %s exist in the data but not in ' + '`class_weight`.' % + (existing_classes - existing_class_weight)) + return weights + else: + return None diff --git a/tensorflow/python/keras/_impl/keras/utils/__init__.py b/tensorflow/python/keras/_impl/keras/utils/__init__.py index 370ae0dd0f..0c9f19a0c8 100644 --- a/tensorflow/python/keras/_impl/keras/utils/__init__.py +++ b/tensorflow/python/keras/_impl/keras/utils/__init__.py @@ -31,8 +31,8 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_ke from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary +from tensorflow.python.keras._impl.keras.utils.multi_gpu_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical -from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py similarity index 100% rename from tensorflow/python/keras/_impl/keras/utils/training_utils.py rename to tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py diff --git a/tensorflow/python/keras/_impl/keras/utils/training_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py similarity index 100% rename from tensorflow/python/keras/_impl/keras/utils/training_utils_test.py rename to tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py diff --git a/tensorflow/python/keras/utils/__init__.py b/tensorflow/python/keras/utils/__init__.py index 91cc860727..2f74cf031d 100644 --- a/tensorflow/python/keras/utils/__init__.py +++ b/tensorflow/python/keras/utils/__init__.py @@ -30,9 +30,9 @@ from tensorflow.python.keras._impl.keras.utils.generic_utils import Progbar from tensorflow.python.keras._impl.keras.utils.generic_utils import serialize_keras_object from tensorflow.python.keras._impl.keras.utils.io_utils import HDF5Matrix from tensorflow.python.keras._impl.keras.utils.layer_utils import convert_all_kernels_in_model +from tensorflow.python.keras._impl.keras.utils.multi_gpu_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.np_utils import normalize from tensorflow.python.keras._impl.keras.utils.np_utils import to_categorical -from tensorflow.python.keras._impl.keras.utils.training_utils import multi_gpu_model from tensorflow.python.keras._impl.keras.utils.vis_utils import plot_model del absolute_import -- GitLab From 4d631ce22f2902ed11b5e56a6241983dfa5d3eed Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 1 Mar 2018 12:04:59 -0800 Subject: [PATCH 296/884] TFE: Cache `TensorShape` object for `EagerTensor`'s, for performance. PiperOrigin-RevId: 187512946 --- tensorflow/python/eager/pywrap_tensor.cc | 25 ++++++++++++++++++++++++ tensorflow/python/framework/ops.py | 6 +++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index d3aaede749..8338bc4343 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -186,6 +186,10 @@ typedef struct EagerTensor { // This stores `_keras_mask` object and is set by Tensorflow layers. PyObject* keras_mask; + // This stores `_tensor_shape`, a cached `TensorShape` object, and is set the + // first time that `_EagerTensorBase`'s `shape` property is called. + PyObject* tensor_shape; + // We store a status object here as an optimization to avoid allocating a new // Status objects on different functions that operate on EagerTensor and need // to use a TF_Status object. However note that accesses to `status` are not @@ -201,6 +205,8 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) { self->handle_data = Py_None; Py_INCREF(Py_None); self->keras_mask = Py_None; + Py_INCREF(Py_None); + self->tensor_shape = Py_None; self->status = TF_NewStatus(); PyObject* value; PyObject* context = nullptr; @@ -333,6 +339,7 @@ void EagerTensor_dealloc(EagerTensor* self) { TF_DeleteStatus(self->status); Py_DECREF(self->handle_data); Py_DECREF(self->keras_mask); + Py_DECREF(self->tensor_shape); TFE_DeleteTensorHandle(self->handle); self->handle = nullptr; // We have the global interpreter lock, so use this chance to perform delayed @@ -420,6 +427,19 @@ static int EagerTensor_setkeras_mask(EagerTensor* self, PyObject* value, self->keras_mask = value; return 0; } + +static PyObject* EagerTensor_tensor_shape(EagerTensor* self, void* unused) { + Py_INCREF(self->tensor_shape); + return self->tensor_shape; +} + +static int EagerTensor_settensor_shape(EagerTensor* self, PyObject* value, + void* unused) { + Py_DECREF(self->tensor_shape); + Py_INCREF(value); + self->tensor_shape = value; + return 0; +} // Function `_copy_to_device`. static PyObject* EagerTensor_copy_to_device(EagerTensor* self, PyObject* args, PyObject* kwds) { @@ -484,6 +504,9 @@ static PyGetSetDef EagerTensor_getseters[] = { {const_cast("_keras_mask"), (getter)EagerTensor_keras_mask, (setter)EagerTensor_setkeras_mask, const_cast("_keras_mask"), nullptr}, + {const_cast("_tensor_shape"), (getter)EagerTensor_tensor_shape, + (setter)EagerTensor_settensor_shape, const_cast("_tensor_shape"), + nullptr}, {nullptr} /* Sentinel */ }; @@ -599,6 +622,8 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) { t->handle_data = Py_None; Py_INCREF(Py_None); t->keras_mask = Py_None; + Py_INCREF(Py_None); + t->tensor_shape = Py_None; t->handle = handle; t->status = TF_NewStatus(); } diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 735ba316d0..0a85b153de 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -782,7 +782,11 @@ class _EagerTensorBase(Tensor): @property def shape(self): - return tensor_shape.TensorShape(self._shape_tuple()) + if self._tensor_shape is None: # pylint: disable=access-member-before-definition + # `_tensor_shape` is declared and defined in the definition of + # `EagerTensor`, in C. + self._tensor_shape = tensor_shape.TensorShape(self._shape_tuple()) + return self._tensor_shape def get_shape(self): """Alias of Tensor.shape.""" -- GitLab From c953be2e880b3f751e014f947c2d054e4a22c3e2 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 12:23:37 -0800 Subject: [PATCH 297/884] Remove underscore prefix from the following HIDDEN ops: add_sparse_to_tensors_map, add_many_sparse_to_tensors_map and take_many_sparse_from_tensors_map. PiperOrigin-RevId: 187515638 --- tensorflow/python/framework/python_op_gen.cc | 4 +--- tensorflow/python/ops/sparse_ops.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/framework/python_op_gen.cc b/tensorflow/python/framework/python_op_gen.cc index 4813458f07..64d214a07f 100644 --- a/tensorflow/python/framework/python_op_gen.cc +++ b/tensorflow/python/framework/python_op_gen.cc @@ -100,10 +100,8 @@ bool IsOpWithUnderscorePrefix(const string& s) { "fused_batch_norm", "histogram_fixed_width", "stack", "batch_norm_with_global_normalization", // TODO(annarev): replace these ops in the next change. - "add_sparse_to_tensors_map", "add_many_sparse_to_tensors_map", "broadcast_gradient_args", "concat", "enter", "histogram_summary", - "ref_enter", "ref_identity", "scalar_summary", - "take_many_sparse_from_tensors_map"}); + "ref_enter", "ref_identity", "scalar_summary"}); return kUnderscoreOps->count(s) > 0; } diff --git a/tensorflow/python/ops/sparse_ops.py b/tensorflow/python/ops/sparse_ops.py index a01bba632f..c580052c32 100644 --- a/tensorflow/python/ops/sparse_ops.py +++ b/tensorflow/python/ops/sparse_ops.py @@ -2046,7 +2046,7 @@ def _add_sparse_to_tensors_map(sp_input, """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._add_sparse_to_tensors_map( + return gen_sparse_ops.add_sparse_to_tensors_map( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -2086,7 +2086,7 @@ def _add_many_sparse_to_tensors_map(sp_input, """ sp_input = _convert_to_sparse_tensor(sp_input) - return gen_sparse_ops._add_many_sparse_to_tensors_map( + return gen_sparse_ops.add_many_sparse_to_tensors_map( sp_input.indices, sp_input.values, sp_input.dense_shape, @@ -2167,7 +2167,7 @@ def _take_many_sparse_from_tensors_map(sparse_map_op, with ops.colocate_with(sparse_map_op): shared_name = sparse_map_op.get_attr("shared_name") or sparse_map_op.name output_indices, output_values, output_shape = ( - gen_sparse_ops._take_many_sparse_from_tensors_map( + gen_sparse_ops.take_many_sparse_from_tensors_map( sparse_handles, dtype=sparse_map_op.get_attr("T"), container=sparse_map_op.get_attr("container"), -- GitLab From 1df40b152216bde47dd9ac1fa65bec57434920e1 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 1 Mar 2018 12:56:05 -0800 Subject: [PATCH 298/884] [XLA] Fully qualify xla::MakeUnique uses in shape_tree.h. No functional changes. PiperOrigin-RevId: 187520283 --- tensorflow/compiler/xla/shape_tree.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/compiler/xla/shape_tree.h b/tensorflow/compiler/xla/shape_tree.h index 280f02e886..ffaa40c2d6 100644 --- a/tensorflow/compiler/xla/shape_tree.h +++ b/tensorflow/compiler/xla/shape_tree.h @@ -53,7 +53,7 @@ struct ShapeTreeNode { ShapeTreeNode(const ShapeTreeNode& other) : data(other.data), children(other.children.size()) { for (size_t i = 0; i < children.size(); ++i) { - children[i] = MakeUnique(*other.children[i]); + children[i] = ::xla::MakeUnique(*other.children[i]); } } @@ -62,7 +62,7 @@ struct ShapeTreeNode { data = other.data; children.resize(other.children.size()); for (size_t i = 0; i < children.size(); ++i) { - children[i] = MakeUnique(*other.children[i]); + children[i] = ::xla::MakeUnique(*other.children[i]); } } return *this; @@ -445,7 +445,7 @@ class ShapeTreeIterator : public std::iterator(index, node_->data); + current_ = ::xla::MakeUnique(index, node_->data); return *current_; } @@ -492,7 +492,7 @@ void ShapeTree::InitChildren(const Shape& shape, Node* node) { template ShapeTree::ShapeTree(Shape shape) : root_(), - shape_storage_(MakeUnique(std::move(shape))), + shape_storage_(::xla::MakeUnique(std::move(shape))), shape_(shape_storage_.get()) { // The shape_ field is just used to hold the structure of the shape. // It should not be relied upon to store layout information. @@ -508,7 +508,7 @@ ShapeTree::ShapeTree(const Shape* shape) : root_(), shape_(shape) { template ShapeTree::ShapeTree(Shape shape, const T& init_value) : root_(init_value), - shape_storage_(MakeUnique(std::move(shape))), + shape_storage_(::xla::MakeUnique(std::move(shape))), shape_(shape_storage_.get()) { // The shape_ field is just used to hold the structure of the shape. // It should not be relied upon to store layout information. -- GitLab From deef58ba3913c4ab9ca93876cd30744db00c4a6a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:00:40 -0800 Subject: [PATCH 299/884] Cast sequence_length to an integer. PiperOrigin-RevId: 187520920 --- .../feature_column/sequence_feature_column.py | 2 +- .../sequence_feature_column_test.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index e99033bbec..e446043bdd 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -295,7 +295,7 @@ def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): row_ids = sp_tensor.indices[:, 0] column_ids = sp_tensor.indices[:, 1] column_ids += array_ops.ones_like(column_ids) - seq_length = ( + seq_length = math_ops.to_int64( math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) # If the last n rows do not have ids, seq_length will have shape # [batch_size - n]. Pad the remaining values with zeros. diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 8c37ccf11b..105213680e 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -221,8 +221,9 @@ class SequenceCategoricalColumnWithIdentityTest(test.TestCase): sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_zeros(self): column = sfc.sequence_categorical_column_with_identity( @@ -311,8 +312,9 @@ class SequenceEmbeddingColumnTest(test.TestCase): _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_empty_rows(self): """Tests _sequence_length when some examples do not have ids.""" @@ -423,8 +425,9 @@ class SequenceNumericColumnTest(test.TestCase): _LazyBuilder({'aaa': sparse_input})) with monitored_session.MonitoredSession() as sess: - self.assertAllEqual( - expected_sequence_length, sequence_length.eval(session=sess)) + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) def test_sequence_length_with_shape(self): """Tests _sequence_length with shape !=(1,).""" -- GitLab From 16478853c73d9e6dfab26e73e99d931f4c74043c Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Thu, 1 Mar 2018 13:04:44 -0800 Subject: [PATCH 300/884] Fix parameter name mismatches in declarations/definitions. Reported by clang-tidy PiperOrigin-RevId: 187521627 --- .../xla/client/compile_only_client.cc | 13 +++++------ .../xla/client/computation_builder.cc | 23 ++++++++++--------- .../compiler/xla/client/computation_builder.h | 2 +- tensorflow/compiler/xla/client/local_client.h | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/client/compile_only_client.cc b/tensorflow/compiler/xla/client/compile_only_client.cc index c7e2c4367b..59662c95ac 100644 --- a/tensorflow/compiler/xla/client/compile_only_client.cc +++ b/tensorflow/compiler/xla/client/compile_only_client.cc @@ -39,16 +39,15 @@ CompileOnlyClient::CompileAheadOfTime( return compiler_service_->CompileAheadOfTime(service_instances, options); } -int64 CompileOnlyClient::PointerSizeForTriple( - tensorflow::StringPiece target_triple) { - llvm::Triple triple(llvm::Triple::normalize( - llvm::StringRef(target_triple.data(), target_triple.size()))); - if (triple.isArch64Bit()) { +int64 CompileOnlyClient::PointerSizeForTriple(tensorflow::StringPiece triple) { + llvm::Triple llvm_triple( + llvm::Triple::normalize(llvm::StringRef(triple.data(), triple.size()))); + if (llvm_triple.isArch64Bit()) { return 8; - } else if (triple.isArch32Bit()) { + } else if (llvm_triple.isArch32Bit()) { return 4; } else { - CHECK(triple.isArch16Bit()); + CHECK(llvm_triple.isArch16Bit()); return 2; } } diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 2a6e02649d..4afef6e448 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -408,7 +408,7 @@ ComputationDataHandle ComputationBuilder::Reshape( ComputationDataHandle ComputationBuilder::Collapse( const ComputationDataHandle& operand, - tensorflow::gtl::ArraySlice dims_to_collapse) { + tensorflow::gtl::ArraySlice dimensions) { if (!first_error_.ok()) { return ComputationDataHandle(); } @@ -416,8 +416,8 @@ ComputationDataHandle ComputationBuilder::Collapse( // Don't support out-of-order collapse here. // Checks that the collapsed dimensions are in order and consecutive. for (tensorflow::gtl::ArraySlice::size_type i = 1; - i < dims_to_collapse.size(); ++i) { - if (dims_to_collapse[i] - 1 != dims_to_collapse[i - 1]) { + i < dimensions.size(); ++i) { + if (dimensions[i] - 1 != dimensions[i - 1]) { NoteError(InvalidArgument( "Collapsed dimensions are not in order and consecutive.")); return ComputationDataHandle(); @@ -434,9 +434,9 @@ ComputationDataHandle ComputationBuilder::Collapse( VLOG(3) << "original shape: " << ShapeUtil::HumanString(*original_shape); VLOG(3) << "dims to collapse: " - << tensorflow::str_util::Join(dims_to_collapse, ","); + << tensorflow::str_util::Join(dimensions, ","); - if (dims_to_collapse.size() <= 1) { + if (dimensions.size() <= 1) { // Not collapsing anything, trivially we can return the operand versus // enqueueing a trivial reshape. return operand; @@ -444,7 +444,7 @@ ComputationDataHandle ComputationBuilder::Collapse( std::vector new_sizes; for (int i = 0; i < ShapeUtil::Rank(*original_shape); ++i) { - if (i <= dims_to_collapse.front() || i > dims_to_collapse.back()) { + if (i <= dimensions.front() || i > dimensions.back()) { new_sizes.push_back(original_shape->dimensions(i)); } else { new_sizes.back() *= original_shape->dimensions(i); @@ -753,13 +753,13 @@ ComputationDataHandle ComputationBuilder::Infeed(const Shape& shape, } void ComputationBuilder::Outfeed(const ComputationDataHandle& operand, - const Shape& shape, + const Shape& shape_with_layout, const string& outfeed_config) { OpRequest op_request; OutfeedRequest* request = op_request.mutable_outfeed_request(); request->set_outfeed_config(outfeed_config); *request->mutable_operand() = operand; - *request->mutable_shape() = shape; + *request->mutable_shape() = shape_with_layout; RunOpAndNoteError(&op_request); } @@ -1382,15 +1382,16 @@ ComputationDataHandle ComputationBuilder::BatchNormInference( ComputationDataHandle ComputationBuilder::BatchNormGrad( const ComputationDataHandle& operand, const ComputationDataHandle& scale, - const ComputationDataHandle& mean, const ComputationDataHandle& var, + const ComputationDataHandle& batch_mean, + const ComputationDataHandle& batch_var, const ComputationDataHandle& grad_output, float epsilon, int64 feature_index) { OpRequest op_request; BatchNormGradRequest* request = op_request.mutable_batch_norm_grad_request(); *request->mutable_operand() = operand; *request->mutable_scale() = scale; - *request->mutable_mean() = mean; - *request->mutable_variance() = var; + *request->mutable_mean() = batch_mean; + *request->mutable_variance() = batch_var; *request->mutable_grad_output() = grad_output; request->set_epsilon(epsilon); request->set_feature_index(feature_index); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index 377b671639..e085fcb3b1 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -872,7 +872,7 @@ class ComputationBuilder { Window* window); // Internal helper method that does the building for an arbitrary unary op. - ComputationDataHandle UnaryOp(UnaryOperation binop, + ComputationDataHandle UnaryOp(UnaryOperation unop, const ComputationDataHandle& operand); // Internal helper method that does the building for an arbitrary binary op. diff --git a/tensorflow/compiler/xla/client/local_client.h b/tensorflow/compiler/xla/client/local_client.h index b52a30f5a0..de0ed13c43 100644 --- a/tensorflow/compiler/xla/client/local_client.h +++ b/tensorflow/compiler/xla/client/local_client.h @@ -69,7 +69,7 @@ class LocalExecutable { // of the computation. tensorflow::Status ValidateExecutionOptions( const tensorflow::gtl::ArraySlice arguments, - const ExecutableRunOptions& options, const Backend& backend); + const ExecutableRunOptions& run_options, const Backend& backend); // Records the computation in a SessionModule proto with the arguments used to // invoke it, and the result. Enabled by flag: --tla_dump_executions_to. -- GitLab From 8307faacb96808eae1550ed879fa9a85cf76d897 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:09:46 -0800 Subject: [PATCH 301/884] Add support for keyword args for dynamically converted functions. PiperOrigin-RevId: 187522324 --- tensorflow/contrib/py2tf/converters/call_trees.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/call_trees.py b/tensorflow/contrib/py2tf/converters/call_trees.py index f18f9f6086..ca8726f916 100644 --- a/tensorflow/contrib/py2tf/converters/call_trees.py +++ b/tensorflow/contrib/py2tf/converters/call_trees.py @@ -185,7 +185,7 @@ class CallTreeTransformer(transformer.Base): """ return templates.replace(template, func=node.func, original_args=node.args) - def _converted_call(self, node): + def _insert_dynamic_conversion(self, node): """Inlines a dynamic conversion for a dynamic function.""" # TODO(mdan): Pass information on the statically compiled functions. # Having access to the statically compiled functions can help avoid @@ -208,7 +208,10 @@ class CallTreeTransformer(transformer.Base): """ call_expr = templates.replace( template, func=node.func, original_args=node.args) - return call_expr[0].value + new_call = call_expr[0].value + # TODO(mdan): Improve the template mechanism to better support this. + new_call.keywords = node.keywords + return new_call # pylint:disable=invalid-name @@ -251,7 +254,7 @@ class CallTreeTransformer(transformer.Base): raise NotImplementedError('py_func with return values') else: if self.context.recursive: - node = self._converted_call(node) + node = self._insert_dynamic_conversion(node) else: # Unresolved functions are allowed in non-recursive mode. pass -- GitLab From 0abc4c9ecae912676f6070ca4b76b35c80351c26 Mon Sep 17 00:00:00 2001 From: Fred Reiss Date: Thu, 1 Mar 2018 13:25:21 -0800 Subject: [PATCH 302/884] Clean up output formatting of saved_model_cli.py (#17235) --- .../docs_src/programmers_guide/saved_model.md | 60 ++++---- tensorflow/python/tools/saved_model_cli.py | 68 +++++---- .../python/tools/saved_model_cli_test.py | 141 +++++++++--------- 3 files changed, 142 insertions(+), 127 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index f18d50b282..c54c278584 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -697,15 +697,15 @@ executing the computation graph later. For example: $ saved_model_cli show --dir \ /tmp/saved_model_dir --tag_set serve --signature_def serving_default The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 Method name is: tensorflow/serving/predict ``` @@ -717,32 +717,32 @@ $ saved_model_cli show --dir /tmp/saved_model_dir --all MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['classify_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/classify ... signature_def['serving_default']: -The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/predict + The given SavedModel SignatureDef contains the following input(s): + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/predict ``` diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index 33f6debbcb..b0e9e3e5ed 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -115,7 +115,7 @@ def _get_outputs_tensor_info_from_meta_graph_def(meta_graph_def, signature_def_key).outputs -def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): +def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, indent=0): """Prints input and output TensorInfos. Prints the details of input and output TensorInfos for the SignatureDef mapped @@ -126,6 +126,7 @@ def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): tag_set: Group of tag(s) of the MetaGraphDef, in string format, separated by ','. For tag-set contains multiple tags, all tags must be passed in. signature_def_key: A SignatureDef key string. + indent: How far (in increments of 2 spaces) to indent each line of output. """ meta_graph_def = saved_model_utils.get_meta_graph_def(saved_model_dir, tag_set) @@ -134,29 +135,39 @@ def _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key): outputs_tensor_info = _get_outputs_tensor_info_from_meta_graph_def( meta_graph_def, signature_def_key) - print('The given SavedModel SignatureDef contains the following input(s):') + indent_str = " " * indent + def in_print(s): + print(indent_str + s) + + in_print('The given SavedModel SignatureDef contains the following input(s):') for input_key, input_tensor in sorted(inputs_tensor_info.items()): - print('inputs[\'%s\'] tensor_info:' % input_key) - _print_tensor_info(input_tensor) + in_print(' inputs[\'%s\'] tensor_info:' % input_key) + _print_tensor_info(input_tensor, indent+1) - print('The given SavedModel SignatureDef contains the following output(s):') + in_print('The given SavedModel SignatureDef contains the following ' + 'output(s):') for output_key, output_tensor in sorted(outputs_tensor_info.items()): - print('outputs[\'%s\'] tensor_info:' % output_key) - _print_tensor_info(output_tensor) + in_print(' outputs[\'%s\'] tensor_info:' % output_key) + _print_tensor_info(output_tensor, indent+1) - print('Method name is: %s' % - meta_graph_def.signature_def[signature_def_key].method_name) + in_print('Method name is: %s' % + meta_graph_def.signature_def[signature_def_key].method_name) -def _print_tensor_info(tensor_info): +def _print_tensor_info(tensor_info, indent=0): """Prints details of the given tensor_info. Args: tensor_info: TensorInfo object to be printed. + indent: How far (in increments of 2 spaces) to indent each line output """ - print(' dtype: ' + - {value: key - for (key, value) in types_pb2.DataType.items()}[tensor_info.dtype]) + indent_str = " " * indent + def in_print(s): + print(indent_str + s) + + in_print(' dtype: ' + + {value: key + for (key, value) in types_pb2.DataType.items()}[tensor_info.dtype]) # Display shape as tuple. if tensor_info.tensor_shape.unknown_rank: shape = 'unknown_rank' @@ -164,8 +175,8 @@ def _print_tensor_info(tensor_info): dims = [str(dim.size) for dim in tensor_info.tensor_shape.dim] shape = ', '.join(dims) shape = '(' + shape + ')' - print(' shape: ' + shape) - print(' name: ' + tensor_info.name) + in_print(' shape: ' + shape) + in_print(' name: ' + tensor_info.name) def _show_all(saved_model_dir): @@ -186,7 +197,8 @@ def _show_all(saved_model_dir): signature_def_map = get_signature_def_map(saved_model_dir, tag_set) for signature_def_key in sorted(signature_def_map.keys()): print('\nsignature_def[\'' + signature_def_key + '\']:') - _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key) + _show_inputs_outputs(saved_model_dir, tag_set, signature_def_key, + indent=1) def get_meta_graph_def(saved_model_dir, tag_set): @@ -614,19 +626,19 @@ def create_parser(): show_msg = ( 'Usage examples:\n' 'To show all tag-sets in a SavedModel:\n' - '$saved_model_cli show --dir /tmp/saved_model\n' + '$saved_model_cli show --dir /tmp/saved_model\n\n' 'To show all available SignatureDef keys in a ' 'MetaGraphDef specified by its tag-set:\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve\n' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve\n\n' 'For a MetaGraphDef with multiple tags in the tag-set, all tags must be ' 'passed in, separated by \';\':\n' '$saved_model_cli show --dir /tmp/saved_model --tag_set serve,gpu\n\n' 'To show all inputs and outputs TensorInfo for a specific' ' SignatureDef specified by the SignatureDef key in a' ' MetaGraph.\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve ' - '--signature_def serving_default\n\n' - 'To show all available information in the SavedModel\n:' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve' + ' --signature_def serving_default\n\n' + 'To show all available information in the SavedModel:\n' '$saved_model_cli show --dir /tmp/saved_model --all') parser_show = subparsers.add_parser( 'show', @@ -658,12 +670,14 @@ def create_parser(): run_msg = ('Usage example:\n' 'To run input tensors from files through a MetaGraphDef and save' ' the output tensors to files:\n' - '$saved_model_cli show --dir /tmp/saved_model --tag_set serve ' - '--signature_def serving_default ' - '--inputs input1_key=/tmp/124.npz[x],input2_key=/tmp/123.npy ' - '--input_exprs \'input3_key=np.ones(2)\' --input_examples ' - '\'input4_key=[{"id":[26],"weights":[0.5, 0.5]}]\' ' - '--outdir=/out\n\n' + '$saved_model_cli show --dir /tmp/saved_model --tag_set serve \\\n' + ' --signature_def serving_default \\\n' + ' --inputs input1_key=/tmp/124.npz[x],input2_key=/tmp/123.npy ' + '\\\n' + ' --input_exprs \'input3_key=np.ones(2)\' \\\n' + ' --input_examples ' + '\'input4_key=[{"id":[26],"weights":[0.5, 0.5]}]\' \\\n' + ' --outdir=/out\n\n' 'For more information about input file format, please see:\n' 'https://www.tensorflow.org/programmers_guide/saved_model_cli\n') parser_run = subparsers.add_parser( diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index d6cbc49ba1..f99c844845 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -61,83 +61,84 @@ class SavedModelCLITestCase(test.TestCase): exp_out = """MetaGraphDef with tag-set: 'serve' contains the following SignatureDefs: signature_def['classify_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/classify signature_def['classify_x_to_y']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['scores'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/classify + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['scores'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/classify signature_def['regress_x2_to_y3']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x2:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y3:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x2:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y3:0 + Method name is: tensorflow/serving/regress signature_def['regress_x_to_y']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/regress signature_def['regress_x_to_y2']: -The given SavedModel SignatureDef contains the following input(s): -inputs['inputs'] tensor_info: - dtype: DT_STRING - shape: unknown_rank - name: tf_example:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['outputs'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y2:0 -Method name is: tensorflow/serving/regress + The given SavedModel SignatureDef contains the following input(s): + inputs['inputs'] tensor_info: + dtype: DT_STRING + shape: unknown_rank + name: tf_example:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['outputs'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y2:0 + Method name is: tensorflow/serving/regress signature_def['serving_default']: -The given SavedModel SignatureDef contains the following input(s): -inputs['x'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: x:0 -The given SavedModel SignatureDef contains the following output(s): -outputs['y'] tensor_info: - dtype: DT_FLOAT - shape: (-1, 1) - name: y:0 -Method name is: tensorflow/serving/predict""" + The given SavedModel SignatureDef contains the following input(s): + inputs['x'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: x:0 + The given SavedModel SignatureDef contains the following output(s): + outputs['y'] tensor_info: + dtype: DT_FLOAT + shape: (-1, 1) + name: y:0 + Method name is: tensorflow/serving/predict""" # pylint: enable=line-too-long + self.maxDiff = None # Produce a useful error msg if the comparison fails self.assertMultiLineEqual(output, exp_out) self.assertEqual(err.getvalue().strip(), '') @@ -193,11 +194,11 @@ Method name is: tensorflow/serving/predict""" output = out.getvalue().strip() expected_output = ( 'The given SavedModel SignatureDef contains the following input(s):\n' - 'inputs[\'x\'] tensor_info:\n' - ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: x:0\n' + ' inputs[\'x\'] tensor_info:\n' + ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: x:0\n' 'The given SavedModel SignatureDef contains the following output(s):\n' - 'outputs[\'y\'] tensor_info:\n' - ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: y:0\n' + ' outputs[\'y\'] tensor_info:\n' + ' dtype: DT_FLOAT\n shape: (-1, 1)\n name: y:0\n' 'Method name is: tensorflow/serving/predict') self.assertEqual(output, expected_output) self.assertEqual(err.getvalue().strip(), '') -- GitLab From eec6cbd4a60c8525d6601ceebf50511cefa50ec1 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Thu, 1 Mar 2018 13:37:16 -0800 Subject: [PATCH 303/884] Fix TensorRT build. PiperOrigin-RevId: 187526192 --- tensorflow/contrib/tensorrt/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 65a0e903a7..3b7b68f61b 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -107,6 +107,7 @@ tf_cuda_library( tf_gen_op_wrapper_py( name = "trt_engine_op", + gen_locally = True, deps = [ ":trt_engine_op_op_lib", ":trt_logging", -- GitLab From 80710d5c53a8b2896a57dbe026d7f742e71fc03b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 13:43:03 -0800 Subject: [PATCH 304/884] Optimize training with feature selection by avoiding any computations on the features that are not selected once we have reached our target number of features. PiperOrigin-RevId: 187526964 --- .../boosted_trees/kernels/model_ops.cc | 57 +++ .../boosted_trees/kernels/training_ops.cc | 28 +- .../contrib/boosted_trees/ops/model_ops.cc | 27 ++ .../python/kernel_tests/model_ops_test.py | 16 + .../python/kernel_tests/training_ops_test.py | 190 +-------- .../boosted_trees/python/ops/model_ops.py | 1 + .../python/training/functions/gbdt_batch.py | 34 +- .../training/functions/gbdt_batch_test.py | 376 ++++++++++++++++++ 8 files changed, 517 insertions(+), 212 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc index 754b7bc327..3bf33186ec 100644 --- a/tensorflow/contrib/boosted_trees/kernels/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/model_ops.cc @@ -137,6 +137,61 @@ class TreeEnsembleDeserializeOp : public OpKernel { } }; +class TreeEnsembleUsedHandlersOp : public OpKernel { + public: + explicit TreeEnsembleUsedHandlersOp(OpKernelConstruction* context) + : OpKernel(context) { + OP_REQUIRES_OK(context, + context->GetAttr("num_all_handlers", &num_handlers_)); + } + + void Compute(OpKernelContext* context) override { + boosted_trees::models::DecisionTreeEnsembleResource* ensemble_resource; + + OP_REQUIRES_OK(context, LookupResource(context, HandleFromInput(context, 0), + &ensemble_resource)); + tf_shared_lock l(*ensemble_resource->get_mutex()); + core::ScopedUnref unref_me(ensemble_resource); + + // Get the stamp token. + const Tensor* stamp_token_t; + OP_REQUIRES_OK(context, context->input("stamp_token", &stamp_token_t)); + int64 stamp_token = stamp_token_t->scalar()(); + + // Only the Chief should run this Op and it is guaranteed to be in + // a consistent state so the stamps must always match. + CHECK(ensemble_resource->is_stamp_valid(stamp_token)); + + Tensor* output_used_handlers_t = nullptr; + OP_REQUIRES_OK( + context, context->allocate_output("used_handlers_mask", {num_handlers_}, + &output_used_handlers_t)); + auto output_used_handlers = output_used_handlers_t->vec(); + + Tensor* output_num_used_handlers_t = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output("num_used_handlers", {}, + &output_num_used_handlers_t)); + int handler_idx = 0; + std::vector used_handlers = ensemble_resource->GetUsedHandlers(); + output_num_used_handlers_t->scalar()() = used_handlers.size(); + for (int64 i = 0; i < num_handlers_; ++i) { + if (handler_idx >= used_handlers.size() || + used_handlers[handler_idx] > i) { + output_used_handlers(i) = false; + } else { + OP_REQUIRES(context, used_handlers[handler_idx] == i, + errors::InvalidArgument("Handler IDs should be sorted.")); + ++handler_idx; + output_used_handlers(i) = true; + } + } + } + + private: + int64 num_handlers_; +}; + REGISTER_RESOURCE_HANDLE_KERNEL(DecisionTreeEnsembleResource); REGISTER_KERNEL_BUILDER( @@ -155,5 +210,7 @@ REGISTER_KERNEL_BUILDER(Name("TreeEnsembleSerialize").Device(DEVICE_CPU), REGISTER_KERNEL_BUILDER(Name("TreeEnsembleDeserialize").Device(DEVICE_CPU), TreeEnsembleDeserializeOp); +REGISTER_KERNEL_BUILDER(Name("TreeEnsembleUsedHandlers").Device(DEVICE_CPU), + TreeEnsembleUsedHandlersOp); } // namespace boosted_trees } // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc index 7f8dea1d3c..1bfeed3066 100644 --- a/tensorflow/contrib/boosted_trees/kernels/training_ops.cc +++ b/tensorflow/contrib/boosted_trees/kernels/training_ops.cc @@ -361,27 +361,10 @@ class GrowTreeEnsembleOp : public OpKernel { // Increment attempt stats. ensemble_resource->IncrementAttempts(); - // In case we want to do feature selection and we have reached the limit, - // build a list of handlers used so far to avoid adding new features. - std::vector allowed_handlers; - if (learner_config_.constraints().max_number_of_unique_feature_columns() > - 0) { - allowed_handlers = ensemble_resource->GetUsedHandlers(); - // TODO(soroush): We can disable handlers that are not going to be used to - // avoid unnecessary computations. - if (allowed_handlers.size() < - learner_config_.constraints() - .max_number_of_unique_feature_columns()) { - // We have not reached the limit yet. Empty the list of allow features - // which means we can keep adding new features. - allowed_handlers.clear(); - } - } - // Find best splits for each active partition. std::map best_splits; - FindBestSplitsPerPartition(context, allowed_handlers, partition_ids_list, - gains_list, splits_list, &best_splits); + FindBestSplitsPerPartition(context, partition_ids_list, gains_list, + splits_list, &best_splits); // No-op if no new splits can be considered. if (best_splits.empty()) { @@ -422,19 +405,12 @@ class GrowTreeEnsembleOp : public OpKernel { // and finds the best split for each partition. void FindBestSplitsPerPartition( OpKernelContext* const context, - const std::vector& allowed_handlers, // Empty means all handlers. const OpInputList& partition_ids_list, const OpInputList& gains_list, const OpInputList& splits_list, std::map* best_splits) { // Find best split per partition going through every feature candidate. // TODO(salehay): Is this worth parallelizing? for (int64 handler_id = 0; handler_id < num_handlers_; ++handler_id) { - if (!allowed_handlers.empty()) { - if (!std::binary_search(allowed_handlers.begin(), - allowed_handlers.end(), handler_id)) { - continue; - } - } const auto& partition_ids = partition_ids_list[handler_id].vec(); const auto& gains = gains_list[handler_id].vec(); const auto& splits = splits_list[handler_id].vec(); diff --git a/tensorflow/contrib/boosted_trees/ops/model_ops.cc b/tensorflow/contrib/boosted_trees/ops/model_ops.cc index 0786c41664..9d6343c7e8 100644 --- a/tensorflow/contrib/boosted_trees/ops/model_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/model_ops.cc @@ -110,5 +110,32 @@ stamp_token: Token to use as the new value of the resource stamp. tree_ensemble_config: Serialized proto of the ensemble. )doc"); +REGISTER_OP("TreeEnsembleUsedHandlers") + .Attr("num_all_handlers: int >= 0") + .Input("tree_ensemble_handle: resource") + .Input("stamp_token: int64") + .Output("num_used_handlers: int64") + .Output("used_handlers_mask: bool") + .SetShapeFn([](shape_inference::InferenceContext* c) { + shape_inference::ShapeHandle unused_input; + TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused_input)); + c->set_output(0, c->Scalar()); + int num_all_handlers; + c->GetAttr("num_all_handlers", &num_all_handlers).IgnoreError(); + c->set_output(1, {c->Vector(num_all_handlers)}); + + return Status::OK(); + }) + .Doc(R"doc( +Returns the mask of used handlers along with the number of non-zero elements in +this mask. Used in feature selection. + +tree_ensemble_handle: Handle to the tree ensemble. +stamp_token: Token to use as the new value of the resource stamp. +num_used_handlers: number of feature column handlers used in the model. +used_handlers_mask: A boolean vector of showing which handlers are used in the + model. +)doc"); + } // namespace boosted_trees } // namespace tensorflow diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py index 27c288bbf7..63b9c5fddf 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/model_ops_test.py @@ -310,6 +310,22 @@ class ModelOpsTest(test_util.TensorFlowTestCase): # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]]) + def testUsedHandlers(self): + with self.test_session(): + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + tree_ensemble_config.growing_metadata.used_handler_ids.append(1) + tree_ensemble_config.growing_metadata.used_handler_ids.append(5) + stamp_token = 3 + tree_ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=stamp_token, + tree_ensemble_config=tree_ensemble_config.SerializeToString(), + name="create_tree") + resources.initialize_resources(resources.shared_resources()).run() + result = model_ops.tree_ensemble_used_handlers( + tree_ensemble_handle, stamp_token, num_all_handlers=6) + self.assertAllEqual([0, 1, 0, 0, 0, 1], result.used_handlers_mask.eval()) + self.assertEqual(2, result.num_used_handlers.eval()) + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py index 8ca1aabaca..3e524efbea 100644 --- a/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py +++ b/tensorflow/contrib/boosted_trees/python/kernel_tests/training_ops_test.py @@ -1588,7 +1588,7 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): self.assertEqual( 2, tree_ensemble_config.tree_metadata[2].num_tree_weight_updates) - def testGrowExistingEnsembleTreeWithFeatureSelectionCanStillGrow(self): + def testGrowExistingEnsembleTreeWithFeatureSelectionUsedHandlers(self): """Test growing a tree with feature selection.""" with self.test_session() as session: # Create existing ensemble with one root split and one bias tree. @@ -1649,7 +1649,6 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): num_trees_attempted: 2 num_layers_attempted: 2 used_handler_ids: 2 - used_handler_ids: 5 } """, tree_ensemble_config) tree_ensemble_handle = model_ops.tree_ensemble_variable( @@ -1668,183 +1667,8 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): min_node_weight=0, pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - # There are 2 handler_ids in used_handler_ids already but one of them - # is handler 2, so we can still grow trees. - learner_config.constraints.max_number_of_unique_feature_columns = 2 - learner_config = learner_config.SerializeToString() - # Prepare handler inputs. - handler1_partitions = np.array([0], dtype=np.int32) - handler1_gains = np.array([7.62], dtype=np.float32) - handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)] - handler2_partitions = np.array([0], dtype=np.int32) - handler2_gains = np.array([0.63], dtype=np.float32) - handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)] - handler3_partitions = np.array([0], dtype=np.int32) - handler3_gains = np.array([7.62], dtype=np.float32) - handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)] - - # Grow tree ensemble. - grow_op = training_ops.grow_tree_ensemble( - tree_ensemble_handle, - stamp_token=0, - next_stamp_token=1, - learning_rate=1, - partition_ids=[ - handler1_partitions, handler2_partitions, handler3_partitions - ], - gains=[handler1_gains, handler2_gains, handler3_gains], - splits=[handler1_split, handler2_split, handler3_split], - learner_config=learner_config, - dropout_seed=123, - center_bias=True) - session.run(grow_op) - - # Expect a new tree to be added with the split from handler 1. - _, serialized = session.run( - model_ops.tree_ensemble_serialize(tree_ensemble_handle)) - tree_ensemble_config.ParseFromString(serialized) - self.assertEqual(3, len(tree_ensemble_config.trees)) - self.assertEqual( - 2, len(tree_ensemble_config.growing_metadata.used_handler_ids)) - - def testGrowExistingEnsembleTreeWithFeatureSelectionEmptyEnsemble(self): - """Test growing a tree with feature selection with empty ensemble.""" - with self.test_session() as session: - # Create existing ensemble with one root split and one bias tree. - tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() - tree_ensemble_handle = model_ops.tree_ensemble_variable( - stamp_token=0, - tree_ensemble_config=tree_ensemble_config.SerializeToString(), - name="tree_ensemble") - resources.initialize_resources(resources.shared_resources()).run() - - # Prepare learner config. - learner_config = _gen_learner_config( - num_classes=2, - l1_reg=0, - l2_reg=0, - tree_complexity=0, - max_depth=1, - min_node_weight=0, - pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, - growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - learner_config.constraints.max_number_of_unique_feature_columns = 2 - learner_config = learner_config.SerializeToString() - # Prepare handler inputs. - handler1_partitions = np.array([0], dtype=np.int32) - handler1_gains = np.array([7.62], dtype=np.float32) - handler1_split = [_gen_dense_split_info(5, 0.52, -4.375, 7.143)] - handler2_partitions = np.array([0], dtype=np.int32) - handler2_gains = np.array([0.63], dtype=np.float32) - handler2_split = [_gen_dense_split_info(2, 0.23, -0.6, 0.24)] - handler3_partitions = np.array([0], dtype=np.int32) - handler3_gains = np.array([7.62], dtype=np.float32) - handler3_split = [_gen_categorical_split_info(8, 7, -4.375, 7.143)] - - # Grow tree ensemble. - grow_op = training_ops.grow_tree_ensemble( - tree_ensemble_handle, - stamp_token=0, - next_stamp_token=1, - learning_rate=1, - partition_ids=[ - handler1_partitions, handler2_partitions, handler3_partitions - ], - gains=[handler1_gains, handler2_gains, handler3_gains], - splits=[handler1_split, handler2_split, handler3_split], - learner_config=learner_config, - dropout_seed=123, - center_bias=True) - session.run(grow_op) - - _, serialized = session.run( - model_ops.tree_ensemble_serialize(tree_ensemble_handle)) - tree_ensemble_config.ParseFromString(serialized) - self.assertEqual(1, len(tree_ensemble_config.trees)) - self.assertEqual( - 1, len(tree_ensemble_config.growing_metadata.used_handler_ids)) - - def testGrowExistingEnsembleTreeWithFeatureSelectionCantGrow(self): - """Test growing a tree with feature selection with empty ensemble.""" - with self.test_session() as session: - # Create existing ensemble with one root split and one bias tree. - tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() - text_format.Merge(""" - trees { - nodes { - leaf { - vector { - value: -0.32 - value: 0.28 - } - } - } - } - trees { - nodes { - categorical_id_binary_split { - feature_column: 3 - feature_id: 7 - left_id: 1 - right_id: 2 - } - node_metadata { - gain: 1.3 - } - } - nodes { - leaf { - sparse_vector { - index: 0 - value: 2.3 - } - } - } - nodes { - leaf { - sparse_vector { - index: 0 - value: -0.9 - } - } - } - } - tree_weights: 0.7 - tree_weights: 1 - tree_metadata { - num_tree_weight_updates: 1 - num_layers_grown: 1 - is_finalized: true - } - tree_metadata { - num_tree_weight_updates: 5 - num_layers_grown: 1 - is_finalized: true - } - growing_metadata { - num_trees_attempted: 2 - num_layers_attempted: 2 - used_handler_ids: 4 - used_handler_ids: 5 - } - """, tree_ensemble_config) - tree_ensemble_handle = model_ops.tree_ensemble_variable( - stamp_token=0, - tree_ensemble_config=tree_ensemble_config.SerializeToString(), - name="tree_ensemble") - resources.initialize_resources(resources.shared_resources()).run() - # Prepare learner config. - learner_config = _gen_learner_config( - num_classes=2, - l1_reg=0, - l2_reg=0, - tree_complexity=0, - max_depth=1, - min_node_weight=0, - pruning_mode=learner_pb2.LearnerConfig.PRE_PRUNE, - growing_mode=learner_pb2.LearnerConfig.WHOLE_TREE) - learner_config.constraints.max_number_of_unique_feature_columns = 2 + learner_config.constraints.max_number_of_unique_feature_columns = 3 learner_config = learner_config.SerializeToString() # Prepare handler inputs. handler1_partitions = np.array([0], dtype=np.int32) @@ -1876,12 +1700,10 @@ class GrowTreeEnsembleOpTest(test_util.TensorFlowTestCase): _, serialized = session.run( model_ops.tree_ensemble_serialize(tree_ensemble_handle)) tree_ensemble_config.ParseFromString(serialized) - # We can't grow a tree since we have reached the limit of 2 unique - # features [4, 5] and the only available splits are from - # handlers [0, 1, 2]. - self.assertEqual(2, len(tree_ensemble_config.trees)) - self.assertEqual( - 2, len(tree_ensemble_config.growing_metadata.used_handler_ids)) + self.assertEqual(3, len(tree_ensemble_config.trees)) + # 2 was already used. handler 0 is being added in this tree. + self.assertAllEqual( + [0, 2], tree_ensemble_config.growing_metadata.used_handler_ids) if __name__ == "__main__": diff --git a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py index 7a5f509047..25b2c9e2fd 100644 --- a/tensorflow/contrib/boosted_trees/python/ops/model_ops.py +++ b/tensorflow/contrib/boosted_trees/python/ops/model_ops.py @@ -25,6 +25,7 @@ from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensem from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_serialize # pylint: disable=unused-import from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_stamp_token +from tensorflow.contrib.boosted_trees.python.ops.gen_model_ops import tree_ensemble_used_handlers # pylint: enable=unused-import from tensorflow.python.framework import ops diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index f0b66dcbbe..233e21f1cf 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -57,6 +57,8 @@ PREDICTIONS = "predictions" PARTITION_IDS = "partition_ids" NUM_LAYERS_ATTEMPTED = "num_layers" NUM_TREES_ATTEMPTED = "num_trees" +NUM_USED_HANDLERS = "num_used_handlers" +USED_HANDLERS_MASK = "used_handlers_mask" _FEATURE_NAME_TEMPLATE = "%s_%d" @@ -70,7 +72,8 @@ def _get_column_by_index(tensor, indices): return array_ops.reshape(array_ops.gather(p_flat, i_flat), [shape[0], -1]) -def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): +def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats, + used_handlers): """Returns predictions for the given logits and n_classes. Args: @@ -79,6 +82,8 @@ def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): that contains predictions when no dropout was applied. partition_ids: A rank 1 `Tensor` with shape [batch_size]. ensemble_stats: A TreeEnsembleStatsOp result tuple. + used_handlers: A TreeEnsembleUsedHandlerOp result tuple of an int and a + boolean mask.. Returns: A dict of predictions. @@ -89,6 +94,8 @@ def _make_predictions_dict(stamp, logits, partition_ids, ensemble_stats): result[PARTITION_IDS] = partition_ids result[NUM_LAYERS_ATTEMPTED] = ensemble_stats.attempted_layers result[NUM_TREES_ATTEMPTED] = ensemble_stats.attempted_trees + result[NUM_USED_HANDLERS] = used_handlers.num_used_handlers + result[USED_HANDLERS_MASK] = used_handlers.used_handlers_mask return result @@ -361,6 +368,13 @@ class GradientBoostedDecisionTreeModel(object): """ ensemble_stats = training_ops.tree_ensemble_stats(ensemble_handle, ensemble_stamp) + num_handlers = ( + len(self._dense_floats) + len(self._sparse_float_shapes) + + len(self._sparse_int_shapes)) + # Used during feature selection. + used_handlers = model_ops.tree_ensemble_used_handlers( + ensemble_handle, ensemble_stamp, num_all_handlers=num_handlers) + # We don't need dropout info - we can always restore it based on the # seed. apply_dropout, seed = _dropout_params(mode, ensemble_stats) @@ -395,7 +409,7 @@ class GradientBoostedDecisionTreeModel(object): use_locking=True) return _make_predictions_dict(ensemble_stamp, predictions, partition_ids, - ensemble_stats) + ensemble_stats, used_handlers) def predict(self, mode): """Returns predictions given the features and mode. @@ -716,6 +730,22 @@ class GradientBoostedDecisionTreeModel(object): else: active_handlers = array_ops.ones([len(handlers), 2], dtype=dtypes.bool) + if self._learner_config.constraints.max_number_of_unique_feature_columns: + target = ( + self._learner_config.constraints.max_number_of_unique_feature_columns) + + def _feature_selection_active_handlers(): + # The active list for current and the next iteration. + used_handlers = array_ops.reshape(predictions_dict[USED_HANDLERS_MASK], + [-1, 1]) + used_handlers = array_ops.concat([used_handlers, used_handlers], axis=1) + return math_ops.logical_and(used_handlers, active_handlers) + + active_handlers = ( + control_flow_ops.cond(predictions_dict[NUM_USED_HANDLERS] >= target, + _feature_selection_active_handlers, + lambda: active_handlers)) + # Prepare empty gradients and hessians when handlers are not ready. empty_hess_shape = [1] + hessian_shape.as_list() empty_grad_shape = [1] + gradient_shape.as_list() diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py index dba51d4f52..6411f57a54 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch_test.py @@ -47,6 +47,38 @@ def _squared_loss(label, unused_weights, predictions): return loss +def _append_to_leaf(leaf, c_id, w): + """Helper method for building tree leaves. + + Appends weight contributions for the given class index to a leaf node. + + Args: + leaf: leaf node to append to. + c_id: class Id for the weight update. + w: weight contribution value. + """ + leaf.sparse_vector.index.append(c_id) + leaf.sparse_vector.value.append(w) + + +def _set_float_split(split, feat_col, thresh, l_id, r_id): + """Helper method for building tree float splits. + + Sets split feature column, threshold and children. + + Args: + split: split node to update. + feat_col: feature column for the split. + thresh: threshold to split on forming rule x <= thresh. + l_id: left child Id. + r_id: right child Id. + """ + split.feature_column = feat_col + split.threshold = thresh + split.left_id = l_id + split.right_id = r_id + + class GbdtTest(test_util.TensorFlowTestCase): def setUp(self): @@ -917,6 +949,350 @@ class GbdtTest(test_util.TensorFlowTestCase): output.trees[0].nodes[2].leaf.sparse_vector.value[0], atol=1e-4, rtol=1e-4) + def testTrainFnChiefFeatureSelectionReachedLimitNoGoodSplit(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config="", name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + # Feature 1 is predictive but it won't be used because we have reached the + # limit of num_used_handlers >= max_number_of_unique_feature_columns + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([True, False], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 0) + self.assertEquals(len(output.tree_weights), 0) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + # On second run, expect a trivial split to be chosen to basically + # predict the average. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 1) + self.assertAllClose(output.tree_weights, [0.1]) + self.assertEquals(stamp_token.eval(), 2) + expected_tree = """ + nodes { + dense_float_binary_split { + feature_column: 0 + threshold: 1.0 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0 + } + } + nodes { + leaf { + vector { + value: -0.25 + } + } + } + nodes { + leaf { + vector { + value: 0.0 + } + } + }""" + self.assertProtoEquals(expected_tree, output.trees[0]) + + def testTrainFnChiefFeatureSelectionWithGoodSplits(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config="", name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + # Feature 1 is predictive and is in our selected features so it will be + # used even when we're at the limit. + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([False, True], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 0) + self.assertEquals(len(output.tree_weights), 0) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + + self.assertEquals(len(output.trees), 1) + self.assertAllClose(output.tree_weights, [0.1]) + self.assertEquals(stamp_token.eval(), 2) + expected_tree = """ + nodes { + dense_float_binary_split { + feature_column: 1 + left_id: 1 + right_id: 2 + } + node_metadata { + gain: 0.5 + } + } + nodes { + leaf { + vector { + value: 0.0 + } + } + } + nodes { + leaf { + vector { + value: -0.5 + } + } + }""" + self.assertProtoEquals(expected_tree, output.trees[0]) + + def testTrainFnChiefFeatureSelectionReachedLimitIncrementAttemptedLayer(self): + """Tests the train function running on chief with feature selection.""" + with self.test_session() as sess: + tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() + tree = tree_ensemble_config.trees.add() + + _set_float_split(tree.nodes.add() + .sparse_float_binary_split_default_right.split, 2, 4.0, + 1, 2) + _append_to_leaf(tree.nodes.add().leaf, 0, 0.5) + _append_to_leaf(tree.nodes.add().leaf, 1, 1.2) + tree_ensemble_config.tree_weights.append(1.0) + metadata = tree_ensemble_config.tree_metadata.add() + metadata.is_finalized = False + metadata.num_layers_grown = 1 + tree_ensemble_config = tree_ensemble_config.SerializeToString() + ensemble_handle = model_ops.tree_ensemble_variable( + stamp_token=0, tree_ensemble_config=tree_ensemble_config, + name="tree_ensemble") + learner_config = learner_pb2.LearnerConfig() + learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 + learner_config.num_classes = 2 + learner_config.regularization.l1 = 0 + learner_config.regularization.l2 = 0 + learner_config.constraints.max_tree_depth = 1 + learner_config.constraints.max_number_of_unique_feature_columns = 1 + learner_config.constraints.min_node_weight = 0 + features = {} + # Both features will be disabled since the feature selection limit is + # already reached. + features["dense_float_0"] = array_ops.ones([4, 1], dtypes.float32) + features["dense_float_1"] = array_ops.constant([0, 0, 1, 1], + dtypes.float32) + + gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( + is_chief=True, + num_ps_replicas=0, + center_bias=False, + ensemble_handle=ensemble_handle, + examples_per_layer=1, + learner_config=learner_config, + logits_dimension=1, + features=features) + + predictions = array_ops.constant( + [[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) + partition_ids = array_ops.zeros([4], dtypes.int32) + ensemble_stamp = variables.Variable( + initial_value=0, + name="ensemble_stamp", + trainable=False, + dtype=dtypes.int64) + + predictions_dict = { + "predictions": + predictions, + "predictions_no_dropout": + predictions, + "partition_ids": + partition_ids, + "ensemble_stamp": + ensemble_stamp, + "num_trees": + 12, + # We have somehow reached our limit 1. Both of the handlers will be + # disabled. + "num_used_handlers": + array_ops.constant(1, dtype=dtypes.int64), + "used_handlers_mask": + array_ops.constant([False, False], dtype=dtypes.bool), + } + + labels = array_ops.constant([0, 0, 1, 1], dtypes.float32) + weights = array_ops.ones([4, 1], dtypes.float32) + # Create train op. + train_op = gbdt_model.train( + loss=math_ops.reduce_mean( + _squared_loss(labels, weights, predictions)), + predictions_dict=predictions_dict, + labels=labels) + variables.global_variables_initializer().run() + resources.initialize_resources(resources.shared_resources()).run() + + # On first run, expect no splits to be chosen because the quantile + # buckets will not be ready. + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + self.assertEquals(len(output.trees), 1) + self.assertEquals(output.growing_metadata.num_layers_attempted, 1) + self.assertEquals(stamp_token.eval(), 1) + + # Update the stamp to be able to run a second time. + sess.run([ensemble_stamp.assign_add(1)]) + + train_op.run() + stamp_token, serialized = model_ops.tree_ensemble_serialize( + ensemble_handle) + output = tree_config_pb2.DecisionTreeEnsembleConfig() + output.ParseFromString(serialized.eval()) + # Make sure the trees are not modified, but the num_layers_attempted is + # incremented so that eventually the training stops. + self.assertEquals(len(output.trees), 1) + self.assertEquals(len(output.trees[0].nodes), 3) + + self.assertEquals(output.growing_metadata.num_layers_attempted, 2) if __name__ == "__main__": googletest.main() -- GitLab From f8f4a6e26cc1108495c0b9a55d9a7d6e7005c2b5 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Thu, 1 Mar 2018 14:15:20 -0800 Subject: [PATCH 305/884] Internal change. PiperOrigin-RevId: 187532378 --- tensorflow/c/c_test_util.cc | 31 +++++++++++++++++-- tensorflow/c/c_test_util.h | 9 ++++++ .../common_runtime/graph_execution_state.cc | 4 +++ 3 files changed, 41 insertions(+), 3 deletions(-) diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 3db2852ce6..53346a8cdf 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -34,6 +34,10 @@ static void DoubleDeallocator(void* data, size_t, void* arg) { delete[] static_cast(data); } +static void FloatDeallocator(void* data, size_t, void* arg) { + delete[] static_cast(data); +} + TF_Tensor* Int8Tensor(const int64_t* dims, int num_dims, const char* values) { int64_t num_values = 1; for (int i = 0; i < num_dims; ++i) { @@ -78,13 +82,21 @@ TF_Tensor* DoubleTensor(double v) { &DoubleDeallocator, nullptr); } +TF_Tensor* FloatTensor(float v) { + const int num_bytes = sizeof(float); + float* values = new float[1]; + values[0] = v; + return TF_NewTensor(TF_FLOAT, nullptr, 0, values, num_bytes, + &FloatDeallocator, nullptr); +} + // All the *Helper methods are used as a workaround for the restrictions that // one cannot call ASSERT_* methods in non-void-returning functions (when // exceptions are disabled during compilation) void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, - TF_Operation** op) { + TF_DataType dtype, TF_Operation** op) { TF_OperationDescription* desc = TF_NewOperation(graph, "Placeholder", name); - TF_SetAttrType(desc, "dtype", TF_INT32); + TF_SetAttrType(desc, "dtype", dtype); *op = TF_FinishOperation(desc, s); ASSERT_EQ(TF_OK, TF_GetCode(s)) << TF_Message(s); ASSERT_NE(*op, nullptr); @@ -92,7 +104,14 @@ void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) { TF_Operation* op; - PlaceholderHelper(graph, s, name, &op); + PlaceholderHelper(graph, s, name, TF_INT32, &op); + return op; +} + +TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, + const char* name) { + TF_Operation* op; + PlaceholderHelper(graph, s, name, TF_FLOAT, &op); return op; } @@ -126,6 +145,12 @@ TF_Operation* ScalarConst(double v, TF_Graph* graph, TF_Status* s, return Const(tensor.get(), graph, s, name); } +TF_Operation* ScalarConst(float v, TF_Graph* graph, TF_Status* s, + const char* name) { + unique_tensor_ptr tensor(FloatTensor(v), TF_DeleteTensor); + return Const(tensor.get(), graph, s, name); +} + void AddOpHelper(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name, TF_Operation** op, bool check) { diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index 2a70177c72..8cf060f73f 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -44,8 +44,14 @@ TF_Tensor* Int32Tensor(int32_t v); TF_Tensor* DoubleTensor(double v); +TF_Tensor* FloatTensor(float v); + +// TODO(hongm): Change Placeholder() to take in a TF_DataType parameter, and +// unify with PlaceholderFloat. TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name = "feed"); +TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, + const char* name = "feed"); TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name = "const"); @@ -56,6 +62,9 @@ TF_Operation* ScalarConst(int32_t v, TF_Graph* graph, TF_Status* s, TF_Operation* ScalarConst(double v, TF_Graph* graph, TF_Status* s, const char* name = "scalar"); +TF_Operation* ScalarConst(float v, TF_Graph* graph, TF_Status* s, + const char* name = "scalar"); + TF_Operation* Add(TF_Operation* l, TF_Operation* r, TF_Graph* graph, TF_Status* s, const char* name = "add"); diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index 33a5d60eb7..785ec3d227 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -73,6 +73,10 @@ GraphExecutionState::~GraphExecutionState() { /* static */ Status GraphExecutionState::MakeForBaseGraph( GraphDef* graph_def, const GraphExecutionStateOptions& options, std::unique_ptr* out_state) { +#ifndef __ANDROID__ + VLOG(1) << "Graph proto is " << graph_def->DebugString(); +#endif // __ANDROID__ + std::unique_ptr ret( new GraphExecutionState(graph_def, options)); -- GitLab From 3973e772ed84db08cb86b1086558223af29fd64a Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Thu, 1 Mar 2018 14:15:23 -0800 Subject: [PATCH 306/884] Sampling group embeddings for each child. PiperOrigin-RevId: 187532388 --- .../grappler/hierarchical_controller.py | 41 ++++++++++++++----- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/grappler/hierarchical_controller.py b/tensorflow/python/grappler/hierarchical_controller.py index b06fb3c6d0..c0866c1069 100644 --- a/tensorflow/python/grappler/hierarchical_controller.py +++ b/tensorflow/python/grappler/hierarchical_controller.py @@ -258,9 +258,11 @@ class HierarchicalController(Controller): "attn_w_2", [self.hparams.hidden_size, self.hparams.hidden_size]) variable_scope.get_variable("attn_v", [self.hparams.hidden_size, 1]) seq2seq_input_layer = array_ops.placeholder_with_default( - array_ops.zeros([1, self.num_groups, self.group_emb_size], + array_ops.zeros([self.hparams.num_children, + self.num_groups, + self.group_emb_size], dtypes.float32), - shape=(1, self.num_groups, self.group_emb_size)) + shape=(self.hparams.num_children, self.num_groups, self.group_emb_size)) self.seq2seq_input_layer = seq2seq_input_layer def compute_reward(self, run_time): @@ -585,12 +587,29 @@ class HierarchicalController(Controller): """Approximating the blocks of a TF graph from a graph_def. Args: - grouping_actions: grouping predictions + grouping_actions: grouping predictions. verbose: print stuffs. Returns: groups: list of groups. """ + groups = [ + self._create_group_embeddings(grouping_actions, i, verbose) for + i in range(self.hparams.num_children) + ] + return np.stack(groups, axis=0) + + def _create_group_embeddings(self, grouping_actions, child_id, verbose=False): + """Approximating the blocks of a TF graph from a graph_def for each child. + + Args: + grouping_actions: grouping predictions. + child_id: child_id for the group. + verbose: print stuffs. + + Returns: + groups: group embedding for the child_id. + """ if verbose: print("Processing input_graph") @@ -599,13 +618,13 @@ class HierarchicalController(Controller): dag_matrix = np.zeros([self.num_groups, self.num_groups], dtype=np.float32) for op in self.important_ops: topo_op_index = self.name_to_topo_order_index[op.name] - # TODO(agoldie) child_id - group_index = grouping_actions[0][topo_op_index] + group_index = grouping_actions[child_id][topo_op_index] for output_op in self.get_node_fanout(op): if output_op.name not in self.important_op_names: continue - output_group_index = grouping_actions[0][self.name_to_topo_order_index[ - output_op.name]] + output_group_index = ( + grouping_actions[child_id][self.name_to_topo_order_index[ + output_op.name]]) dag_matrix[group_index, output_group_index] += 1.0 num_connections = np.sum(dag_matrix) num_intra_group_connections = dag_matrix.trace() @@ -648,7 +667,8 @@ class HierarchicalController(Controller): ], dtype=np.float32) for op_index, op in enumerate(self.important_ops): - group_index = grouping_actions[0][self.name_to_topo_order_index[op.name]] + group_index = grouping_actions[child_id][ + self.name_to_topo_order_index[op.name]] type_name = str(op.op) type_index = self.type_dict[type_name] group_embedding[group_index, type_index] += 1 @@ -675,7 +695,7 @@ class HierarchicalController(Controller): shape=[num_children, self.num_groups], trainable=False) - x = array_ops.tile(self.seq2seq_input_layer, [num_children, 1, 1]) + x = self.seq2seq_input_layer last_c, last_h, attn_mem = self.encode(x) actions, log_probs = {}, {} actions["sample"], log_probs["sample"] = ( @@ -988,8 +1008,7 @@ class HierarchicalController(Controller): def generate_placement(self, grouping, sess): controller_ops = self.ops["controller"] feed_seq2seq_input_dict = {} - feed_seq2seq_input_dict[self.seq2seq_input_layer] = np.expand_dims( - grouping, axis=0) + feed_seq2seq_input_dict[self.seq2seq_input_layer] = grouping sess.run( controller_ops["y_preds"]["sample"], feed_dict=feed_seq2seq_input_dict) -- GitLab From 759da7754a708f1f64e4b4b2e17cd4d8c42e3ed3 Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 14:26:07 -0800 Subject: [PATCH 307/884] Set more generated ops to 'hidden'. These ops have not been hidden before but instead have corresponding definitions in Python files. We don't want tf_export decorators for the generated ops since corresponding Python ops have tf_export decorators instead. PiperOrigin-RevId: 187534113 --- tensorflow/core/api_def/python_api/api_def_Angle.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cast.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Gather.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Imag.pbtxt | 4 ++++ .../api_def/python_api/api_def_IsVariableInitialized.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ParseSingleExample.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Real.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_ReverseSequence.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Shape.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Size.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentMean.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt | 4 ++++ .../core/api_def/python_api/api_def_SparseSegmentSum.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt | 4 ++++ tensorflow/core/api_def/python_api/api_def_Where.pbtxt | 4 ++++ 25 files changed, 100 insertions(+) create mode 100644 tensorflow/core/api_def/python_api/api_def_Angle.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cast.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Gather.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Imag.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Real.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Shape.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Size.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_Where.pbtxt diff --git a/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt b/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt new file mode 100644 index 0000000000..771e861fd1 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Angle.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Angle" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt b/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt new file mode 100644 index 0000000000..551b51db26 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Bincount.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Bincount" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt new file mode 100644 index 0000000000..428aa62c46 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cast.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cast" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt new file mode 100644 index 0000000000..8f5e2f061b --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cumprod.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cumprod" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt b/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt new file mode 100644 index 0000000000..715f26fcac --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Cumsum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Cumsum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt b/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt new file mode 100644 index 0000000000..fd0766b365 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_DepthToSpace.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "DepthToSpace" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt b/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt new file mode 100644 index 0000000000..5f956930e0 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Gather.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Gather" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt b/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt new file mode 100644 index 0000000000..5632fd4365 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Imag.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Imag" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt b/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt new file mode 100644 index 0000000000..6a7b078909 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_IsVariableInitialized.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "IsVariableInitialized" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt b/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt new file mode 100644 index 0000000000..9b65433580 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Multinomial.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Multinomial" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt b/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt new file mode 100644 index 0000000000..c058e5b1ab --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_OnesLike.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "OnesLike" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt b/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt new file mode 100644 index 0000000000..4193bdd091 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ParseSingleExample.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ParseSingleExample" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt b/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt new file mode 100644 index 0000000000..40673234ed --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_QuantizeV2.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "QuantizeV2" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Real.pbtxt b/tensorflow/core/api_def/python_api/api_def_Real.pbtxt new file mode 100644 index 0000000000..52a9089f4a --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Real.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Real" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt new file mode 100644 index 0000000000..0fde5942ab --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReduceJoin.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReduceJoin" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt b/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt new file mode 100644 index 0000000000..f3fc2578df --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_ReverseSequence.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "ReverseSequence" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt b/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt new file mode 100644 index 0000000000..bd7b5ad36c --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Shape.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Shape" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Size.pbtxt b/tensorflow/core/api_def/python_api/api_def_Size.pbtxt new file mode 100644 index 0000000000..7f76173a5d --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Size.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Size" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt b/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt new file mode 100644 index 0000000000..d56a7384eb --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SpaceToDepth.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SpaceToDepth" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt new file mode 100644 index 0000000000..f12c2e2073 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentMean.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentMean" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt new file mode 100644 index 0000000000..7daaa81482 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSqrtN.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSqrtN" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt new file mode 100644 index 0000000000..e7028efce2 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_SparseSegmentSum.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "SparseSegmentSum" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt b/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt new file mode 100644 index 0000000000..a55fa98877 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_StridedSlice.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "StridedSlice" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt b/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt new file mode 100644 index 0000000000..e22b6a040e --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Transpose.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Transpose" + visibility: HIDDEN +} diff --git a/tensorflow/core/api_def/python_api/api_def_Where.pbtxt b/tensorflow/core/api_def/python_api/api_def_Where.pbtxt new file mode 100644 index 0000000000..d4dd25a206 --- /dev/null +++ b/tensorflow/core/api_def/python_api/api_def_Where.pbtxt @@ -0,0 +1,4 @@ +op { + graph_op_name: "Where" + visibility: HIDDEN +} -- GitLab From a8bcf9c5b2ea7c88c3034d1b4c5d62c209a6b431 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 14:35:44 -0800 Subject: [PATCH 308/884] Expose native inference latency via TFlite interpreter. PiperOrigin-RevId: 187535695 --- .../main/java/org/tensorflow/lite/Interpreter.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index 9286814b74..b071cda5df 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -167,6 +167,19 @@ public final class Interpreter implements AutoCloseable { return wrapper.getOutputIndex(opName); } + + /** + * Returns native inference timing. + *

IllegalArgumentException will be thrown if the model is not initialized by the + * {@link Interpreter}. + */ + public Long getLastNativeInferenceDurationNanoseconds() { + if (wrapper == null) { + throw new IllegalStateException("The interpreter has already been closed."); + } + return wrapper.getLastNativeInferenceDurationNanoseconds(); + } + /** Release resources associated with the {@code Interpreter}. */ @Override public void close() { -- GitLab From 8b10f9c7a0a67282061275302b00c254b609b7f6 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 1 Mar 2018 14:49:49 -0800 Subject: [PATCH 309/884] EagerTensor.device reflects the op's device and not the tensor's memory space. This matches graph mode's behavior. PiperOrigin-RevId: 187537818 --- tensorflow/c/eager/c_api.cc | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 252ceab54a..4b619dc4e1 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -180,12 +180,10 @@ int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, } const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { - // TODO(apassos) this will be potentially incorrect in the distributed case as - // our local device will have a name which depends on the ClusterSpec and - // hence will require the context to resolve. status->status = tensorflow::Status::OK(); - return (h->d == nullptr) ? "/job:localhost/replica:0/task:0/device:CPU:0" - : h->d->name().c_str(); + return (h->op_device == nullptr) + ? "/job:localhost/replica:0/task:0/device:CPU:0" + : h->op_device->name().c_str(); } TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { -- GitLab From 4d1a2894b7faa7d9576e82e291758c0da0616b47 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 1 Mar 2018 15:09:23 -0800 Subject: [PATCH 310/884] Added support for optimization of functions with fixed input/output types PiperOrigin-RevId: 187540982 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/function_optimizer.cc | 35 +++++--- .../optimizers/function_optimizer_test.cc | 87 +++++++++++++++++++ .../grappler/optimizers/meta_optimizer.cc | 2 +- 4 files changed, 111 insertions(+), 14 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index b8995ef365..037438ee75 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -144,6 +144,7 @@ cc_library( "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:op_types", + "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/utils:functions", ], ) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index ba8a76ad5f..a5cf00c155 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/versions.pb.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/functions.h" namespace tensorflow { @@ -53,13 +54,17 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, AttrValue::ListValue* type_list = (*func_inputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().input_arg()) { - auto it = attr.find(arg.type_attr()); - if (it == attr.end()) { - return errors::InvalidArgument("Invalid input argument ", arg.name(), - " for function ", node.op(), - " instantiated by ", node.name()); + if (arg.type() != DT_INVALID) { + type_list->add_type(arg.type()); + } else { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid input argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); } - type_list->add_type(it->second.type()); } for (NodeDef& func_body_node : *item->graph.mutable_node()) { @@ -75,7 +80,7 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, } else { // Update the input names. for (string& input : *func_body_node.mutable_input()) { - input = strings::StrCat(node.name(), "/", input); + input = AddPrefixToNodeName(input, node.name()); } } @@ -98,13 +103,17 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); for (const OpDef::ArgDef& arg : func.signature().output_arg()) { - auto it = attr.find(arg.type_attr()); - if (it == attr.end()) { - return errors::InvalidArgument("Invalid output argument ", arg.name(), - " for function ", node.op(), - " instantiated by ", node.name()); + if (arg.type() != DT_INVALID) { + type_list->add_type(arg.type()); + } else { + auto it = attr.find(arg.type_attr()); + if (it == attr.end()) { + return errors::InvalidArgument("Invalid output argument ", arg.name(), + " for function ", node.op(), + " instantiated by ", node.name()); + } + type_list->add_type(it->second.type()); } - type_list->add_type(it->second.type()); func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); } diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 76a5c08d35..fd61c067ed 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -100,6 +100,93 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } +TEST_F(FunctionOptimizerTest, FixedTypeFunction) { + // Create and instantiate a version of the XTimesTwo function that only + // accepts floats a inputs. + const Tensor kTwo = test::AsScalar(2.0f); + FunctionDef x_times_two = FunctionDefHelper::Define( + // Name + "XTimesTwo", + // Args + {"x: float"}, + // Return values + {"y: float"}, + // Attr def + {}, + // Nodes + { + {{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_FLOAT}}}, + {{"y"}, "Mul", {"x", "two"}, {{"T", DT_FLOAT}}}, + }); + + constexpr char device[] = "/device:CPU:0"; + GrapplerItem item; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "XTimesTwo", {"x"}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + x_times_two, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/x") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/two") { + count++; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(device, node.device()); + } else if (node.name() == "y/y") { + count++; + EXPECT_EQ("Mul", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("y/x", node.input(0)); + EXPECT_EQ("y/two:0", node.input(1)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/y", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(6, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index b674ee1553..72d7b94dc8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -136,7 +136,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } else { const std::set available_optimizers = { - "pruning", "constfold", "layout", "memory", + "pruning", "function", "constfold", "layout", "memory", "autoparallel", "arithmetic", "dependency", "loop"}; std::vector custom_optimizer_names; for (const auto& optimizer_name : cfg_.optimizers()) { -- GitLab From ac79486324bda04cc2f3b75e9590935dfe1ef826 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 15:36:19 -0800 Subject: [PATCH 311/884] Checkpointable: Make Model Checkpointable-compatible Has Models track Checkpointable dependencies with __setattr__. Switches subclassed Models to creating ResourceVariables by default, which removes one source of eager/graph differences. tfe.Network was doing this by default. This is necessary for eager/graph agnostic code since tapes currently only work with ResourceVariables. It's not quite trivial to fix that, and ResourceVariables by default in more places is a Good Thing anyway. (Not that we shouldn't also fix the tape code.) PiperOrigin-RevId: 187544850 --- tensorflow/contrib/eager/python/BUILD | 1 + .../eager/python/checkpointable_utils_test.py | 154 ++++++++---------- .../keras/_impl/keras/engine/network.py | 8 + .../keras/_impl/keras/engine/training.py | 9 + 4 files changed, 86 insertions(+), 86 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index a26ec8513f..8c4b0827fd 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -262,6 +262,7 @@ py_test( "//tensorflow/python:variables", "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", + "//tensorflow/python/keras", "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 7367f1b71c..9424de0835 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -22,7 +22,6 @@ import os import six from tensorflow.contrib.eager.python import checkpointable_utils -from tensorflow.contrib.eager.python import network as network_lib from tensorflow.python.client import session as session_lib from tensorflow.python.eager import context from tensorflow.python.eager import test @@ -30,7 +29,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import test_util -from tensorflow.python.layers import base +from tensorflow.python.keras._impl.keras.engine import training from tensorflow.python.layers import core from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops @@ -42,21 +41,6 @@ from tensorflow.python.training import saver as core_saver from tensorflow.python.training import training_util -# pylint: disable=not-callable -class CheckpointableNetwork(network_lib.Network, checkpointable.Checkpointable): - - def __setattr__(self, name, value): - if isinstance(value, base.Layer): - self.track_layer(value, name=name) - # Checkpointable is next in the method resolution order, so this will catch - # Checkpointable objects which aren't Layers. - super(CheckpointableNetwork, self).__setattr__(name, value) - - def track_layer(self, layer, name): - self._track_checkpointable(layer, name=name) - return super(CheckpointableNetwork, self).track_layer(layer) - - class NonLayerCheckpointable(checkpointable.Checkpointable): def __init__(self): @@ -65,19 +49,20 @@ class NonLayerCheckpointable(checkpointable.Checkpointable): self, name="a_variable", shape=[]) -class MyNetwork(CheckpointableNetwork): - """A concrete Network for testing.""" +# pylint: disable=not-callable +class MyModel(training.Model): + """A concrete Model for testing.""" def __init__(self): - super(MyNetwork, self).__init__() + super(MyModel, self).__init__() self._named_dense = core.Dense(1, use_bias=True) - self._via_track_layer = self.track_layer( - core.Dense(1, use_bias=False), name="via_track_layer") + self._second = core.Dense(1, use_bias=False) # We can still track Checkpointables which aren't Layers. self._non_layer = NonLayerCheckpointable() def call(self, values): - return self._via_track_layer(self._named_dense(values)) + ret = self._second(self._named_dense(values)) + return ret class InterfaceTests(test.TestCase): @@ -171,26 +156,26 @@ class CheckpointingTests(test.TestCase): @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testNamingWithOptimizer(self): input_value = constant_op.constant([[3.]]) - network = MyNetwork() - # A nuisance Network using the same optimizer. Its slot variables should not + model = MyModel() + # A nuisance Model using the same optimizer. Its slot variables should not # go in the checkpoint, since it is never depended on. - other_network = MyNetwork() + other_model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, optimizer_step=optimizer_step) + optimizer=optimizer, model=model, optimizer_step=optimizer_step) if context.in_eager_mode(): optimizer.minimize( - lambda: network(input_value), + lambda: model(input_value), global_step=optimizer_step) optimizer.minimize( - lambda: other_network(input_value), + lambda: other_model(input_value), global_step=optimizer_step) else: train_op = optimizer.minimize( - network(input_value), global_step=optimizer_step) + model(input_value), global_step=optimizer_step) optimizer.minimize( - other_network(input_value), + other_model(input_value), global_step=optimizer_step) self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) @@ -200,24 +185,21 @@ class CheckpointingTests(test.TestCase): expected_checkpoint_names = ( # Created in the root node, so no prefix. "optimizer_step", - # No name provided to track_checkpointable(), so the position is used - # instead (one-based). - "network/via_track_layer/kernel", - # track_checkpointable() with a name provided, so that's used - "network/_named_dense/kernel", - "network/_named_dense/bias", - # non-Layer dependency of the network - "network/_non_layer/a_variable", + "model/_second/kernel", + "model/_named_dense/kernel", + "model/_named_dense/bias", + # non-Layer dependency of the model + "model/_non_layer/a_variable", # The optimizer creates two non-slot variables "optimizer/beta1_power", "optimizer/beta2_power", # Slot variables - "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/m", - "network/via_track_layer/kernel/.OPTIMIZER_SLOT/optimizer/v", - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", - "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", - "network/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_second/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/v", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/m", + "model/_named_dense/bias/.OPTIMIZER_SLOT/optimizer/v", ) suffix = "/.ATTRIBUTES/VARIABLE_VALUE" expected_checkpoint_names = [ @@ -229,11 +211,11 @@ class CheckpointingTests(test.TestCase): "global_step:0", named_variables["optimizer_step" + suffix].name) self.assertEqual( - "my_network/dense_1/kernel:0", - named_variables["network/via_track_layer/kernel" + suffix].name) + "my_model/dense_1/kernel:0", + named_variables["model/_second/kernel" + suffix].name) self.assertEqual( - "my_network/dense/kernel:0", - named_variables["network/_named_dense/kernel" + suffix].name) + "my_model/dense/kernel:0", + named_variables["model/_named_dense/kernel" + suffix].name) self.assertEqual( "beta1_power:0", named_variables["optimizer/beta1_power" + suffix].name) @@ -251,80 +233,80 @@ class CheckpointingTests(test.TestCase): serialized_graph.nodes[optimizer_node.children[0].node_id] .attributes[0].full_name) self.assertEqual( - "my_network/dense/kernel", + "my_model/dense/kernel", serialized_graph.nodes[optimizer_node.slot_variables[0] .original_variable_node_id] .attributes[0].full_name) # We strip off the :0 suffix, as variable.name-based saving does. self.assertEqual( - "my_network/dense/kernel/Adam", + "my_model/dense/kernel/Adam", serialized_graph.nodes[optimizer_node.slot_variables[0] .slot_variable_node_id] .attributes[0].full_name) self.assertEqual( - "my_network/dense/kernel/Adam:0", + "my_model/dense/kernel/Adam:0", optimizer.get_slot( - var=named_variables["network/_named_dense/kernel" + suffix], + var=named_variables["model/_named_dense/kernel" + suffix], name="m").name) self.assertEqual( - "network/_named_dense/kernel" + suffix, + "model/_named_dense/kernel" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .original_variable_node_id].attributes[0].checkpoint_key) self.assertEqual("m", optimizer_node.slot_variables[0].slot_name) self.assertEqual( - "network/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, + "model/_named_dense/kernel/.OPTIMIZER_SLOT/optimizer/m" + suffix, serialized_graph.nodes[ optimizer_node.slot_variables[0] .slot_variable_node_id].attributes[0].checkpoint_key) @test_util.run_in_graph_and_eager_modes() def testSaveRestore(self): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network) + optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) if context.in_eager_mode(): optimizer.minimize( - lambda: network(input_value)) + lambda: model(input_value)) else: - train_op = optimizer.minimize(network(input_value)) + train_op = optimizer.minimize(model(input_value)) # TODO(allenl): Make initialization more pleasant when graph building. root_checkpointable.save_counter # pylint: disable=pointless-statement self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) self.evaluate(train_op) prefix = os.path.join(self.get_temp_dir(), "ckpt") - self.evaluate(state_ops.assign(network._named_dense.variables[1], [42.])) - m_bias_slot = optimizer.get_slot(network._named_dense.variables[1], "m") + self.evaluate(state_ops.assign(model._named_dense.variables[1], [42.])) + m_bias_slot = optimizer.get_slot(model._named_dense.variables[1], "m") self.evaluate(state_ops.assign(m_bias_slot, [1.5])) save_path = root_checkpointable.save(file_prefix=prefix) - self.evaluate(state_ops.assign(network._named_dense.variables[1], [43.])) + self.evaluate(state_ops.assign(model._named_dense.variables[1], [43.])) self.evaluate(state_ops.assign(root_checkpointable.save_counter, 3)) optimizer_variables = self.evaluate(optimizer.variables()) self.evaluate(state_ops.assign(m_bias_slot, [-2.])) # Immediate restoration status = root_checkpointable.restore(save_path=save_path).assert_consumed() status.run_restore_ops() - self.assertAllEqual([42.], self.evaluate(network._named_dense.variables[1])) + self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) if context.in_graph_mode(): return # Restore-on-create is only supported when executing eagerly - on_create_network = MyNetwork() + on_create_model = MyModel() on_create_optimizer = adam.AdamOptimizer(0.001) on_create_root = checkpointable_utils.Checkpoint( - optimizer=on_create_optimizer, network=on_create_network) + optimizer=on_create_optimizer, model=on_create_model) # Deferred restoration status = on_create_root.restore(save_path=save_path) - on_create_network(constant_op.constant([[3.]])) # create variables + on_create_model(constant_op.constant([[3.]])) # create variables self.assertAllEqual(1, self.evaluate(on_create_root.save_counter)) self.assertAllEqual([42.], self.evaluate( - on_create_network._named_dense.variables[1])) + on_create_model._named_dense.variables[1])) on_create_m_bias_slot = on_create_optimizer.get_slot( - on_create_network._named_dense.variables[1], "m") + on_create_model._named_dense.variables[1], "m") # Optimizer slot variables are created when the original variable is # restored. self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot)) @@ -344,17 +326,17 @@ class CheckpointingTests(test.TestCase): checkpoint_directory = self.get_temp_dir() checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, optimizer_step=training_util.get_or_create_global_step()) root.restore(core_saver.latest_checkpoint(checkpoint_directory)) for _ in range(num_training_steps): # TODO(allenl): Use a Dataset and serialize/checkpoint it. input_value = constant_op.constant([[3.]]) optimizer.minimize( - lambda: network(input_value), # pylint: disable=cell-var-from-loop + lambda: model(input_value), # pylint: disable=cell-var-from-loop global_step=root.optimizer_step) root.save(file_prefix=checkpoint_prefix) self.assertEqual((training_continuation + 1) * num_training_steps, @@ -368,14 +350,14 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) input_value = constant_op.constant([[3.]]) train_op = optimizer.minimize( - network(input_value), + model(input_value), global_step=root.global_step) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) with self.test_session(graph=ops.get_default_graph()) as session: @@ -405,17 +387,17 @@ class CheckpointingTests(test.TestCase): for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( graph=ops.get_default_graph()): - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, + optimizer=optimizer, model=model, global_step=training_util.get_or_create_global_step()) checkpoint_path = core_saver.latest_checkpoint(checkpoint_directory) status = root.restore(save_path=checkpoint_path) input_value = constant_op.constant([[3.]]) train_fn = functools.partial( optimizer.minimize, - functools.partial(network, input_value), + functools.partial(model, input_value), global_step=root.global_step) if context.in_graph_mode(): train_fn = functools.partial(self.evaluate, train_fn()) @@ -877,41 +859,41 @@ class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): input_value = constant_op.constant([[3.]]) - network = MyNetwork() + model = MyModel() optimizer = adam.AdamOptimizer(0.001) optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( - optimizer=optimizer, network=network, optimizer_step=optimizer_step) + optimizer=optimizer, model=model, optimizer_step=optimizer_step) train_op = optimizer.minimize( - functools.partial(network, input_value), + functools.partial(model, input_value), global_step=optimizer_step) self.evaluate(checkpointable_utils.gather_initializers( root_checkpointable)) self.evaluate(train_op) # A regular variable, a slot variable, and a non-slot Optimizer variable # with known values to check when loading. - self.evaluate(network._named_dense.bias.assign([1.])) + self.evaluate(model._named_dense.bias.assign([1.])) self.evaluate(optimizer.get_slot( - var=network._named_dense.bias, name="m").assign([2.])) + var=model._named_dense.bias, name="m").assign([2.])) beta1_power, _ = optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(3.)) return root_checkpointable def _set_sentinels(self, root_checkpointable): - self.evaluate(root_checkpointable.network._named_dense.bias.assign([101.])) + self.evaluate(root_checkpointable.model._named_dense.bias.assign([101.])) self.evaluate( root_checkpointable.optimizer.get_slot( - var=root_checkpointable.network._named_dense.bias, name="m") + var=root_checkpointable.model._named_dense.bias, name="m") .assign([102.])) beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() self.evaluate(beta1_power.assign(103.)) def _check_sentinels(self, root_checkpointable): self.assertAllEqual( - [1.], self.evaluate(root_checkpointable.network._named_dense.bias)) + [1.], self.evaluate(root_checkpointable.model._named_dense.bias)) self.assertAllEqual([2.], self.evaluate( root_checkpointable.optimizer.get_slot( - var=root_checkpointable.network._named_dense.bias, name="m"))) + var=root_checkpointable.model._named_dense.bias, name="m"))) beta1_power, _ = root_checkpointable.optimizer._get_beta_accumulators() self.assertAllEqual(3., self.evaluate(beta1_power)) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 453cc8f8b7..e47bba9267 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -38,6 +38,7 @@ from tensorflow.python.keras._impl.keras.utils.layer_utils import print_summary from tensorflow.python.layers import base as tf_base_layers from tensorflow.python.layers import utils as tf_layers_util from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect @@ -302,6 +303,13 @@ class Network(base_layer.Layer): if not is_graph_network: if value not in self._layers: self._layers.append(value) + if isinstance(value, checkpointable.CheckpointableBase): + # Layer (and therefore Network/Model) inherit from CheckpointableBase + # rather than Checkpointable, which means there is no Checkpointable + # __setattr__ override (it would be a performance issue for functional + # layers). Therefore Model tracks Checkpointable objects itself. + self._track_checkpointable( + checkpointable=value, name=name, overwrite=True) super(Network, self).__setattr__(name, value) def add_variable(self, name, shape, dtype=None, initializer=None, diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 2d040e7c0f..81ab77094e 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -879,6 +879,15 @@ class Model(Network): else: self._symbolic_set_inputs(inputs, training=training) + def _set_scope(self, scope=None): + """Modify the Layer scope creation logic to create ResourceVariables.""" + super(Model, self)._set_scope(scope=scope) + # Subclassed Models create ResourceVariables by default. This makes it + # easier to use Models in an eager/graph agnostic way (since eager execution + # always uses ResourceVariables). + if not self._is_graph_network: + self._scope.set_use_resource(True) + def _eager_set_inputs(self, inputs): """Set model's input and output specs based on the input data received. -- GitLab From 45daab910a3c730380594317749d911db5e933e6 Mon Sep 17 00:00:00 2001 From: Xiaoqiang Zheng Date: Thu, 1 Mar 2018 15:41:11 -0800 Subject: [PATCH 312/884] A fp16 implemention for ReluGrad. On V100 with Cuda 9, it reduces the average ReluGrad kernel time in Resnet50 from 249.44 us to 175.60 us, a 42% speedup. On Titan-X Pascal with Cuda 9, it reduces the average ReluGrad kernel time in Resnet50 from 747.98 us to 509.37 us, a 46.8% improvement. PiperOrigin-RevId: 187545504 --- tensorflow/core/kernels/relu_op_gpu.cu.cc | 93 ++++++++++++++++++- tensorflow/core/util/cuda_kernel_helper.h | 5 + .../python/kernel_tests/relu_op_test.py | 31 +++++++ 3 files changed, 127 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/relu_op_gpu.cu.cc b/tensorflow/core/kernels/relu_op_gpu.cu.cc index ec09d8dfea..6e46c979f3 100644 --- a/tensorflow/core/kernels/relu_op_gpu.cu.cc +++ b/tensorflow/core/kernels/relu_op_gpu.cu.cc @@ -19,15 +19,104 @@ limitations under the License. #include -#include "tensorflow/core/kernels/relu_op_functor.h" - +#include "third_party/eigen3/Eigen/Core" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/kernels/relu_op_functor.h" +#include "tensorflow/core/util/cuda_kernel_helper.h" +#include "tensorflow/core/util/cuda_launch_config.h" namespace tensorflow { typedef Eigen::GpuDevice GPUDevice; +namespace functor { +#ifdef TF_HAS_CUDA_FP16 + +// This kernel computes ReluGrad by processing one half2, two fp16, at a time. +// It effectively does: backdrops = (feature > 0) ? gradient : 0 +// It also tries to use native half2 primitives as much as possible. +__global__ void ReluGradHalfKernel(const Eigen::half* gradient, + const Eigen::half* feature, + Eigen::half* backprop, int32 count) { + int32 half2_count = count >> 1; + int32 index = blockIdx.x * blockDim.x + threadIdx.x; + const int32 total_device_threads = gridDim.x * blockDim.x; + + while (index < half2_count) { + // The fast branch. + // One half2, two fp16, is fetched and processed at a time. + half2 gradient_h2 = reinterpret_cast(gradient)[index]; + half2 feature_h2 = reinterpret_cast(feature)[index]; + half2* p_backprop_h2 = reinterpret_cast(backprop) + index; + +#if __CUDA_ARCH__ >= 530 + // Fast path, when half2 primitives are available. + const half2 kZeroH2 = __float2half2_rn(0.f); + // mask = (feature > 0) + half2 mask_h2 = __hgt2(feature_h2, kZeroH2); + // backprop = mask * gradient + half2 backprop_h2 = __hmul2(mask_h2, gradient_h2); +#else + // Fall back: convert half2 to float2 for processing. + float2 feature_f2 = __half22float2(feature_h2); + float2 gradient_f2 = __half22float2(gradient_h2); + float2 backprop_f2 = make_float2((feature_f2.x > 0) ? gradient_f2.x : 0, + (feature_f2.y > 0) ? gradient_f2.y : 0); + // Convert back to half2. + half2 backprop_h2 = __float22half2_rn(backprop_f2); +#endif + + // Write back the result. + *p_backprop_h2 = backprop_h2; + + index += total_device_threads; + } + + if ((count & 0x1) == 1 && index == half2_count) { + // If the total number of the elements is odd, process the last element. + Eigen::half grad_h = gradient[count - 1]; + Eigen::half feature_h = feature[count - 1]; + + float grad_f = static_cast(grad_h); + float feature_f = static_cast(feature_h); + float backprop_f = (feature_f > 0) ? grad_f : 0; + + Eigen::half backprop_h(backprop_f); + backprop[count - 1] = backprop_h; + } +} + +template +struct ReluGrad { + // Computes ReluGrad backprop. + // + // gradient: gradient backpropagated to the Relu op. + // feature: either the inputs that were passed to the Relu, or its outputs + // (using either one yields the same result here). + // backprop: gradient to backpropagate to the Relu inputs. + void operator()(const Device& d, + typename TTypes::ConstTensor gradient, + typename TTypes::ConstTensor feature, + typename TTypes::Tensor backprop) { + // NOTE: When the activation is exactly zero, we do not propagate the + // associated gradient value. This allows the output of the Relu to be used, + // as well as its input. + int32 count = gradient.size(); + if (count == 0) return; + int32 half2_count = Eigen::divup(count, 2); + const int32 kThreadInBlock = 512; + CudaLaunchConfig config = GetCudaLaunchConfigFixedBlockSize( + half2_count, d, ReluGradHalfKernel, 0, kThreadInBlock); + ReluGradHalfKernel<<>>(gradient.data(), feature.data(), + backprop.data(), count); + } +}; + +#endif // TF_HAS_CUDA_FP16 +} // namespace functor + // Definition of the GPU implementations declared in relu_op.cc. #define DEFINE_GPU_KERNELS(T) \ template struct functor::Relu; \ diff --git a/tensorflow/core/util/cuda_kernel_helper.h b/tensorflow/core/util/cuda_kernel_helper.h index 18a4c008f1..01a5b6828a 100644 --- a/tensorflow/core/util/cuda_kernel_helper.h +++ b/tensorflow/core/util/cuda_kernel_helper.h @@ -21,6 +21,11 @@ limitations under the License. #include "tensorflow/core/util/cuda_device_functions.h" #include "tensorflow/core/util/cuda_launch_config.h" +#if CUDA_VERSION >= 7050 +#include "cuda/include/cuda_fp16.h" +#define TF_HAS_CUDA_FP16 +#endif + // Deprecated, use 'for(int i : CudaGridRangeX(n))' instead. #define CUDA_1D_KERNEL_LOOP(i, n) \ for (int i : ::tensorflow::CudaGridRangeX(n)) diff --git a/tensorflow/python/kernel_tests/relu_op_test.py b/tensorflow/python/kernel_tests/relu_op_test.py index 6b4091ae5d..25e947f09e 100644 --- a/tensorflow/python/kernel_tests/relu_op_test.py +++ b/tensorflow/python/kernel_tests/relu_op_test.py @@ -19,12 +19,14 @@ from __future__ import division from __future__ import print_function import numpy as np +from six.moves import xrange # pylint: disable=redefined-builtin from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables @@ -87,6 +89,35 @@ class ReluTest(test.TestCase): print("relu (float32) gradient err = ", err) self.assertLess(err, 1e-4) + # The gradient for fp16 is inaccurate due to the low-precision. + # Instead of relying on compute_gradient_error, we compare the fp16 analytical + # gradient against their fp32 counterpart. + def testGradientFloat16(self): + with self.test_session(use_gpu=True) as sess: + # Randomly construct a 1D shape from [1, 40) + shape = random_ops.random_uniform( + [1], minval=1, maxval=40, dtype=dtypes.int32) + + # Construct the fp32 graph and its gradient. + x = random_ops.random_uniform(shape, minval=-1, maxval=1, name="x") + y1 = nn_ops.relu(x, name="relu_fp32") + l1 = nn_ops.l2_loss(y1) + dx_f32 = gradients_impl.gradients(l1, x) + + # Construct the fp16 graph and its gradient. + # It starts with the same x, in fp32. But before it reaches Relu, it is + # cast into fp16. So during backprop, the gradient computation is in fp16. + x2 = math_ops.cast(x, dtype=dtypes.float16, name="cast") + y2 = nn_ops.relu(x2, name="relu_fp16") + l2 = nn_ops.l2_loss(y2) + dx_f16 = gradients_impl.gradients(l2, x) + + # Repeat the experiment for 100 times. All tensor shapes and its tensor + # values are randomly generated for each run. + for _ in xrange(100): + dx_f32_v, dx_f16_v = sess.run([dx_f32, dx_f16]) + self.assertAllClose(dx_f32_v, dx_f16_v, atol=3e-4) + def testGradientFloat64(self): with self.test_session(): x = constant_op.constant( -- GitLab From 80ebc380ec8dacdf900cc66c6590054e26b6dade Mon Sep 17 00:00:00 2001 From: Anna R Date: Thu, 1 Mar 2018 15:47:28 -0800 Subject: [PATCH 313/884] Fix batch_norm_benchmark. PiperOrigin-RevId: 187546384 --- tensorflow/python/ops/batch_norm_benchmark.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/batch_norm_benchmark.py b/tensorflow/python/ops/batch_norm_benchmark.py index 4f65e3771c..5d68b47aea 100644 --- a/tensorflow/python/ops/batch_norm_benchmark.py +++ b/tensorflow/python/ops/batch_norm_benchmark.py @@ -41,7 +41,7 @@ def batch_norm_op(tensor, mean, variance, beta, gamma, scale): # _batch_norm_with_global_normalization is deprecated in v9 ops.get_default_graph().graph_def_versions.producer = 8 # pylint: disable=protected-access - return gen_nn_ops.batch_norm_with_global_normalization( + return gen_nn_ops._batch_norm_with_global_normalization( tensor, mean, variance, beta, gamma, 0.001, scale) # pylint: enable=protected-access -- GitLab From 6db78cd5266dc761c4f90a80d7555c6c33fc453a Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 1 Mar 2018 16:00:17 -0800 Subject: [PATCH 314/884] [ClusterFLR] Prolong the lifetime of the RunGraphRequest until the call has completed. Some WorkerService implementations rely on the request object remaining live until the callback is called. PiperOrigin-RevId: 187548140 --- .../cluster_function_library_runtime.cc | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc index 3a8d591236..0c5c4d59ed 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc @@ -175,32 +175,33 @@ void ClusterFunctionLibraryRuntime::Run( return; } - RunGraphRequest req; - req.set_session_handle(worker_session_->session_name); - req.set_graph_handle(function_data->graph_handle); + RunGraphRequest* req = new RunGraphRequest; + req->set_session_handle(worker_session_->session_name); + req->set_graph_handle(function_data->graph_handle); // Borrowed from master_session.cc const uint64 step_id = (random::New64() & ((1uLL << 56) - 1)) | (1uLL << 56); - req.set_step_id(step_id); + req->set_step_id(step_id); int i = 0; for (const auto& send_key : function_data->send_keys) { - NamedTensorProto* send = req.add_send(); + NamedTensorProto* send = req->add_send(); send->set_name(send_key); args[i].AsProtoTensorContent(send->mutable_tensor()); i++; } const std::vector& recv_keys = function_data->recv_keys; for (const auto& recv_key : recv_keys) { - req.add_recv_key(recv_key); + req->add_recv_key(recv_key); } RunGraphResponse* resp = new RunGraphResponse(); CallOptions* call_options = new CallOptions(); wi->RunGraphAsync( - call_options, &req, resp, - [call_options, resp, rets, recv_keys, done](const Status& status) { + call_options, req, resp, + [call_options, req, resp, rets, recv_keys, done](const Status& status) { if (!status.ok()) { done(status); delete call_options; + delete req; delete resp; return; } @@ -212,25 +213,28 @@ void ClusterFunctionLibraryRuntime::Run( for (const auto& recv_key : recv_keys) { TensorProto* tp = mapped_recvs[recv_key]; if (tp == nullptr) { + done(errors::Internal("Could not find key: ", recv_key)); delete call_options; + delete req; delete resp; - done(errors::Internal("Could not find key: ", recv_key)); return; } Tensor t; if (t.FromProto(*tp)) { rets->push_back(t); } else { - delete call_options; - delete resp; done(errors::Internal("Could not convert tensor proto: ", tp->DebugString())); + delete call_options; + delete req; + delete resp; return; } } + done(status); delete call_options; + delete req; delete resp; - done(status); }); } -- GitLab From c4a50c5897170edf3055afcce25c981ee331de07 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 16:06:22 -0800 Subject: [PATCH 315/884] Do not crash if we failed to get the field name. PiperOrigin-RevId: 187549153 --- tensorflow/contrib/lite/java/proguard.flags | 3 +++ .../lite/java/src/main/native/nativeinterpreterwrapper_jni.cc | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/lite/java/proguard.flags diff --git a/tensorflow/contrib/lite/java/proguard.flags b/tensorflow/contrib/lite/java/proguard.flags new file mode 100644 index 0000000000..8ee3d7e7ae --- /dev/null +++ b/tensorflow/contrib/lite/java/proguard.flags @@ -0,0 +1,3 @@ +-keepclassmembers class org.tensorflow.lite.NativeInterpreterWrapper { + private long inferenceDurationNanoseconds; +} \ No newline at end of file diff --git a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc index 47bf4c9c9d..475b467fac 100644 --- a/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc +++ b/tensorflow/contrib/lite/java/src/main/native/nativeinterpreterwrapper_jni.cc @@ -447,7 +447,9 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_run( jclass wrapper_clazz = env->GetObjectClass(wrapper); jfieldID fid = env->GetFieldID(wrapper_clazz, "inferenceDurationNanoseconds", "J"); - if (fid != 0) { + if (env->ExceptionCheck()) { + env->ExceptionClear(); + } else if (fid != nullptr) { env->SetLongField( wrapper, fid, ::tflite::timespec_diff_nanoseconds(&beforeInference, &afterInference)); -- GitLab From 980028f59f96c7e60688fef9106df2d043e02629 Mon Sep 17 00:00:00 2001 From: Karmel Allison Date: Thu, 1 Mar 2018 16:33:26 -0800 Subject: [PATCH 316/884] Adds a TensorServingInputReceiver that allows export_savedmodel to pass raw tensors to model functions. Addresses #11674. PiperOrigin-RevId: 187552824 --- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/estimator/estimator_test.py | 55 ++++++++++++++++ tensorflow/python/estimator/export/export.py | 56 +++++++++++++++++ .../python/estimator/export/export_lib.py | 2 + .../python/estimator/export/export_test.py | 62 +++++++++++++++++++ ...xport.-tensor-serving-input-receiver.pbtxt | 27 ++++++++ .../golden/tensorflow.estimator.export.pbtxt | 4 ++ 7 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 1167b3834e..1a2b33721a 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -570,7 +570,7 @@ class Estimator(object): export_dir_base: A string containing a directory in which to create timestamped subdirectories containing exported SavedModels. serving_input_receiver_fn: A function that takes no argument and - returns a `ServingInputReceiver`. + returns a `ServingInputReceiver` or `TensorServingInputReceiver`. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel, or `None` if no extra assets are needed. as_text: whether to write the SavedModel proto in text format. diff --git a/tensorflow/python/estimator/estimator_test.py b/tensorflow/python/estimator/estimator_test.py index 7a0745b1d0..ac0ff41dd2 100644 --- a/tensorflow/python/estimator/estimator_test.py +++ b/tensorflow/python/estimator/estimator_test.py @@ -48,6 +48,7 @@ from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import lookup_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_lib from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops @@ -1936,6 +1937,60 @@ class EstimatorExportTest(test.TestCase): # cleanup gfile.DeleteRecursively(tmpdir) + def test_export_savedmodel_tensor_features(self): + """Test that models accepting a single raw Tensor can be exported. + + See https://github.com/tensorflow/tensorflow/issues/11674 + + If the model_fn and receiver_fn accept raw tensors rather than dictionaries + as input, export_savedmodel should be okay with that, too. + + """ + + tmpdir = tempfile.mkdtemp() + + def _input_fn_tensor_features(): + t = array_ops.constant([1, 2, 3], dtype=dtypes.float32, shape=[1, 3]) + return (t, None) + + def _model_fn_tensor_features(features, labels, mode): + _ = labels + prediction = math_ops.matmul(features, features, transpose_b=True) + + return model_fn_lib.EstimatorSpec( + mode, + predictions=prediction, + loss=constant_op.constant(1.), + train_op=state_ops.assign_add(training.get_global_step(), 1), + export_outputs={ + 'test': export_output.PredictOutput({'prediction': prediction}) + }) + + def _serving_input_receiver_fn(): + feat = array_ops.placeholder(dtype=dtypes.float32) + return export.TensorServingInputReceiver( + features=feat, receiver_tensors=feat) + + est = estimator.Estimator(model_fn=_model_fn_tensor_features) + est.train(input_fn=_input_fn_tensor_features, steps=1) + + # Perform the export. + export_dir_base = os.path.join( + compat.as_bytes(tmpdir), compat.as_bytes('export')) + export_dir = est.export_savedmodel( + export_dir_base, _serving_input_receiver_fn) + + # Restore, to validate that the export was well-formed. + with ops.Graph().as_default() as graph: + with session.Session(graph=graph) as sess: + loader.load(sess, [tag_constants.SERVING], export_dir) + graph_ops = [x.name.lower() for x in graph.get_operations()] + self.assertTrue('const' in graph_ops) + self.assertTrue('matmul' in graph_ops) + + # Clean up. + gfile.DeleteRecursively(tmpdir) + def test_scaffold_is_used_for_saver(self): tmpdir = tempfile.mkdtemp() diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index 83251c79fc..f240e11478 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -120,6 +120,62 @@ class ServingInputReceiver(collections.namedtuple( receiver_tensors_alternatives=receiver_tensors_alternatives) +@tf_export('estimator.export.TensorServingInputReceiver') +class TensorServingInputReceiver(collections.namedtuple( + 'TensorServingInputReceiver', + ['features', 'receiver_tensors', 'receiver_tensors_alternatives'])): + """A return type for a serving_input_receiver_fn. + + This is for use with models that expect a single `Tensor` or `SparseTensor` + as an input feature, as opposed to a dict of features. + + The normal `ServingInputReceiver` always returns a feature dict, even if it + contains only one entry, and so can be used only with models that accept such + a dict. For models that accept only a single raw feature, the + `serving_input_receiver_fn` provided to `Estimator.export_savedmodel()` should + return this `TensorServingInputReceiver` instead. See: + https://github.com/tensorflow/tensorflow/issues/11674 + + Note that the receiver_tensors and receiver_tensor_alternatives arguments + will be automatically converted to the dict representation in either case, + because the SavedModel format requires each input `Tensor` to have a name + (provided by the dict key). + + The expected return values are: + features: A single `Tensor` or `SparseTensor`, representing the feature + to be passed to the model. + receiver_tensors: a `Tensor`, or a dict of string to `Tensor`, specifying + input nodes where this receiver expects to be fed by default. Typically, + this is a single placeholder expecting serialized `tf.Example` protos. + receiver_tensors_alternatives: a dict of string to additional + groups of receiver tensors, each of which may be a `Tensor` or a dict of + string to `Tensor`. These named receiver tensor alternatives generate + additional serving signatures, which may be used to feed inputs at + different points within the input receiver subgraph. A typical usage is + to allow feeding raw feature `Tensor`s *downstream* of the + tf.parse_example() op. Defaults to None. + """ + + def __new__(cls, features, receiver_tensors, + receiver_tensors_alternatives=None): + if features is None: + raise ValueError('features must be defined.') + if not (isinstance(features, ops.Tensor) + or isinstance(features, sparse_tensor.SparseTensor)): + raise ValueError('feature must be a Tensor or SparseTensor.') + + receiver = ServingInputReceiver( + features=features, + receiver_tensors=receiver_tensors, + receiver_tensors_alternatives=receiver_tensors_alternatives) + + return super(TensorServingInputReceiver, cls).__new__( + cls, + features=receiver.features[_SINGLE_FEATURE_DEFAULT_NAME], + receiver_tensors=receiver.receiver_tensors, + receiver_tensors_alternatives=receiver.receiver_tensors_alternatives) + + @tf_export('estimator.export.build_parsing_serving_input_receiver_fn') def build_parsing_serving_input_receiver_fn(feature_spec, default_batch_size=None): diff --git a/tensorflow/python/estimator/export/export_lib.py b/tensorflow/python/estimator/export/export_lib.py index 99cd81d678..226fc97fd3 100644 --- a/tensorflow/python/estimator/export/export_lib.py +++ b/tensorflow/python/estimator/export/export_lib.py @@ -22,6 +22,7 @@ from __future__ import print_function from tensorflow.python.estimator.export.export import build_parsing_serving_input_receiver_fn from tensorflow.python.estimator.export.export import build_raw_serving_input_receiver_fn from tensorflow.python.estimator.export.export import ServingInputReceiver +from tensorflow.python.estimator.export.export import TensorServingInputReceiver from tensorflow.python.estimator.export.export_output import ClassificationOutput from tensorflow.python.estimator.export.export_output import ExportOutput from tensorflow.python.estimator.export.export_output import PredictOutput @@ -34,6 +35,7 @@ _allowed_symbols = [ 'build_parsing_serving_input_receiver_fn', 'build_raw_serving_input_receiver_fn', 'ServingInputReceiver', + 'TensorServingInputReceiver', 'ClassificationOutput', 'ExportOutput', 'PredictOutput', diff --git a/tensorflow/python/estimator/export/export_test.py b/tensorflow/python/estimator/export/export_test.py index 8442bf04ac..eb9688bc97 100644 --- a/tensorflow/python/estimator/export/export_test.py +++ b/tensorflow/python/estimator/export/export_test.py @@ -385,5 +385,67 @@ class ExportTest(test_util.TensorFlowTestCase): self.assertTrue(int(time_2) < int(time_3)) +class TensorServingReceiverTest(test_util.TensorFlowTestCase): + + def test_tensor_serving_input_receiver_constructor(self): + features = constant_op.constant([0]) + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + r = export.TensorServingInputReceiver(features, receiver_tensors) + self.assertTrue(isinstance(r.features, ops.Tensor)) + self.assertTrue(isinstance(r.receiver_tensors, dict)) + + def test_tensor_serving_input_receiver_sparse(self): + features = sparse_tensor.SparseTensor( + indices=[[0, 0]], values=[1], dense_shape=[1, 1]) + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + r = export.TensorServingInputReceiver(features, receiver_tensors) + self.assertTrue(isinstance(r.features, sparse_tensor.SparseTensor)) + self.assertTrue(isinstance(r.receiver_tensors, dict)) + + def test_serving_input_receiver_features_invalid(self): + receiver_tensors = { + "example0": array_ops.placeholder(dtypes.string, name="example0"), + u"example1": array_ops.placeholder(dtypes.string, name="example1"), + } + + with self.assertRaisesRegexp(ValueError, "features must be defined"): + export.TensorServingInputReceiver( + features=None, + receiver_tensors=receiver_tensors) + + with self.assertRaisesRegexp(ValueError, "feature must be a Tensor"): + export.TensorServingInputReceiver( + features={"1": constant_op.constant([1])}, + receiver_tensors=receiver_tensors) + + def test_serving_input_receiver_receiver_tensors_invalid(self): + features = constant_op.constant([0]) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensors must be defined"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors=None) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensors keys must be strings"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors={ + 1: array_ops.placeholder(dtypes.string, name="example0")}) + + with self.assertRaisesRegexp( + ValueError, "receiver_tensor example1 must be a Tensor"): + export.TensorServingInputReceiver( + features=features, + receiver_tensors={"example1": [1]}) + + if __name__ == "__main__": test.main() diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt new file mode 100644 index 0000000000..4fe92643bf --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.estimator.export.-tensor-serving-input-receiver.pbtxt @@ -0,0 +1,27 @@ +path: "tensorflow.estimator.export.TensorServingInputReceiver" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "features" + mtype: "" + } + member { + name: "receiver_tensors" + mtype: "" + } + member { + name: "receiver_tensors_alternatives" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt index 4d0dddb3bc..bd72f6cd79 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.export.pbtxt @@ -20,6 +20,10 @@ tf_module { name: "ServingInputReceiver" mtype: "" } + member { + name: "TensorServingInputReceiver" + mtype: "" + } member_method { name: "build_parsing_serving_input_receiver_fn" argspec: "args=[\'feature_spec\', \'default_batch_size\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 72b3a5cd8d787bcdab40a94de4788e7e555c76da Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 16:52:07 -0800 Subject: [PATCH 317/884] Expose Checkpointable symbols in tf.contrib.eager/tfe - tfe.Checkpoint Utility for grouping Checkpointable objects into training checkpoints, has save/restore methods which call CheckpointableSaver. - tfe.Checkpointable For user-defined Checkpointable objects. - tfe.CheckpointableSaver More control over saving/restoring than tfe.Checkpoint. Only tfe.Checkpoint is required to switch examples over, so I can leave the others out if there are objections. PiperOrigin-RevId: 187555472 --- tensorflow/contrib/cmake/python_modules.txt | 1 + tensorflow/contrib/cmake/python_protos.txt | 1 + tensorflow/contrib/eager/python/BUILD | 1 + tensorflow/contrib/eager/python/tfe.py | 7 +++++++ 4 files changed, 10 insertions(+) diff --git a/tensorflow/contrib/cmake/python_modules.txt b/tensorflow/contrib/cmake/python_modules.txt index bfe53c01b3..0d2a6a23db 100644 --- a/tensorflow/contrib/cmake/python_modules.txt +++ b/tensorflow/contrib/cmake/python_modules.txt @@ -165,6 +165,7 @@ tensorflow/contrib/distributions/python tensorflow/contrib/distributions/python/ops tensorflow/contrib/distributions/python/ops/bijectors tensorflow/contrib/eager +tensorflow/contrib/eager/proto tensorflow/contrib/eager/python tensorflow/contrib/estimator tensorflow/contrib/estimator/python diff --git a/tensorflow/contrib/cmake/python_protos.txt b/tensorflow/contrib/cmake/python_protos.txt index 8a9c406d8b..c03c0c80fe 100644 --- a/tensorflow/contrib/cmake/python_protos.txt +++ b/tensorflow/contrib/cmake/python_protos.txt @@ -4,6 +4,7 @@ tensorflow/python tensorflow/contrib/boosted_trees/proto tensorflow/contrib/cloud/kernels tensorflow/contrib/decision_trees/proto +tensorflow/contrib/eager/proto tensorflow/contrib/gdr tensorflow/contrib/lite/toco tensorflow/contrib/mpi diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 8c4b0827fd..e8c514c114 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -11,6 +11,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//visibility:public"], deps = [ + ":checkpointable_utils", ":datasets", ":metrics", ":network", diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index d32bebf90c..fce7a60853 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -56,6 +56,10 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@save_network_checkpoint @@restore_network_checkpoint +@@Checkpoint +@@Checkpointable +@@CheckpointableSaver + @@in_eager_mode @@in_graph_mode @@ -74,6 +78,8 @@ from __future__ import print_function # pylint:disable=g-bad-import-order,g-import-not-at-top,unused-import # from tensorflow.contrib.eager.python import metrics +from tensorflow.contrib.eager.python.checkpointable_utils import CheckpointableSaver +from tensorflow.contrib.eager.python.checkpointable_utils import Checkpoint from tensorflow.contrib.eager.python.datasets import Iterator from tensorflow.contrib.eager.python.network import Network from tensorflow.contrib.eager.python.network import Sequential @@ -105,6 +111,7 @@ from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Vari from tensorflow.python.ops.variable_scope import EagerVariableStore from tensorflow.python.ops import script_ops from tensorflow.python.ops import template +from tensorflow.python.training.checkpointable import Checkpointable from tensorflow.python.util.all_util import remove_undocumented py_func = script_ops.eager_py_func -- GitLab From 39ca1b1d77242b2a614d091ce79a765fd2c376c0 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 1 Mar 2018 17:03:56 -0800 Subject: [PATCH 318/884] Make segmentation option configurable --- tensorflow/contrib/tensorrt/convert/convert_graph.cc | 7 +++---- tensorflow/contrib/tensorrt/convert/convert_graph.h | 2 +- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 4 +++- tensorflow/contrib/tensorrt/python/trt_convert.py | 5 +++-- tensorflow/contrib/tensorrt/trt_conversion.i | 7 ++++--- 5 files changed, 14 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 23ebaf35ba..638fdebcac 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -322,7 +322,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = 0) { + int precision_mode = 0, int minimum_segment_size = 3) { // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; @@ -357,7 +357,6 @@ tensorflow::Status ConvertGraphDefToTensorRT( // AJ refactoring shape inference through grappler/GraphProperties. tensorflow::grappler::GraphProperties static_graph_properties(item); TF_RETURN_IF_ERROR(static_graph_properties.InferStatically(false)); - // Build full graph tensorflow::FunctionLibraryDefinition flib(tensorflow::OpRegistry::Global(), gdef.library()); @@ -374,7 +373,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // TODO(sami): this should be passed as a knob!!!! - segment_options.minimum_segment_size = 2; + segment_options.minimum_segment_size = minimum_segment_size; tensorflow::tensorrt::segment::SegmentNodesVector segments; TF_RETURN_IF_ERROR(tensorrt::segment::SegmentGraph( gdef, IsTensorRTCandidate, segment_options, &segments)); @@ -410,7 +409,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( if (status != tensorflow::Status::OK()) { LOG(WARNING) << "subgraph conversion error for subgraph_index:" << count << " due to: \n" - << status.ToString() << "SKIPPING......"; + << status.ToString() << " SKIPPING......"; } count++; } diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 846d7f2721..5d5301393c 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -38,7 +38,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode); + int precision_mode,int minimum_segment_size); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index d9377ba597..ec3dee40d7 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -69,8 +69,9 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, case tensorflow::DataType::DT_HALF: *trt_dtype = nvinfer1::DataType::kHALF; break; + default: - return tensorflow::errors::InvalidArgument("Unsupported data type"); + return tensorflow::errors::InvalidArgument("Unsupported data type "+tensorflow::DataTypeString(tf_dtype)); } return tensorflow::Status::OK(); } @@ -2536,6 +2537,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( shape_inference_node_name = s.output_edge_map->at(tensor_name).second; shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; } + if(shape_inference_output_idx<0)continue; VLOG(2) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 94afb75897..071f09d37b 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -41,7 +41,8 @@ def create_inference_graph(input_graph_def, outputs, max_batch_size=1, max_workspace_size_bytes=2 << 20, - precision_mode="FP32"): + precision_mode="FP32", + minimum_segment_size=3): """Python wrapper for the TRT transormation. @@ -98,7 +99,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes,mode) + max_workspace_size_bytes,mode,minimum_segment_size) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 0ae3c91a63..28334e26a9 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -73,7 +73,8 @@ std::pair trt_convert( std::vector output_names, size_t max_batch_size, size_t max_workspace_size_bytes, - int precision_mode + int precision_mode, + int minimum_segment_size // Unfortunately we can't use TF_Status here since it // is in c/c_api and brings in a lot of other libraries // which in turn declare ops. These ops are included @@ -105,7 +106,7 @@ std::pair trt_convert( tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode); + &outGraph, precision_mode,minimum_segment_size); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; @@ -179,7 +180,7 @@ std::pair trt_convert(string graph_def_string, std::vector output_names, size_t max_batch_size, size_t max_workspace_size_bytes, - int precision_mode); + int precision_mode, int minimum_segment_size); %unignoreall -- GitLab From 700c406bc5c9182b91cf32873e8ae0d81e084114 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:00:46 -0800 Subject: [PATCH 319/884] Include the response upon any error. PiperOrigin-RevId: 187556563 --- .../core/platform/cloud/curl_http_request.cc | 56 ++++++++++++------- .../platform/cloud/curl_http_request_test.cc | 7 ++- 2 files changed, 40 insertions(+), 23 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 4b5f6974c1..80ad1cf0b8 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -399,6 +399,24 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size, return size * nmemb; } +// This is pulled out as a separate function so that it's only computed when +// an error occurs. +string response_to_error_message(uint64 response_code, StringPiece response, + size_t response_to_error_limit, + CURLcode curl_result, + StringPiece error_buffer) { + string error_message = strings::StrCat( + "Error executing an HTTP request (HTTP response code ", response_code, + ", error code ", curl_result, ", error message '", error_buffer, "')"); + if (!response.empty()) { + return strings::StrCat( + error_message, ", response '", + response.substr(0, std::min(response.size(), response_to_error_limit)), + "'"); + } + return error_message; +} + Status CurlHttpRequest::Send() { CheckNotSent(); CHECK(is_uri_set_) << "URI has not been set."; @@ -430,13 +448,7 @@ Status CurlHttpRequest::Send() { libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &response_code_); - const auto& error_message = strings::StrCat( - "Error executing an HTTP request (HTTP response code ", response_code_, - ", error code ", curl_result, ", error message '", error_buffer, "')"); - Status result; - StringPiece response = GetResponse(); - string extended_error_message; switch (response_code_) { // The group of response codes indicating that the request achieved // the expected goal. @@ -447,7 +459,9 @@ Status CurlHttpRequest::Send() { if (curl_result != CURLE_OK) { // This means the server executed the request successfully, but then // something went wrong during the transmission of the response. - result = errors::Unavailable(error_message); + result = errors::Unavailable(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, + curl_result, error_buffer)); } else { result = Status::OK(); } @@ -463,27 +477,25 @@ Status CurlHttpRequest::Send() { // INVALID_ARGUMENT indicates a problem with how the request is constructed. case 400: // Bad Request case 411: // Length Required - result = errors::InvalidArgument(error_message); + result = errors::InvalidArgument(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // PERMISSION_DENIED indicates an authentication or an authorization issue. case 401: // Unauthorized case 403: // Forbidden - if (!response.empty()) { - extended_error_message = strings::StrCat( - error_message, ", response ", - response.substr( - 0, std::min(response.size(), response_to_error_limit_))); - result = errors::PermissionDenied(extended_error_message); - } else { - result = errors::PermissionDenied(error_message); - } + result = errors::PermissionDenied(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // NOT_FOUND indicates that the requested resource does not exist. case 404: // Not found case 410: // Gone - result = errors::NotFound(error_message); + result = errors::NotFound(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // FAILED_PRECONDITION indicates that the request failed because some @@ -495,7 +507,9 @@ Status CurlHttpRequest::Send() { case 307: // Temporary Redirect case 412: // Precondition Failed case 413: // Payload Too Large - result = errors::FailedPrecondition(error_message); + result = errors::FailedPrecondition(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; // UNAVAILABLE indicates a problem that can go away if the request @@ -511,7 +525,9 @@ Status CurlHttpRequest::Send() { case 502: // Bad Gateway case 503: // Service Unavailable default: // All other HTTP response codes also should be retried. - result = errors::Unavailable(error_message); + result = errors::Unavailable(response_to_error_message( + response_code_, GetResponse(), response_to_error_limit_, curl_result, + error_buffer)); break; } if (!result.ok()) { diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 86d26a0287..94af121768 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -378,7 +378,7 @@ TEST(CurlHttpRequestTest, GetRequest_503) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 503, " - "error code 23, error message '')", + "error code 23, error message ''), response 'get response'", status.error_message()); EXPECT_EQ(503, http_request.GetResponseCode()); } @@ -397,7 +397,8 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 0, " - "error code 28, error message 'Operation timed out')", + "error code 28, error message 'Operation timed out'), " + "response 'get response'", status.error_message()); EXPECT_EQ(0, http_request.GetResponseCode()); } @@ -629,7 +630,7 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( "Error executing an HTTP request (HTTP response code 200, " - "error code 42, error message '')", + "error code 42, error message ''), response 'test'", status.error_message()); } -- GitLab From 64bd36057449dd01d6944b8d31a53b1301923f2c Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:07:20 -0800 Subject: [PATCH 320/884] Improve the error message when failing to write events. The current error message looks like: "Failed to sync 10 to " PiperOrigin-RevId: 187557623 --- tensorflow/core/util/events_writer.cc | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/util/events_writer.cc b/tensorflow/core/util/events_writer.cc index 49507616ed..c50e329bda 100644 --- a/tensorflow/core/util/events_writer.cc +++ b/tensorflow/core/util/events_writer.cc @@ -122,9 +122,11 @@ Status EventsWriter::Flush() { CHECK(recordio_file_ != nullptr) << "Unexpected NULL file"; TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_writer_->Flush(), "Failed to flush ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); TF_RETURN_WITH_CONTEXT_IF_ERROR(recordio_file_->Sync(), "Failed to sync ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); // The FileStillExists() condition is necessary because // recordio_writer_->Sync() can return OK even if the underlying @@ -135,7 +137,8 @@ Status EventsWriter::Flush() { // disappearing file, in case for some file system File::Exists() is // false after File::Open() but before File::Sync(). TF_RETURN_WITH_CONTEXT_IF_ERROR(FileStillExists(), "Failed to flush ", - num_outstanding_events_, " to ", filename_); + num_outstanding_events_, " events to ", + filename_); VLOG(1) << "Wrote " << num_outstanding_events_ << " events to disk."; num_outstanding_events_ = 0; return Status::OK(); -- GitLab From 16f1eea1cdfdb7facdac8ac2ccab3ee80af41409 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 1 Mar 2018 17:20:54 -0800 Subject: [PATCH 321/884] Scaffolding for int8 calibration in TF-TRT (#17309) * Scaffolding for int8 calibration * Add ops/trt_calib_op.cc * Rename files and replace std::string with string * Line lengths, variable names, conditionals in BUILD * mode variable renaming * More fixes for review * Run clang-format * Fix the build failue and replace the macro with a function * Add TODO(aaroey) for future PRs * Fix namespace for internal build * Fix mismatched argument name and unused includes to make internal build happy * Fix order of dependencies in BUILD file * Remove dangling #undef --- tensorflow/contrib/tensorrt/BUILD | 44 +++++- .../contrib/tensorrt/kernels/trt_calib_op.cc | 129 ++++++++++++++++++ .../contrib/tensorrt/kernels/trt_calib_op.h | 52 +++++++ .../contrib/tensorrt/ops/trt_calib_op.cc | 37 +++++ .../tensorrt/resources/trt_int8_calibrator.cc | 119 ++++++++++++++++ .../tensorrt/resources/trt_int8_calibrator.h | 65 +++++++++ .../resources/trt_resource_manager.cc | 39 ++++++ .../tensorrt/resources/trt_resource_manager.h | 49 +++++++ .../tensorrt/resources/trt_resources.h | 95 +++++++++++++ 9 files changed, 625 insertions(+), 4 deletions(-) create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.h create mode 100644 tensorflow/contrib/tensorrt/ops/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resources.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 65a0e903a7..9909fcaca2 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -47,7 +47,10 @@ tf_cuda_cc_test( tf_custom_op_library( name = "python/ops/_trt_engine_op.so", - srcs = ["ops/trt_engine_op.cc"], + srcs = [ + "ops/trt_calib_op.cc", + "ops/trt_engine_op.cc", + ], deps = [ ":trt_engine_op_kernel", ":trt_shape_function", @@ -71,11 +74,18 @@ tf_cuda_library( cc_library( name = "trt_engine_op_kernel", - srcs = ["kernels/trt_engine_op.cc"], - hdrs = ["kernels/trt_engine_op.h"], + srcs = [ + "kernels/trt_calib_op.cc", + "kernels/trt_engine_op.cc", + ], + hdrs = [ + "kernels/trt_calib_op.h", + "kernels/trt_engine_op.h", + ], copts = tf_copts(), deps = [ ":trt_logging", + ":trt_resources", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:stream_executor_headers_lib", @@ -87,7 +97,10 @@ cc_library( ) tf_gen_op_libs( - op_lib_names = ["trt_engine_op"], + op_lib_names = [ + "trt_engine_op", + "trt_calib_op", + ], deps = if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]), @@ -108,6 +121,7 @@ tf_cuda_library( tf_gen_op_wrapper_py( name = "trt_engine_op", deps = [ + ":trt_calib_op_op_lib", ":trt_engine_op_op_lib", ":trt_logging", ":trt_shape_function", @@ -171,6 +185,27 @@ tf_py_wrap_cc( ], ) +tf_cuda_library( + name = "trt_resources", + srcs = [ + "resources/trt_int8_calibrator.cc", + "resources/trt_resource_manager.cc", + ], + hdrs = [ + "resources/trt_int8_calibrator.h", + "resources/trt_resource_manager.h", + "resources/trt_resources.h", + ], + deps = [ + ":trt_logging", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + # Library for the node-level conversion portion of TensorRT operation creation tf_cuda_library( name = "trt_conversion", @@ -185,6 +220,7 @@ tf_cuda_library( deps = [ ":segment", ":trt_logging", + ":trt_resources", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework", diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc new file mode 100644 index 0000000000..1dcb87e768 --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("segment_nodes", &segment_nodes_)); + OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_)); + OP_REQUIRES_OK(context, context->GetAttr("resource_name", &resource_name_)); +}; + +#define TYPECASE(dt, X, Y) \ + case dt: { \ + return (void*)X->flat::Type>().data(); \ + } + +void* GetTensorAddress(const Tensor* tensor_ptr) { + auto tensor_type = tensor_ptr->dtype(); + switch (tensor_type) { + TYPECASE(tensorflow::DT_FLOAT, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_HALF, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_INT8, tensor_ptr, dest_ptr); + default: { + LOG(FATAL) << "Unsupported Data type " + << tensorflow::DataTypeString(tensor_type); + return nullptr; + } + } +} + +void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { + // TODO(aaroey): make sure ctx->resource_mgr() is used in future PR. + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); + auto res_mgr = trt_rm->getManager("TRTCalibOps"); + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = res_mgr->Lookup(resource_name_, resource_name_, &calib_res); + + if (!status.ok()) { + ctx->SetStatus(status); + return; + } + int num_inputs = ctx->num_inputs(); + // first run instantiate calibrator + if (calib_res->calibrator_ == nullptr) { + dev_tensors_.resize(num_inputs); + int batch_size = ctx->input(0).dim_size(0); + VLOG(1) << " Constructing calibrator"; + for (int i = 0; i < num_inputs; i++) { + // allocate workspace on device for inputs + const tensorflow::Tensor& t = ctx->input(i); + OP_REQUIRES_OK(ctx, + ctx->allocate_persistent(t.dtype(), t.shape(), + &dev_tensors_.at(i), nullptr)); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes()); + void* device_address = GetTensorAddress(device_tensor); + device_buffers_.emplace(input_names_.at(i), + std::pair( + device_address, device_tensor->TotalBytes())); + } + + calib_res->calibrator_ = + new TRTInt8Calibrator(device_buffers_, batch_size, resource_name_); + string label(resource_name_); + calib_res->thr_ = new std::thread([calib_res, label]() { + VLOG(1) << "Starting calibration thread, Calibration Resource @ " + << calib_res; + calib_res->builder_->setInt8Calibrator(calib_res->calibrator_); + calib_res->builder_->setInt8Mode(true); + calib_res->engine_ = calib_res->builder_->buildCudaEngine( + *calib_res->network_); // will loop until we terminate calibrator + VLOG(1) << "Calibration loop terminated " << label; + }); + VLOG(1) << "initialized calibrator resource"; + } // calibrator initialized + + // Pass input data to calibrator + std::unordered_map input_data; + for (int i = 0; i < num_inputs; i++) { + const Tensor& t = ctx->input(i); + void* data_address = GetTensorAddress(&t); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), + device_tensor->TotalBytes()); // use the tensor so FW keeps it + input_data.emplace(input_names_.at(i), data_address); + ctx->set_output(i, t); + } + VLOG(2) << "Filled map for sending"; + calib_res->calibrator_->setBatch(input_data); + VLOG(2) << "Passed calibration data"; + // TODO(aaroey): make sure we wait for the completion of calibration on the + // last batch in future PR. +}; + +#undef TYPECASE + +REGISTER_KERNEL_BUILDER(Name("TRTCalibOp").Device(DEVICE_GPU), TRTCalibOp); + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h new file mode 100644 index 0000000000..23df9db32f --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H +#define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H + +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +namespace tensorflow { +namespace tensorrt { +// TODO(sami): Convert this to async kernel! +class TRTCalibOp : public OpKernel { + public: + explicit TRTCalibOp(OpKernelConstruction* context); + + void Compute(OpKernelContext* context) override; + + private: + string resource_name_; + std::vector segment_nodes_; + std::vector input_names_; + std::vector shapes_; + std::unordered_map> device_buffers_; + std::vector dev_tensors_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc new file mode 100644 index 0000000000..4835e50650 --- /dev/null +++ b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +namespace tensorflow { + +REGISTER_OP("TRTCalibOp") + .Attr("segment_nodes: list(string)") // names of the ops in segment + .Attr("segment_output_names: list(string)") // names of the output ops in + // segment + .Attr("input_names: list(string)") // names of the inputs for + // passing into tensorrt + .Attr("resource_name: string") + .Attr("InT: list({int8, float16, float32})") + .Input("in_tensor: InT") + .Output("out_tensor: InT") + .SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) { + for (int i = 0; i < c->num_inputs(); i++) { + c->set_output(i, c->input(i)); + } + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc new file mode 100644 index 0000000000..3d5cc76c42 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -0,0 +1,119 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" + +#include +#include +#include + +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { + +// set the batch size before constructing the thread to execute engine +int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } + +TRTInt8Calibrator::TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false), + engine_name_(engine_name) {} + +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed + if (done_) return false; + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + VLOG(1) << "Set Batch Waiting finished"; + for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); + if (devptr == dev_buffers_.end()) { + LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first + << "' does not match with the buffer names"; + } + const auto& d = devptr->second; + + // TODO(aaroey): we should not use sync copy on default stream. Make sure + // stream->ThenMemcpy() is used in future PRs. + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + if (status != cudaSuccess) { + LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first + << "' failed with " << status; + } + } + calib_running_.store(true, std::memory_order_release); // release builder + cond_.notify_all(); + return true; +} + +bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, + int num_bindings) { + calib_running_.store(false, std::memory_order_release); // wait for new batch + cond_.notify_all(); + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + if (done_) { + return false; + } + + for (int i = 0; i < num_bindings; i++) { + auto it = dev_buffers_.find(names[i]); + if (it == dev_buffers_.end()) { + LOG(FATAL) << "Calibration engine asked for unknown tensor name '" + << names[i] << "' at position " << i; + } + + bindings[i] = it->second.first; + } + return true; +} + +const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { + return nullptr; +} + +void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, + std::size_t length) {} +TRTInt8Calibrator::~TRTInt8Calibrator() { + VLOG(1) << "Destroying calibrator for " << engine_name_; +} + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h new file mode 100644 index 0000000000..8830f7efe7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + +#include +#include +#include +#include +#include "tensorflow/core/platform/mutex.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" +namespace tensorflow { +namespace tensorrt { +// This class provides a 1 element queue to match TFs push model to +// TRTs pull model for calibration. When TRT implements a means for +// a push calibration This class should be updated accordingly + +struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { + public: + TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name); + int getBatchSize() const override; + bool getBatch(void* bindings[], const char* names[], + int num_bindings) override; + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } + const void* readCalibrationCache(std::size_t& length) override; + void writeCalibrationCache(const void* ptr, std::size_t length) override; + ~TRTInt8Calibrator(); + + private: + const int batch_size_; + tensorflow::mutex cond_mtx_; // mutex for condition_variable + tensorflow::condition_variable cond_; // condition variable to implement + // producer-consumer queue for + // calibration + bool done_; + const std::unordered_map> + dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with + // buffer names + std::atomic_bool calib_running_; + string engine_name_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc new file mode 100644 index 0000000000..e663eed4dd --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace tensorrt { + +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held + // longer will be during op creation and should be ok. + tensorflow::mutex_lock lock(map_mutex_); + auto s = managers_.find(op_name); + if (s == managers_.end()) { + auto it = managers_.emplace( + op_name, std::make_shared(op_name)); + VLOG(1) << "Returning a new manager " << op_name; + return it.first->second; + } + VLOG(1) << "Returning old manager " << op_name; + return s->second; +} + +} // namespace tensorrt +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h new file mode 100644 index 0000000000..5f8ad491d3 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#include + +#include +#include +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace tensorrt { + +class TRTResourceManager { + TRTResourceManager() = default; + + public: + static std::shared_ptr instance() { + static std::shared_ptr instance_( + new TRTResourceManager); + return instance_; + } + // returns a manager for given op, if it doesn't exists it creates one + std::shared_ptr getManager(const string& op_name); + + private: + std::unordered_map> + managers_; + tensorflow::mutex map_mutex_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRT_RESOURCE_MANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h new file mode 100644 index 0000000000..3c85968ae7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -0,0 +1,95 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#include +#include +#include +#include +#include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/core/framework/resource_mgr.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { +class TRTCalibrationResource : public tensorflow::ResourceBase { + public: + TRTCalibrationResource() + : calibrator_(nullptr), + builder_(nullptr), + network_(nullptr), + engine_(nullptr), + logger_(nullptr), + thr_(nullptr) {} + string DebugString() override { + std::stringstream oss; + oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl + << " Builder = " << std::hex << builder_ << std::dec << std::endl + << " Network = " << std::hex << network_ << std::dec << std::endl + << " Engine = " << std::hex << engine_ << std::dec << std::endl + << " Logger = " << std::hex << logger_ << std::dec << std::endl + << " Thread = " << std::hex << thr_ << std::dec << std::endl; + return oss.str(); + } + ~TRTCalibrationResource() { + VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + } + TRTInt8Calibrator* calibrator_; + nvinfer1::IBuilder* builder_; + nvinfer1::INetworkDefinition* network_; + nvinfer1::ICudaEngine* engine_; + tensorflow::tensorrt::Logger* logger_; + // TODO(sami): Use threadpool threads! + std::thread* thr_; +}; + +class TRTWeightStore : public tensorflow::ResourceBase { + public: + TRTWeightStore() {} + std::list> store_; + string DebugString() override { + std::stringstream oss; + size_t lenBytes = 0; + for (const auto& v : store_) { + lenBytes += v.size() * sizeof(uint8_t); + } + oss << " Number of entries = " << store_.size() << std::endl + << " Total number of bytes = " + << store_.size() * sizeof(std::vector) + lenBytes << std::endl; + return oss.str(); + } + virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } +}; + +class TRTEngineResource : public tensorflow::ResourceBase { + public: + TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){}; + string DebugString() override { return string(""); } + nvinfer1::IRuntime* runtime_; + nvinfer1::IExecutionContext* ctx_; +}; + +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ +#endif +#endif -- GitLab From 0770b3f963405974692bf0908fcb7db8df81d3f6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 17:28:48 -0800 Subject: [PATCH 322/884] Implement partial constant propagation through IdentityN. PiperOrigin-RevId: 187560028 --- .../grappler/optimizers/constant_folding.cc | 51 ++++++++++++++++++ .../optimizers/constant_folding_test.cc | 53 +++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 32c8a9b2f5..77804142e6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1843,6 +1843,57 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, continue; } + // Partial constant propagation through IdentityN. + if (IsIdentityN(*node) && NumNonControlInputs(*node) > 0) { + const std::set& tmp = node_map_->GetOutputs(node->name()); + const std::vector consumers(tmp.begin(), tmp.end()); + for (int input_idx = 0; input_idx < node->input_size(); ++input_idx) { + const string& input = node->input(input_idx); + if (IsControlInput(input)) { + break; + } + const NodeDef* input_node = node_map_->GetNode(NodeName(input)); + if (input_node == nullptr) { + LOG(ERROR) << "Bad input: " << input; + break; + } + // Forward constant inputs to outputs and add a control dependency on + // the IdentityN node. + if (IsReallyConstant(*input_node)) { + // Update each consumer. + for (NodeDef* consumer : consumers) { + bool add_dep = false; + for (int consumer_input_idx = 0; + consumer_input_idx < consumer->input_size(); + ++consumer_input_idx) { + const string& consumer_input = + consumer->input(consumer_input_idx); + if (IsControlInput(consumer_input)) { + break; + } + int output_idx; + const string input_node_name = + ParseNodeName(consumer_input, &output_idx); + if (input_node_name == node->name() && output_idx == input_idx) { + consumer->set_input(consumer_input_idx, input); + // We will keep the input from IdentityN through a control + // dependendy, so we only need to add the consumer as an output + // for the constant input node. + node_map_->AddOutput(NodeName(input), consumer->name()); + add_dep = true; + } + } + if (add_dep) { + consumer->add_input(AsControlDependency(node->name())); + } + } + } + } + for (NodeDef* consumer : consumers) { + DedupControlInputs(consumer); + } + } + // Partial constant folding for associative operators: // Split AddN/AccumulateNV2 to enable partial // folding of ops when more than one but not all inputs are constant. diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 3149e1d53e..29dc93c257 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -1646,6 +1646,59 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } } +TEST_F(ConstantFoldingTest, IdenticalN) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output x = ops::Placeholder(scope.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({}))); + Output c1 = ops::Const(scope.WithOpName("c1"), 1.0f, {2, 2}); + Output c2 = ops::Const(scope.WithOpName("c2"), 2.0f, {2, 2}); + auto id_n = ops::IdentityN(scope.WithOpName("id_n"), {c1, x, c2}); + auto id0 = ops::Identity(scope.WithOpName("id0"), id_n[1]); + auto id1 = ops::Identity(scope.WithOpName("id1"), id_n[0]); + auto add0 = ops::Add(scope.WithOpName("add0"), id_n[0], id_n[1]); + auto add1 = ops::Add(scope.WithOpName("add1"), id_n[0], id_n[2]); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch.push_back("id0"); + item.fetch.push_back("id1"); + item.fetch.push_back("add0"); + item.fetch.push_back("add1"); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + + TF_EXPECT_OK(status); + EXPECT_EQ(8, output.node_size()); + // id_n should remain unchanged. + EXPECT_EQ("id_n", output.node(3).name()); + EXPECT_EQ(3, output.node(3).input_size()); + EXPECT_EQ("c1", output.node(3).input(0)); + EXPECT_EQ("x", output.node(3).input(1)); + EXPECT_EQ("c2", output.node(3).input(2)); + // id0 is unchanged. + EXPECT_EQ("id0", output.node(4).name()); + EXPECT_EQ(1, output.node(4).input_size()); + // id1 should have the constant input forwarded to it, + // and a control dependency from id_n. + EXPECT_EQ("id1", output.node(5).name()); + EXPECT_EQ(2, output.node(5).input_size()); + EXPECT_EQ("c1", output.node(5).input(0)); + EXPECT_EQ("^id_n", output.node(5).input(1)); + + EXPECT_EQ("add0", output.node(6).name()); + EXPECT_EQ(2, output.node(6).input_size()); + EXPECT_EQ("c1", output.node(6).input(0)); + EXPECT_EQ("id_n:1", output.node(6).input(1)); + + EXPECT_EQ("add1", output.node(7).name()); + EXPECT_EQ(3, output.node(7).input_size()); + EXPECT_EQ("c1", output.node(7).input(0)); + EXPECT_EQ("c2", output.node(7).input(1)); + EXPECT_EQ("^id_n", output.node(7).input(2)); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 8a591af6854ee1b010d82d262072b5d3b2cdf7cc Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 1 Mar 2018 17:37:49 -0800 Subject: [PATCH 323/884] Checkpointable: Make Templates Checkpointable Uses a variable_creator_scope to hook all variable creation within the Template. For variables without a more deeply nested Template parent, it adds a dependency directly. For variables with a Template parent, it adds a dependency on the sub-Template instead. The variable scope prefix for the Template itself is stripped. However, any variable_scopes inside the Template (such as those for Layers) will be included in the dependency names. So within Templates we essentially have name-based saving (with the exception of dependencies between Templates themselves, which use the object-based dependency mechanism). This isn't ideal, but will hopefully allow migration toward object oriented dependencies more smoothly. Throws an error on object-based save() if the dependencies don't match between Checkpointable and .variables. Includes a semi-related usability fix for the Checkpoint utility; mostly in unit tests, restore() is not called before save(), which when graph building was leading to the save counter not being initialized. Fixes that. PiperOrigin-RevId: 187560911 --- .../eager/python/checkpointable_utils.py | 11 +- .../eager/python/checkpointable_utils_test.py | 80 ++++++++++++ .../python/kernel_tests/template_test.py | 4 + tensorflow/python/ops/template.py | 117 +++++++++++++++++- 4 files changed, 207 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index ed431e02ea..89cd543f77 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -843,10 +843,17 @@ class Checkpoint(core_checkpointable.Checkpointable): def save(self, file_prefix, session=None): """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" - assign_op = self.save_counter.assign_add(1) - if context.in_graph_mode(): + in_graph_mode = context.in_graph_mode() + if in_graph_mode: if session is None: session = ops.get_default_session() + if self._save_counter is None: + # When graph building, if this is a new save counter variable then it + # needs to be initialized before assign_add. This is only an issue if + # restore() has not been called first. + session.run(self.save_counter.initializer) + assign_op = self.save_counter.assign_add(1) + if in_graph_mode: session.run(assign_op) return self._saver.save( file_prefix=file_prefix, diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 9424de0835..c9db2bcafc 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -34,6 +34,7 @@ from tensorflow.python.layers import core from tensorflow.python.ops import init_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops +from tensorflow.python.ops import template from tensorflow.python.ops import variable_scope from tensorflow.python.training import adam from tensorflow.python.training import checkpointable @@ -855,6 +856,85 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual(3., self.evaluate(beta1_power)) +class TemplateTests(test.TestCase): + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_checkpointable_save_restore(self): + + def _templated(): + v = variable_scope.get_variable( + "v", shape=[1], initializer=init_ops.zeros_initializer()) + v2 = variable_scope.get_variable( + "v2", shape=[1], initializer=init_ops.zeros_initializer()) + return v, v + 1., v2 + + save_template = template.make_template("s1", _templated) + save_root = checkpointable_utils.Checkpoint(my_template=save_template) + v1_save, _, v2_save = save_template() + self.evaluate(v1_save.assign([12.])) + self.evaluate(v2_save.assign([14.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) + + load_template = template.make_template("s2", _templated) + load_root = checkpointable_utils.Checkpoint(my_template=load_template) + status = load_root.restore(save_path) + var, var_plus_one, var2 = load_template() + self.assertEqual(2, len(load_template._checkpoint_dependencies)) + self.assertEqual("v", load_template._checkpoint_dependencies[0].name) + self.assertEqual("v2", load_template._checkpoint_dependencies[1].name) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([12.], self.evaluate(var)) + self.assertAllEqual([13.], self.evaluate(var_plus_one)) + self.assertAllEqual([14.], self.evaluate(var2)) + + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) + def test_checkpointable_save_restore_nested(self): + + def _inner_template(): + v = variable_scope.get_variable( + "v", shape=[1], initializer=init_ops.zeros_initializer()) + return v + + def _outer_template(): + first_inner = template.make_template("i1", _inner_template) + second_inner = template.make_template("i2", _inner_template) + v1 = first_inner() + v2 = second_inner() + v3 = second_inner() + return (first_inner, second_inner), (v1, v2, v3) + + with variable_scope.variable_scope("ignored"): + save_template = template.make_template("s1", _outer_template) + save_root = checkpointable_utils.Checkpoint(my_template=save_template) + (inner_template_one, inner_template_two), _ = save_template() + self.evaluate(inner_template_one.variables[0].assign([20.])) + self.evaluate(inner_template_two.variables[0].assign([25.])) + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + save_path = save_root.save(checkpoint_prefix) + + load_template = template.make_template("s2", _outer_template) + load_root = checkpointable_utils.Checkpoint(my_template=load_template) + status = load_root.restore(save_path) + (inner_template_one, inner_template_two), (v1, v2, v3) = load_template() + outer_template_dependencies = load_root.my_template._checkpoint_dependencies + self.assertEqual(2, len(outer_template_dependencies)) + self.assertEqual("i1", outer_template_dependencies[0].name) + self.assertIs(inner_template_one, outer_template_dependencies[0].ref) + self.assertEqual("i2", outer_template_dependencies[1].name) + self.assertIs(inner_template_two, outer_template_dependencies[1].ref) + self.assertEqual(1, len(inner_template_one._checkpoint_dependencies)) + self.assertEqual("v", inner_template_one._checkpoint_dependencies[0].name) + self.assertEqual(1, len(inner_template_two._checkpoint_dependencies)) + self.assertEqual("v", inner_template_two._checkpoint_dependencies[0].name) + status.assert_consumed().run_restore_ops() + self.assertAllEqual([20.], self.evaluate(v1)) + self.assertAllEqual([25.], self.evaluate(v2)) + self.assertAllEqual([25.], self.evaluate(v3)) + + class CheckpointCompatibilityTests(test.TestCase): def _initialized_model(self): diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index a519b69b22..c42ae5a77d 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -356,6 +356,10 @@ class TemplateTest(test.TestCase): self.assertEqual("s1_1/nested/dummy:0", v5.name) self.assertEqual("s1_1/nested_1/dummy:0", v6.name) + self.assertEqual(2, len(tmpl1._checkpoint_dependencies)) + self.assertEqual("nested", tmpl1._checkpoint_dependencies[0].name) + self.assertEqual("nested_1", tmpl1._checkpoint_dependencies[1].name) + @test_util.run_in_graph_and_eager_modes() def test_nested_templates_with_defun(self): diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 424582b348..70e8040512 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -26,6 +26,7 @@ from tensorflow.python.eager import function from tensorflow.python.framework import ops from tensorflow.python.ops import variable_scope from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_decorator from tensorflow.python.util.deprecation import deprecated @@ -230,7 +231,7 @@ def _skip_common_stack_elements(stacktrace, base_case): return stacktrace[-1:] -class Template(object): +class Template(checkpointable.CheckpointableBase): """Wrap a function to aid in variable sharing. Templates are functions that create variables the first time they are called @@ -294,12 +295,115 @@ class Template(object): # which is not the same as whether the scope has been created. self._variables_created = False + @property + def _checkpoint_dependencies(self): + """Sanity checking for object-based saving. + + Does not override Checkpointable dependency tracking, but checks that + variables accessible through Checkpointable dependencies on other `Template` + objects include all of the variable_scope-filtered `Template.variables`. + + Returns: + A list of checkpointable.CheckpointableReference objects. + Raises: + ValueError: If this object is not compatible with object-based saving. + """ + dependencies = super(Template, self)._checkpoint_dependencies + dependency_variables = [] + for _, dependency in dependencies: + if isinstance(dependency, Template): + dependency_variables.extend(dependency.variables) + else: + dependency_variables.append(dependency) + dependency_variables = set(dependency_variables) + not_included_variables = [] + for expected_variable in sorted(self.variables, key=lambda v: v.name): + if expected_variable not in dependency_variables: + not_included_variables.append(expected_variable) + if not_included_variables: + # Trying to save a Template which improperly tracks its variables. + raise ValueError( + ("The Template '%s' references variables which are not included via " + "object-based dependency tracking. Most likely a custom " + "getter/creator was registered which does not call Template's " + "custom variable creator (which is responsible for tracking " + "dependencies).\n\nExpected these variables to be dependencies: %s") + % (self, not_included_variables)) + return dependencies + + def _checkpointable_custom_creator(self, next_creator, name, initial_value, + checkpointable_parent=None, **kwargs): + """A variable creation hook which adds Checkpointable dependencies. + + Set during the `Template`'s first wrapped function execution. Ensures that + (a) `Template` objects depend on `Template`s created inside them which + create variables, and (b) that any variables not in a more deeply nested + `Template` are added as dependencies directly. + + The `checkpointable_parent` argument is passed between `Template` custom + creators but ignored when the variable object itself is created. This + argument indicates (if not `None`) that a more deeply nested `Template` has + already added the variable as a dependency, and that parent `Template`s + should add a dependency on that `Template` rather than on the variable + directly. + + Args: + next_creator: See `variable_scope.variable_creator_scope`; the next + creator in the chain. + name: The (full, scope-influenced) name of the variable. The scope name + for the Template itself is stripped for the purposes of object-based + dependency tracking, but scopes within Templates are respected. + initial_value: See `variable_scope.variable_creator_scope`. Taken + explicitly so the argument can be re-named and used with + `Checkpointable._add_variable_with_custom_getter`. + checkpointable_parent: If not None, a more deeply nested Template object + to add a dependency on (rather than depending on the variable directly). + **kwargs: Passed through to the next creator. + Returns: + The output of `next_creator`: the fetched/created variable object. + """ + def _call_next_creator_renaming_initializer(initializer, **inner_kwargs): + inner_kwargs.pop("name") # Ignored; this is the scope-stripped name which + # we don't want to propagate. + return next_creator( + initial_value=initializer, + name=name, + **inner_kwargs) + if name.startswith(self._variable_scope.name): + scope_stripped_name = name[len(self._variable_scope.name) + 1:] + if not checkpointable_parent: + return self._add_variable_with_custom_getter( + initializer=initial_value, + name=scope_stripped_name, + getter=_call_next_creator_renaming_initializer, + # Disable error checking for Checkpointable. Exceptions are instead + # raised if necessary when the object-based saver tries to + # save/restore the object. + overwrite=True, + checkpointable_parent=self, + **kwargs) + else: + self._track_checkpointable( + checkpointable_parent, + name=checkpointable_parent._variable_scope.name[ # pylint: disable=protected-access + len(self._variable_scope.name) + 1:], + overwrite=True) + return next_creator(name=name, initial_value=initial_value, + checkpointable_parent=self, **kwargs) + def _call_func(self, args, kwargs): try: vars_at_start = len(ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)) trainable_at_start = len( ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - result = self._func(*args, **kwargs) + if self._variables_created: + result = self._func(*args, **kwargs) + else: + # The first time we run, restore variables if necessary (via + # Checkpointable). + with variable_scope.variable_creator_scope( + self._checkpointable_custom_creator): + result = self._func(*args, **kwargs) if self._variables_created: # Variables were previously created, implying this is not the first @@ -563,7 +667,14 @@ class EagerTemplate(Template): try: vars_at_start = self._template_store.variables() trainable_at_start = self._template_store.trainable_variables() - result = self._func(*args, **kwargs) + if self._variables_created: + result = self._func(*args, **kwargs) + else: + # The first time we run, restore variables if necessary (via + # Checkpointable). + with variable_scope.variable_creator_scope( + self._checkpointable_custom_creator): + result = self._func(*args, **kwargs) if self._variables_created: # Variables were previously created, implying this is not the first -- GitLab From 4669767c4c6d830c2234c3ba15944a362b08fa14 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Thu, 1 Mar 2018 17:41:41 -0800 Subject: [PATCH 324/884] Add util which creates Python callable with tf.Variables explicitly as arguments. PiperOrigin-RevId: 187561302 --- tensorflow/contrib/bayesflow/BUILD | 17 -- tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/variable_utils_test.py | 135 --------------- .../bayesflow/python/ops/variable_utils.py | 29 ---- .../python/ops/variable_utils_impl.py | 157 ------------------ 5 files changed, 340 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/variable_utils.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 270c309ec3..3592cff90b 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -251,23 +251,6 @@ cuda_py_test( tags = ["notsan"], ) -cuda_py_test( - name = "variable_utils_test", - size = "small", - srcs = ["python/kernel_tests/variable_utils_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - ], -) - cuda_py_test( name = "variational_sgd_optimizer_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 528c4fbacd..c411026346 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -30,7 +30,6 @@ from tensorflow.contrib.bayesflow.python.ops import mcmc_diagnostics from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import optimizers -from tensorflow.contrib.bayesflow.python.ops import variable_utils # pylint: enable=unused-import,line-too-long from tensorflow.python.util.all_util import remove_undocumented @@ -49,7 +48,6 @@ _allowed_symbols = [ 'optimizers', 'special_math', 'stochastic_variables', - 'variable_utils', 'variational_inference', ] diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py deleted file mode 100644 index f978cf8641..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/variable_utils_test.py +++ /dev/null @@ -1,135 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for utility functions related to managing `tf.Variable`s.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import variable_utils - -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.ops import variables as variables_ops -from tensorflow.python.platform import test - - -def test_fn(x): - x = ops.convert_to_tensor(x, name="x") - dtype = x.dtype.as_numpy_dtype - s = x.shape.as_list() - z = varscope_ops.get_variable( - name="z", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)) - y = varscope_ops.get_variable( - name="y", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)**2) - return x + y + z - - -class _WrapCallableTest(object): - - def testDefaultArgsWorkCorrectly(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, vars_args = variable_utils.externalize_variables_as_args( - test_fn, [x]) - - varscope_ops.get_variable_scope().reuse_variables() - - result = wrapped_fn(self.dtype(2), [3, 4, 5], 0.5) - - y_actual = varscope_ops.get_variable("y", dtype=self.dtype) - z_actual = varscope_ops.get_variable("z", dtype=self.dtype) - - variables_ops.global_variables_initializer().run() - result_ = result.eval() - - self.assertEqual(self.dtype, result_.dtype) - self.assertAllEqual([5.5, 6.5, 7.5], result_) - self.assertAllEqual([y_actual, z_actual], vars_args) - - def testNonDefaultArgsWorkCorrectly(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - - _ = test_fn(self.dtype([0., 0.])) # Needed to create vars. - varscope_ops.get_variable_scope().reuse_variables() - - y_actual = varscope_ops.get_variable("y", dtype=self.dtype) - - wrapped_fn, vars_args = variable_utils.externalize_variables_as_args( - test_fn, [x], possible_ancestor_vars=[y_actual]) - - result = wrapped_fn(self.dtype([2, 3]), 0.5) # x, y - - variables_ops.global_variables_initializer().run() - result_ = result.eval() - - self.assertEqual(self.dtype, result_.dtype) - self.assertAllEqual([2.5, 4.5], result_) - self.assertAllEqual([y_actual], vars_args) - - def testWarnings(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, _ = variable_utils.externalize_variables_as_args( - test_fn, [x], possible_ancestor_vars=[]) - varscope_ops.get_variable_scope().reuse_variables() - with warnings.catch_warnings(record=True) as w: - wrapped_fn(self.dtype(2)) - w = sorted(w, key=lambda w: str(w.message)) - self.assertEqual(2, len(w)) - self.assertRegexpMatches( - str(w[0].message), - r"Variable .* 'y:0' .* not found in bypass dict.") - self.assertRegexpMatches( - str(w[1].message), - r"Variable .* 'z:0' .* not found in bypass dict.") - - def testExceptions(self): - with self.test_session(): - x = constant_op.constant(self.dtype([0.1, 0.2])) - wrapped_fn, _ = variable_utils.externalize_variables_as_args( - test_fn, - [x], - possible_ancestor_vars=[], - assert_variable_override=True) - varscope_ops.get_variable_scope().reuse_variables() - with self.assertRaisesRegexp(ValueError, r"not found"): - wrapped_fn(self.dtype(2)) - - -class WrapCallableTest16(test.TestCase, _WrapCallableTest): - dtype = np.float16 - - -class WrapCallableTest32(test.TestCase, _WrapCallableTest): - dtype = np.float32 - - -class WrapCallableTest64(test.TestCase, _WrapCallableTest): - dtype = np.float64 - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/variable_utils.py b/tensorflow/contrib/bayesflow/python/ops/variable_utils.py deleted file mode 100644 index eadf6f4d5f..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/variable_utils.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions related to managing `tf.Variable`s.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -from tensorflow.contrib.bayesflow.python.ops.variable_utils_impl import * # pylint: disable=wildcard-import,unused-wildcard-import,g-importing-member -from tensorflow.python.util import all_util - -_allowed_symbols = [ - "externalize_variables_as_args", -] - -all_util.remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py b/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py deleted file mode 100644 index ca3d75b5bf..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/variable_utils_impl.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utility functions related to managing `tf.Variable`s. - -@@externalize_variables_as_args -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings - -from tensorflow.python.framework import ops -from tensorflow.python.ops import gradients_impl as gradients_ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.ops import variables as variables_ops - -__all__ = [ - "externalize_variables_as_args", -] - - -# Cause all warnings to always be triggered. -# Not having this means subsequent calls wont trigger the warning. -warnings.simplefilter("always") - - -def externalize_variables_as_args(fn, - fn_args=(), - ancestor_variables=None, - possible_ancestor_vars=None, - assert_variable_override=False, - name=None): - """"Converts variables within a callable into explicit args. - - Makes a new callable from `fn` which has arguments `list(fn_args) + - list(ancestor_variables)`. If `ancestor_variables` is not specified, it is - inferred by checking which of `possible_ancestor_vars` actually influences the - return value of `fn` (concretely, gradient of `fn(*fn_args)` is not `None`). - By default `possible_ancestor_vars` is `tf.trainable_variables() + - tf.get_collection(tf.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)`. - - #### Examples: - - ```python - num_samples = 2 - num_dims = 1 - dtype = np.float32 - - def foo(x): - x = tf.convert_to_tensor(x, dtype=dtype, name="x") - s = x.shape.as_list() - y = tf.get_variable( - name="y", - dtype=dtype, - initializer=np.arange(np.prod(s)).reshape(s).astype(dtype)) - return x + y - - x = tf.constant(dtype([0.1, 0.2])) - - wrapped_foo, discovered_ancestor_variables = ( - externalize_variables_as_args(foo, [x])) - - new_x = dtype([[1.], [2.]]) - new_y = dtype([[3.], [4.]]) - new_result = wrapped_foo(new_x, new_y) - # ==> [[4.], [6.]] - - discovered_ancestor_variables == [tf.get_variable("y", dtype)] - # ==> [True] - ``` - - Args: - fn: Python callable which returns a `Tensor` and accepts `*fn_args`. - fn_args: Python list of args to `fn`. Represents dummy arguments passed to - `fn` to trace its execution; actual values are unimportant. These args are - only used to construct the output of `fn` and to resolve the ancestor - `tf.Variable`s. - Default value: `()` (i.e., `fn` takes no args). - ancestor_variables: Python list of `tf.Variable`s. When `None` the list is - expanded to non-`None` gradients of `fn(*fn_args)`. By directly providing - the `ancestor_variables` the internal call to `fn` is avoided. - Default value: `None` (i.e., `tf.Variable` dependencies are discovered). - possible_ancestor_vars: Python list of possible `tf.Variable`s which might - be a dependency of computing `fn(*fn_args)`. - Default value: `None` (i.e., expanded as described above). - assert_variable_override: Python `bool` indicating that not finding a - `tf.Variable` in the override list is an exception. - Default value: `False` (i.e., missing a `Variable` triggers a `warning`). - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "externalize_variables_as_args"). - - Returns: - wrapped_fn: Python callable taking arguments like - `*(list(fn_args) + discovered_ancestor_variables)`. - discovered_ancestor_variables: Python list of `tf.Variable`s known to be a - dependency of `fn(*fn_args)`. - - Raises: - ValueError: if `assert_variable_override` is `True` and `Variable` is - requested but not overridden. - """ - def _make_bypassing_custom_getter_fn(new_var_dict): - """Return dict value rather than what would otherwise be dict key.""" - def _custom_getter(getter, *args, **kwargs): - v = getter(*args, **kwargs) - new_v = new_var_dict.get(v, None) - if new_v is None: - msg = "Variable \"{}\" not found in bypass dict.".format(v) - if assert_variable_override: - raise ValueError(msg) - warnings.warn(msg) - return v - return new_v - return _custom_getter - - with ops.name_scope(name, "externalize_variables_as_args"): - if ancestor_variables is not None and not ancestor_variables: - return fn, () - if ancestor_variables is None: - y = fn(*fn_args) # Side-effect: adds trainable vars. - if possible_ancestor_vars is None: - possible_ancestor_vars = ( - variables_ops.trainable_variables() + - ops.get_collection(ops.GraphKeys.TRAINABLE_RESOURCE_VARIABLES)) - # TODO(b/72873296): Add a dedicated op for identifying ancestors. - ancestors = [v for g, v - in zip(gradients_ops.gradients(y, possible_ancestor_vars), - possible_ancestor_vars) - if g is not None] - ancestor_variables = sorted(ancestors, key=lambda v: v.name) - n = len(fn_args) - def _fn(*args): - with ops.name_scope("wrapped_fn"): - vars_dict = dict( - (k, ops.convert_to_tensor( - v, dtype=k.dtype.base_dtype, name=k.op.name)) - for k, v in zip(ancestor_variables, args[n:])) - with varscope_ops.variable_scope( - varscope_ops.get_variable_scope(), - reuse=True, - custom_getter=_make_bypassing_custom_getter_fn(vars_dict)): - return fn(*args[:n]) - return _fn, ancestor_variables -- GitLab From e927be3872e00c9b0e5e9aa64e6aae90c4ae1315 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Thu, 1 Mar 2018 17:53:49 -0800 Subject: [PATCH 325/884] Improve CURL error reporting and handling in the GCS filesystem. - The main semantics change is that we return immediately if curl_easy_perform doesn't return CURLE_OK. The CURL documentation indicates that it's not ok to fetch info if the curl call failed: https://curl.haxx.se/libcurl/c/curl_easy_getinfo.html - LOG errors where we can't return a status. Otherwise return with a status immediately. PiperOrigin-RevId: 187562481 --- .../core/platform/cloud/curl_http_request.cc | 190 ++++++++++++------ .../core/platform/cloud/curl_http_request.h | 44 +++- .../platform/cloud/curl_http_request_test.cc | 18 +- 3 files changed, 176 insertions(+), 76 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 80ad1cf0b8..9bc06d56ae 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/macros.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/version.h" @@ -129,20 +130,34 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) // default in //third_party:curl.BUILD and can be customized via an // environment variable. - libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput); - libcurl_->curl_easy_setopt( - curl_, CURLOPT_USERAGENT, - strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput), + "Setting verbose output"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt( + curl_, CURLOPT_USERAGENT, + strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str()), + "Setting user agent"); // Do not use signals for timeouts - does not work in multi-threaded programs. - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L); - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, - CURL_HTTP_VERSION_2_0); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L), + "Disabling signals"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, + CURL_HTTP_VERSION_2_0), + "Setting HTTP version"); // Set up the progress meter. - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL); - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this); - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, - &CurlHttpRequest::ProgressCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL), + "Disabling progress meter"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this), + "Setting custom pointer to the progress callback"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, + &CurlHttpRequest::ProgressCallback), + "Setting the progress callback"); // If response buffer is not set, libcurl will print results to stdout, // so we always set it. @@ -175,13 +190,17 @@ void CurlHttpRequest::SetUri(const string& uri) { CheckNotSent(); is_uri_set_ = true; uri_ = uri; - libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str()), + "Setting URL"); } void CurlHttpRequest::SetRange(uint64 start, uint64 end) { CheckNotSent(); - libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE, - strings::StrCat(start, "-", end).c_str()); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE, + strings::StrCat(start, "-", end).c_str()), + "Setting range"); } void CurlHttpRequest::AddHeader(const string& name, const string& value) { @@ -210,7 +229,9 @@ void CurlHttpRequest::SetDeleteRequest() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"), + "Setting delete request"); } Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, @@ -232,9 +253,12 @@ Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(put_body_)); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting PUT request"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(put_body_)), + "Setting read data"); // Using the default CURLOPT_READFUNCTION, which is doing an fread() on the // FILE * userdata set with CURLOPT_READDATA. return Status::OK(); @@ -244,13 +268,18 @@ void CurlHttpRequest::SetPutEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting put request"); curl_headers_ = libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); } void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { @@ -259,11 +288,17 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { is_method_set_ = true; curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), + "Setting POST request"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); post_body_buffer_ = StringPiece(buffer, size); } @@ -271,13 +306,19 @@ void CurlHttpRequest::SetPostEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), + "Setting POST request"); curl_headers_ = libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this)), + "Setting read data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback), + "Setting read callback"); } void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { @@ -287,10 +328,14 @@ void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { out_buffer->clear(); response_buffer_ = out_buffer; - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallback); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this)), + "Setting write data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, + &CurlHttpRequest::WriteCallback), + "Setting write callback"); } void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { @@ -299,10 +344,14 @@ void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { direct_response_ = DirectResponseState{buffer, size, 0}; - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallbackDirect); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this)), + "Setting write data"); + TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, + &CurlHttpRequest::WriteCallbackDirect), + "Setting write callback"); } bool CurlHttpRequest::IsDirectResponse() const { @@ -424,29 +473,50 @@ Status CurlHttpRequest::Send() { is_sent_ = true; if (curl_headers_) { - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_), + "Setting HTTP header"); } if (resolve_list_) { - libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_); - } - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, - reinterpret_cast(this)); - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, - &CurlHttpRequest::HeaderCallback); - - libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_); - libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, - connect_timeout_secs_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_), + "Setting custom resolves"); + } + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, + reinterpret_cast(this)), + "Setting header data"); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, + &CurlHttpRequest::HeaderCallback), + "Setting header function"); + + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_), + "Setting request timeout"); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, + connect_timeout_secs_), + "Setting connection timeout"); char error_buffer[CURL_ERROR_SIZE] = {0}; - libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer), + "Setting error buffer"); - const auto curl_result = libcurl_->curl_easy_perform(curl_); + const CURLcode curl_result = libcurl_->curl_easy_perform(curl_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + curl_result, "Performing request. Detailed error: ", error_buffer); double written_size = 0; - libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, &written_size); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, &written_size), + "Fetching written size"); - libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, &response_code_); + TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( + libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, + &response_code_), + "Fetching response code"); Status result; switch (response_code_) { @@ -616,4 +686,12 @@ int CurlHttpRequest::ProgressCallback(void* this_object, curl_off_t dltotal, return 0; } +Status CURLcodeToStatus(CURLcode code) { + // Return Unavailable to retry by default. We probably should distinguish + // between permanent or temporary failures. + return errors::Unavailable("Error executing an HTTP request (error code ", + code, ", error message '", + curl_easy_strerror(code), "')"); +} + } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index cfa26f2b79..c9f60cb5fc 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -229,26 +229,28 @@ class LibCurl { virtual CURL* curl_easy_init() = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - uint64 param) = 0; + uint64 param) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - const char* param) = 0; + const char* param) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - void* param) = 0; - virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, - size_t (*param)(void*, size_t, size_t, - FILE*)) = 0; + void* param) TF_MUST_USE_RESULT = 0; + virtual CURLcode curl_easy_setopt( + CURL* curl, CURLoption option, + size_t (*param)(void*, size_t, size_t, FILE*)) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt(CURL* curl, CURLoption option, size_t (*param)(const void*, size_t, size_t, - void*)) = 0; + void*)) + TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_setopt( CURL* curl, CURLoption option, int (*param)(void* clientp, curl_off_t dltotal, curl_off_t dlnow, - curl_off_t ultotal, curl_off_t ulnow)) = 0; - virtual CURLcode curl_easy_perform(CURL* curl) = 0; + curl_off_t ultotal, + curl_off_t ulnow)) TF_MUST_USE_RESULT = 0; + virtual CURLcode curl_easy_perform(CURL* curl) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_getinfo(CURL* curl, CURLINFO info, - uint64* value) = 0; + uint64* value) TF_MUST_USE_RESULT = 0; virtual CURLcode curl_easy_getinfo(CURL* curl, CURLINFO info, - double* value) = 0; + double* value) TF_MUST_USE_RESULT = 0; virtual void curl_easy_cleanup(CURL* curl) = 0; virtual curl_slist* curl_slist_append(curl_slist* list, const char* str) = 0; virtual void curl_slist_free_all(curl_slist* list) = 0; @@ -258,6 +260,26 @@ class LibCurl { virtual const char* curl_easy_strerror(CURLcode errornum) = 0; }; +Status CURLcodeToStatus(CURLcode code); + +#define TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR(_code, ...) \ + do { \ + if (_code != CURLE_OK) { \ + ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ + ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ + return _status; \ + } \ + } while (0) + +#define TF_CURL_LOG_WITH_CONTEXT_IF_ERROR(_code, ...) \ + do { \ + if (_code != CURLE_OK) { \ + ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ + ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ + LOG(ERROR) << "curl error: " << _status.error_message(); \ + } \ + } while (0) + } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_CLOUD_CURL_HTTP_REQUEST_H_ diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 94af121768..4cded9b81b 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -346,7 +346,6 @@ TEST(CurlHttpRequestTest, GetRequest_Empty) { TEST(CurlHttpRequestTest, GetRequest_RangeOutOfBound) { FakeLibCurl libcurl("get response", 416); - libcurl.curl_easy_perform_result_ = CURLE_WRITE_ERROR; CurlHttpRequest http_request(&libcurl); std::vector scratch; @@ -377,10 +376,10 @@ TEST(CurlHttpRequestTest, GetRequest_503) { const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 503, " - "error code 23, error message ''), response 'get response'", + "Error executing an HTTP request (error code 23, error message 'Failed " + "writing received data to disk/application')\n\tPerforming request. " + "Detailed error: ", status.error_message()); - EXPECT_EQ(503, http_request.GetResponseCode()); } TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { @@ -396,9 +395,9 @@ TEST(CurlHttpRequestTest, GetRequest_HttpCode0) { const auto& status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 0, " - "error code 28, error message 'Operation timed out'), " - "response 'get response'", + "Error executing an HTTP request (error code 28, error message 'Timeout " + "was reached')\n\tPerforming request. Detailed error: Operation timed " + "out", status.error_message()); EXPECT_EQ(0, http_request.GetResponseCode()); } @@ -629,8 +628,9 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { auto status = http_request.Send(); EXPECT_EQ(error::UNAVAILABLE, status.code()); EXPECT_EQ( - "Error executing an HTTP request (HTTP response code 200, " - "error code 42, error message ''), response 'test'", + "Error executing an HTTP request (error code 42, error message " + "'Operation was aborted by an application callback')\n\tPerforming " + "request. Detailed error: ", status.error_message()); } -- GitLab From 80a647612e2cc0b98f763ffca1f7f35df7d27805 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 17:58:07 -0800 Subject: [PATCH 326/884] Allow replacing attributes in templates. PiperOrigin-RevId: 187562864 --- tensorflow/contrib/py2tf/pyct/templates.py | 11 +++++++++++ .../contrib/py2tf/pyct/templates_test.py | 19 ++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index 6ee6c0c5ce..7021e2ba93 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -79,6 +79,17 @@ class ReplaceTransformer(gast.NodeTransformer): else: raise ValueError('unexpected node type "%s"' % node) + def visit_Attribute(self, node): + node = self.generic_visit(node) + if node.attr not in self.replacements: + return node + repl = self.replacements[node.attr] + if not isinstance(repl, gast.Name): + raise ValueError( + 'An attribute can only be replaced by a Name node. Found: %s' % repl) + node.attr = repl.id + return node + def visit_Name(self, node): if node.id not in self.replacements: return node diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/py2tf/pyct/templates_test.py index 8ccfde8573..0d1c1c5d9e 100644 --- a/tensorflow/contrib/py2tf/pyct/templates_test.py +++ b/tensorflow/contrib/py2tf/pyct/templates_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import imp + import gast from tensorflow.contrib.py2tf.pyct import compiler @@ -62,7 +64,7 @@ class TemplatesTest(test.TestCase): result, _ = compiler.ast_to_object(node) self.assertEquals(7, result.test_fn(2)) - def test_code_block(self): + def test_replace_code_block(self): template = """ def test_fn(a): block @@ -79,6 +81,21 @@ class TemplatesTest(test.TestCase): result, _ = compiler.ast_to_object(node) self.assertEquals(3, result.test_fn(1)) + def test_replace_attribute(self): + template = """ + def test_fn(a): + return a.foo + """ + + node = templates.replace(template, foo='b')[0] + result, _ = compiler.ast_to_object(node) + mod = imp.new_module('test') + mod.b = 3 + self.assertEquals(3, result.test_fn(mod)) + + with self.assertRaises(ValueError): + templates.replace(template, foo=1) + if __name__ == '__main__': test.main() -- GitLab From 6d1309419497d52ef9a28df927a0b214cde9507c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 1 Mar 2018 18:03:19 -0800 Subject: [PATCH 327/884] Grappler: Change memory optimizer recomputation name prefix into a regexp. This allows us to match any node names, especially those under different scopes. This still performs a prefix regexp match, so it is basically backwards compatible. PiperOrigin-RevId: 187563544 --- tensorflow/core/BUILD | 1 + tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/memory_optimizer.cc | 20 ++++++++----- .../grappler/optimizers/memory_optimizer.h | 10 +++---- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 16 +++++----- .../python/grappler/memory_optimizer_test.py | 29 ++++++++++++++++++- 7 files changed, 56 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3271825251..96e30ca3c0 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2231,6 +2231,7 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", + "//tensorflow/core/grappler:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 037438ee75..0a4330b524 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -363,6 +363,7 @@ cc_library( ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", + "//tensorflow/core:regexp_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 694139fa50..d73050ac4d 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/grappler/utils/traversal.h" +#include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -413,7 +414,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix, + const string& recomputation_targets_name_regexp, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -437,16 +438,19 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, for (const auto& feed : item.feed) { feeds.insert(NodeName(feed.first)); } + RE2 recomputation_targets_re(recomputation_targets_name_regexp); std::function is_target = - [&recomputation_targets_name_prefix](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. Typically targets will - // be gradients (recomputation_targets_name_prefix="gradients/"), - // although the prefix is configurable since gradients may be created - // in a name scope. + [&recomputation_targets_re](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. This does a prefix + // regexp match, and typically one sets regexp="gradients/" meaning + // it will match all node names with scope beginning with "gradients/". + // If used within scopes, one may want to set regexp="(.+/)?gradients/". // TODO(allenl): Use a static schedule // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes // whose outputs will sit around for a while. - return node.name().find(recomputation_targets_name_prefix) == 0; + bool match = recomputation_targets_re.Match( + node.name(), 0, node.name().size(), RE2::ANCHOR_START, nullptr, 0); + return match; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1225,7 +1229,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_prefix_, + recomputation_targets_name_regexp_, optimized_graph, item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index c3dd0c45c6..62ab969848 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_prefix: Name prefix for potential outputs of + // recomputation_targets_name_regxp: Name regxp for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_prefix. + // RewriterConfig::memory_optimizer_target_node_name_regxp. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix = "gradients/") + const string& recomputation_targets_name_regexp = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} + recomputation_targets_name_regexp_(recomputation_targets_name_regexp) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_prefix_; + string recomputation_targets_name_regexp_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 72d7b94dc8..979f3e7161 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { + if (cfg_.memory_optimizer_target_node_name_regexp().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_prefix()))); + cfg_.memory_optimizer_target_node_name_regexp()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 9ebf217811..63303fa968 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,16 +78,14 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // The prefix for nodes which are valid outputs of recomputations. Inputs to - // nodes with this name prefix may be recomputed (subject either to manual + // A regexp for node names which are valid outputs of recomputations. Inputs + // to nodes that match this regexp may be recomputed (subject either to manual // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the prefixed nodes themselves will - // not be recomputed. Typically this will be "gradients/", indicating that - // activations from the forward pass of a graph may be recomputed as inputs to - // gradients, but may be adjusted if gradients are inside a name scope or if - // inputs to non-gradients should be recomputed. Defaults to "gradients/" if - // empty or not set. - string memory_optimizer_target_node_name_prefix = 6; + // depending on memory_optimization), but the nodes themselves will not be + // recomputed. This is a prefix match, meaning it matches any node name that + // contains a prefix that matches this regexp. Defaults to "gradients/" if + // not provided, but can be changed if used within scopes. + string memory_optimizer_target_node_name_regexp = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 948911f099..58d3c1e85f 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,7 +162,34 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_prefix='optimizer/gradients/'), + memory_optimizer_target_node_name_regexp='optimizer/gradients/'), + original_metagraph) + self.assertGreater( + len(rewritten_graph_def.node), + len(original_metagraph.graph_def.node)) + self.assertEqual( + 0, + len([node for node in original_metagraph.graph_def.node + if 'Recomputed/' in node.name])) + self.assertEqual( + 20, # Two per layer + len([node for node in rewritten_graph_def.node + if 'Recomputed/' in node.name])) + + def testRewritingNameScopedGradientNamesRegexp(self): + """Tests that rewriting occurs with non-standard gradient names.""" + (original_metagraph, _, _, _) = self._GetMetaGraph( + optimizer_scope_name='foo/bar') + rewritten_graph_def = tf_optimizer.OptimizeGraph( + rewriter_config_pb2.RewriterConfig( + disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + memory_optimization=rewriter_config_pb2.RewriterConfig. + RECOMPUTATION_HEURISTICS, + memory_optimizer_target_node_name_regexp='(.+/)gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), -- GitLab From bf1abe945330dffe3f93b81344185f629bef023f Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 1 Mar 2018 18:49:05 -0800 Subject: [PATCH 328/884] [XLA] For graphviz graph dumps that are colored by sharding, choose the fill color for fusion nodes according to the sharding color rather than always choosing grey. PiperOrigin-RevId: 187567679 --- .../compiler/xla/service/hlo_graph_dumper.cc | 104 ++++++++++-------- 1 file changed, 61 insertions(+), 43 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc index 99c4932a38..1dc72355cf 100644 --- a/tensorflow/compiler/xla/service/hlo_graph_dumper.cc +++ b/tensorflow/compiler/xla/service/hlo_graph_dumper.cc @@ -157,52 +157,60 @@ enum ColorScheme { kDashedBorder, }; +// Graphviz attributes/colors that make up a color scheme. +struct NodeColors { + const char* style; + const char* fill_color; + const char* stroke_color; + const char* font_color; +}; + +NodeColors NodeColorsForScheme(ColorScheme color) { + switch (color) { + case kBlue: + return NodeColors{"filled", "#bbdefb", "#8aacc8", "black"}; + case kBrown: + return NodeColors{"filled", "#bcaaa4", "#8c7b75", "black"}; + case kDarkBlue: + return NodeColors{"filled", "#1565c0", "#003c8f", "white"}; + case kDarkGreen: + return NodeColors{"filled", "#2e7d32", "#005005", "white"}; + case kDarkRed: + return NodeColors{"filled", "#b71c1c", "#7f0000", "white"}; + case kGray: + return NodeColors{"filled", "#cfd8dc", "#9ea7aa", "black"}; + case kGreen: + return NodeColors{"filled", "#c8e6c9", "#97b498", "black"}; + case kOrange: + return NodeColors{"filled", "#ffe0b2", "#cbae82", "black"}; + case kPurple: + return NodeColors{"filled", "#e1bee7", "#af8eb5", "black"}; + case kRed: + return NodeColors{"filled", "#ffcdd2", "#cb9ca1", "black"}; + case kWhite: + return NodeColors{"filled", "white", "black", "black"}; + case kYellow: + return NodeColors{"filled", "#fff9c4", "#cbc693", "black"}; + case kDashedBorder: + // "filled,dashed" looks the same as "dashed", since we have a white + // background. But we use "filled,dashed" so that when you hover over + // any part of the node (not just the text inside the node), our css + // :hover rule is triggered. + return NodeColors{"filled,dashed", "white", "#757575", "#757575"}; + } +} + // Given a ColorScheme, returns an attribute string for a node of that color. // Sets the node's style and fill/stroke/text colors. // // Colors are from https://material.io/color. string NodeColorAttributes(ColorScheme color) { - using std::make_tuple; - - const char *style, *fill_color, *stroke_color, *font_color; - std::tie(style, fill_color, stroke_color, font_color) = [color] { - switch (color) { - case kBlue: - return make_tuple("filled", "#bbdefb", "#8aacc8", "black"); - case kBrown: - return make_tuple("filled", "#bcaaa4", "#8c7b75", "black"); - case kDarkBlue: - return make_tuple("filled", "#1565c0", "#003c8f", "white"); - case kDarkGreen: - return make_tuple("filled", "#2e7d32", "#005005", "white"); - case kDarkRed: - return make_tuple("filled", "#b71c1c", "#7f0000", "white"); - case kGray: - return make_tuple("filled", "#cfd8dc", "#9ea7aa", "black"); - case kGreen: - return make_tuple("filled", "#c8e6c9", "#97b498", "black"); - case kOrange: - return make_tuple("filled", "#ffe0b2", "#cbae82", "black"); - case kPurple: - return make_tuple("filled", "#e1bee7", "#af8eb5", "black"); - case kRed: - return make_tuple("filled", "#ffcdd2", "#cb9ca1", "black"); - case kWhite: - return make_tuple("filled", "white", "black", "black"); - case kYellow: - return make_tuple("filled", "#fff9c4", "#cbc693", "black"); - case kDashedBorder: - // "filled,dashed" looks the same as "dashed", since we have a white - // background. But we use "filled,dashed" so that when you hover over - // any part of the node (not just the text inside the node), our css - // :hover rule is triggered. - return make_tuple("filled,dashed", "white", "#757575", "#757575"); - } - }(); + NodeColors node_colors = NodeColorsForScheme(color); return Printf( - R"(style="%s", fontcolor="%s", color="%s", fillcolor="%s")", style, - font_color, stroke_color, fill_color); + R"(style="%s", fontcolor="%s", color="%s", fillcolor="%s")", + node_colors.style, node_colors.font_color, node_colors.stroke_color, + node_colors.fill_color); } // Replaces <> with <>, so that this string is safe(er) for use in a @@ -604,11 +612,21 @@ tooltip = " "; StrAppend(&subcomp_label, "
", extra_info); } - // Subcomputation's fill/stroke color is light/dark red/gray, depending on - // whether or not the subcomputation's fusion node is highlighted. bool highlight = filter_.Highlight(parent_instr); - const char* fillcolor = highlight ? "#ffcdd2" : "#f5f5f5"; - const char* strokecolor = highlight ? "#b71c1c" : "#c2c2c2"; + const char* fillcolor; + const char* strokecolor; + if (debug_options_.xla_hlo_graph_sharding_color() && !highlight) { + // Use the sharding color, if the node isn't highlighted. + NodeColors node_colors = + NodeColorsForScheme(GetInstructionColor(parent_instr)); + fillcolor = node_colors.fill_color; + strokecolor = node_colors.stroke_color; + } else { + // Subcomputation's fill/stroke color is light/dark red/gray, depending on + // whether or not the subcomputation's fusion node is highlighted. + fillcolor = highlight ? "#ffcdd2" : "#f5f5f5"; + strokecolor = highlight ? "#b71c1c" : "#c2c2c2"; + } style = Printf(R"(style="rounded,filled,bold"; fillcolor="%s"; color="%s;")", fillcolor, strokecolor); -- GitLab From d3907d2fbec6f26d11a9e1b3df928f262903b510 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 1 Mar 2018 19:06:52 -0800 Subject: [PATCH 329/884] Update testing script and README.md --- tensorflow/contrib/tensorrt/README.md | 23 ++----- .../contrib/tensorrt/convert/convert_nodes.cc | 5 +- .../contrib/tensorrt/test/test_tftrt.py | 60 ++++++++++++++++--- 3 files changed, 60 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/tensorrt/README.md b/tensorflow/contrib/tensorrt/README.md index dfcce0fd00..461e627e99 100644 --- a/tensorflow/contrib/tensorrt/README.md +++ b/tensorflow/contrib/tensorrt/README.md @@ -2,7 +2,8 @@ Using TensorRT in TensorFlow ============================ This module provides necessary bindings and introduces TRT_engine_op -operator that wraps a subgraph in TensorRT. +operator that wraps a subgraph in TensorRT. This is still a work in progress +but should be useable with most common graphs. Compilation ----------- @@ -15,26 +16,10 @@ configure script should find the necessary components from the system automatically. If installed from tar packages, user has to set path to location where the library is installed during configuration. - -``` +```shell bazel build --config=cuda --config=opt //tensorflow/tools/pip_package:build_pip_package bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/ ``` After the installation of tensorflow package, TensorRT transformation -will be available. An example use is shown below. - -```python -import tensorflow as tf -import tensorflow.contrib.tensorrt as trt -#... create and train or load model -gdef = sess.graph.as_graph_def() -trt_gdef = trt.create_inference_graph( - gdef, #original graph_def - ["output"], #name of output node(s) - max_batch_size, #maximum batch size to run the inference - max_workspace_size_bytes) # max memory for TensorRT to use -tf.reset_default_graph() -tf.import_graph_def(graph_def=trt_gdef) -#...... run inference -``` +will be available. An example use can be found in test/test_tftrt.py directory diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index ec3dee40d7..f1925d364b 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -71,7 +71,8 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, break; default: - return tensorflow::errors::InvalidArgument("Unsupported data type "+tensorflow::DataTypeString(tf_dtype)); + return tensorflow::errors::InvalidArgument( + "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); } return tensorflow::Status::OK(); } @@ -2537,7 +2538,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( shape_inference_node_name = s.output_edge_map->at(tensor_name).second; shape_inference_output_idx = s.output_edge_map->at(tensor_name).first; } - if(shape_inference_output_idx<0)continue; + if (shape_inference_output_idx < 0) continue; VLOG(2) << "shapeinference name: " << shape_inference_node_name << " idx: " << shape_inference_output_idx; diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 18dba94acb..9e4077eca0 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -44,12 +44,11 @@ def get_simple_graph_def(): dtype=dtypes.float32, shape=(None, 24, 24, 2), name="input") e = cop.constant( [[[[1., 0.5, 4., 6., 0.5, 1.], [1., 0.5, 1., 1., 0.5, 1.]]]], - name="weights", - dtype=dtypes.float32) + name="weights", dtype=dtypes.float32) conv = nn.conv2d( input=a, filter=e, strides=[1, 2, 2, 1], padding="SAME", name="conv") - b = cop.constant( - [4., 1.5, 2., 3., 5., 7.], name="bias", dtype=dtypes.float32) + b = cop.constant([4., 1.5, 2., 3., 5., 7.], name="bias", + dtype=dtypes.float32) t = nn.bias_add(conv, b, name="biasAdd") relu = nn.relu(t, "relu") idty = aops.identity(relu, "ID") @@ -60,6 +59,7 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): + """Run given graphdef once""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -73,16 +73,62 @@ def run_graph(gdef, dumm_inp): val = sess.run(out, {inp: dumm_inp}) return val +# Use real data that is representatitive of the inference dataset +# for calibration. For this test script it is random data + + +def run_calibration(gdef, dumm_inp): + """Run given calibration graph multiple times""" + gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) + ops.reset_default_graph() + g = ops.Graph() + with g.as_default(): + inp, out = importer.import_graph_def( + graph_def=gdef, return_elements=["input", "output"]) + inp = inp.outputs[0] + out = out.outputs[0] + with csess.Session( + config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: + for _ in range(30): + val = sess.run(out, {inp: dumm_inp}) + return val + if "__main__" in __name__: inp_dims = (100, 24, 24, 2) dummy_input = np.random.random_sample(inp_dims) - gdef = get_simple_graph_def() + orig_graph = get_simple_graph_def() # use a frozen graph for inference # Get optimized graph - trt_graph = trt.create_inference_graph(gdef, ["output"], inp_dims[0]) - o1 = run_graph(gdef, dummy_input) + trt_graph = trt.create_inference_graph(input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP32", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o1 = run_graph(orig_graph, dummy_input) o2 = run_graph(trt_graph, dummy_input) o3 = run_graph(trt_graph, dummy_input) assert np.array_equal(o1, o2) assert np.array_equal(o3, o2) # sanity check + fp16_graph = trt.create_inference_graph(input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="FP16", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + int8_calib_gdef = trt.create_inference_graph(input_graph_def=orig_graph, + outputs=["output"], + max_batch_size=inp_dims[0], + max_workspace_size_bytes=1 << 25, + precision_mode="INt8", # TRT Engine precision "FP32","FP16" or "INT8" + minimum_segment_size=2 # minimum number of nodes in an engine + ) + o4 = run_graph(fp16_graph, dummy_input) + _ = run_calibration(int8_calib_gdef, dummy_input) + int8_graph = trt.calib_graph_to_infer_graph(int8_calib_gdef) + o5 = run_graph(int8_graph, dummy_input) + assert np.allclose(o1, o4) + assert np.allclose(o1, o5) print("Pass") -- GitLab From 4735af25c0edfdc012d16a09377161b48839d858 Mon Sep 17 00:00:00 2001 From: Brett Koonce Date: Thu, 1 Mar 2018 21:00:45 -0800 Subject: [PATCH 330/884] minor spelling tweaks for contrib/verbs docs --- tensorflow/contrib/verbs/README.md | 2 +- tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index 58fed4e5cb..4b6104a8b4 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -93,7 +93,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen 1. When the sender receives a tensor request, the source tensor may or may not be ready yet. The situation is handled through a process of tag matching: * If the request arrives before the tensor is ready, then a callback is put in a local table, and will be invoked once the tensor arrives. - * If the tensor is ready before the request arives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. + * If the tensor is ready before the request arrives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. In code it is done by calling **RecvLocalAsync()**, which receives the tensor's key, step-id, and the callback. 2. When the callback is invoked, the relevant tensor is removed from the tag matching table. In the case where we need to send the tensor's meta-data, the **RdmaTensorResponse** will store a copy of the tensor until the re-request arrives. 3. The sending of protocol messages (**RDMA_MESSAGE_TENSOR_REQUEST**, **RDMA_MESSAGE_META_DATA_RESPONSE** and **RDMA_MESSAGE_TENSOR_RE_REQUEST**) is done by the class **RdmaMessageBuffer**. All messages are sent using RDMA writes from/to fixed messages buffers. This implies that we cannot send on a specific channel more than one message at a time. In order to synchronize the messages, the **RdmaMessageBuffer** holds the a local and remote buffer statuses which can be either busy or idle. When a write is issued, both statuses will be changed to busy. When the write-complete event is received, the local status is changed to idle. When the write is received on the remote side, the remote side will parse the message, and return an ACK back to the sending side on which the sending side will update the remote status to idle. When both the local and remote statuses are idle, the next message can be sent. diff --git a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md index 956b8f2147..da6fdd48e1 100644 --- a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md +++ b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md @@ -64,7 +64,7 @@ The protocol messages themselves will remain mostly unchanged at the first stage * type - The message type. * request_index - Request index. * is_dead/data_type/tensor_shape/tensor_bytes - The up-to-date meta-data. -* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-requset after meta-data update and reallocation of result/proxy tensors. +* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-request after meta-data update and reallocation of result/proxy tensors. * type - The message type. * name (name_size) - Name of the requested tensor. * step_id - Step ID. -- GitLab From 9ae264f93e0f0048f2078588a5dfe6371acabb8b Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Thu, 1 Mar 2018 21:39:22 -0800 Subject: [PATCH 331/884] Merging upstream --- tensorflow/contrib/tensorrt/BUILD | 258 ++++++++---------- .../contrib/tensorrt/convert/convert_graph.h | 2 +- .../contrib/tensorrt/convert/convert_nodes.cc | 116 ++++---- .../contrib/tensorrt/kernels/trt_calib_op.cc | 9 +- .../tensorrt/resources/TRTInt8Calibrator.cc | 174 ------------ .../tensorrt/resources/TRTInt8Calibrator.h | 52 ---- .../tensorrt/resources/TRTResourceManager.cc | 33 --- .../tensorrt/resources/TRTResourceManager.h | 47 ---- .../contrib/tensorrt/resources/TRTResources.h | 91 ------ .../tensorrt/resources/trt_int8_calibrator.cc | 42 +-- .../tensorrt/resources/trt_int8_calibrator.h | 8 +- 11 files changed, 205 insertions(+), 627 deletions(-) delete mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc delete mode 100644 tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h delete mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc delete mode 100644 tensorflow/contrib/tensorrt/resources/TRTResourceManager.h delete mode 100644 tensorflow/contrib/tensorrt/resources/TRTResources.h diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 1010a8988d..79ed24b570 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -3,46 +3,46 @@ # and provide TensorRT operators and converter package. # APIs are meant to change over time. -package(default_visibility=["//tensorflow:__subpackages__"]) +package(default_visibility = ["//tensorflow:__subpackages__"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) load( - "//tensorflow:tensorflow.bzl", - "tf_cc_test", - "tf_copts", - "tf_cuda_library", - "tf_custom_op_library", - "tf_custom_op_library_additional_deps", - "tf_gen_op_libs", - "tf_gen_op_wrapper_py", + "//tensorflow:tensorflow.bzl", + "tf_cc_test", + "tf_copts", + "tf_cuda_library", + "tf_custom_op_library", + "tf_custom_op_library_additional_deps", + "tf_gen_op_libs", + "tf_gen_op_wrapper_py", ) load("//tensorflow:tensorflow.bzl", "tf_cuda_cc_test") load("//tensorflow:tensorflow.bzl", "tf_custom_op_py_library") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load( - "@local_config_tensorrt//:build_defs.bzl", - "if_tensorrt", + "@local_config_tensorrt//:build_defs.bzl", + "if_tensorrt", ) tf_cuda_cc_test( - name="tensorrt_test_cc", - size="small", - srcs=["tensorrt_test.cc"], - tags=[ - "manual", - "notap", - ], - deps=[ - "//tensorflow/core:lib", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ] + if_tensorrt([ - "@local_config_cuda//cuda:cuda_headers", - "@local_config_tensorrt//:nv_infer", - ]), + name = "tensorrt_test_cc", + size = "small", + srcs = ["tensorrt_test.cc"], + tags = [ + "manual", + "notap", + ], + deps = [ + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ] + if_tensorrt([ + "@local_config_cuda//cuda:cuda_headers", + "@local_config_tensorrt//:nv_infer", + ]), ) tf_custom_op_library( @@ -61,15 +61,15 @@ tf_custom_op_library( ) tf_cuda_library( - name="trt_shape_function", - srcs=["shape_fn/trt_shfn.cc"], - hdrs=["shape_fn/trt_shfn.h"], - visibility=["//visibility:public"], - deps=[ - ":trt_logging", - ] + if_tensorrt([ - "@local_config_tensorrt//:nv_infer", - ]) + tf_custom_op_library_additional_deps(), + name = "trt_shape_function", + srcs = ["shape_fn/trt_shfn.cc"], + hdrs = ["shape_fn/trt_shfn.h"], + visibility = ["//visibility:public"], + deps = [ + ":trt_logging", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]) + tf_custom_op_library_additional_deps(), ) cc_library( @@ -83,6 +83,7 @@ cc_library( "kernels/trt_engine_op.h", ], copts = tf_copts(), + visibility = ["//visibility:public"], deps = [ ":trt_logging", ":trt_resources", @@ -92,7 +93,6 @@ cc_library( ] + if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]) + tf_custom_op_library_additional_deps(), - visibility = ["//visibility:public"], # TODO(laigd) alwayslink = 1, # buildozer: disable=alwayslink-with-hdrs ) @@ -108,15 +108,15 @@ tf_gen_op_libs( ) tf_cuda_library( - name="trt_logging", - srcs=["log/trt_logger.cc"], - hdrs=["log/trt_logger.h"], - visibility=["//visibility:public"], - deps=[ - "//tensorflow/core:lib_proto_parsing", - ] + if_tensorrt([ - "@local_config_tensorrt//:nv_infer", - ]), + name = "trt_logging", + srcs = ["log/trt_logger.cc"], + hdrs = ["log/trt_logger.h"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), ) tf_gen_op_wrapper_py( @@ -130,80 +130,60 @@ tf_gen_op_wrapper_py( ) tf_custom_op_py_library( - name="trt_engine_op_loader", - srcs=["python/ops/trt_engine_op.py"], - dso=[ + name = "trt_engine_op_loader", + srcs = ["python/ops/trt_engine_op.py"], + dso = [ ":python/ops/_trt_engine_op.so", - ] + if_tensorrt([ - "@local_config_tensorrt//:nv_infer", - ]), - srcs_version="PY2AND3", - deps=[ - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:resources", - ], + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), + srcs_version = "PY2AND3", + deps = [ + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:resources", + ], ) py_library( - name="init_py", - srcs=[ - "__init__.py", - "python/__init__.py", - ], - srcs_version="PY2AND3", - deps=[ - ":trt_convert_py", - ":trt_ops_py", - ], + name = "init_py", + srcs = [ + "__init__.py", + "python/__init__.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":trt_convert_py", + ":trt_ops_py", + ], ) py_library( - name="trt_ops_py", - srcs_version="PY2AND3", - deps=[ - ":trt_engine_op", - ":trt_engine_op_loader", - ], + name = "trt_ops_py", + srcs_version = "PY2AND3", + deps = [ + ":trt_engine_op", + ":trt_engine_op_loader", + ], ) py_library( - name="trt_convert_py", - srcs=["python/trt_convert.py"], - srcs_version="PY2AND3", - deps=[ - ":wrap_conversion", - ], + name = "trt_convert_py", + srcs = ["python/trt_convert.py"], + srcs_version = "PY2AND3", + deps = [ + ":wrap_conversion", + ], ) tf_py_wrap_cc( - name="wrap_conversion", - srcs=["trt_conversion.i"], - copts=tf_copts(), - deps=[ - ":trt_conversion", - "//tensorflow/core:framework_lite", - "//util/python:python_headers", - ], -) - -tf_cuda_library( - name="trt_resources", - srcs=[ - "resources/TRTInt8Calibrator.cc", - "resources/TRTResourceManager.cc", - ], - hdrs=[ - "resources/TRTInt8Calibrator.h", - "resources/TRTResourceManager.h", - "resources/TRTResources.h", - ], - deps=[ - "@local_config_tensorrt//:nv_infer", - "//tensorflow/core:framework_headers_lib", - "//tensorflow/core:framework_lite", - "//tensorflow/core:lib_proto_parsing", - - ], + name = "wrap_conversion", + srcs = ["trt_conversion.i"], + copts = tf_copts(), + deps = [ + ":trt_conversion", + "//tensorflow/core:framework_lite", + "//util/python:python_headers", + ], ) tf_cuda_library( @@ -262,43 +242,43 @@ tf_cuda_library( # Library for the segmenting portion of TensorRT operation creation cc_library( - name="segment", - srcs=["segment/segment.cc"], - hdrs=[ - "segment/segment.h", - "segment/union_find.h", - ], - linkstatic=1, - deps=[ - "//tensorflow/core:graph", - "//tensorflow/core:lib_proto_parsing", - "//tensorflow/core:protos_all_cc", - "@protobuf_archive//:protobuf_headers", - ], + name = "segment", + srcs = ["segment/segment.cc"], + hdrs = [ + "segment/segment.h", + "segment/union_find.h", + ], + linkstatic = 1, + deps = [ + "//tensorflow/core:graph", + "//tensorflow/core:lib_proto_parsing", + "//tensorflow/core:protos_all_cc", + "@protobuf_archive//:protobuf_headers", + ], ) tf_cc_test( - name="segment_test", - size="small", - srcs=["segment/segment_test.cc"], - deps=[ - ":segment", - "//tensorflow/c:c_api", - "//tensorflow/core:lib", - "//tensorflow/core:protos_all_cc", - "//tensorflow/core:test", - "//tensorflow/core:test_main", - ], + name = "segment_test", + size = "small", + srcs = ["segment/segment_test.cc"], + deps = [ + ":segment", + "//tensorflow/c:c_api", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + ], ) filegroup( - name="all_files", - srcs=glob( - ["**/*"], - exclude=[ - "**/METADATA", - "**/OWNERS", - ], - ), - visibility=["//tensorflow:__subpackages__"], + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], ) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 5d5301393c..905824cdc8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -38,7 +38,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode,int minimum_segment_size); + int precision_mode, int minimum_segment_size); } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index f1925d364b..1bd60c650e 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -25,8 +25,8 @@ limitations under the License. #include #include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" -#include "tensorflow/contrib/tensorrt/resources/TRTResources.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/node_def_builder.h" @@ -319,7 +319,7 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } template -void reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, +void Reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, T* odata, nvinfer1::DimsHW ostrides) { for (int h = 0; h < shape.h(); ++h) { for (int w = 0; w < shape.w(); ++w) { @@ -330,8 +330,8 @@ void reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, } // TODO(jie): fail to tensorflow!! -void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, - TRT_ShapedWeights* oweights) { +void ReorderCKtoKC(TRT_ShapedWeights const& iweights, + TRT_ShapedWeights* oweights) { int c = iweights.shape_.d[0]; int k = iweights.shape_.d[1]; oweights->shape_.d[0] = k; @@ -340,14 +340,14 @@ void reorder_ck_to_kc(TRT_ShapedWeights const& iweights, nvinfer1::DimsHW ostrides = {c, 1}; switch (iweights.type_) { case tensorflow::DataType::DT_FLOAT: { - reorder2({k, c}, static_cast(iweights.GetValues()), + Reorder2({k, c}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), ostrides); break; } case tensorflow::DataType::DT_HALF: { - reorder2( + Reorder2( {k, c}, static_cast(iweights.GetValues()), istrides, static_cast(const_cast(oweights->GetValues())), @@ -427,7 +427,7 @@ class Converter { std::unordered_map op_registry_; nvinfer1::INetworkDefinition* trt_network_; std::list> temp_bufs_; - tensorflow::trt::TRTWeightStore* weight_store_; + tensorflow::tensorrt::TRTWeightStore* weight_store_; bool fp16_; void register_op_converters(); std::vector get_inputs( @@ -464,11 +464,11 @@ class Converter { public: explicit Converter(nvinfer1::INetworkDefinition* trt_network, - tensorflow::trt::TRTWeightStore* ws, bool fp16) + tensorflow::tensorrt::TRTWeightStore* ws, bool fp16) : trt_network_(trt_network), weight_store_(ws), fp16_(fp16) { this->register_op_converters(); } - tensorflow::trt::TRTWeightStore* weight_store() { return weight_store_; } + tensorflow::tensorrt::TRTWeightStore* weight_store() { return weight_store_; } TRT_ShapedWeights get_temp_weights(tensorflow::DataType type, nvinfer1::Dims shape) { TRT_ShapedWeights weights(type, nullptr, shape); @@ -813,12 +813,12 @@ tensorflow::Status ConstantFoldBinary( "Binary op implicit broadcast not supported: " + node_def.op()); // TODO(jie): constant fold should really fall back to TF. - int nb_dims = weights_input_l.shape_.nbDims; + int num_dims = weights_input_l.shape_.nbDims; nvinfer1::Dims output_shape; - output_shape.nbDims = nb_dims; - VLOG(2) << "nb_dims: " << nb_dims + output_shape.nbDims = num_dims; + VLOG(2) << "nb_dims: " << num_dims << ", the other: " << weights_input_r.shape_.nbDims; - for (int i = 0; i < nb_dims; i++) { + for (int i = 0; i < num_dims; i++) { if (weights_input_l.shape_.d[i] == weights_input_r.shape_.d[i]) { output_shape.d[i] = weights_input_l.shape_.d[i]; } else if (weights_input_l.shape_.d[i] == 1 || @@ -1950,27 +1950,6 @@ tensorflow::Status ConvertFusedBatchNorm( } } } - // if (scale_weights.type_ != tensorflow::DataType::DT_FLOAT || - // offset_weights.type_ != tensorflow::DataType::DT_FLOAT || - // mean_weights.type_ != tensorflow::DataType::DT_FLOAT || - // variance_weights.type_ != tensorflow::DataType::DT_FLOAT) { - // return tensorflow::errors::Unimplemented( - // "only float32 weights data type is supported, at " + - // node_def.name()); - // } - // for (size_t i = 0; i < nweight; ++i) { - // float scale = (static_cast(scale_weights.GetValues()))[i]; - // float offset = (static_cast(offset_weights.GetValues()))[i]; float mean = (static_cast(mean_weights.GetValues()))[i]; float variance = - // (static_cast(variance_weights.GetValues()))[i]; - // float& combined_scale_ref = const_cast( - // static_cast(combined_scale_weights.GetValues()))[i]; - // float& combined_offset_ref = const_cast( - // static_cast(combined_offset_weights.GetValues()))[i]; - // combined_scale_ref = scale / sqrtf(variance + epsilon); - // combined_offset_ref = offset - mean * combined_scale_ref; - // } nvinfer1::IScaleLayer* layer = ctx.network()->addScale( *const_cast(tensor), nvinfer1::ScaleMode::kCHANNEL, combined_offset_weights.GetWeightsForTRT(), @@ -1996,7 +1975,7 @@ tensorflow::Status ConvertMatMul(Converter& ctx, TRT_ShapedWeights weights_ck = inputs.at(1).weights(); TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); - reorder_ck_to_kc(weights_ck, &weights); + ReorderCKtoKC(weights_ck, &weights); TRT_ShapedWeights biases(weights.type_); int noutput = weights.shape_.d[0]; @@ -2022,7 +2001,6 @@ tensorflow::Status ConvertReshape( nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // restore implicit batch dimension - int nbDims = dims.nbDims + 1; TRT_ShapedWeights shape = inputs.at(1).weights(); @@ -2171,32 +2149,32 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( for (auto& i : input_names) { VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); } - auto trt_rm = tensorflow::trt::TRTResourceManager::instance(); + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); - tensorflow::trt::TRTCalibrationResource* calibRes = nullptr; - auto status = resmgr->Lookup(res_name, res_name, &calibRes); - if (!status.ok() || !calibRes->calibrator) { + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = resmgr->Lookup(res_name, res_name, &calib_res); + if (!status.ok() || !calib_res->calibrator_) { return tensorflow::errors::FailedPrecondition( "You must run calibration" " and inference conversion in the same proces"); } - calibRes->calibrator->setDone(); - calibRes->thr->join(); - delete calibRes->thr; - if (!calibRes->engine) { + calib_res->calibrator_->setDone(); + calib_res->thr_->join(); + delete calib_res->thr_; + if (!calib_res->engine_) { LOG(FATAL) << "Calibration failed!, engine is nullptr"; } auto weight_rmgr = trt_rm->getManager("WeightStore"); - TF_CHECK_OK( - weight_rmgr->Delete(res_name, res_name)); - auto engine_plan = calibRes->engine->serialize(); - calibRes->engine->destroy(); - calibRes->network->destroy(); - calibRes->builder->destroy(); - calibRes->thr = nullptr; - calibRes->engine = nullptr; - calibRes->builder = nullptr; + TF_CHECK_OK(weight_rmgr->Delete( + res_name, res_name)); + auto engine_plan = calib_res->engine_->serialize(); + calib_res->engine_->destroy(); + calib_res->network_->destroy(); + calib_res->builder_->destroy(); + calib_res->thr_ = nullptr; + calib_res->engine_ = nullptr; + calib_res->builder_ = nullptr; tensorflow::NodeDefBuilder op_builder(engine_name, "TRTEngineOp"); std::vector income_edges; for (const auto in_edge : c_node->in_edges()) { @@ -2275,23 +2253,23 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op", static_id); static_id++; VLOG(2) << "BUILDING 2"; - auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); - auto op_res = new tensorflow::trt::TRTCalibrationResource(); + auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); VLOG(1) << "SAMI Creating calibresource " << calib_op_name << " @ " << op_res; TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); - op_res->logger = new tensorflow::tensorrt::Logger(); - op_res->builder = nvinfer1::createInferBuilder(*(op_res->logger)); + op_res->logger_ = new tensorflow::tensorrt::Logger(); + op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); - if (!op_res->builder) { + if (!op_res->builder_) { return tensorflow::errors::Internal( "failed to create TensorRT builder object"); } VLOG(2) << "BUILDING 3"; - op_res->network = op_res->builder->createNetwork(); - if (!op_res->network) { + op_res->network_ = op_res->builder_->createNetwork(); + if (!op_res->network_) { return tensorflow::errors::Internal( "failed to create TensorRT network object"); } @@ -2300,9 +2278,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // Build the network auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::trt::TRTWeightStore(); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network, ws, s.precision_mode == 1); + Converter converter(op_res->network_, ws, s.precision_mode == 1); VLOG(2) << "BUILDING 5"; std::vector input_names; @@ -2420,8 +2398,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { VLOG(2) << "finished output"; // Build the engine - op_res->builder->setMaxBatchSize(s.max_batch_size); - op_res->builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); + op_res->builder_->setMaxBatchSize(s.max_batch_size); + op_res->builder_->setMaxWorkspaceSize(s.max_workspace_size_bytes); // Build the TRT op // TODO(sami,ben,jie): proper naming! @@ -2505,9 +2483,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( string engine_name = tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op"); engine_name = tensorflow::strings::StrCat(engine_name, static_id++); - auto trt_rmgr = tensorflow::trt::TRTResourceManager::instance(); + auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto weight_rmgr = trt_rmgr->getManager("WeightStore"); - auto ws = new tensorflow::trt::TRTWeightStore(); + auto ws = new tensorflow::tensorrt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); // Build the network @@ -2680,8 +2658,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - weight_rmgr->Delete(engine_name, - engine_name); + weight_rmgr->Delete(engine_name, + engine_name); LOG(INFO) << "finished engine " << engine_name; // Build the TRT op diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index 1dcb87e768..b78ff18a8d 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_types.h" #include "tensorflow/core/framework/types.h" +#include "tensorflow/core/platform/stream_executor.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -113,7 +114,13 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ctx->set_output(i, t); } VLOG(2) << "Filled map for sending"; - calib_res->calibrator_->setBatch(input_data); + // copied from cuda_kernel_helper since it seems only valid in *.cu.cc files + const cudaStream_t* stream = CHECK_NOTNULL( + reinterpret_cast(ctx->op_device_context() + ->stream() + ->implementation() + ->CudaStreamMemberHack())); + calib_res->calibrator_->setBatch(input_data,*stream); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc deleted file mode 100644 index 57677a327d..0000000000 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.cc +++ /dev/null @@ -1,174 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" - -#include -#include -#include -#include "cuda_runtime_api.h" - -#include "tensorflow/core/platform/logging.h" - -namespace tensorflow { -namespace trt { -// set the batch size before constructing the thread to execute engine -int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } - -TRTInt8Calibrator::TRTInt8Calibrator( - const std::unordered_map>& dev_buffers, - int batch_size, string engineName) - : batch_size_(batch_size), - done_(false), - dev_buffers_(dev_buffers), - calib_running_(false), - engine_name_(engineName) { - cudaPointerAttributes pa; - int devid = -1; - cudaGetDevice(&devid); - VLOG(0) << "Constructing calibrator with batch size " << batch_size - << " on device" << devid; - for (auto b : dev_buffers_) { - if (cudaPointerGetAttributes(&pa, b.second.first) == cudaSuccess) { - VLOG(1) << "CALIBRATOR " << engine_name_ << " Device buffer name " - << b.first << " size" << b.second.second << " @ " - << b.second.first << " onDevice " - << ((pa.memoryType == cudaMemoryTypeHost) ? "HOST" : "DEVICE"); - } else { - VLOG(1) << "CALIBRATOR " << engine_name_ << " Device buffer name " - << b.first << " size" << b.second.second << " @ " - << b.second.first; - } - } -} - -bool TRTInt8Calibrator::setBatch( - const std::unordered_map& data) { - VLOG(1) << "SAMI SAMI " << engine_name_ << " Waiting to set new batch"; - if (done_) return false; - while (calib_running_.load( - std::memory_order_acquire)) { // wait while calibration is running - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; - } - VLOG(1) << "Set Batch Waiting finished"; - for (const auto it : data) { - auto devptr = dev_buffers_.find(it.first); - if (devptr == dev_buffers_.end()) { - LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first - << "' does not match with the buffer names"; - } - const auto& d = devptr->second; - if (VLOG_IS_ON(1)) { - cudaPointerAttributes pa; - VLOG(1) << "cuda memcopy " << engine_name_ << " buff name= " << it.first - << " dst= " << d.first << " size= " << d.second - << " inp= " << it.second; - if (cudaPointerGetAttributes(&pa, it.second) == cudaSuccess) { - VLOG(1) << "CALIBRATOR " << engine_name_ << " Device buffer name " - << it.first << " size" << d.second << " @ " << d.first - << " onDevice " - << ((pa.memoryType == cudaMemoryTypeHost) ? "HOST" : "DEVICE"); - } - } - - auto status = - cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); - if (status != cudaSuccess) { - LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first - << "' failed with " << status; - } - if (VLOG_IS_ON(1)) { - float f[4]; - f[0] = 3.; - f[1] = 0.14159; - f[2] = 3.; - f[3] = 0.14159; - status = - cudaMemcpy(f, d.first, sizeof(float) * 2, cudaMemcpyDeviceToHost); - if (status != cudaSuccess) { - VLOG(1) << "Memcopy failed!"; - } - status = cudaMemcpy(f + 2, it.second, sizeof(float) * 2, - cudaMemcpyDeviceToHost); - int devid = -1; - cudaGetDevice(&devid); - VLOG(1) << "SAMI ORDER SETTING " << engine_name_ - << " Data in perm storage [0]=" << f[0] << " [1]=" << f[1] - << " current device=" << devid << " data in tensor=" << f[2] - << " " << f[3]; - } - } - calib_running_.store(true, std::memory_order_release); // release builder - cond_.notify_all(); - return true; -} - -bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, - int nbBindings) { - calib_running_.store(false, std::memory_order_release); // wait for new batch - VLOG(1) << "SAMI SAMI Calibrator is waiting for new batch"; - cond_.notify_all(); - while (!calib_running_.load( - std::memory_order_acquire)) { // wait until new batch arrives - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; - } - if (done_) { - return false; - } - - for (int i = 0; i < nbBindings; i++) { - auto it = dev_buffers_.find(names[i]); - if (it == dev_buffers_.end()) { - LOG(FATAL) << "Calibration engine asked for unknown tensor name '" - << names[i] << "' at position " << i; - } - - bindings[i] = it->second.first; - if (VLOG_IS_ON(1)) { - VLOG(1) << "Setting buffer " << i << " named=" << names[i] << " @ " - << it->second.first; - float f[2]; - f[0] = 3.; - f[1] = 0.14159; - auto status = - cudaMemcpy(f, bindings[i], sizeof(float) * 2, cudaMemcpyDeviceToHost); - if (status != cudaSuccess) { - VLOG(0) << "Memcopy failed!"; - } - int devid = -1; - cudaGetDevice(&devid); - VLOG(1) << "ORDER GETTING, " << engine_name_ - << " Data in perm storage [0]=" << f[0] << " [1]=" << f[1] - << " on device=" << devid - << " Succeed=" << (status == cudaSuccess ? "True" : "False"); - } - } - return true; -} -const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { - return nullptr; -} -void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, - std::size_t length) {} -TRTInt8Calibrator::~TRTInt8Calibrator() { - VLOG(1) << "Destroying calibrator for " << engine_name_; -} - -} // namespace trt -} // namespace tensorflow \ No newline at end of file diff --git a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h b/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h deleted file mode 100644 index 62c2bf99b6..0000000000 --- a/tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h +++ /dev/null @@ -1,52 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ -#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ - -#include -#include -#include -#include -#include "tensorflow/core/platform/mutex.h" -#include "tensorrt/include/NvInfer.h" -namespace tensorflow { -namespace trt { - -struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { - public: - TRTInt8Calibrator( - const std::unordered_map>& dev_buffers, - int batch_size, string engineName); - int getBatchSize() const; - bool getBatch(void* bindings[], const char* names[], int nbBindings) override; - bool setBatch(const std::unordered_map& data); - void setDone() { done_ = true; } - const void* readCalibrationCache(std::size_t& length) override; - void writeCalibrationCache(const void* ptr, std::size_t length) override; - ~TRTInt8Calibrator(); - - private: - int batch_size_; - tensorflow::mutex cond_mtx_; - tensorflow::condition_variable cond_; - bool done_; - const std::unordered_map> dev_buffers_; - std::atomic_bool calib_running_; - string engine_name_; -}; -} // namespace trt -} // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTINT8CALIBRATOR_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc deleted file mode 100644 index 3eea23b1b8..0000000000 --- a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/contrib/tensorrt/resources/TRTResourceManager.h" -#include "tensorflow/core/platform/default/logging.h" - -std::shared_ptr -tensorflow::trt::TRTResourceManager::getManager(const std::string& mgr_name) { - // mutex is held for lookup only. Most instantiations where mutex will be held - // longer will be during op creation and should be ok. - tensorflow::mutex_lock lock(map_mutex_); - auto s = managers_.find(mgr_name); - if (s == managers_.end()) { - auto it = managers_.emplace( - mgr_name, std::make_shared(mgr_name)); - VLOG(0) << "Returning a new manager " << mgr_name; - return it.first->second; - } - VLOG(1) << "Returning old manager " << mgr_name; - return s->second; -} diff --git a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h b/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h deleted file mode 100644 index d482c7d526..0000000000 --- a/tensorflow/contrib/tensorrt/resources/TRTResourceManager.h +++ /dev/null @@ -1,47 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ - -#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRTRESOURCEMANAGER_H_ -#include - -#include -#include -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorflow/core/platform/mutex.h" - -namespace tensorflow { -namespace trt { -class TRTResourceManager { - TRTResourceManager() = default; - - public: - static std::shared_ptr instance() { - static std::shared_ptr instance_( - new TRTResourceManager); - return instance_; - } - // returns a manager for given op, if it doesn't exists it creates one - std::shared_ptr getManager(const string& op_name); - - private: - std::unordered_map> - managers_; - tensorflow::mutex map_mutex_; -}; -} // namespace trt -} // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCEMANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/TRTResources.h b/tensorflow/contrib/tensorrt/resources/TRTResources.h deleted file mode 100644 index 20ccf0f9d4..0000000000 --- a/tensorflow/contrib/tensorrt/resources/TRTResources.h +++ /dev/null @@ -1,91 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ - -#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ - -#include -#include -#include -#include -#include -#include "tensorflow/contrib/tensorrt/log/trt_logger.h" -#include "tensorflow/contrib/tensorrt/resources/TRTInt8Calibrator.h" -#include "tensorflow/core/framework/resource_mgr.h" -#include "tensorrt/include/NvInfer.h" - -namespace tensorflow { -namespace trt { -struct TRTCalibrationResource : public tensorflow::ResourceBase { - TRTCalibrationResource() - : calibrator(nullptr), - builder(nullptr), - network(nullptr), - engine(nullptr), - logger(nullptr), - thr(nullptr) {} - string DebugString() override { - std::stringstream oss; -#define VALID_OR_NULL(ptr) \ - (!ptr ? "nullptr" : std::hex << (void)ptr << std::dec << std::endl) - oss << " Calibrator = " << std::hex << calibrator << std::dec << std::endl - << " Builder = " << std::hex << builder << std::dec << std::endl - << " Network = " << std::hex << network << std::dec << std::endl - << " Engine = " << std::hex << engine << std::dec << std::endl - << " Logger = " << std::hex << logger << std::dec << std::endl - << " Thread = " << std::hex << thr << std::dec << std::endl; - return oss.str(); -#undef VALID_OR_NULL - } - ~TRTCalibrationResource() { - VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); - } - TRTInt8Calibrator* calibrator; - nvinfer1::IBuilder* builder; - nvinfer1::INetworkDefinition* network; - nvinfer1::ICudaEngine* engine; - tensorflow::tensorrt::Logger* logger; - // TODO(sami): Use threadpool threads! - std::thread* thr; -}; - -struct TRTWeightStore : public tensorflow::ResourceBase { - TRTWeightStore() {} - std::list> store_; - string DebugString() override { - std::stringstream oss; - size_t lenBytes = 0; - for (const auto& v : store_) { - lenBytes += v.size() * sizeof(uint8_t); - } - oss << " Number of entries = " << store_.size() << std::endl - << " Total number of bytes = " - << store_.size() * sizeof(std::vector) + lenBytes << std::endl; - return oss.str(); - } - virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } -}; - -struct TRTEngineResource : public tensorflow::ResourceBase { - TRTEngineResource() : runtime(nullptr), ctx(nullptr){}; - string DebugString() override { return string(""); } - nvinfer1::IRuntime* runtime; - nvinfer1::IExecutionContext* ctx; -}; - -} // namespace trt -} // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 3d5cc76c42..f15772058f 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -38,22 +38,24 @@ TRTInt8Calibrator::TRTInt8Calibrator( done_(false), dev_buffers_(dev_buffers), calib_running_(false), + batch_is_set_(false), engine_name_(engine_name) {} -bool TRTInt8Calibrator::setBatch( - const std::unordered_map& data) { +bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, + const cudaStream_t stream) { // TODO(aaroey): make sure that in future PR: // 1. the mutex_lock is outside of the loop // 2. wait() is used instead of wait_for() // 3. done_ is to be protected by the mutex // 4. the first batch is not missed if (done_) return false; - while (calib_running_.load( - std::memory_order_acquire)) { // wait while calibration is running - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); + tensorflow::mutex_lock l(cond_mtx_); + while ((calib_running_ || batch_is_set_) && + !done_) { // wait while calibration is running + cond_.wait(l); if (done_) return false; } + CHECK(!calib_running_ && !batch_is_set_); VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { auto devptr = dev_buffers_.find(it.first); @@ -65,32 +67,32 @@ bool TRTInt8Calibrator::setBatch( // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. - auto status = - cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + auto status = cudaMemcpyAsync(d.first, it.second, d.second, + cudaMemcpyDeviceToDevice, stream); if (status != cudaSuccess) { LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first << "' failed with " << status; } } - calib_running_.store(true, std::memory_order_release); // release builder + cudaStreamSynchronize( + stream); // we have to wait for the stream before returning! + batch_is_set_ = true; cond_.notify_all(); return true; } bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - calib_running_.store(false, std::memory_order_release); // wait for new batch + tensorflow::mutex_lock l(cond_mtx_); + calib_running_ = false; cond_.notify_all(); - while (!calib_running_.load( - std::memory_order_acquire)) { // wait until new batch arrives - tensorflow::mutex_lock l(cond_mtx_); - cond_.wait_for(l, std::chrono::milliseconds(50)); - if (done_) return false; + while ((!batch_is_set_ && !done_)) { // wait until new batch arrives + cond_.wait(l); } if (done_) { return false; } - + CHECK(!calib_running_ && batch_is_set_); for (int i = 0; i < num_bindings; i++) { auto it = dev_buffers_.find(names[i]); if (it == dev_buffers_.end()) { @@ -100,13 +102,19 @@ bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, bindings[i] = it->second.first; } + batch_is_set_ = false; + calib_running_ = true; return true; } const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } - +void TRTInt8Calibrator::setDone() { + tensorflow::mutex_lock l(cond_mtx_); + done_ = true; + cond_.notify_all(); +} void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, std::size_t length) {} TRTInt8Calibrator::~TRTInt8Calibrator() { diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index 8830f7efe7..cab9c7e43b 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,6 +24,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { namespace tensorrt { @@ -39,8 +40,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data); - void setDone() { done_ = true; } + bool setBatch(const std::unordered_map& data,const cudaStream_t stream); + void setDone(); const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; ~TRTInt8Calibrator(); @@ -55,7 +56,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { const std::unordered_map> dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with // buffer names - std::atomic_bool calib_running_; + bool calib_running_; + bool batch_is_set_; string engine_name_; }; } // namespace tensorrt -- GitLab From 1401b731cc2df2ca48117216b5f91c9f2070ae3c Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 1 Mar 2018 22:25:41 -0800 Subject: [PATCH 332/884] Automated g4 rollback of changelist 187563544 PiperOrigin-RevId: 187582263 --- tensorflow/core/BUILD | 1 - tensorflow/core/grappler/optimizers/BUILD | 1 - .../grappler/optimizers/memory_optimizer.cc | 20 +++++-------- .../grappler/optimizers/memory_optimizer.h | 10 +++---- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 16 +++++----- .../python/grappler/memory_optimizer_test.py | 29 +------------------ 7 files changed, 25 insertions(+), 56 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 96e30ca3c0..3271825251 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2231,7 +2231,6 @@ cc_library( ], visibility = [ "//tensorflow/compiler:__subpackages__", - "//tensorflow/core/grappler:__subpackages__", "//tensorflow/core/profiler:__subpackages__", ], deps = [":lib_internal"], diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 0a4330b524..037438ee75 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -363,7 +363,6 @@ cc_library( ":graph_rewriter", ":static_schedule", "//tensorflow/core:framework", - "//tensorflow/core:regexp_internal", "//tensorflow/core:lib", "//tensorflow/core:protos_all_cc", "//tensorflow/core/grappler:graph_view", diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index d73050ac4d..694139fa50 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -36,7 +36,6 @@ limitations under the License. #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/grappler/utils/topological_sort.h" #include "tensorflow/core/grappler/utils/traversal.h" -#include "tensorflow/core/platform/regexp.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { @@ -414,7 +413,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_regexp, + const string& recomputation_targets_name_prefix, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -438,19 +437,16 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, for (const auto& feed : item.feed) { feeds.insert(NodeName(feed.first)); } - RE2 recomputation_targets_re(recomputation_targets_name_regexp); std::function is_target = - [&recomputation_targets_re](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. This does a prefix - // regexp match, and typically one sets regexp="gradients/" meaning - // it will match all node names with scope beginning with "gradients/". - // If used within scopes, one may want to set regexp="(.+/)?gradients/". + [&recomputation_targets_name_prefix](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. Typically targets will + // be gradients (recomputation_targets_name_prefix="gradients/"), + // although the prefix is configurable since gradients may be created + // in a name scope. // TODO(allenl): Use a static schedule // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes // whose outputs will sit around for a while. - bool match = recomputation_targets_re.Match( - node.name(), 0, node.name().size(), RE2::ANCHOR_START, nullptr, 0); - return match; + return node.name().find(recomputation_targets_name_prefix) == 0; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1229,7 +1225,7 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_regexp_, + recomputation_targets_name_prefix_, optimized_graph, item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index 62ab969848..c3dd0c45c6 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_regxp: Name regxp for potential outputs of + // recomputation_targets_name_prefix: Name prefix for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_regxp. + // RewriterConfig::memory_optimizer_target_node_name_prefix. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_regexp = "gradients/") + const string& recomputation_targets_name_prefix = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_regexp_(recomputation_targets_name_regexp) {} + recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_regexp_; + string recomputation_targets_name_prefix_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 979f3e7161..72d7b94dc8 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_regexp().empty()) { + if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_regexp()))); + cfg_.memory_optimizer_target_node_name_prefix()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 63303fa968..9ebf217811 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,14 +78,16 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // A regexp for node names which are valid outputs of recomputations. Inputs - // to nodes that match this regexp may be recomputed (subject either to manual + // The prefix for nodes which are valid outputs of recomputations. Inputs to + // nodes with this name prefix may be recomputed (subject either to manual // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the nodes themselves will not be - // recomputed. This is a prefix match, meaning it matches any node name that - // contains a prefix that matches this regexp. Defaults to "gradients/" if - // not provided, but can be changed if used within scopes. - string memory_optimizer_target_node_name_regexp = 6; + // depending on memory_optimization), but the prefixed nodes themselves will + // not be recomputed. Typically this will be "gradients/", indicating that + // activations from the forward pass of a graph may be recomputed as inputs to + // gradients, but may be adjusted if gradients are inside a name scope or if + // inputs to non-gradients should be recomputed. Defaults to "gradients/" if + // empty or not set. + string memory_optimizer_target_node_name_prefix = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 58d3c1e85f..948911f099 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,34 +162,7 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_regexp='optimizer/gradients/'), - original_metagraph) - self.assertGreater( - len(rewritten_graph_def.node), - len(original_metagraph.graph_def.node)) - self.assertEqual( - 0, - len([node for node in original_metagraph.graph_def.node - if 'Recomputed/' in node.name])) - self.assertEqual( - 20, # Two per layer - len([node for node in rewritten_graph_def.node - if 'Recomputed/' in node.name])) - - def testRewritingNameScopedGradientNamesRegexp(self): - """Tests that rewriting occurs with non-standard gradient names.""" - (original_metagraph, _, _, _) = self._GetMetaGraph( - optimizer_scope_name='foo/bar') - rewritten_graph_def = tf_optimizer.OptimizeGraph( - rewriter_config_pb2.RewriterConfig( - disable_model_pruning=True, - constant_folding=rewriter_config_pb2.RewriterConfig.OFF, - dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, - layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, - arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, - memory_optimization=rewriter_config_pb2.RewriterConfig. - RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_regexp='(.+/)gradients/'), + memory_optimizer_target_node_name_prefix='optimizer/gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), -- GitLab From 974822bcde764eb6a0b1498a575fdde7001aae15 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 01:17:19 -0800 Subject: [PATCH 333/884] [XLA:GPU] Extract multiplication of complex numbers into a helper function. Also add helper functions for getting the real and the imaginary part of a complex number. PiperOrigin-RevId: 187593341 --- .../compiler/xla/service/gpu/ir_emitter.cc | 65 +++++++++++-------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc index a3df67a873..1e0db2821a 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "tensorflow/core/platform/logging.h" // IWYU pragma: no_include "llvm/IR/Intrinsics.gen.inc" @@ -438,6 +439,32 @@ Status IrEmitter::HandleSelect(HloInstruction* select) { return IrEmitter::DefaultAction(select); } +namespace { +llvm::Value* Real(llvm::Value* x, llvm::IRBuilder<>* ir_builder) { + return ir_builder->CreateExtractValue(x, {0}); +} + +llvm::Value* Imag(llvm::Value* x, llvm::IRBuilder<>* ir_builder) { + return ir_builder->CreateExtractValue(x, {1}); +} + +std::pair MultiplyComplex( + llvm::Value* lhs_value, llvm::Value* rhs_value, + llvm::IRBuilder<>* ir_builder) { + llvm::Value* lhs_real = Real(lhs_value, ir_builder); + llvm::Value* lhs_imag = Imag(lhs_value, ir_builder); + llvm::Value* rhs_real = Real(rhs_value, ir_builder); + llvm::Value* rhs_imag = Imag(rhs_value, ir_builder); + llvm::Value* real_result1 = ir_builder->CreateFMul(lhs_real, rhs_real); + llvm::Value* real_result2 = ir_builder->CreateFMul(lhs_imag, rhs_imag); + llvm::Value* real_result = ir_builder->CreateFSub(real_result1, real_result2); + llvm::Value* imag_result1 = ir_builder->CreateFMul(lhs_real, rhs_imag); + llvm::Value* imag_result2 = ir_builder->CreateFMul(lhs_imag, rhs_real); + llvm::Value* imag_result = ir_builder->CreateFAdd(imag_result1, imag_result2); + return {real_result, imag_result}; +} +} // namespace + Status IrEmitter::HandleDot(HloInstruction* dot) { auto lhs_instruction = dot->operand(0); auto rhs_instruction = dot->operand(1); @@ -456,21 +483,10 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { rhs_array.EmitReadArrayElement(/*index=*/{}, &ir_builder_); llvm::Value* result; if (ShapeUtil::ElementIsComplex(lhs_shape)) { - auto real = [&](llvm::Value* x) { - return ir_builder_.CreateExtractValue(x, {0}); - }; - auto imag = [&](llvm::Value* x) { - return ir_builder_.CreateExtractValue(x, {1}); - }; - llvm::Value* real_result = ir_builder_.CreateFSub( - ir_builder_.CreateFMul(real(lhs_value), real(rhs_value)), - ir_builder_.CreateFMul(imag(lhs_value), imag(rhs_value))); - llvm::Value* imag_result = ir_builder_.CreateFAdd( - ir_builder_.CreateFMul(real(lhs_value), imag(rhs_value)), - ir_builder_.CreateFMul(imag(lhs_value), real(rhs_value))); + auto value = MultiplyComplex(lhs_value, rhs_value, &ir_builder_); result = llvm::ConstantAggregateZero::get(lhs_array.GetElementLlvmType()); - result = ir_builder_.CreateInsertValue(result, real_result, {0}); - result = ir_builder_.CreateInsertValue(result, imag_result, {1}); + result = ir_builder_.CreateInsertValue(result, value.first, {0}); + result = ir_builder_.CreateInsertValue(result, value.second, {1}); } else { result = ir_builder_.CreateFMul(lhs_value, rhs_value); } @@ -548,20 +564,13 @@ Status IrEmitter::HandleDot(HloInstruction* dot) { llvm::Value* accum = ir_builder_.CreateLoad(accum_address); llvm::Value* updated_accum; if (ShapeUtil::ElementIsComplex(lhs_shape)) { -#define REAL(x) ir_builder_.CreateExtractValue(x, {0}) -#define IMAG(x) ir_builder_.CreateExtractValue(x, {1}) - llvm::Value* product_real = ir_builder_.CreateFSub( - ir_builder_.CreateFMul(REAL(lhs_element), REAL(rhs_element)), - ir_builder_.CreateFMul(IMAG(lhs_element), IMAG(rhs_element))); - llvm::Value* product_imag = ir_builder_.CreateFAdd( - ir_builder_.CreateFMul(REAL(lhs_element), IMAG(rhs_element)), - ir_builder_.CreateFMul(IMAG(lhs_element), REAL(rhs_element))); - updated_accum = ir_builder_.CreateInsertValue( - accum, ir_builder_.CreateFAdd(REAL(accum), product_real), {0}); - updated_accum = ir_builder_.CreateInsertValue( - updated_accum, ir_builder_.CreateFAdd(IMAG(accum), product_imag), {1}); -#undef IMAG -#undef REAL + auto value = MultiplyComplex(lhs_element, rhs_element, &ir_builder_); + llvm::Value* accum_real = Real(accum, &ir_builder_); + llvm::Value* real_sum = ir_builder_.CreateFAdd(accum_real, value.first); + updated_accum = ir_builder_.CreateInsertValue(accum, real_sum, {0}); + llvm::Value* accum_imag = Imag(accum, &ir_builder_); + llvm::Value* imag_sum = ir_builder_.CreateFAdd(accum_imag, value.second); + updated_accum = ir_builder_.CreateInsertValue(updated_accum, imag_sum, {1}); } else { llvm::Value* product = ir_builder_.CreateFMul(lhs_element, rhs_element); updated_accum = ir_builder_.CreateFAdd(accum, product); -- GitLab From 353dbff0cbabe8d8b38530b13669271b4d047c9b Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 01:48:59 -0800 Subject: [PATCH 334/884] Java: Update to 1.6.0 PiperOrigin-RevId: 187595636 --- tensorflow/java/maven/libtensorflow/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni/pom.xml | 2 +- tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml | 2 +- tensorflow/java/maven/pom.xml | 2 +- tensorflow/java/maven/proto/pom.xml | 2 +- tensorflow/java/maven/tensorflow/pom.xml | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/java/maven/libtensorflow/pom.xml b/tensorflow/java/maven/libtensorflow/pom.xml index d35bb41112..1c84eae540 100644 --- a/tensorflow/java/maven/libtensorflow/pom.xml +++ b/tensorflow/java/maven/libtensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow diff --git a/tensorflow/java/maven/libtensorflow_jni/pom.xml b/tensorflow/java/maven/libtensorflow_jni/pom.xml index d9ba1bbbfb..cf1a7b6c9c 100644 --- a/tensorflow/java/maven/libtensorflow_jni/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow_jni diff --git a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml index f6f532c2c1..b202dcd5c7 100644 --- a/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml +++ b/tensorflow/java/maven/libtensorflow_jni_gpu/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ libtensorflow_jni_gpu diff --git a/tensorflow/java/maven/pom.xml b/tensorflow/java/maven/pom.xml index 0a6b3d23d7..606805ff33 100644 --- a/tensorflow/java/maven/pom.xml +++ b/tensorflow/java/maven/pom.xml @@ -6,7 +6,7 @@ 4.0.0 org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 pom https://www.tensorflow.org diff --git a/tensorflow/java/maven/proto/pom.xml b/tensorflow/java/maven/proto/pom.xml index 1d8e872373..c6bba4e536 100644 --- a/tensorflow/java/maven/proto/pom.xml +++ b/tensorflow/java/maven/proto/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ proto diff --git a/tensorflow/java/maven/tensorflow/pom.xml b/tensorflow/java/maven/tensorflow/pom.xml index 5c1b55085c..a22663f9f3 100644 --- a/tensorflow/java/maven/tensorflow/pom.xml +++ b/tensorflow/java/maven/tensorflow/pom.xml @@ -6,7 +6,7 @@ org.tensorflow parentpom - 1.6.0-rc1 + 1.6.0 ../ tensorflow -- GitLab From 2d3e25245ec4dc2b791212b65b17a7ff4051dfe3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 05:50:55 -0800 Subject: [PATCH 335/884] Add support to convert ResourceVariables of graphs into constants. This involves a change to the implementation of convert_variables_to_constants. PiperOrigin-RevId: 187610062 --- tensorflow/python/BUILD | 1 + .../python/framework/graph_util_impl.py | 18 ++- .../python/framework/graph_util_test.py | 106 ++++++++++-------- 3 files changed, 76 insertions(+), 49 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index b0cb48c80c..fbdf15a69f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3654,6 +3654,7 @@ py_test( ":framework_for_generated_wrappers", ":math_ops", ":state_ops_gen", + ":variable_scope", ":variables", "//tensorflow/core:protos_all_py", ], diff --git a/tensorflow/python/framework/graph_util_impl.py b/tensorflow/python/framework/graph_util_impl.py index 5a543317e6..910364364c 100644 --- a/tensorflow/python/framework/graph_util_impl.py +++ b/tensorflow/python/framework/graph_util_impl.py @@ -235,7 +235,7 @@ def convert_variables_to_constants(sess, variable_names = [] variable_dict_names = [] for node in inference_graph.node: - if node.op in ["Variable", "VariableV2"]: + if node.op in ["Variable", "VariableV2", "VarHandleOp"]: variable_name = node.name if ((variable_names_whitelist is not None and variable_name not in variable_names_whitelist) or @@ -243,7 +243,10 @@ def convert_variables_to_constants(sess, variable_name in variable_names_blacklist)): continue variable_dict_names.append(variable_name) - variable_names.append(variable_name + ":0") + if node.op == "VarHandleOp": + variable_names.append(variable_name + "/Read/ReadVariableOp:0") + else: + variable_names.append(variable_name + ":0") if variable_names: returned_variables = sess.run(variable_names) else: @@ -266,6 +269,17 @@ def convert_variables_to_constants(sess, tensor=tensor_util.make_tensor_proto( data, dtype=dtype.type, shape=data.shape))) how_many_converted += 1 + elif input_node.op == "ReadVariableOp" and ( + input_node.input[0] in found_variables): + # The preceding branch converts all VarHandleOps of ResourceVariables to + # constants, so we need to convert the associated ReadVariableOps to + # Identity ops. + output_node.op = "Identity" + output_node.name = input_node.name + output_node.input.extend([input_node.input[0]]) + output_node.attr["T"].CopyFrom(input_node.attr["dtype"]) + if "_class" in input_node.attr: + output_node.attr["_class"].CopyFrom(input_node.attr["_class"]) else: output_node.CopyFrom(input_node) output_graph_def.node.extend([output_node]) diff --git a/tensorflow/python/framework/graph_util_test.py b/tensorflow/python/framework/graph_util_test.py index 1cdd738198..b618152b02 100644 --- a/tensorflow/python/framework/graph_util_test.py +++ b/tensorflow/python/framework/graph_util_test.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import tensor_util from tensorflow.python.ops import gen_state_ops from tensorflow.python.ops import math_ops # pylint: disable=unused-import from tensorflow.python.ops import math_ops as math_ops_lib +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -226,52 +227,62 @@ class DeviceFunctionsTest(test.TestCase): constant_graph_def.library) def testConvertVariablesToConsts(self): - with ops.Graph().as_default(): - variable_node = variables.Variable(1.0, name="variable_node") - _ = variables.Variable(1.0, name="unused_variable_node") - output_node = math_ops_lib.multiply( - variable_node, 2.0, name="output_node") - with session.Session() as sess: - init = variables.initialize_variables([variable_node]) - sess.run(init) - output = sess.run(output_node) - self.assertNear(2.0, output, 0.00001) - variable_graph_def = sess.graph.as_graph_def() - # First get the constant_graph_def when variable_names_whitelist is set, - # note that if variable_names_whitelist is not set an error will be - # thrown because unused_variable_node is not initialized. - constant_graph_def = graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_whitelist=set(["variable_node"])) + self._test_variable_to_const_conversion(use_resource=False) - # Then initialize the unused variable, and get another - # constant_graph_def when variable_names_whitelist is not set. - sess.run(variables.global_variables_initializer()) - constant_graph_def_without_variable_whitelist = ( - graph_util.convert_variables_to_constants(sess, variable_graph_def, - ["output_node"])) - - # The unused variable should be cleared so the two graphs should be - # equivalent. - self.assertEqual( - str(constant_graph_def), - str(constant_graph_def_without_variable_whitelist)) - - # Test variable name black list. This should result in the variable not - # being a const. - sess.run(variables.global_variables_initializer()) - constant_graph_def_with_blacklist = ( - graph_util.convert_variables_to_constants( - sess, - variable_graph_def, ["output_node"], - variable_names_blacklist=set(["variable_node"]))) - variable_node = None - for node in constant_graph_def_with_blacklist.node: - if node.name == "variable_node": - variable_node = node - self.assertIsNotNone(variable_node) - self.assertEqual(variable_node.op, "VariableV2") + def testConvertResourceVariablesToConsts(self): + self._test_variable_to_const_conversion(use_resource=True) + + def _test_variable_to_const_conversion(self, use_resource): + with ops.Graph().as_default(): + with variable_scope.variable_scope("", use_resource=use_resource): + variable_node = variable_scope.get_variable( + "variable_node", initializer=1.0) + another_variable = variable_scope.get_variable( + "unused_variable_node", initializer=1.0) + output_node = math_ops_lib.multiply( + variable_node, 2.0, name="output_node") + with session.Session() as sess: + sess.run(variable_node.initializer) + output = sess.run(output_node) + self.assertNear(2.0, output, 0.00001) + variable_graph_def = sess.graph.as_graph_def() + # First get the constant_graph_def when variable_names_whitelist is + # set, note that if variable_names_whitelist is not set an error will + # be thrown because unused_variable_node is not initialized. + constant_graph_def = graph_util.convert_variables_to_constants( + sess, + variable_graph_def, ["output_node"], + variable_names_whitelist=set(["variable_node"])) + + # Then initialize the unused variable, and get another + # constant_graph_def when variable_names_whitelist is not set. + sess.run(another_variable.initializer) + constant_graph_def_without_variable_whitelist = ( + graph_util.convert_variables_to_constants( + sess, variable_graph_def, ["output_node"])) + + # The unused variable should be cleared so the two graphs should be + # equivalent. + self.assertEqual( + str(constant_graph_def), + str(constant_graph_def_without_variable_whitelist)) + + # Test variable name black list. This should result in the variable + # not being a const. + constant_graph_def_with_blacklist = ( + graph_util.convert_variables_to_constants( + sess, + variable_graph_def, ["output_node"], + variable_names_blacklist=set(["variable_node"]))) + variable_node = None + for node in constant_graph_def_with_blacklist.node: + if node.name == "variable_node": + variable_node = node + self.assertIsNotNone(variable_node) + if use_resource: + self.assertEqual(variable_node.op, "VarHandleOp") + else: + self.assertEqual(variable_node.op, "VariableV2") # Now we make sure the variable is now a constant, and that the graph still # produces the expected result. @@ -279,8 +290,9 @@ class DeviceFunctionsTest(test.TestCase): _ = importer.import_graph_def(constant_graph_def, name="") self.assertEqual(4, len(constant_graph_def.node)) for node in constant_graph_def.node: - self.assertNotEqual("Variable", node.op) - self.assertNotEqual("VariableV2", node.op) + self.assertNotIn( + node.op, + ["Variable", "VariableV2", "VarHandleOp", "ReadVariableOp"]) with session.Session() as sess: output_node = sess.graph.get_tensor_by_name("output_node:0") output = sess.run(output_node) -- GitLab From 95be42c41c77aed8dd811398332687f45105c926 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 10:18:40 -0500 Subject: [PATCH 336/884] Remove underscore prefix from gen_array_ops._unique_with_counts --- tensorflow/python/ops/array_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index e537787398..e0bcac0641 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -1326,10 +1326,10 @@ def unique_with_counts(x, out_idx=dtypes.int32, name=None): # period (3 weeks) pass. # TODO(yongtang): The documentation should also # be updated when switch to v2. - return gen_array_ops._unique_with_counts(x, out_idx, name) + return gen_array_ops.unique_with_counts(x, out_idx, name) -unique_with_counts.__doc__ = gen_array_ops._unique_with_counts.__doc__ +unique_with_counts.__doc__ = gen_array_ops.unique_with_counts.__doc__ @tf_export("split") -- GitLab From 7b7ce88a073530dd3ea6ec5ee329fb45dd64b06b Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 10:32:27 -0500 Subject: [PATCH 337/884] Remove underscore prefix from gen_array_ops._unique_with_counts_v2 --- tensorflow/python/kernel_tests/unique_op_test.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/kernel_tests/unique_op_test.py b/tensorflow/python/kernel_tests/unique_op_test.py index 3c9650ef6e..bbc040dc13 100644 --- a/tensorflow/python/kernel_tests/unique_op_test.py +++ b/tensorflow/python/kernel_tests/unique_op_test.py @@ -137,10 +137,10 @@ class UniqueWithCountsTest(test.TestCase): for dtype in [np.int32, np.int64]: x = np.array([[1, 0, 0], [1, 0, 0], [2, 0, 0]]) with self.test_session() as sess: - y0, idx0, count0 = gen_array_ops._unique_with_counts_v2( + y0, idx0, count0 = gen_array_ops.unique_with_counts_v2( x, axis=np.array([0], dtype)) tf_y0, tf_idx0, tf_count0 = sess.run([y0, idx0, count0]) - y1, idx1, count1 = gen_array_ops._unique_with_counts_v2( + y1, idx1, count1 = gen_array_ops.unique_with_counts_v2( x, axis=np.array([1], dtype)) tf_y1, tf_idx1, tf_count1 = sess.run([y1, idx1, count1]) self.assertAllEqual(tf_y0, np.array([[1, 0, 0], [2, 0, 0]])) @@ -155,7 +155,7 @@ class UniqueWithCountsTest(test.TestCase): # by default, the axis will be wrapped to allow `axis=None`. x = np.random.randint(2, high=10, size=7000) with self.test_session() as sess: - y, idx, count = gen_array_ops._unique_with_counts_v2( + y, idx, count = gen_array_ops.unique_with_counts_v2( x, axis=np.array([], np.int32)) tf_y, tf_idx, tf_count = sess.run([y, idx, count]) -- GitLab From 60740a489475365815c50d5b0d3c352d420454ab Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 08:20:27 -0800 Subject: [PATCH 338/884] Eliminate the creation of unnecessary read ops when working with ResourceVariables. In particular: 1. Don't create additional read ops when creating a ResourceVariable from a VariableDef proto. 2. Expose the ability to assign a ResourceVariable without reading & returning the new value. 3. Colocating with a ResourceVariable's ".op" property eliminates the creation of additional read ops. 4. Savers can read a variable's value using the _graph_element property, since these reads don't need control dependencies. This makes the visualization of graphs on TensorBoard much nicer. PiperOrigin-RevId: 187622122 --- tensorflow/contrib/framework/BUILD | 1 + tensorflow/python/BUILD | 1 + .../python/framework/meta_graph_test.py | 14 ---- .../resource_variable_ops_test.py | 45 ++++++++++ .../python/ops/resource_variable_ops.py | 82 +++++++++++++++---- .../python/training/checkpoint_utils.py | 9 +- .../python/training/checkpoint_utils_test.py | 26 ++++++ tensorflow/python/training/saver.py | 10 ++- tensorflow/python/training/saver_test.py | 18 ++++ 9 files changed, 171 insertions(+), 35 deletions(-) diff --git a/tensorflow/contrib/framework/BUILD b/tensorflow/contrib/framework/BUILD index 50868c6d6c..ac043fda06 100644 --- a/tensorflow/contrib/framework/BUILD +++ b/tensorflow/contrib/framework/BUILD @@ -62,6 +62,7 @@ tf_custom_op_py_library( "//tensorflow/python:math_ops", "//tensorflow/python:platform", "//tensorflow/python:pywrap_tensorflow", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python:script_ops", "//tensorflow/python:smart_cond", "//tensorflow/python:sparse_tensor", diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index fbdf15a69f..cb54cebf0f 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3954,6 +3954,7 @@ py_test( ":partitioned_variables", ":platform", ":pywrap_tensorflow", + ":resource_variable_ops", ":state_ops", ":training", ":variable_scope", diff --git a/tensorflow/python/framework/meta_graph_test.py b/tensorflow/python/framework/meta_graph_test.py index 19dcd6a1b3..21963d0bee 100644 --- a/tensorflow/python/framework/meta_graph_test.py +++ b/tensorflow/python/framework/meta_graph_test.py @@ -905,20 +905,6 @@ class ExportImportAcrossScopesTest(test.TestCase): with variable_scope.variable_scope("importA/keepA"): graph_fn(use_resource=use_resource) - if use_resource: - # Bringing in collections that contain ResourceVariables will adds ops - # to the graph the first time a variable is encountered, so mimic the - # same behavior. - seen_variables = set() - for collection_key in sorted([ - ops.GraphKeys.GLOBAL_VARIABLES, - ops.GraphKeys.TRAINABLE_VARIABLES, - ]): - for var in expected_graph.get_collection(collection_key): - if var not in seen_variables: - var._read_variable_op() - seen_variables.add(var) - result = meta_graph.export_scoped_meta_graph(graph=imported_graph)[0] expected = meta_graph.export_scoped_meta_graph(graph=expected_graph)[0] diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 8503f3e031..71699fe0ad 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -277,6 +277,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign(2.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign(3.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(3.0, assign_with_read.eval()) + else: + self.assertEqual(3.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign(4.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(4.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testLoad(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") @@ -329,6 +343,9 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): w = resource_variable_ops.ResourceVariable.from_proto(v.to_proto()) self.assertEquals(2, math_ops.add(w, 1).eval()) + self.assertEquals(v._handle, w._handle) + self.assertEquals(v._graph_element, w._graph_element) + @test_util.run_in_graph_and_eager_modes() def testAssignAddMethod(self): v = resource_variable_ops.ResourceVariable(1.0, name="var0") @@ -336,6 +353,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign_add(1.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign_add(1.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(3.0, assign_with_read.eval()) + else: + self.assertEqual(3.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign_add(1.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(4.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testAssignSubMethod(self): v = resource_variable_ops.ResourceVariable(3.0, name="var0") @@ -343,6 +374,20 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.evaluate(v.assign_sub(1.0)) self.assertEqual(2.0, self.evaluate(v.value())) + # Tests for the 'read_value' argument: + assign_with_read = v.assign_sub(1.0, read_value=True) + if context.in_graph_mode(): + self.assertEqual(1.0, assign_with_read.eval()) + else: + self.assertEqual(1.0, self.evaluate(assign_with_read)) + assign_without_read = v.assign_sub(1.0, read_value=False) + if context.in_graph_mode(): + self.assertIsInstance(assign_without_read, ops.Operation) + else: + self.assertIsNone(assign_without_read) + self.evaluate(assign_without_read) + self.assertEqual(0.0, self.evaluate(v.value())) + @test_util.run_in_graph_and_eager_modes() def testDestroyResource(self): v = resource_variable_ops.ResourceVariable(3.0, name="var0") diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 2d6d0672e0..bf186f1734 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -534,7 +534,8 @@ class ResourceVariable(variables.Variable): self._save_slice_info = None self._caching_device = None self._dtype = dtypes.as_dtype(self._handle.op.get_attr("dtype")) - self._graph_element = self.value() + self._graph_element = g.get_tensor_by_name( + self._handle.op.name + "/Read/ReadVariableOp:0") self._constraint = None def __nonzero__(self): @@ -788,20 +789,52 @@ class ResourceVariable(variables.Variable): __array_priority__ = 100 - def assign_sub(self, delta, use_locking=None, name=None): + def assign_sub(self, delta, use_locking=None, name=None, read_value=True): + """Subtracts a value from this variable. + + Args: + delta: A `Tensor`. The value to subtract from this variable. + use_locking: If `True`, use locking during the operation. + name: The name to use for the operation. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. + + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ # TODO(apassos): this here and below is not atomic. Consider making it # atomic if there's a way to do so without a performance cost for those who # don't need it. - return self._lazy_read(gen_resource_variable_ops.assign_sub_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name)) + assign_sub_op = gen_resource_variable_ops.assign_sub_variable_op( + self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) + if read_value: + return self._lazy_read(assign_sub_op) + return assign_sub_op + + def assign_add(self, delta, use_locking=None, name=None, read_value=True): + """Adds a value to this variable. + + Args: + delta: A `Tensor`. The value to add to this variable. + use_locking: If `True`, use locking during the operation. + name: The name to use for the operation. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. - def assign_add(self, delta, use_locking=None, name=None): - return self._lazy_read(gen_resource_variable_ops.assign_add_variable_op( - self.handle, - ops.convert_to_tensor(delta, dtype=self.dtype), - name=name)) + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ + assign_add_op = gen_resource_variable_ops.assign_add_variable_op( + self.handle, ops.convert_to_tensor(delta, dtype=self.dtype), name=name) + if read_value: + return self._lazy_read(assign_add_op) + return assign_add_op def _lazy_read(self, op): if hasattr(self, "_trainable") and self._trainable: @@ -811,14 +844,29 @@ class ResourceVariable(variables.Variable): self._in_graph_mode, self._handle_deleter if not self._in_graph_mode else None, op) - def assign(self, value, use_locking=None, name=None): + def assign(self, value, use_locking=None, name=None, read_value=True): + """Assigns a new value to this variable. + + Args: + value: A `Tensor`. The new value for this variable. + use_locking: If `True`, use locking during the assignment. + name: The name to use for the assignment. + read_value: A `bool`. Whether to read and return the new value of the + variable or not. + + Returns: + If `read_value` is `True`, this method will return the new value of the + variable after the assignment has completed. Otherwise, when in graph mode + it will return the `Operation` that does the assignment, and when in eager + mode it will return `None`. + """ value_tensor = ops.convert_to_tensor(value, dtype=self.dtype) self._shape.assert_is_compatible_with(value_tensor.shape) - return self._lazy_read( - gen_resource_variable_ops.assign_variable_op( - self.handle, - value_tensor, - name=name)) + assign_op = gen_resource_variable_ops.assign_variable_op( + self.handle, value_tensor, name=name) + if read_value: + return self._lazy_read(assign_op) + return assign_op def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask, end_mask, ellipsis_mask, new_axis_mask, diff --git a/tensorflow/python/training/checkpoint_utils.py b/tensorflow/python/training/checkpoint_utils.py index 0af1cdecfa..52d092bc22 100644 --- a/tensorflow/python/training/checkpoint_utils.py +++ b/tensorflow/python/training/checkpoint_utils.py @@ -23,6 +23,7 @@ import six from tensorflow.python import pywrap_tensorflow from tensorflow.python.framework import ops from tensorflow.python.ops import io_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables @@ -289,10 +290,14 @@ def _set_checkpoint_initializer(variable, name: Name of the operation. """ base_type = variable.dtype.base_dtype - with ops.colocate_with(variable): + with ops.colocate_with(variable.op): restore_op = io_ops.restore_v2( ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0] - variable._initializer_op = state_ops.assign(variable, restore_op) # pylint:disable=protected-access + if isinstance(variable, resource_variable_ops.ResourceVariable): + init_op = variable.assign(restore_op, read_value=False) + else: + init_op = state_ops.assign(variable, restore_op) + variable._initializer_op = init_op # pylint:disable=protected-access restore_op.set_shape(variable.shape) variable._initial_value = restore_op # pylint:disable=protected-access diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index a461b24cbb..640bd665cb 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -26,6 +26,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -362,6 +363,31 @@ class CheckpointsTest(test.TestCase): checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"useful_scope": "some_scope/"}) + def testNoAdditionalReadOpsForResourceVariables(self): + checkpoint_dir = self.get_temp_dir() + with self.test_session() as session: + v1, _, _, _ = _create_checkpoints(session, checkpoint_dir) + + # New graph and session. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as session: + my1 = resource_variable_ops.ResourceVariable([[0.0] * 10], name="my1") + + with ops.name_scope("init_from_checkpoint"): + checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1}) + + # Basic sanity checks: + session.run(variables.global_variables_initializer()) + self.assertAllEqual(session.run(my1), v1) + + ops_in_init_from_checkpoint_scope = [ + op for op in g.get_operations() + if (op.name.startswith("init_from_checkpoint/") and + not op.name.startswith("init_from_checkpoint/checkpoint_initializer" + ) and op.type != "AssignVariableOp") + ] + self.assertEqual(ops_in_init_from_checkpoint_scope, []) + if __name__ == "__main__": test.main() diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index e8ea5abfbd..6c80562968 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -584,7 +584,10 @@ class BaseSaverBuilder(object): else: if context.in_graph_mode(): if convert_variable_to_tensor: - var = ops.internal_convert_to_tensor(var, as_ref=True) + if isinstance(var, resource_variable_ops.ResourceVariable): + var = var._graph_element # pylint: disable=protected-access + else: + var = ops.internal_convert_to_tensor(var, as_ref=True) if not BaseSaverBuilder._IsVariable(var): raise TypeError("Variable to save is not a Variable: %s" % var) if var.op.type == "ReadVariableOp": @@ -674,7 +677,10 @@ class BaseSaverBuilder(object): "mode is enabled, type: %s." % type(op)) saveable = BaseSaverBuilder.ResourceVariableSaveable(op, "", name) else: - variable = ops.internal_convert_to_tensor(op, as_ref=True) + if isinstance(op, resource_variable_ops.ResourceVariable): + variable = op._graph_element # pylint: disable=protected-access + else: + variable = ops.internal_convert_to_tensor(op, as_ref=True) if not BaseSaverBuilder._IsVariable(variable): raise TypeError("names_to_saveables must be a dict mapping string " "names to Tensors/Variables. Not a variable: %s" % diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index b758ceaab0..7947765449 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -262,6 +262,24 @@ class SaverTest(test.TestCase): save2.restore(sess, save_path) self.assertEquals(self.evaluate(v), [1]) + def testNoAdditionalOpsAddedBySaverForResourceVariablesOutsideSaveScope(self): + with ops_lib.Graph().as_default() as g: + v = resource_variable_ops.ResourceVariable(1.0, name="v") + with ops_lib.name_scope("saver1"): + saver_module.Saver() + with ops_lib.name_scope("saver2"): + saver_module.Saver({"name": v}) + ops_in_saver1_scope_but_not_save_scope = [ + op for op in g.get_operations() + if (op.name.startswith("saver1/") and + not op.name.startswith("saver1/save/"))] + self.assertEqual(ops_in_saver1_scope_but_not_save_scope, []) + ops_in_saver2_scope_but_not_save_scope = [ + op for op in g.get_operations() + if (op.name.startswith("saver2/") and + not op.name.startswith("saver2/save/"))] + self.assertEqual(ops_in_saver2_scope_but_not_save_scope, []) + def testSaveCopyRestoreWithSaveRelativePaths(self): """Save, copy checkpoint dir and restore from copied dir. -- GitLab From 84fe908258550e1ce27e8725de1e2af279479c9d Mon Sep 17 00:00:00 2001 From: Minmin Sun Date: Sat, 3 Mar 2018 00:26:31 +0800 Subject: [PATCH 339/884] =?UTF-8?q?Add=20LINM=20(Loop=20Invariant=20Node?= =?UTF-8?q?=20Motion)=20optimization=20pass=20in=20GraphOptim=E2=80=A6=20(?= =?UTF-8?q?#16306)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add Loop Invariant Node Motion optimization in grappler * linm: disable loop optimizations by default, remove includes not needed from loop_optimizer_test.cc * remove redundant lines after merging with master * LINM: a minor change in BUILD to fix gen_ci_sanity_out failure, and remove 'No newline at end of file' warning --- tensorflow/core/grappler/optimizers/BUILD | 2 + .../grappler/optimizers/loop_optimizer.cc | 382 +++++++++++++- .../core/grappler/optimizers/loop_optimizer.h | 26 + .../optimizers/loop_optimizer_test.cc | 489 +++++++++++++++++- .../grappler/optimizers/meta_optimizer.cc | 8 +- 5 files changed, 901 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index a52d1c8df2..0a72a68a66 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -480,6 +480,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":constant_folding", ":graph_optimizer", "//tensorflow/core:framework", "//tensorflow/core:lib", @@ -489,6 +490,7 @@ cc_library( "//tensorflow/core/grappler:op_types", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/costs:graph_properties", + "//tensorflow/core/grappler/utils:frame", ], ) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 102526e22f..0223930d74 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -15,23 +15,403 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/loop_optimizer.h" +#include +#include #include #include +#include +#include +#include "tensorflow/core/framework/attr_value.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/tensor_shape.pb.h" +#include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/tensor_coding.h" +#include "tensorflow/core/util/device_name_utils.h" +#include "tensorflow/core/util/saved_tensor_slice_util.h" + +using tensorflow::strings::StrCat; namespace tensorflow { namespace grappler { +Status LoopOptimizer::LINMHandleInvariantEnter(NodeDef* node, + const int num_outputs) { + auto consumers = node_map_->GetOutputs(node->name()); + std::vector enter_control_inputs; + string enter_input; + for (auto& input : node->input()) { + if (IsControlInput(input)) { + enter_control_inputs.push_back(input); + } else { + enter_input = input; + } + } + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (NodeName(consumer->input(i)) == node->name()) { + consumer->set_input(i, enter_input); + node_map_->AddOutput(NodeName(enter_input), consumer->name()); + node_map_->RemoveOutput(node->name(), consumer->name()); + } + } + for (auto& control_input : enter_control_inputs) { + consumer->add_input(control_input); + node_map_->AddOutput(NodeName(control_input), consumer->name()); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::LINMHandleConst(NodeDef* node, + const int num_outputs, const int frame_id) { + NodeDef* const_node; + if (num_outputs == 0) { + // all successor nodes are invariant + // Remove the control inputs from this frame to the const node, + // when moving it out of the frame (in parent frame) + const_node = node; + node_map_->RemoveInputs(node->name()); + node->clear_input(); + } else { + // some successor nodes are variant + // Have to keep the const node in the frame, + // so create a new one outside the frame (in parent frame) + const_node = optimized_graph_->add_node(); + const_node->set_name(AddPrefixToNodeName(node->name(), kLoopOptimizer)); + const_node->set_op("Const"); + const_node->set_device(node->device()); + *const_node->mutable_attr() = node->attr(); + node_map_->AddNode(const_node->name(), const_node); + auto consumers = node_map_->GetOutputs(node->name()); + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + if (NodeName(consumer->input(i)) == node->name()) { + if (IsControlInput(consumer->input(i))) { + *consumer->mutable_input(i) = AsControlDependency(*const_node); + } else { + *consumer->mutable_input(i) = const_node->name(); + } + node_map_->AddOutput(const_node->name(), consumer->name()); + node_map_->RemoveOutput(node->name(), consumer->name()); + } + } + } + } + } + // add a control input from the parent frame + auto parent_it = frame_parent_.find(frame_id); + if (parent_it != frame_parent_.end()) { + int parent_id = parent_it->second; + auto loop_cond_it = loop_cond_.find(parent_id); + if (loop_cond_it == loop_cond_.end()) { + return errors::InvalidArgument( + "Frame ", frame_id, " doesn't have a LoopCond node"); + } + auto& loop_cond_name = loop_cond_it->second->name(); + NodeDef* switch_node = nullptr; + for (auto* node : node_map_->GetOutputs(loop_cond_name)) { + if (node->op() == "Switch") { + switch_node = node; + break; + } + } + if (!switch_node) { + return errors::InvalidArgument( + "LoopCond node of Frame ", frame_id, + " doesn't connect to any Switch node"); + } + string switch_output = StrCat(switch_node->name(), ":1"); + const string ctrl_dep = ConstantFolding::AddControlDependency( + switch_output, optimized_graph_, node_map_.get()); + const_node->add_input(ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), const_node->name()); + } + return Status::OK(); +} + +Status LoopOptimizer::LINMHandleInvariantNode(NodeDef* node, + const int num_outputs, const int frame_id) { + // have to remove control inputs to the invariant node from the same frame + // when moving this node out of this frame + for (int i = 0; i < node->input_size(); ++i) { + if (IsControlInput(node->input(i))) { + node->mutable_input()->SwapElements(i, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + } + } + if (num_outputs == 0) { + return Status::OK(); + } + + DataTypeVector input_types; + DataTypeVector output_types; + OpRegistryInterface* op_registry = OpRegistry::Global(); + const OpRegistrationData* op_reg_data = nullptr; + TF_RETURN_IF_ERROR( + op_registry->LookUp(node->op(), &op_reg_data)); + TF_RETURN_IF_ERROR( + InOutTypesForNode(*node, op_reg_data->op_def, + &input_types, &output_types)); + + auto consumers = node_map_->GetOutputs(node->name()); + string fname = invariant_enters_[frame_id][0]->attr().at("frame_name").s(); + int piterations = invariant_enters_[frame_id][0] + ->attr().at("parallel_iterations").i(); + for (auto* consumer : consumers) { + if (!invariant_nodes_.count(consumer)) { + for (int i = 0; i < consumer->input_size(); ++i) { + int port; + string node_name = ParseNodeName(consumer->input(i), &port); + if (node_name != node->name()) { + continue; + } + if (port < 0) { + return errors::InvalidArgument( + "Invariant node should not have control outputs " + "to variant node"); + } + DataType output_type = output_types[port]; + NodeDef* new_enter = optimized_graph_->add_node(); + new_enter->set_op("Enter"); + new_enter->set_device(node->device()); + new_enter->set_name(AddPrefixToNodeName( + StrCat(fname, "_enter_", new_enter_id_++), kLoopOptimizer)); + AttrValue data_type; + data_type.set_type(output_type); + new_enter->mutable_attr()->insert({"T", data_type}); + AttrValue frame_name; + frame_name.set_s(fname); + new_enter->mutable_attr()->insert({"frame_name", frame_name}); + AttrValue is_const; + is_const.set_b(true); + new_enter->mutable_attr()->insert({"is_constant", is_const}); + AttrValue parallel_iterations; + parallel_iterations.set_i(piterations); + new_enter->mutable_attr()->insert( + {"parallel_iterations", parallel_iterations}); + new_enter->add_input(consumer->input(i)); + *consumer->mutable_input(i) = new_enter->name(); + node_map_->AddNode(new_enter->name(), new_enter); + node_map_->AddOutput(node->name(), new_enter->name()); + node_map_->AddOutput(new_enter->name(), consumer->name()); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::MoveInvariantNodes(const int frame_id) { + for (auto iter = invariant_nodes_.begin(); + iter != invariant_nodes_.end(); ++iter) { + auto* invariant_node = iter->first; + const int num_outputs = iter->second; + if (IsEnter(*invariant_node)) { + TF_RETURN_IF_ERROR( + LINMHandleInvariantEnter(invariant_node, num_outputs)); + } else if (IsConstant(*invariant_node)) { + TF_RETURN_IF_ERROR( + LINMHandleConst(invariant_node, num_outputs, frame_id)); + } else { + TF_RETURN_IF_ERROR( + LINMHandleInvariantNode(invariant_node, num_outputs, frame_id)); + } + } + return Status::OK(); +} + +Status LoopOptimizer::RevertInvariantNodes() { + std::deque reverted_nodes; + for (auto iter=invariant_nodes_.begin(); iter != invariant_nodes_.end();) { + bool erased = false; + const auto* node = iter->first; + if (!IsConstant(*node) && !IsEnter(*node) && iter->second > 0) { + auto& consumers = node_map_->GetOutputs(node->name()); + for (auto* consumer : consumers) { + if (!invariant_nodes_.count(consumer)) { + for (const auto& input : consumer->input()) { + if (IsControlInput(input) && NodeName(input) == node->name()) { + reverted_nodes.push_back(node); + invariant_nodes_.erase(iter++); + erased = true; + break; + } + } + if (erased) break; + } + } + } + if (!erased) ++iter; + } + while (!reverted_nodes.empty()) { + const auto* node = reverted_nodes.front(); + reverted_nodes.pop_front(); + std::set producers; + for (const auto& input : node->input()) { + auto* producer = node_map_->GetNode(input); + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + if (IsControlInput(input) && + !IsConstant(*producer) && !IsEnter(*producer)) { + reverted_nodes.push_back(producer); + invariant_nodes_.erase(iter); + } else { + producers.insert(producer); + } + } + } + for (auto* producer : producers) { + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + ++iter->second; + } + } + for (auto* consumer : node_map_->GetOutputs(node->name())) { + auto iter = invariant_nodes_.find(consumer); + if (iter != invariant_nodes_.end()) { + reverted_nodes.push_back(consumer); + invariant_nodes_.erase(iter); + } + } + } + return Status::OK(); +} + +Status LoopOptimizer::FindInvariantNodes(NodeDef* node) { + auto consumers = node_map_->GetOutputs(node->name()); + invariant_nodes_.insert(std::make_pair(node, consumers.size())); + for (auto* consumer : consumers) { + if (invariant_nodes_.count(consumer) || + ModifiesFrameInfo(*consumer)) { + continue; + } + bool is_invariant = true; + for (const auto& input : consumer->input()) { + if (!IsControlInput(input)) { + const auto& name = NodeName(input); + auto* producer = node_map_->GetNode(name); + if (!invariant_nodes_.count(producer)) { + if (IsConstant(*producer)) { + invariant_nodes_.insert( + std::make_pair(producer, node_map_->GetOutputs(name).size())); + } else { + is_invariant = false; + break; + } + } + } + } + if (is_invariant) { + std::set producers; + for (const auto& input : consumer->input()) { + auto* producer = node_map_->GetNode(input); + producers.insert(producer); + } + for (auto* producer : producers) { + auto iter = invariant_nodes_.find(producer); + if (iter != invariant_nodes_.end()) { + --iter->second; + } + } + TF_RETURN_IF_ERROR(FindInvariantNodes(consumer)); + } + } + return Status::OK(); +} + +Status LoopOptimizer::LoopInvariantNodeMotion() { + std::deque worklist; + for (auto iter = frame_map_.begin(); iter != frame_map_.end(); ++iter) { + auto* node = iter->first; + auto& frame_ids = iter->second; + if (frame_ids.size() >= 3) { + for (unsigned int i = 1; i < frame_ids.size() - 1; ++i) { + frame_parent_[frame_ids[i]] = frame_ids[i - 1]; + frame_children_[frame_ids[i]].insert(frame_ids[i + 1]); + } + } + if (frame_ids.size() >= 2) { + frame_children_[frame_ids[0]].insert(frame_ids[1]); + frame_parent_[frame_ids.back()] = frame_ids[frame_ids.size() - 2]; + } + if (frame_ids.size() >= 1) { + frame_children_.insert(std::make_pair(frame_ids.back(), empty_set_)); + if (node->op() == "LoopCond") { + if (loop_cond_.count(frame_ids.back())) { + return errors::InvalidArgument( + "Loop ", frame_ids.back(), + " has more than one LoopCond node: ", node->name(), " and ", + loop_cond_[frame_ids.back()]->name()); + } + loop_cond_[frame_ids.back()] = node; + } + if (IsEnter(*node) && node->attr().at("is_constant").b()) { + invariant_enters_[frame_ids.back()].push_back( + const_cast(node)); + } + } + } + + for (auto it = frame_children_.begin(); it != frame_children_.end(); ++it) { + if (it->second.size() == 0) { + worklist.push_back(it->first); + } + } + + while (!worklist.empty()) { + int frame_id = worklist.front(); + new_enter_id_ = 0; + worklist.pop_front(); + auto parent_it = frame_parent_.find(frame_id); + if (parent_it != frame_parent_.end()) { + int parent_id = parent_it->second; + frame_children_[parent_id].erase(frame_id); + if (frame_children_[parent_id].size() == 0) { + worklist.push_back(parent_id); + } + } + + if (invariant_enters_[frame_id].empty()) { + continue; + } + invariant_nodes_.clear(); + for (auto* enter : invariant_enters_[frame_id]) { + TF_RETURN_IF_ERROR(FindInvariantNodes(enter)); + } + + // revert invariant nodes that have control outputs to variant nodes + TF_RETURN_IF_ERROR(RevertInvariantNodes()); + + TF_RETURN_IF_ERROR(MoveInvariantNodes(frame_id)); + } + return Status::OK(); +} + Status LoopOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { - *optimized_graph = item.graph; + optimized_graph_ = optimized_graph; + *optimized_graph_ = item.graph; + + // Set up helper data structures. + node_map_.reset(new NodeMap(optimized_graph_)); + int num_frames; + TF_RETURN_IF_ERROR(IdentifyFramesWithNodeMap(*optimized_graph_, *node_map_, + &frame_map_, &num_frames)); + + TF_RETURN_IF_ERROR(LoopInvariantNodeMotion()); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.h b/tensorflow/core/grappler/optimizers/loop_optimizer.h index 106d4628ae..b5944cd30b 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.h +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.h @@ -17,13 +17,17 @@ limitations under the License. #define TENSORFLOW_CORE_GRAPPLER_OPTIMIZERS_LOOP_OPTIMIZER_H_ #include +#include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/optimizers/graph_optimizer.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/frame.h" #include "tensorflow/core/protobuf/rewriter_config.pb.h" namespace tensorflow { namespace grappler { +constexpr char kLoopOptimizer[] = "LoopOptimizer"; + class LoopOptimizer : public GraphOptimizer { public: LoopOptimizer() : opt_level_(RewriterConfig::ON) {} @@ -40,7 +44,29 @@ class LoopOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + Status LoopInvariantNodeMotion(); + Status FindInvariantNodes(NodeDef* node); + Status RevertInvariantNodes(); + Status MoveInvariantNodes(const int fname); + Status LINMHandleInvariantNode(NodeDef* node, const int num_outputs, + const int frame_id); + Status LINMHandleConst(NodeDef* node, const int num_outputs, + const int frame_id); + Status LINMHandleInvariantEnter(NodeDef* node, const int num_outputs); + + std::map invariant_nodes_; + std::set empty_set_; + std::map> frame_children_; + std::map frame_parent_; + std::map loop_cond_; + std::map> invariant_enters_; + int new_enter_id_; RewriterConfig::Toggle opt_level_; + + std::unique_ptr node_map_; + FrameMap frame_map_; + std::unique_ptr graph_properties_; + GraphDef* optimized_graph_; // Not owned. }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index c09434f609..cc0432c3ed 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -26,7 +26,494 @@ namespace tensorflow { namespace grappler { namespace { -class LoopOptimizerTest : public ::testing::Test {}; +class LoopOptimizerTest : public ::testing::Test { + protected: + static NodeDef CreateNode(const string& name, + const std::vector& inputs) { + return CreateNode(name, "Identity", "", false, 0, inputs); + } + static NodeDef CreateNode(const string& name, const string& op, + const std::vector& inputs) { + return CreateNode(name, op, "", false, 0, inputs); + } + static NodeDef CreateNode(const string& name, const string& op, + const string& frame, + const bool is_constant, + const int piterations, + const std::vector& inputs) { + NodeDef node; + node.set_name(name); + if (!op.empty()) { + node.set_op(op); + } + if (!frame.empty()) { + AttrValue frame_name; + frame_name.set_s(frame); + node.mutable_attr()->insert({"frame_name", frame_name}); + } + if (op == "Enter") { + AttrValue is_const; + is_const.set_b(is_constant); + node.mutable_attr()->insert({"is_constant", is_const}); + AttrValue parallel_iterations; + parallel_iterations.set_i(piterations); + node.mutable_attr()->insert( + {"parallel_iterations", parallel_iterations}); + } + AttrValue type; + type.set_type(DT_FLOAT); + node.mutable_attr()->insert({"T", type}); + for (const string& input : inputs) { + node.add_input(input); + } + return node; + } +}; + +TEST_F(LoopOptimizerTest, Basic) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd")).back(), 0); +} + +TEST_F(LoopOptimizerTest, Const) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode("Const", "Const", {"^Identity"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "Const"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"VariantAdd", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const")).size(), 0); +} + +TEST_F(LoopOptimizerTest, ControlOutput) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode( + "Less", "Less", {"VariantAdd", "less/y", "^InvariantAdd"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"VariantAdd"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoop1) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"VariantAdd"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).back(), 0); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd")).size(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoop2) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"InvariantAdd"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "InvariantEnter2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("VariantAdd2")).back(), 1); +} + +TEST_F(LoopOptimizerTest, NestedLoopConst1) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"VariantAdd"}); + *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 0); +} + +TEST_F(LoopOptimizerTest, NestedLoopConst2) { + GraphDef graph; + *graph.add_node() = CreateNode("0", {}); + *graph.add_node() = CreateNode( + "InvariantEnter", "Enter", "while/while_context", true, 1, {"0"}); + *graph.add_node() = CreateNode( + "InvariantAdd", "Add", {"InvariantEnter", "InvariantEnter"}); + *graph.add_node() = CreateNode( + "VariantAdd", "Add", {"InvariantAdd", "Identity"}); + *graph.add_node() = CreateNode( + "VariantEnter", "Enter", "while/while_context", false, 1, {"0"}); + *graph.add_node() = CreateNode( + "Merge", "Merge", {"VariantEnter", "NextIteration"}); + *graph.add_node() = CreateNode("Less/y", "Const", {"^Identity"}); + *graph.add_node() = CreateNode("Less", "Less", {"Exit2", "less/y"}); + *graph.add_node() = CreateNode("LoopCond", "LoopCond", {"Less"}); + *graph.add_node() = CreateNode("Switch", "Switch", {"Merge", "LoopCond"}); + *graph.add_node() = CreateNode("Identity", {"Switch:1"}); + *graph.add_node() = CreateNode( + "NextIteration", "NextIteration", {"Exit2"}); + *graph.add_node() = CreateNode("Exit", "Exit", {"Switch"}); + *graph.add_node() = CreateNode("1", {"Exit"}); + + *graph.add_node() = CreateNode( + "InvariantEnter2", "Enter", "while/while/while_context", true, 1, + {"InvariantAdd"}); + *graph.add_node() = CreateNode("Const2", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode( + "InvariantAdd2", "Add", {"InvariantEnter2", "Const2"}); + *graph.add_node() = CreateNode( + "VariantAdd2", "Add", {"InvariantAdd2", "Identity2"}); + *graph.add_node() = CreateNode( + "VariantEnter2", "Enter", "while/while/while_context", false, 1, + {"VariantEnter"}); + *graph.add_node() = CreateNode( + "Merge2", "Merge", {"VariantEnter2", "NextIteration2"}); + *graph.add_node() = CreateNode("Less2/y", "Const", {"^Identity2"}); + *graph.add_node() = CreateNode("Less2", "Less", {"VariantAdd2", "less2/y"}); + *graph.add_node() = CreateNode("LoopCond2", "LoopCond", {"Less2"}); + *graph.add_node() = CreateNode("Switch2", "Switch", {"Merge2", "LoopCond2"}); + *graph.add_node() = CreateNode("Identity2", {"Switch2:1"}); + *graph.add_node() = CreateNode( + "NextIteration2", "NextIteration", {"VariantAdd2"}); + *graph.add_node() = CreateNode("Exit2", "Exit", {"Switch2"}); + + GrapplerItem item; + item.graph = graph; + + LoopOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + std::unique_ptr node_map; + std::unordered_map> frames; + int num_frames; + + node_map.reset(new NodeMap(&graph)); + EXPECT_TRUE(IdentifyFrames(graph, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).back(), 1); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 2); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).back(), 1); + + node_map.reset(new NodeMap(&output)); + EXPECT_TRUE(IdentifyFrames(output, &frames, &num_frames).ok()); + EXPECT_EQ(num_frames, 2); + EXPECT_EQ(frames.at(node_map->GetNode("InvariantAdd2")).size(), 0); + EXPECT_EQ(frames.at(node_map->GetNode("Const2")).size(), 0); +} void VerifyGraphsEqual(const GraphDef& original_graph, const GraphDef& optimized_graph, const string& func) { diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 7ae77207af..39ecf017db 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -98,13 +98,13 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( - new DependencyOptimizer(cfg_.dependency_optimization()))); + new LoopOptimizer(cfg_.loop_optimization()))); } - if (cfg_.loop_optimization() != RewriterConfig::OFF) { + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { optimizers.push_back(std::unique_ptr( - new LoopOptimizer(cfg_.loop_optimization()))); + new DependencyOptimizer(cfg_.dependency_optimization()))); } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { optimizers.push_back( -- GitLab From 1534cf92b4710d29dea780b1a17a6f7d2f10fc7b Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 08:31:21 -0800 Subject: [PATCH 340/884] Internal-only change. PiperOrigin-RevId: 187623121 --- tensorflow/contrib/tpu/python/tpu/datasets.py | 2 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 29aea98542..71a3a92540 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -116,7 +116,7 @@ def StreamingFilesDataset(files, file_reader_job = file_reader_job or 'coordinator' - worker_job = worker_job or 'worker' + worker_job = worker_job or 'tpu_worker' if filename_shuffle_buffer_size is None: filename_shuffle_buffer_size = 4096 diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 2c40797792..0173aac4f7 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -44,7 +44,7 @@ class DatasetsTest(test.TestCase): self._cluster_def = cluster_pb2.ClusterDef() worker_job = self._cluster_def.job.add() - worker_job.name = 'worker' + worker_job.name = 'tpu_worker' worker_job.tasks[0] = self._worker.target[len('grpc://'):] coord_job = self._cluster_def.job.add() coord_job.name = 'coordinator' -- GitLab From 4397f80b34d28144ed523a3f31a0fcbd1f3a9ba1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 08:45:01 -0800 Subject: [PATCH 341/884] Add a testing utility that can be called from compiled code, and which can mock a TF module for internal tests. Use it in api_test.py PiperOrigin-RevId: 187624343 --- tensorflow/contrib/py2tf/impl/api_test.py | 35 ++++++++++++---------- tensorflow/contrib/py2tf/utils/BUILD | 1 + tensorflow/contrib/py2tf/utils/__init__.py | 1 + tensorflow/contrib/py2tf/utils/testing.py | 35 ++++++++++++++++++++++ 4 files changed, 56 insertions(+), 16 deletions(-) create mode 100644 tensorflow/contrib/py2tf/utils/testing.py diff --git a/tensorflow/contrib/py2tf/impl/api_test.py b/tensorflow/contrib/py2tf/impl/api_test.py index 51e99864ad..13f8e66018 100644 --- a/tensorflow/contrib/py2tf/impl/api_test.py +++ b/tensorflow/contrib/py2tf/impl/api_test.py @@ -18,23 +18,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.impl import api from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.pyct import parser from tensorflow.python.framework import constant_op -from tensorflow.python.ops import math_ops from tensorflow.python.platform import test +tf = utils.fake_tf() + + class ApiTest(test.TestCase): def setUp(self): - config.DEFAULT_UNCOMPILED_MODULES.add((math_ops.__name__,)) config.COMPILED_IMPORT_STATEMENTS = ( - 'from tensorflow.python.framework ' - 'import ops as tf', + 'from __future__ import print_function', 'from tensorflow.contrib.py2tf import utils as ' - 'py2tf_utils') + 'py2tf_utils', + 'tf = py2tf_utils.fake_tf()' + ) def test_decorator_recurses(self): @@ -47,7 +50,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -63,11 +66,11 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=False) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -84,11 +87,11 @@ class ApiTest(test.TestCase): @api.graph_ready def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -111,7 +114,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= self.called_member(a) return x @@ -133,7 +136,7 @@ class ApiTest(test.TestCase): @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= api.convert_inline(self.called_member, a) return x @@ -149,11 +152,11 @@ class ApiTest(test.TestCase): class TestClass(object): def called_member(self, a): - return math_ops.negative(a) + return tf.negative(a) @api.convert(recursive=True) def test_method(self, x, s, a): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= api.graph_ready(self.called_member(a)) return x @@ -166,7 +169,7 @@ class ApiTest(test.TestCase): def test_to_graph_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x //= 2 return x @@ -178,7 +181,7 @@ class ApiTest(test.TestCase): def test_to_code_basic(self): def test_fn(x, s): - while math_ops.reduce_sum(x) > s: + while tf.reduce_sum(x) > s: x /= 2 return x diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 2086a9ef60..63261d5043 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -26,6 +26,7 @@ py_library( "multiple_dispatch.py", "py_func.py", "tensor_list.py", + "testing.py", "type_check.py", ], srcs_version = "PY2AND3", diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 19bf2272bc..313e5c97cc 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -25,4 +25,5 @@ from tensorflow.contrib.py2tf.utils.misc import alias_tensors from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func +from tensorflow.contrib.py2tf.utils.testing import fake_tf from tensorflow.contrib.py2tf.utils.type_check import is_tensor diff --git a/tensorflow/contrib/py2tf/utils/testing.py b/tensorflow/contrib/py2tf/utils/testing.py new file mode 100644 index 0000000000..cb4785d0dc --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/testing.py @@ -0,0 +1,35 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Testing utilities.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import imp + +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops + + +def fake_tf(): + """Creates a fake module that looks like TensorFlow, for testing.""" + mod = imp.new_module('tensorflow') + mod_contents = dict() + mod_contents.update(math_ops.__dict__) + mod_contents.update(ops.__dict__) + mod_contents.update(mod.__dict__) + mod.__dict__.update(mod_contents) + return mod -- GitLab From 75adc3da8b6b61fafd9f88f7828ee6aa73f3f9fb Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 2 Mar 2018 09:20:17 -0800 Subject: [PATCH 342/884] Uncomment google preprocessor conditionals --- tensorflow/contrib/tensorrt/convert/convert_graph.cc | 8 ++++---- tensorflow/contrib/tensorrt/convert/convert_graph.h | 8 ++++---- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 8 ++++---- tensorflow/contrib/tensorrt/convert/convert_nodes.h | 8 ++++---- tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc | 8 ++++---- tensorflow/contrib/tensorrt/trt_conversion.i | 2 +- 6 files changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index d753e272f4..44e9dda7b9 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -40,8 +40,8 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/protobuf/device_properties.pb.h" // NOLINT -//#if GOOGLE_CUDA -//#if GOOGLE_TENSORRT +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #include "tensorrt/include/NvInfer.h" namespace tensorflow { @@ -422,5 +422,5 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // namespace tensorrt } // namespace tensorflow -//#endif // GOOGLE_TENSORRT -//#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 905824cdc8..8401791f76 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -21,8 +21,8 @@ limitations under the License. #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/platform/types.h" -//#if GOOGLE_CUDA -//#if GOOGLE_TENSORRT +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT namespace tensorflow { namespace tensorrt { @@ -43,7 +43,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( } // namespace tensorrt } // namespace tensorflow -//#endif // GOOGLE_TENSORRT -//#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA #endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_GRAPH_H_ diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 1bd60c650e..a36851a336 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -43,8 +43,8 @@ limitations under the License. #include "tensorflow/core/platform/tensor_coding.h" #include "tensorflow/core/platform/types.h" -//#if GOOGLE_CUDA -//#if GOOGLE_TENSORRT +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #include "tensorrt/include/NvInfer.h" // Check if the types are equal. Cast to int first so that failure log message @@ -2696,5 +2696,5 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } // namespace tensorrt } // namespace tensorflow -//#endif // GOOGLE_TENSORRT -//#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 7e9f8a9b4b..48fe51a954 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -27,8 +27,8 @@ limitations under the License. #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/lib/core/status.h" -//#if GOOGLE_CUDA -//#if GOOGLE_TENSORRT +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT namespace tensorflow { namespace tensorrt { @@ -74,7 +74,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, } // namespace tensorrt } // namespace tensorflow -//#endif // GOOGLE_TENSORRT -//#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA #endif // TENSORFLOW_CONTRIB_TENSORRT_CONVERT_CONVERT_NODES_H_ diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index f8360ac547..03f80dd506 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -19,8 +19,8 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -//#if GOOGLE_CUDA -//#if GOOGLE_TENSORRT +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { @@ -152,5 +152,5 @@ REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); } // namespace tensorrt } // namespace tensorflow -//#endif // GOOGLE_TENSORRT -//#endif // GOOGLE_CUDA +#endif // GOOGLE_TENSORRT +#endif // GOOGLE_CUDA diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 28334e26a9..09e58e8ce9 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -106,7 +106,7 @@ std::pair trt_convert( tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertGraphDefToTensorRT( graph_def, output_names, max_batch_size, max_workspace_size_bytes, - &outGraph, precision_mode,minimum_segment_size); + &outGraph, precision_mode, minimum_segment_size); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; -- GitLab From 7013a5ae241cd0c5375065f549aec27fcee6465d Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 2 Mar 2018 09:24:26 -0800 Subject: [PATCH 343/884] Take into account the return value mapping of functions PiperOrigin-RevId: 187628382 --- .../grappler/optimizers/function_optimizer.cc | 6 +- .../optimizers/function_optimizer_test.cc | 156 +++++++++++++++++- tensorflow/core/grappler/utils/functions.cc | 17 +- .../core/grappler/utils/functions_test.cc | 85 +++++++++- 4 files changed, 256 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index a5cf00c155..167e5a153a 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -102,7 +102,8 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_outputs->set_op("IdentityN"); func_outputs->set_device(node.device()); type_list = (*func_outputs->mutable_attr())["T"].mutable_list(); - for (const OpDef::ArgDef& arg : func.signature().output_arg()) { + for (int i = 0; i < func.signature().output_arg_size(); ++i) { + const OpDef::ArgDef& arg = func.signature().output_arg(i); if (arg.type() != DT_INVALID) { type_list->add_type(arg.type()); } else { @@ -114,7 +115,8 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, } type_list->add_type(it->second.type()); } - func_outputs->add_input(strings::StrCat(node.name(), "/", arg.name())); + // Use the fetch names since they take into account the output mapping. + func_outputs->add_input(strings::StrCat(node.name(), "/", item->fetch[i])); } return Status::OK(); diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index fd61c067ed..5072abaac7 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -79,7 +79,7 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { EXPECT_EQ("IdentityN", node.op()); EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("y/y", node.input(0)); + EXPECT_EQ("y/y:0", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); @@ -166,7 +166,7 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { EXPECT_EQ("IdentityN", node.op()); EXPECT_EQ(device, node.device()); EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("y/y", node.input(0)); + EXPECT_EQ("y/y:0", node.input(0)); } else if (node.name() == "z") { count++; EXPECT_EQ("Identity", node.op()); @@ -187,6 +187,158 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { test::ExpectTensorEqual(tensors_expected[0], tensors[0]); } +TEST_F(FunctionOptimizerTest, FunctionWithOutputMapping) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "Exp_func", + // Args + {"in: float"}, + // Return values + {"out: float"}, + // Attr def + {}, + // Nodes + {{{"Linear_func"}, "Identity", {"in"}, {{"T", DT_FLOAT}}}, + {{"Exp"}, "Exp", {"Linear_func:output:0"}, {{"T", DT_FLOAT}}}}, + // Mapping + {{"out", "Exp:y:0"}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "Exp_func", {"x"}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int count = 0; + for (const NodeDef& node : output.node()) { + if (node.name() == "y/inlined_inputs") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("x", node.input(0)); + } else if (node.name() == "y/in") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/inlined_inputs:0", node.input(0)); + } else if (node.name() == "y/Linear_func") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/in", node.input(0)); + } else if (node.name() == "y/Exp") { + count++; + EXPECT_EQ("Exp", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/Linear_func:0", node.input(0)); + } else if (node.name() == "y") { + count++; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y/Exp:0", node.input(0)); + } else if (node.name() == "z") { + count++; + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("y", node.input(0)); + } + } + EXPECT_EQ(6, count); + + item.fetch = {"z"}; + Tensor pi(DT_FLOAT, {}); + pi.flat()(0) = 3.14f; + item.feed.emplace_back("x", pi); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); +} + +TEST_F(FunctionOptimizerTest, FunctionWithInputForwarding) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "ForwardInputs", + // Args + {"in0: float", "in1: float", "arg2: float", "arg3: int32", "arg4: float"}, + // Return values + {"out0: float", "arg2: float", "arg3: int32"}, + // Attr def + {}, + // Nodes + {}, + // Mapping + {{"out0", "in0"}, {"arg2", "arg2"}, {"arg3", "arg3"}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("x0", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x1", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x2", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("x3", "Placeholder", {}, {{"dtype", DT_INT32}}, + device), + test::function::NDef("x4", "Placeholder", {}, {{"dtype", DT_FLOAT}}, + device), + test::function::NDef("y", "ForwardInputs", + {"x0", "x1", "x2", "x3", "x4"}, {}, device), + test::function::NDef("z0", "Identity", {"y:0"}, {{"T", DT_FLOAT}}, + device), + test::function::NDef("z1", "Identity", {"y:1"}, {{"T", DT_FLOAT}}, + device), + test::function::NDef("z2", "Identity", {"y:2"}, {{"T", DT_INT32}}, + device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + item.fetch = {"z0", "z1", "z2"}; + Tensor in(DT_FLOAT, {}); + in.flat()(0) = 3.14f; + item.feed.emplace_back("x0", in); + in.flat()(0) = 2.7f; + item.feed.emplace_back("x1", in); + in.flat()(0) = 1.0f; + item.feed.emplace_back("x2", in); + in.flat()(0) = -1.0f; + item.feed.emplace_back("x4", in); + Tensor in_int(DT_INT32, {}); + in_int.flat()(0) = 1234; + item.feed.emplace_back("x3", in_int); + auto tensors_expected = EvaluateFetchNodes(item); + GrapplerItem optimized(item, std::move(output)); + auto tensors = EvaluateFetchNodes(optimized); + test::ExpectTensorEqual(tensors_expected[0], tensors[0]); + test::ExpectTensorEqual(tensors_expected[1], tensors[1]); + test::ExpectTensorEqual(tensors_expected[2], tensors[2]); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions.cc b/tensorflow/core/grappler/utils/functions.cc index 37b00e0a30..4f286ce1c8 100644 --- a/tensorflow/core/grappler/utils/functions.cc +++ b/tensorflow/core/grappler/utils/functions.cc @@ -124,9 +124,22 @@ std::unique_ptr GrapplerItemFromFunctionDef( } } - // Add the function outputs to the list of fetch nodes. + // Add the function outputs to the list of fetch nodes, taking into account + // the output mapping if any. for (const auto& out : func.signature().output_arg()) { - new_item->fetch.emplace_back(out.name()); + auto it = func.ret().find(out.name()); + if (it != func.ret().end()) { + auto it2 = port_map.find(it->second); + if (it2 == port_map.end()) { + LOG(ERROR) << "Unknown output mapping: " << it->first << " to " + << it->second; + return nullptr; + } else { + new_item->fetch.emplace_back(it2->second); + } + } else { + new_item->fetch.emplace_back(out.name()); + } } // Add the function inputs to the list of feeds. for (const auto& inp : func.signature().input_arg()) { diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 25ccb50084..25ec50d478 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -54,7 +54,7 @@ TEST_F(FunctionsTest, FromSimpleFunctionDef) { CHECK(item); EXPECT_EQ("XTimesTwo", item->id); EXPECT_EQ(4, item->graph.node_size()); - EXPECT_EQ(std::vector({"y"}), item->fetch); + EXPECT_EQ(std::vector({"y:0"}), item->fetch); EXPECT_EQ(1, item->feed.size()); EXPECT_EQ("x", item->feed[0].first); @@ -121,7 +121,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithMultiOutputNodes) { CHECK(item); EXPECT_EQ("SubGrad", item->id); EXPECT_EQ(12, item->graph.node_size()); - EXPECT_EQ(std::vector({"dx", "dy"}), item->fetch); + EXPECT_EQ(std::vector({"dx:0", "dy:0"}), item->fetch); EXPECT_EQ(3, item->feed.size()); EXPECT_EQ("x", item->feed[0].first); EXPECT_EQ("y", item->feed[1].first); @@ -184,6 +184,7 @@ TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { {{"x2"}, "Mul", {"x", "x"}, {{"T", DT_FLOAT}}}, {{"y2"}, "Mul", {"y", "y"}, {{"T", DT_FLOAT}}, {"a1"}}, {{"o"}, "Add", {"x2:z:0", "y2:z:0"}, {{"T", DT_FLOAT}}}}, + // Output Mapping {{"o", "o:z:0"}}); std::unordered_map func_attr; @@ -227,6 +228,86 @@ TEST_F(FunctionsTest, FromFunctionDefWithNestedFuncs) { } } +TEST_F(FunctionsTest, FromFunctionDefWithOutputMappings) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "Exp_func", + // Args + {"in: float"}, + // Return values + {"out: float"}, + // Attr def + {}, + // Nodes + {{{"Linear_func"}, "Identity", {"in"}, {{"T", DT_FLOAT}}}, + {{"Exp"}, "Exp", {"Linear_func:output:0"}, {{"T", DT_FLOAT}}}}, + // Mapping + {{"out", "Exp:y:0"}}); + + std::unordered_map func_attr; + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(1, item->fetch.size()); + EXPECT_EQ("Exp:0", item->fetch[0]); + + for (const NodeDef &node : item->graph.node()) { + if (node.name() == "in") { + EXPECT_EQ("Placeholder", node.op()); + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + EXPECT_EQ(0, node.input_size()); + } else if (node.name() == "Linear_func") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("in", node.input(0)); + } else if (node.name() == "Exp") { + EXPECT_EQ("Exp", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("Linear_func:0", node.input(0)); + } + } +} + +TEST_F(FunctionsTest, FromFunctionDefWithInputForwarding) { + FunctionDef func = FunctionDefHelper::Create( + // Name + "ForwardInputs", + // Args + {"in0: float", "in1: float", "arg2: float", "arg3: int32", "arg4: float"}, + // Return values + {"out0: float", "arg2: float", "arg3: int32"}, + // Attr def + {}, + // Nodes + {}, + // Mapping + {{"out0", "in0"}}); + + std::unordered_map func_attr; + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(3, item->fetch.size()); + EXPECT_EQ("in0", item->fetch[0]); + EXPECT_EQ("arg2", item->fetch[1]); + EXPECT_EQ("arg3", item->fetch[2]); + + EXPECT_EQ(5, item->graph.node_size()); + for (const NodeDef &node : item->graph.node()) { + EXPECT_TRUE(node.name() == "in0" || node.name() == "in1" || + node.name() == "arg2" || node.name() == "arg3" || + node.name() == "arg4"); + EXPECT_EQ("Placeholder", node.op()); + if (node.name() == "arg3") { + EXPECT_EQ(DT_INT32, node.attr().at("T").type()); + } else { + EXPECT_EQ(DT_FLOAT, node.attr().at("T").type()); + } + } +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 96845a7c31aea72d44b4e16084ab5350896ca5c8 Mon Sep 17 00:00:00 2001 From: Andrew Selle Date: Fri, 2 Mar 2018 09:32:36 -0800 Subject: [PATCH 344/884] Only use softfp for android builds to make odroid builds work. PiperOrigin-RevId: 187629282 --- tensorflow/contrib/lite/kernels/internal/BUILD | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index f47fb04cba..6ccad3b1ce 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -10,21 +10,25 @@ tflite_deps_intel = [ "@arm_neon_2_x86_sse", ] +HARD_FP_FLAGS_IF_APPLICABLE = select({ + "//tensorflow:android_arm": ["-mfloat-abi=softfp"], + "//tensorflow:android_arm64": ["-mfloat-abi=softfp"], + "//tensorflow:android_armeabi": ["-mfloat-abi=softfp"], + "//conditions:default": [], +}) + NEON_FLAGS_IF_APPLICABLE = select({ ":arm": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], ":armeabi-v7a": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], ":armv7a": [ "-O3", "-mfpu=neon", - "-mfloat-abi=softfp", ], "//conditions:default": [ "-O3", @@ -283,7 +287,7 @@ cc_library( "optimized/neon_tensor_utils.h", "optimized/tensor_utils_impl.h", ], - copts = NEON_FLAGS_IF_APPLICABLE, + copts = NEON_FLAGS_IF_APPLICABLE + HARD_FP_FLAGS_IF_APPLICABLE, deps = [ ":cpu_check", ":portable_tensor_utils", -- GitLab From cd810e21bdb0a5631836c69e5273135e4b15a441 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Fri, 2 Mar 2018 09:53:54 -0800 Subject: [PATCH 345/884] No need to override _handle_device for variables anymore PiperOrigin-RevId: 187631915 --- .../resource_variable_ops_test.py | 1 - .../python/ops/resource_variable_ops.py | 25 ++++++------------- 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 71699fe0ad..10ba9fa674 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -526,7 +526,6 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual(dtypes.int32, v.dtype) self.assertEqual("foo/var7:0", v.name) self.assertAllEqual([10, 20, 35], v.shape.as_list()) - self.assertEqual(context.get_default_context().device_name, v.device) self.assertTrue(isinstance(v.handle, ops.EagerTensor)) self.assertEqual(constraint, v.constraint) self.assertAllEqual(init.numpy(), v.read_value().numpy()) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index bf186f1734..cbac3c686d 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -384,9 +384,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) - self._handle_device = ( - self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() else: initial_value = initial_value() @@ -399,9 +396,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=False) - self._handle_device = ( - self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() # pylint: enable=protected-access @@ -425,8 +419,6 @@ class ResourceVariable(variables.Variable): shared_name=handle_name, name=name, graph_mode=self._in_graph_mode) - self._handle_device = (self._handle.device if self._in_graph_mode else - context.get_default_context().device_name) self._shape = initial_value.get_shape() self._initial_value = initial_value if self._in_graph_mode else None @@ -449,7 +441,7 @@ class ResourceVariable(variables.Variable): with ops.name_scope("Read"), ops.colocate_with(self._handle): # Manually assign reads to the handle's device to avoid log # messages. - with ops.device(self._handle_device): + with ops.device(self._handle.device): value = self._read_variable_op() self._graph_element = value if caching_device is not None: @@ -489,7 +481,7 @@ class ResourceVariable(variables.Variable): # cycles being uncollectable, and means that no __del__ will be defined at # all in graph mode. self._handle_deleter = EagerResourceDeleter( - handle=self._handle, handle_device=self._handle_device) + handle=self._handle, handle_device=self._handle.device) def _init_from_proto(self, variable_def, import_scope=None): """Initializes from `VariableDef` proto.""" @@ -507,7 +499,6 @@ class ResourceVariable(variables.Variable): variable_def.variable_name, import_scope=import_scope)) self._shape = tensor_shape.TensorShape( self._handle.op.get_attr("shape")) - self._handle_device = self._handle.device self._handle_name = self._handle.name self._initializer_op = g.as_graph_element( ops.prepend_name_scope( @@ -552,7 +543,7 @@ class ResourceVariable(variables.Variable): @property def device(self): """The device this variable is on.""" - return self._handle_device + return self._handle.device @property def graph(self): @@ -586,7 +577,7 @@ class ResourceVariable(variables.Variable): if self._cached_value is not None: return self._cached_value with ops.colocate_with(None, ignore_existing=True): - with ops.device(self._handle_device): + with ops.device(self._handle.device): return self._read_variable_op() def _as_graph_element(self): @@ -683,7 +674,7 @@ class ResourceVariable(variables.Variable): """ with ops.name_scope("Read"): # Ensure we read the variable in the same device as the handle. - with ops.device(self._handle_device): + with ops.device(self._handle.device): value = self._read_variable_op() # Return an identity so it can get placed on whatever device the context # specifies instead of the device where the variable is. @@ -840,8 +831,7 @@ class ResourceVariable(variables.Variable): if hasattr(self, "_trainable") and self._trainable: tape.watch_variable(self) return _UnreadVariable( - self._handle, self.dtype, self._handle_device, self._shape, - self._in_graph_mode, + self._handle, self.dtype, self._shape, self._in_graph_mode, self._handle_deleter if not self._in_graph_mode else None, op) def assign(self, value, use_locking=None, name=None, read_value=True): @@ -952,7 +942,7 @@ class _UnreadVariable(ResourceVariable): Pretends to be the tensor if anyone looks. """ - def __init__(self, handle, dtype, handle_device, # pylint: disable=super-init-not-called + def __init__(self, handle, dtype, # pylint: disable=super-init-not-called shape, in_graph_mode, deleter, parent_op): # We do not call super init on purpose. self._trainable = False @@ -960,7 +950,6 @@ class _UnreadVariable(ResourceVariable): self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access self._in_graph_mode = in_graph_mode self._handle = handle - self._handle_device = handle_device self._shape = shape self._initial_value = None if isinstance(self._handle, ops.EagerTensor): -- GitLab From 929c435bcba105cf558e1942b63389812b62aff3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 10:05:14 -0800 Subject: [PATCH 346/884] Add bfloat16 support for CPU ops. PiperOrigin-RevId: 187633511 --- tensorflow/core/kernels/check_numerics_op.cc | 3 ++ tensorflow/core/kernels/cwise_op_add_1.cc | 8 +++--- tensorflow/core/kernels/cwise_op_isnan.cc | 3 +- tensorflow/core/kernels/cwise_op_mul_1.cc | 4 +-- tensorflow/core/kernels/cwise_op_square.cc | 4 +-- tensorflow/core/kernels/cwise_op_sub.cc | 4 +-- tensorflow/core/kernels/cwise_ops_common.h | 2 ++ tensorflow/core/kernels/training_ops.cc | 17 +++++++++++ tensorflow/core/lib/bfloat16/bfloat16.h | 30 ++++++++++++++++++++ 9 files changed, 64 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/check_numerics_op.cc b/tensorflow/core/kernels/check_numerics_op.cc index 6040b2b399..d3b67f4614 100644 --- a/tensorflow/core/kernels/check_numerics_op.cc +++ b/tensorflow/core/kernels/check_numerics_op.cc @@ -15,6 +15,8 @@ limitations under the License. // See docs in ../ops/array_ops.cc. +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #include #include #include @@ -219,6 +221,7 @@ class CheckNumericsOp : public AsyncOpKernel { Name("CheckNumerics").Device(DEVICE_CPU).TypeConstraint("T"), \ CheckNumericsOp); TF_CALL_half(REGISTER_CPU_KERNEL); +TF_CALL_bfloat16(REGISTER_CPU_KERNEL); TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); diff --git a/tensorflow/core/kernels/cwise_op_add_1.cc b/tensorflow/core/kernels/cwise_op_add_1.cc index bf32c8a54b..9e4ffe950c 100644 --- a/tensorflow/core/kernels/cwise_op_add_1.cc +++ b/tensorflow/core/kernels/cwise_op_add_1.cc @@ -16,10 +16,10 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, - int64); -REGISTER5(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double, - int32, int64); +REGISTER6(BinaryOp, CPU, "Add", functor::add, float, Eigen::half, double, int32, + int64, bfloat16); +REGISTER6(BinaryOp, CPU, "AddV2", functor::add, float, Eigen::half, double, + int32, int64, bfloat16); #if GOOGLE_CUDA REGISTER3(BinaryOp, GPU, "Add", functor::add, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_isnan.cc b/tensorflow/core/kernels/cwise_op_isnan.cc index aa180c247e..707dc9e49c 100644 --- a/tensorflow/core/kernels/cwise_op_isnan.cc +++ b/tensorflow/core/kernels/cwise_op_isnan.cc @@ -16,7 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER3(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double); +REGISTER4(UnaryOp, CPU, "IsNan", functor::isnan, float, Eigen::half, double, + bfloat16); #if GOOGLE_CUDA REGISTER3(UnaryOp, GPU, "IsNan", functor::isnan, float, Eigen::half, double); diff --git a/tensorflow/core/kernels/cwise_op_mul_1.cc b/tensorflow/core/kernels/cwise_op_mul_1.cc index 0e8d2e3735..cff0407b83 100644 --- a/tensorflow/core/kernels/cwise_op_mul_1.cc +++ b/tensorflow/core/kernels/cwise_op_mul_1.cc @@ -17,8 +17,8 @@ limitations under the License. namespace tensorflow { -REGISTER5(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, - int32); +REGISTER6(BinaryOp, CPU, "Mul", functor::mul, float, Eigen::half, double, uint8, + int32, bfloat16); #if defined(__ANDROID_TYPES_SLIM__) // We only register the first type when we have multi-argument calls in the // case where we're trying to reduce executable size, but it turns out that the diff --git a/tensorflow/core/kernels/cwise_op_square.cc b/tensorflow/core/kernels/cwise_op_square.cc index 7fc2f6bf08..84f695ddc2 100644 --- a/tensorflow/core/kernels/cwise_op_square.cc +++ b/tensorflow/core/kernels/cwise_op_square.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, - int32, int64, complex64, complex128); +REGISTER8(UnaryOp, CPU, "Square", functor::square, float, Eigen::half, double, + int32, int64, complex64, complex128, bfloat16); #if GOOGLE_CUDA REGISTER4(UnaryOp, GPU, "Square", functor::square, float, Eigen::half, double, diff --git a/tensorflow/core/kernels/cwise_op_sub.cc b/tensorflow/core/kernels/cwise_op_sub.cc index 025041946a..eb27bddb78 100644 --- a/tensorflow/core/kernels/cwise_op_sub.cc +++ b/tensorflow/core/kernels/cwise_op_sub.cc @@ -16,8 +16,8 @@ limitations under the License. #include "tensorflow/core/kernels/cwise_ops_common.h" namespace tensorflow { -REGISTER7(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, - int64, complex64, complex128); +REGISTER8(BinaryOp, CPU, "Sub", functor::sub, float, Eigen::half, double, int32, + int64, bfloat16, complex64, complex128); #if !defined(__ANDROID_TYPES_SLIM__) // Sub op for int8, uint8, int16, uint16 REGISTER4(BinaryOp, CPU, "Sub", functor::sub, int8, uint8, int16, uint16); diff --git a/tensorflow/core/kernels/cwise_ops_common.h b/tensorflow/core/kernels/cwise_ops_common.h index 8295fa939e..e32eccf547 100644 --- a/tensorflow/core/kernels/cwise_ops_common.h +++ b/tensorflow/core/kernels/cwise_ops_common.h @@ -20,6 +20,8 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #ifdef TENSORFLOW_USE_SYCL #include "tensorflow/core/kernels/cwise_ops_sycl_common.h" #endif diff --git a/tensorflow/core/kernels/training_ops.cc b/tensorflow/core/kernels/training_ops.cc index 233aa03c32..f53c567c4d 100644 --- a/tensorflow/core/kernels/training_ops.cc +++ b/tensorflow/core/kernels/training_ops.cc @@ -15,6 +15,8 @@ limitations under the License. #define EIGEN_USE_THREADS +#include "tensorflow/core/lib/bfloat16/bfloat16.h" + #include #include "tensorflow/core/framework/op_kernel.h" @@ -494,6 +496,7 @@ class ApplyGradientDescentOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -647,6 +650,7 @@ class ApplyAdadeltaOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -822,6 +826,7 @@ class SparseApplyAdadeltaOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1107,6 +1112,7 @@ class ApplyAdagradOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1360,6 +1366,7 @@ class SparseApplyAdagradOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1961,6 +1968,7 @@ class ApplyFtrlOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -1982,6 +1990,7 @@ TF_CALL_double(REGISTER_CPU_KERNELS); #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2230,6 +2239,7 @@ class SparseApplyFtrlOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2254,6 +2264,7 @@ TF_CALL_double(REGISTER_CPU_KERNELS); REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2332,6 +2343,7 @@ class ApplyMomentumOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2471,6 +2483,7 @@ class SparseApplyMomentumOp : public OpKernel { REGISTER_KERNELS(T, int64); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2698,6 +2711,7 @@ class ApplyAdamOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -2937,6 +2951,7 @@ class ApplyCenteredRMSPropOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -3352,6 +3367,7 @@ class ApplyAddSignOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); @@ -3457,6 +3473,7 @@ class ApplyPowerSignOp : public OpKernel { #define REGISTER_CPU_KERNELS(T) REGISTER_KERNELS(CPU, T); TF_CALL_half(REGISTER_CPU_KERNELS); +TF_CALL_bfloat16(REGISTER_CPU_KERNELS); TF_CALL_float(REGISTER_CPU_KERNELS); TF_CALL_double(REGISTER_CPU_KERNELS); diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index f9cca0ef2a..de8f92d1eb 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ #define TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ +#include #include #ifdef __CUDACC__ @@ -271,6 +272,35 @@ struct hash { return hash()(static_cast(v)); } }; + +using tensorflow::bfloat16; +inline bool isinf(const bfloat16& a) { return std::isinf(float(a)); } +inline bool isnan(const bfloat16& a) { return std::isnan(float(a)); } +inline bool isfinite(const bfloat16& a) { return std::isfinite(float(a)); } +inline bfloat16 abs(const bfloat16& a) { return bfloat16(std::abs(float(a))); } +inline bfloat16 exp(const bfloat16& a) { return bfloat16(std::exp(float(a))); } +inline bfloat16 log(const bfloat16& a) { return bfloat16(std::log(float(a))); } +inline bfloat16 log10(const bfloat16& a) { + return bfloat16(std::log10(float(a))); +} +inline bfloat16 sqrt(const bfloat16& a) { + return bfloat16(std::sqrt(float(a))); +} +inline bfloat16 pow(const bfloat16& a, const bfloat16& b) { + return bfloat16(std::pow(float(a), float(b))); +} +inline bfloat16 sin(const bfloat16& a) { return bfloat16(std::sin(float(a))); } +inline bfloat16 cos(const bfloat16& a) { return bfloat16(std::cos(float(a))); } +inline bfloat16 tan(const bfloat16& a) { return bfloat16(std::tan(float(a))); } +inline bfloat16 tanh(const bfloat16& a) { + return bfloat16(std::tanh(float(a))); +} +inline bfloat16 floor(const bfloat16& a) { + return bfloat16(std::floor(float(a))); +} +inline bfloat16 ceil(const bfloat16& a) { + return bfloat16(std::ceil(float(a))); +} } // namespace std #endif // TENSORFLOW_CORE_LIB_BFLOAT16_BFLOAT16_H_ -- GitLab From 3942fbfcc3252e2e479e3dde8d996e8e156558c4 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 13:28:17 -0500 Subject: [PATCH 347/884] Disable loop_optimizer_test for now --- tensorflow/core/grappler/optimizers/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index b0a7587600..4e14f0ba40 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -538,6 +538,10 @@ tf_cc_test( name = "loop_optimizer_test", size = "small", srcs = ["loop_optimizer_test.cc"], + tags = [ + "manual", + "no_oss", + ], # b/74111495 deps = [ ":loop_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From bce4f52b7201b943d544606dcca51ef4ba2b2c1a Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 10:30:01 -0800 Subject: [PATCH 348/884] tf.keras: Remove unnecessary "with self.test_sesion()" statements in tests. The test decorator that runs the test twice (once with eager execution enabled, once without) doesn't require the block, and this makes the code appear more eager-friendly (as there is no concept of a session when eager execution is enabled). PiperOrigin-RevId: 187637008 --- .../_impl/keras/model_subclassing_test.py | 245 +++++++++--------- 1 file changed, 117 insertions(+), 128 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py index 3d71a620fc..58b144365b 100644 --- a/tensorflow/python/keras/_impl/keras/model_subclassing_test.py +++ b/tensorflow/python/keras/_impl/keras/model_subclassing_test.py @@ -174,19 +174,18 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = SimpleTestModel(num_classes=num_classes, - use_dp=True, - use_bn=True) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = SimpleTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) @test_util.run_in_graph_and_eager_modes() def test_multi_io_workflow_with_np_arrays(self): @@ -194,21 +193,20 @@ class ModelSubclassingTest(test.TestCase): num_samples = 1000 input_dim = 50 - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, - use_dp=True, - use_bn=True) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = MultiIOTestModel(num_classes=num_classes, + use_dp=True, + use_bn=True) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x1 = np.ones((num_samples, input_dim)) - x2 = np.ones((num_samples, input_dim)) - y1 = np.zeros((num_samples, num_classes[0])) - y2 = np.zeros((num_samples, num_classes[1])) + x1 = np.ones((num_samples, input_dim)) + x2 = np.ones((num_samples, input_dim)) + y1 = np.zeros((num_samples, num_classes[0])) + y2 = np.zeros((num_samples, num_classes[1])) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - _ = model.evaluate([x1, x2], [y1, y2], verbose=0) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + _ = model.evaluate([x1, x2], [y1, y2], verbose=0) def test_single_io_workflow_with_tensors(self): @@ -321,14 +319,13 @@ class ModelSubclassingTest(test.TestCase): x = np.ones((num_samples, input_dim)) y = np.ones((num_samples, input_dim)) - with self.test_session(): - model = BNNet() - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - y_ref = model.predict(x) + model = BNNet() + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + y_ref = model.predict(x) - model.train_on_batch(x, y) - y_new = model.predict(x) - self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) + model.train_on_batch(x, y) + y_new = model.predict(x) + self.assertGreater(np.sum(np.abs(y_ref - y_new)), 0.1) @test_util.run_in_graph_and_eager_modes() def test_training_and_inference_behavior(self): @@ -350,14 +347,13 @@ class ModelSubclassingTest(test.TestCase): x = self.dp(inputs) return self.dense(x) - with self.test_session(): - model = DPNet() - x = np.ones((num_samples, input_dim)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) + model = DPNet() + x = np.ones((num_samples, input_dim)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) @test_util.run_in_graph_and_eager_modes() def test_training_methods(self): @@ -373,21 +369,20 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - model.fit({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}, - epochs=2, batch_size=32) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0, - validation_data=([x1, x2], [y1, y2])) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + model.fit({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}, + epochs=2, batch_size=32) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0, + validation_data=([x1, x2], [y1, y2])) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.train_on_batch([x1, x2], [y1, y2]) - model.train_on_batch({'input_1': x1, 'input_2': x2}, - {'output_1': y1, 'output_2': y2}) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.train_on_batch([x1, x2], [y1, y2]) + model.train_on_batch({'input_1': x1, 'input_2': x2}, + {'output_1': y1, 'output_2': y2}) @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def test_inference_methods(self): @@ -402,17 +397,16 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.evaluate([x1, x2], [y1, y2]) - model.test_on_batch([x1, x2], [y1, y2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.evaluate([x1, x2], [y1, y2]) + model.test_on_batch([x1, x2], [y1, y2]) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.predict([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict([x1, x2]) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.predict_on_batch([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.predict_on_batch([x1, x2]) @test_util.run_in_graph_and_eager_modes() def test_trainable_mutation(self): @@ -435,26 +429,25 @@ class ModelSubclassingTest(test.TestCase): y1 = np.zeros((num_samples, num_classes[0])) y2 = np.zeros((num_samples, num_classes[1])) - with self.test_session(): - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) - y_ref_1, y_ref_2 = model.predict([x1, x2]) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + model.fit([x1, x2], [y1, y2], epochs=2, batch_size=32, verbose=0) + y_ref_1, y_ref_2 = model.predict([x1, x2]) - fd, fname = tempfile.mkstemp('.h5') - model.save_weights(fname) + fd, fname = tempfile.mkstemp('.h5') + model.save_weights(fname) - model = MultiIOTestModel(num_classes=num_classes, use_bn=True) - # need to build the model before loading weights - # (otherwise no weights to load) - model._set_inputs([x1, x2]) - model.load_weights(fname) + model = MultiIOTestModel(num_classes=num_classes, use_bn=True) + # need to build the model before loading weights + # (otherwise no weights to load) + model._set_inputs([x1, x2]) + model.load_weights(fname) - y1, y2 = model.predict([x1, x2]) - self.assertAllClose(y_ref_1, y1, atol=1e-5) - self.assertAllClose(y_ref_2, y2, atol=1e-5) - os.close(fd) - os.remove(fname) + y1, y2 = model.predict([x1, x2]) + self.assertAllClose(y_ref_1, y1, atol=1e-5) + self.assertAllClose(y_ref_2, y2, atol=1e-5) + os.close(fd) + os.remove(fname) @test_util.run_in_graph_and_eager_modes() def test_summary(self): @@ -488,23 +481,22 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = NestedTestModel1(num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = NestedTestModel1(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) @test_util.run_in_graph_and_eager_modes() def test_graph_nested_in_subclass(self): @@ -512,23 +504,22 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = NestedTestModel2(num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = NestedTestModel2(num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) - self.assertEqual(len(model.non_trainable_weights), - 2 + len(model.test_net.non_trainable_weights)) - self.assertEqual(len(model.trainable_weights), - 6 + len(model.test_net.trainable_weights)) + self.assertEqual(len(model.weights), 8 + len(model.test_net.weights)) + self.assertEqual(len(model.non_trainable_weights), + 2 + len(model.test_net.non_trainable_weights)) + self.assertEqual(len(model.trainable_weights), + 6 + len(model.test_net.trainable_weights)) @test_util.run_in_graph_and_eager_modes() def test_subclass_nested_in_graph(self): @@ -536,22 +527,21 @@ class ModelSubclassingTest(test.TestCase): num_samples = 100 input_dim = 50 - with self.test_session(): - model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes) - model.compile(loss='mse', - optimizer=RMSPropOptimizer(learning_rate=0.001), - metrics=['acc']) + model = get_nested_model_3(input_dim=input_dim, num_classes=num_classes) + model.compile(loss='mse', + optimizer=RMSPropOptimizer(learning_rate=0.001), + metrics=['acc']) - x = np.ones((num_samples, input_dim)) - y = np.zeros((num_samples, num_classes)) + x = np.ones((num_samples, input_dim)) + y = np.zeros((num_samples, num_classes)) - model.fit(x, y, epochs=2, batch_size=32, verbose=0) - _ = model.evaluate(x, y, verbose=0) + model.fit(x, y, epochs=2, batch_size=32, verbose=0) + _ = model.evaluate(x, y, verbose=0) - self.assertEqual(len(model.weights), 16) - self.assertEqual( - len(model.non_trainable_weights), 4) - self.assertEqual(len(model.trainable_weights), 12) + self.assertEqual(len(model.weights), 16) + self.assertEqual( + len(model.non_trainable_weights), 4) + self.assertEqual(len(model.trainable_weights), 12) @test_util.run_in_graph_and_eager_modes() def test_support_for_manual_training_arg(self): @@ -575,14 +565,13 @@ class ModelSubclassingTest(test.TestCase): x = self.dp(inputs, training=training) return self.dense(x) - with self.test_session(): - model = DPNet() - x = np.ones((10, 10)) - y = model.predict(x) - self.assertEqual(np.sum(y), np.sum(x)) - model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) - loss = model.train_on_batch(x, y) - self.assertGreater(loss, 0.1) + model = DPNet() + x = np.ones((10, 10)) + y = model.predict(x) + self.assertEqual(np.sum(y), np.sum(x)) + model.compile(loss='mse', optimizer=RMSPropOptimizer(learning_rate=0.001)) + loss = model.train_on_batch(x, y) + self.assertGreater(loss, 0.1) if __name__ == '__main__': -- GitLab From b253460fd13dcfcf27eca610c5d397ef6ac980d2 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 13:37:29 -0500 Subject: [PATCH 349/884] Fix formatting in grappler/optimizers/BUILD --- tensorflow/core/grappler/optimizers/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 4e14f0ba40..1381bfd18b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -540,8 +540,8 @@ tf_cc_test( srcs = ["loop_optimizer_test.cc"], tags = [ "manual", - "no_oss", - ], # b/74111495 + "no_oss", # b/74111495 + ], deps = [ ":loop_optimizer", "//tensorflow/cc:cc_ops", -- GitLab From b5fa6af52198570a758d88b4bd64495353d8e7c6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 11:11:15 -0800 Subject: [PATCH 350/884] Updating toolchain configs for GPU builds PiperOrigin-RevId: 187643585 --- tensorflow/tools/ci_build/Dockerfile.rbe.gpu | 26 + third_party/gpus/cuda/remote.BUILD.tpl | 26 +- third_party/toolchains/gpus/crosstool/BUILD | 5 + .../toolchains/gpus/crosstool/CROSSTOOL | 6 +- third_party/toolchains/gpus/cuda/BUILD | 2016 ++++++++--------- third_party/toolchains/gpus/py/BUILD | 171 ++ 6 files changed, 1186 insertions(+), 1064 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.gpu create mode 100644 third_party/toolchains/gpus/py/BUILD diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.gpu b/tensorflow/tools/ci_build/Dockerfile.rbe.gpu new file mode 100644 index 0000000000..24ff4765a6 --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.gpu @@ -0,0 +1,26 @@ +FROM nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04 + +LABEL maintainer="Nick Lopez " + +# In the Ubuntu 16.04 images, cudnn is placed in system paths. Move them to +# /usr/local/cuda +RUN cp -P /usr/include/cudnn.h /usr/local/cuda/include +RUN cp -P /usr/lib/x86_64-linux-gnu/libcudnn* /usr/local/cuda/lib64 + +# Copy and run the install scripts. +COPY install/*.sh /install/ +ARG DEBIAN_FRONTEND=noninteractive +RUN /install/install_bootstrap_deb_packages.sh +RUN add-apt-repository -y ppa:openjdk-r/ppa && \ + add-apt-repository -y ppa:george-edison55/cmake-3.x +RUN /install/install_deb_packages.sh +RUN /install/install_pip_packages.sh +RUN /install/install_golang.sh + +# Install clang from pre-built package +RUN cd /tmp && \ + wget https://storage.googleapis.com/clang-builds-stable/clang-ubuntu16_04/clang_r323528.tar.gz && \ + echo "26752d9f5785df07193fac8316ba5d5ba3bec36d970c29a1577360848818ac74 clang_r323528.tar.gz" | sha256sum -c && \ + tar -C /usr/local -xf clang_r323528.tar.gz && \ + rm clang_r323528.tar.gz + diff --git a/third_party/gpus/cuda/remote.BUILD.tpl b/third_party/gpus/cuda/remote.BUILD.tpl index d88d512b90..f774def5e6 100644 --- a/third_party/gpus/cuda/remote.BUILD.tpl +++ b/third_party/gpus/cuda/remote.BUILD.tpl @@ -41,65 +41,65 @@ config_setting( alias( name = "cuda_headers", - actual = "%{remote_cuda_repo}cuda:cuda_headers", + actual = "%{remote_cuda_repo}/cuda:cuda_headers", ) alias( name = "cudart_static", - actual = "%{remote_cuda_repo}cuda:cudart_static", + actual = "%{remote_cuda_repo}/cuda:cudart_static", ) alias( name = "cuda_driver", - actual = "%{remote_cuda_repo}cuda:cuda_driver", + actual = "%{remote_cuda_repo}/cuda:cuda_driver", ) alias( name = "cudart", - actual = "%{remote_cuda_repo}cuda:cudart", + actual = "%{remote_cuda_repo}/cuda:cudart", ) alias( name = "cublas", - actual = "%{remote_cuda_repo}cuda:cublas", + actual = "%{remote_cuda_repo}/cuda:cublas", ) alias( name = "cusolver", - actual = "%{remote_cuda_repo}cuda:cusolver", + actual = "%{remote_cuda_repo}/cuda:cusolver", ) alias( name = "cudnn", - actual = "%{remote_cuda_repo}cuda:cudnn", + actual = "%{remote_cuda_repo}/cuda:cudnn", ) alias( name = "cufft", - actual = "%{remote_cuda_repo}cuda:cufft", + actual = "%{remote_cuda_repo}/cuda:cufft", ) alias( name = "curand", - actual = "%{remote_cuda_repo}cuda:curand", + actual = "%{remote_cuda_repo}/cuda:curand", ) alias( name = "cuda", - actual = "%{remote_cuda_repo}cuda:cuda", + actual = "%{remote_cuda_repo}/cuda:cuda", ) alias( name = "cupti_headers", - actual = "%{remote_cuda_repo}cuda:cupti_headers", + actual = "%{remote_cuda_repo}/cuda:cupti_headers", ) alias( name = "cupti_dsos", - actual = "%{remote_cuda_repo}cuda:cupti_dsos", + actual = "%{remote_cuda_repo}/cuda:cupti_dsos", ) alias( name = "libdevice_root", - actual = "%{remote_cuda_repo}cuda:libdevice_root", + actual = "%{remote_cuda_repo}/cuda:libdevice_root", ) diff --git a/third_party/toolchains/gpus/crosstool/BUILD b/third_party/toolchains/gpus/crosstool/BUILD index a8c6b0f029..1f9065007c 100644 --- a/third_party/toolchains/gpus/crosstool/BUILD +++ b/third_party/toolchains/gpus/crosstool/BUILD @@ -50,3 +50,8 @@ filegroup( name = "empty", srcs = [], ) + +filegroup( + name = "crosstool_wrapper_driver_is_not_gcc", + srcs = ["clang/bin/crosstool_wrapper_driver_is_not_gcc"], +) diff --git a/third_party/toolchains/gpus/crosstool/CROSSTOOL b/third_party/toolchains/gpus/crosstool/CROSSTOOL index 16ee2f82c6..d6ee7e38c4 100644 --- a/third_party/toolchains/gpus/crosstool/CROSSTOOL +++ b/third_party/toolchains/gpus/crosstool/CROSSTOOL @@ -144,8 +144,8 @@ toolchain { flag_group { # All warnings are enabled. Maybe enable -Werror as well? flag: "-Wall" - # TODO(ngiraldo): Some parts of the codebase set -Werror and hit this - # warning, so switch it off for now. + # Some parts of the codebase set -Werror and hit this warning, so + # switch it off for now. flag: "-Wno-invalid-partial-specialization" } } @@ -303,7 +303,7 @@ toolchain { cxx_builtin_include_directory: "/usr/include/x86_64-linux-gnu/c++/5.4.0" cxx_builtin_include_directory: "/usr/include/c++/5.4.0/backward" cxx_builtin_include_directory: "/usr/local/include" - cxx_builtin_include_directory: "/usr/local/lib/clang/6.0.0/include" + cxx_builtin_include_directory: "/usr/local/lib/clang/7.0.0/include" cxx_builtin_include_directory: "/usr/include/x86_64-linux-gnu" cxx_builtin_include_directory: "/usr/include" } diff --git a/third_party/toolchains/gpus/cuda/BUILD b/third_party/toolchains/gpus/cuda/BUILD index 39136de99c..cfc6930851 100644 --- a/third_party/toolchains/gpus/cuda/BUILD +++ b/third_party/toolchains/gpus/cuda/BUILD @@ -51,6 +51,7 @@ cc_library( includes = [ ".", "cuda/include", + "cuda/include/crt", ], visibility = ["//visibility:public"], ) @@ -84,8 +85,8 @@ cc_library( cc_library( name = "cudart", - srcs = ["cuda/lib/libcudart.so.8.0"], - data = ["cuda/lib/libcudart.so.8.0"], + srcs = ["cuda/lib/libcudart.so.9.0"], + data = ["cuda/lib/libcudart.so.9.0"], includes = [ ".", "cuda/include", @@ -96,8 +97,8 @@ cc_library( cc_library( name = "cublas", - srcs = ["cuda/lib/libcublas.so.8.0"], - data = ["cuda/lib/libcublas.so.8.0"], + srcs = ["cuda/lib/libcublas.so.9.0"], + data = ["cuda/lib/libcublas.so.9.0"], includes = [ ".", "cuda/include", @@ -108,8 +109,8 @@ cc_library( cc_library( name = "cusolver", - srcs = ["cuda/lib/libcusolver.so.8.0"], - data = ["cuda/lib/libcusolver.so.8.0"], + srcs = ["cuda/lib/libcusolver.so.9.0"], + data = ["cuda/lib/libcusolver.so.9.0"], includes = [ ".", "cuda/include", @@ -121,8 +122,8 @@ cc_library( cc_library( name = "cudnn", - srcs = ["cuda/lib/libcudnn.so.6"], - data = ["cuda/lib/libcudnn.so.6"], + srcs = ["cuda/lib/libcudnn.so.7"], + data = ["cuda/lib/libcudnn.so.7"], includes = [ ".", "cuda/include", @@ -133,8 +134,8 @@ cc_library( cc_library( name = "cufft", - srcs = ["cuda/lib/libcufft.so.8.0"], - data = ["cuda/lib/libcufft.so.8.0"], + srcs = ["cuda/lib/libcufft.so.9.0"], + data = ["cuda/lib/libcufft.so.9.0"], includes = [ ".", "cuda/include", @@ -145,8 +146,8 @@ cc_library( cc_library( name = "curand", - srcs = ["cuda/lib/libcurand.so.8.0"], - data = ["cuda/lib/libcurand.so.8.0"], + srcs = ["cuda/lib/libcurand.so.9.0"], + data = ["cuda/lib/libcurand.so.9.0"], includes = [ ".", "cuda/include", @@ -183,7 +184,7 @@ cc_library( cc_library( name = "cupti_dsos", - data = ["cuda/lib/libcupti.so.8.0"], + data = ["cuda/lib/libcupti.so.9.0"], includes = [ ".", "cuda/include", @@ -200,1063 +201,990 @@ cc_library( genrule( name = "cuda-include", outs = [ - "cuda/include/math_functions.hpp", - "cuda/include/cufft.h", - "cuda/include/nvgraph.h", - "cuda/include/curand_normal.h", - "cuda/include/curand_uniform.h", - "cuda/include/nppi_data_exchange_and_initialization.h", - "cuda/include/cuda_gl_interop.h", - "cuda/include/nppi_compression_functions.h", - "cuda/include/npp.h", + "cuda/include/CL/cl.h", + "cuda/include/CL/cl.hpp", + "cuda/include/CL/cl_egl.h", + "cuda/include/CL/cl_ext.h", + "cuda/include/CL/cl_gl.h", + "cuda/include/CL/cl_gl_ext.h", + "cuda/include/CL/cl_platform.h", + "cuda/include/CL/opencl.h", + "cuda/include/builtin_types.h", + "cuda/include/channel_descriptor.h", + "cuda/include/common_functions.h", + "cuda/include/cooperative_groups.h", + "cuda/include/cooperative_groups_helpers.h", + "cuda/include/crt/common_functions.h", + "cuda/include/crt/device_double_functions.h", + "cuda/include/crt/device_double_functions.hpp", + "cuda/include/crt/device_functions.h", + "cuda/include/crt/device_functions.hpp", + "cuda/include/crt/func_macro.h", + "cuda/include/crt/host_config.h", + "cuda/include/crt/host_defines.h", + "cuda/include/crt/host_runtime.h", + "cuda/include/crt/math_functions.h", + "cuda/include/crt/math_functions.hpp", + "cuda/include/crt/mma.h", + "cuda/include/crt/mma.hpp", + "cuda/include/crt/nvfunctional", + "cuda/include/crt/sm_70_rt.h", + "cuda/include/crt/sm_70_rt.hpp", + "cuda/include/crt/storage_class.h", + "cuda/include/cuComplex.h", + "cuda/include/cublas.h", + "cuda/include/cublasXt.h", + "cuda/include/cublas_api.h", + "cuda/include/cublas_v2.h", "cuda/include/cuda.h", - "cuda/include/nppi_statistics_functions.h", - "cuda/include/vector_functions.hpp", - "cuda/include/sm_32_intrinsics.hpp", - "cuda/include/sm_32_intrinsics.h", - "cuda/include/curand_discrete.h", + "cuda/include/cudaEGL.h", + "cuda/include/cudaGL.h", + "cuda/include/cudaProfiler.h", + "cuda/include/cudaVDPAU.h", + "cuda/include/cuda_device_runtime_api.h", + "cuda/include/cuda_fp16.h", + "cuda/include/cuda_fp16.hpp", + "cuda/include/cuda_gl_interop.h", + "cuda/include/cuda_occupancy.h", + "cuda/include/cuda_profiler_api.h", "cuda/include/cuda_runtime.h", + "cuda/include/cuda_runtime_api.h", + "cuda/include/cuda_surface_types.h", + "cuda/include/cuda_texture_types.h", + "cuda/include/cuda_vdpau_interop.h", + "cuda/include/cudalibxt.h", + "cuda/include/cudnn.h", + "cuda/include/cufft.h", "cuda/include/cufftXt.h", - "cuda/include/sm_61_intrinsics.h", - "cuda/include/texture_fetch_functions.h", + "cuda/include/cufftw.h", + "cuda/include/curand.h", + "cuda/include/curand_discrete.h", + "cuda/include/curand_discrete2.h", + "cuda/include/curand_globals.h", + "cuda/include/curand_kernel.h", + "cuda/include/curand_lognormal.h", "cuda/include/curand_mrg32k3a.h", - "cuda/include/host_defines.h", - "cuda/include/common_functions.h", - "cuda/include/nppi_support_functions.h", - "cuda/include/nppi_linear_transforms.h", - "cuda/include/device_double_functions.hpp", - "cuda/include/math_constants.h", - "cuda/include/nvToolsExtSync.h", - "cuda/include/npps_initialization.h", + "cuda/include/curand_mtgp32.h", + "cuda/include/curand_mtgp32_host.h", + "cuda/include/curand_mtgp32_kernel.h", + "cuda/include/curand_mtgp32dc_p_11213.h", + "cuda/include/curand_normal.h", + "cuda/include/curand_normal_static.h", + "cuda/include/curand_philox4x32_x.h", + "cuda/include/curand_poisson.h", + "cuda/include/curand_precalc.h", + "cuda/include/curand_uniform.h", + "cuda/include/cusolverDn.h", + "cuda/include/cusolverRf.h", + "cuda/include/cusolverSp.h", "cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h", - "cuda/include/texture_indirect_functions.hpp", - "cuda/include/cudaProfiler.h", - "cuda/include/npps_filtering_functions.h", + "cuda/include/cusolver_common.h", + "cuda/include/cusparse.h", "cuda/include/cusparse_v2.h", - "cuda/include/nppi.h", - "cuda/include/surface_indirect_functions.h", - "cuda/include/sm_30_intrinsics.h", + "cuda/include/device_atomic_functions.h", + "cuda/include/device_atomic_functions.hpp", "cuda/include/device_double_functions.h", - "cuda/include/sm_35_intrinsics.h", - "cuda/include/cusolverSp.h", - "cuda/include/library_types.h", - "cuda/include/surface_indirect_functions.hpp", - "cuda/include/cudalibxt.h", - "cuda/include/channel_descriptor.h", + "cuda/include/device_double_functions.hpp", + "cuda/include/device_functions.h", + "cuda/include/device_functions.hpp", "cuda/include/device_functions_decls.h", - "cuda/include/curand_kernel.h", - "cuda/include/curand_mtgp32_host.h", - "cuda/include/nvToolsExtCuda.h", - "cuda/include/nvToolsExt.h", - "cuda/include/cuComplex.h", - "cuda/include/sm_32_atomic_functions.h", - "cuda/include/texture_indirect_functions.h", - "cuda/include/sm_32_atomic_functions.hpp", - "cuda/include/sm_20_intrinsics.hpp", "cuda/include/device_launch_parameters.h", - "cuda/include/curand_mtgp32.h", - "cuda/include/texture_fetch_functions.hpp", - "cuda/include/cuda_occupancy.h", - "cuda/include/CL/opencl.h", - "cuda/include/CL/cl_platform.h", - "cuda/include/CL/cl_egl.h", - "cuda/include/CL/cl_gl.h", - "cuda/include/CL/cl.h", - "cuda/include/CL/cl_gl_ext.h", - "cuda/include/CL/cl_ext.h", - "cuda/include/CL/cl.hpp", + "cuda/include/device_types.h", + "cuda/include/driver_functions.h", + "cuda/include/driver_types.h", + "cuda/include/dynlink_cuda.h", + "cuda/include/dynlink_cuda_cuda.h", + "cuda/include/dynlink_cuviddec.h", + "cuda/include/dynlink_nvcuvid.h", + "cuda/include/fatBinaryCtl.h", + "cuda/include/fatbinary.h", "cuda/include/host_config.h", - "cuda/include/cuda_surface_types.h", + "cuda/include/host_defines.h", + "cuda/include/library_types.h", + "cuda/include/math_constants.h", "cuda/include/math_functions.h", + "cuda/include/math_functions.hpp", + "cuda/include/math_functions_dbl_ptx3.h", + "cuda/include/math_functions_dbl_ptx3.hpp", + "cuda/include/mma.h", + "cuda/include/npp.h", + "cuda/include/nppcore.h", + "cuda/include/nppdefs.h", + "cuda/include/nppi.h", + "cuda/include/nppi_arithmetic_and_logical_operations.h", + "cuda/include/nppi_color_conversion.h", + "cuda/include/nppi_compression_functions.h", + "cuda/include/nppi_computer_vision.h", + "cuda/include/nppi_data_exchange_and_initialization.h", + "cuda/include/nppi_filtering_functions.h", + "cuda/include/nppi_geometry_transforms.h", + "cuda/include/nppi_linear_transforms.h", + "cuda/include/nppi_morphological_operations.h", + "cuda/include/nppi_statistics_functions.h", + "cuda/include/nppi_support_functions.h", + "cuda/include/nppi_threshold_and_compare_operations.h", + "cuda/include/npps.h", + "cuda/include/npps_arithmetic_and_logical_operations.h", + "cuda/include/npps_conversion_functions.h", + "cuda/include/npps_filtering_functions.h", + "cuda/include/npps_initialization.h", + "cuda/include/npps_statistics_functions.h", + "cuda/include/npps_support_functions.h", + "cuda/include/nppversion.h", + "cuda/include/nvToolsExt.h", + "cuda/include/nvToolsExtCuda.h", + "cuda/include/nvToolsExtCudaRt.h", "cuda/include/nvToolsExtMeta.h", + "cuda/include/nvToolsExtSync.h", + "cuda/include/nvblas.h", + "cuda/include/nvfunctional", + "cuda/include/nvgraph.h", + "cuda/include/nvml.h", + "cuda/include/nvrtc.h", + "cuda/include/sm_20_atomic_functions.h", "cuda/include/sm_20_atomic_functions.hpp", - "cuda/include/device_functions.h", - "cuda/include/device_types.h", - "cuda/include/npps_conversion_functions.h", - "cuda/include/curand_precalc.h", - "cuda/include/cusolverRf.h", + "cuda/include/sm_20_intrinsics.h", + "cuda/include/sm_20_intrinsics.hpp", + "cuda/include/sm_30_intrinsics.h", + "cuda/include/sm_30_intrinsics.hpp", + "cuda/include/sm_32_atomic_functions.h", + "cuda/include/sm_32_atomic_functions.hpp", + "cuda/include/sm_32_intrinsics.h", + "cuda/include/sm_32_intrinsics.hpp", + "cuda/include/sm_35_atomic_functions.h", + "cuda/include/sm_35_intrinsics.h", + "cuda/include/sm_60_atomic_functions.h", "cuda/include/sm_60_atomic_functions.hpp", - "cuda/include/cuviddec.h", - "cuda/include/curand_discrete2.h", - "cuda/include/device_functions.hpp", - "cuda/include/thrust/transform_scan.h", - "cuda/include/thrust/system_error.h", - "cuda/include/thrust/device_malloc.h", - "cuda/include/thrust/partition.h", - "cuda/include/thrust/unique.h", - "cuda/include/thrust/device_delete.h", - "cuda/include/thrust/execution_policy.h", + "cuda/include/sm_61_intrinsics.h", + "cuda/include/sm_61_intrinsics.hpp", + "cuda/include/sobol_direction_vectors.h", + "cuda/include/surface_functions.h", + "cuda/include/surface_functions.hpp", + "cuda/include/surface_indirect_functions.h", + "cuda/include/surface_indirect_functions.hpp", + "cuda/include/surface_types.h", + "cuda/include/texture_fetch_functions.h", + "cuda/include/texture_fetch_functions.hpp", + "cuda/include/texture_indirect_functions.h", + "cuda/include/texture_indirect_functions.hpp", + "cuda/include/texture_types.h", "cuda/include/thrust/adjacent_difference.h", - "cuda/include/thrust/sequence.h", - "cuda/include/thrust/merge.h", - "cuda/include/thrust/device_new.h", - "cuda/include/thrust/transform_reduce.h", - "cuda/include/thrust/device_vector.h", - "cuda/include/thrust/gather.h", - "cuda/include/thrust/sort.h", - "cuda/include/thrust/scan.h", - "cuda/include/thrust/detail/temporary_array.h", - "cuda/include/thrust/detail/util/align.h", - "cuda/include/thrust/detail/util/blocking.h", - "cuda/include/thrust/detail/transform.inl", - "cuda/include/thrust/detail/device_vector.inl", + "cuda/include/thrust/advance.h", + "cuda/include/thrust/binary_search.h", + "cuda/include/thrust/complex.h", + "cuda/include/thrust/copy.h", + "cuda/include/thrust/count.h", + "cuda/include/thrust/detail/adjacent_difference.inl", + "cuda/include/thrust/detail/advance.inl", + "cuda/include/thrust/detail/allocator/allocator_traits.h", + "cuda/include/thrust/detail/allocator/allocator_traits.inl", + "cuda/include/thrust/detail/allocator/copy_construct_range.h", + "cuda/include/thrust/detail/allocator/copy_construct_range.inl", + "cuda/include/thrust/detail/allocator/default_construct_range.h", + "cuda/include/thrust/detail/allocator/default_construct_range.inl", + "cuda/include/thrust/detail/allocator/destroy_range.h", + "cuda/include/thrust/detail/allocator/destroy_range.inl", + "cuda/include/thrust/detail/allocator/fill_construct_range.h", + "cuda/include/thrust/detail/allocator/fill_construct_range.inl", + "cuda/include/thrust/detail/allocator/malloc_allocator.h", + "cuda/include/thrust/detail/allocator/malloc_allocator.inl", + "cuda/include/thrust/detail/allocator/no_throw_allocator.h", + "cuda/include/thrust/detail/allocator/tagged_allocator.h", + "cuda/include/thrust/detail/allocator/tagged_allocator.inl", + "cuda/include/thrust/detail/allocator/temporary_allocator.h", + "cuda/include/thrust/detail/allocator/temporary_allocator.inl", "cuda/include/thrust/detail/binary_search.inl", - "cuda/include/thrust/detail/overlapped_copy.h", - "cuda/include/thrust/detail/vector_base.inl", - "cuda/include/thrust/detail/device_reference.inl", - "cuda/include/thrust/detail/functional/actor.h", - "cuda/include/thrust/detail/functional/value.h", - "cuda/include/thrust/detail/functional/operators.h", - "cuda/include/thrust/detail/functional/operators/logical_operators.h", - "cuda/include/thrust/detail/functional/operators/relational_operators.h", - "cuda/include/thrust/detail/functional/operators/assignment_operator.h", - "cuda/include/thrust/detail/functional/operators/bitwise_operators.h", - "cuda/include/thrust/detail/functional/operators/operator_adaptors.h", - "cuda/include/thrust/detail/functional/operators/arithmetic_operators.h", - "cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h", - "cuda/include/thrust/detail/functional/argument.h", - "cuda/include/thrust/detail/functional/placeholder.h", - "cuda/include/thrust/detail/functional/actor.inl", - "cuda/include/thrust/detail/functional/composite.h", - "cuda/include/thrust/detail/static_map.h", - "cuda/include/thrust/detail/type_traits/has_nested_type.h", - "cuda/include/thrust/detail/type_traits/is_call_possible.h", - "cuda/include/thrust/detail/type_traits/function_traits.h", - "cuda/include/thrust/detail/type_traits/pointer_traits.h", - "cuda/include/thrust/detail/type_traits/has_member_function.h", - "cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h", - "cuda/include/thrust/detail/type_traits/minimum_type.h", - "cuda/include/thrust/detail/type_traits/has_trivial_assign.h", - "cuda/include/thrust/detail/type_traits/is_metafunction_defined.h", - "cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h", - "cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h", - "cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h", - "cuda/include/thrust/detail/reference.h", - "cuda/include/thrust/detail/inner_product.inl", - "cuda/include/thrust/detail/use_default.h", - "cuda/include/thrust/detail/sequence.inl", - "cuda/include/thrust/detail/sort.inl", - "cuda/include/thrust/detail/equal.inl", - "cuda/include/thrust/detail/execution_policy.h", - "cuda/include/thrust/detail/integer_traits.h", - "cuda/include/thrust/detail/type_traits.h", - "cuda/include/thrust/detail/reverse.inl", - "cuda/include/thrust/detail/tabulate.inl", - "cuda/include/thrust/detail/unique.inl", - "cuda/include/thrust/detail/scatter.inl", - "cuda/include/thrust/detail/set_operations.inl", - "cuda/include/thrust/detail/device_malloc.inl", - "cuda/include/thrust/detail/copy_if.inl", - "cuda/include/thrust/detail/fill.inl", - "cuda/include/thrust/detail/temporary_array.inl", - "cuda/include/thrust/detail/transform_scan.inl", - "cuda/include/thrust/detail/minmax.h", - "cuda/include/thrust/detail/swap.inl", - "cuda/include/thrust/detail/pointer.inl", - "cuda/include/thrust/detail/transform_reduce.inl", - "cuda/include/thrust/detail/config.h", - "cuda/include/thrust/detail/distance.inl", - "cuda/include/thrust/detail/pair.inl", - "cuda/include/thrust/detail/allocator/temporary_allocator.h", - "cuda/include/thrust/detail/allocator/tagged_allocator.h", - "cuda/include/thrust/detail/allocator/destroy_range.inl", - "cuda/include/thrust/detail/allocator/destroy_range.h", - "cuda/include/thrust/detail/allocator/no_throw_allocator.h", - "cuda/include/thrust/detail/allocator/default_construct_range.inl", - "cuda/include/thrust/detail/allocator/fill_construct_range.inl", - "cuda/include/thrust/detail/allocator/tagged_allocator.inl", - "cuda/include/thrust/detail/allocator/malloc_allocator.h", - "cuda/include/thrust/detail/allocator/allocator_traits.h", - "cuda/include/thrust/detail/allocator/copy_construct_range.h", - "cuda/include/thrust/detail/allocator/allocator_traits.inl", - "cuda/include/thrust/detail/allocator/default_construct_range.h", - "cuda/include/thrust/detail/allocator/copy_construct_range.inl", - "cuda/include/thrust/detail/allocator/malloc_allocator.inl", - "cuda/include/thrust/detail/allocator/temporary_allocator.inl", - "cuda/include/thrust/detail/allocator/fill_construct_range.h", - "cuda/include/thrust/detail/temporary_buffer.h", - "cuda/include/thrust/detail/reduce.inl", - "cuda/include/thrust/detail/device_new.inl", - "cuda/include/thrust/detail/pointer.h", - "cuda/include/thrust/detail/for_each.inl", - "cuda/include/thrust/detail/generate.inl", - "cuda/include/thrust/detail/dispatch/is_trivial_copy.h", - "cuda/include/thrust/detail/adjacent_difference.inl", - "cuda/include/thrust/detail/tuple_meta_transform.h", - "cuda/include/thrust/detail/functional.inl", - "cuda/include/thrust/detail/remove.inl", - "cuda/include/thrust/detail/tuple_transform.h", - "cuda/include/thrust/detail/merge.inl", - "cuda/include/thrust/detail/extrema.inl", - "cuda/include/thrust/detail/trivial_sequence.h", - "cuda/include/thrust/detail/vector_base.h", - "cuda/include/thrust/detail/count.inl", - "cuda/include/thrust/detail/uninitialized_copy.inl", - "cuda/include/thrust/detail/function.h", - "cuda/include/thrust/detail/swap_ranges.inl", - "cuda/include/thrust/detail/device_delete.inl", - "cuda/include/thrust/detail/static_assert.h", - "cuda/include/thrust/detail/logical.inl", - "cuda/include/thrust/detail/seq.h", - "cuda/include/thrust/detail/mpl/math.h", - "cuda/include/thrust/detail/mismatch.inl", - "cuda/include/thrust/detail/internal_functional.h", - "cuda/include/thrust/detail/get_iterator_value.h", - "cuda/include/thrust/detail/copy.inl", - "cuda/include/thrust/detail/copy.h", + "cuda/include/thrust/detail/complex/arithmetic.h", + "cuda/include/thrust/detail/complex/c99math.h", + "cuda/include/thrust/detail/complex/catrig.h", "cuda/include/thrust/detail/complex/catrigf.h", - "cuda/include/thrust/detail/complex/cpowf.h", - "cuda/include/thrust/detail/complex/csqrtf.h", + "cuda/include/thrust/detail/complex/ccosh.h", "cuda/include/thrust/detail/complex/ccoshf.h", - "cuda/include/thrust/detail/complex/csinhf.h", + "cuda/include/thrust/detail/complex/cexp.h", + "cuda/include/thrust/detail/complex/cexpf.h", + "cuda/include/thrust/detail/complex/clog.h", "cuda/include/thrust/detail/complex/clogf.h", - "cuda/include/thrust/detail/complex/ccosh.h", - "cuda/include/thrust/detail/complex/arithmetic.h", - "cuda/include/thrust/detail/complex/csqrt.h", - "cuda/include/thrust/detail/complex/cpow.h", "cuda/include/thrust/detail/complex/complex.inl", - "cuda/include/thrust/detail/complex/math_private.h", - "cuda/include/thrust/detail/complex/c99math.h", + "cuda/include/thrust/detail/complex/cpow.h", + "cuda/include/thrust/detail/complex/cpowf.h", "cuda/include/thrust/detail/complex/cproj.h", - "cuda/include/thrust/detail/complex/catrig.h", - "cuda/include/thrust/detail/complex/ctanhf.h", - "cuda/include/thrust/detail/complex/cexpf.h", "cuda/include/thrust/detail/complex/csinh.h", - "cuda/include/thrust/detail/complex/stream.h", + "cuda/include/thrust/detail/complex/csinhf.h", + "cuda/include/thrust/detail/complex/csqrt.h", + "cuda/include/thrust/detail/complex/csqrtf.h", "cuda/include/thrust/detail/complex/ctanh.h", - "cuda/include/thrust/detail/complex/cexp.h", - "cuda/include/thrust/detail/complex/clog.h", - "cuda/include/thrust/detail/range/head_flags.h", - "cuda/include/thrust/detail/range/tail_flags.h", - "cuda/include/thrust/detail/execute_with_allocator.h", - "cuda/include/thrust/detail/integer_math.h", - "cuda/include/thrust/detail/swap.h", - "cuda/include/thrust/detail/uninitialized_fill.inl", - "cuda/include/thrust/detail/scan.inl", - "cuda/include/thrust/detail/gather.inl", - "cuda/include/thrust/detail/reference_forward_declaration.h", - "cuda/include/thrust/detail/numeric_traits.h", - "cuda/include/thrust/detail/reference.inl", - "cuda/include/thrust/detail/cstdint.h", - "cuda/include/thrust/detail/device_free.inl", - "cuda/include/thrust/detail/copy_if.h", - "cuda/include/thrust/detail/partition.inl", - "cuda/include/thrust/detail/find.inl", - "cuda/include/thrust/detail/config/forceinline.h", - "cuda/include/thrust/detail/config/debug.h", - "cuda/include/thrust/detail/config/config.h", - "cuda/include/thrust/detail/config/host_device.h", - "cuda/include/thrust/detail/config/host_system.h", + "cuda/include/thrust/detail/complex/ctanhf.h", + "cuda/include/thrust/detail/complex/math_private.h", + "cuda/include/thrust/detail/complex/stream.h", + "cuda/include/thrust/detail/config.h", "cuda/include/thrust/detail/config/compiler.h", - "cuda/include/thrust/detail/config/device_system.h", "cuda/include/thrust/detail/config/compiler_fence.h", + "cuda/include/thrust/detail/config/config.h", + "cuda/include/thrust/detail/config/debug.h", + "cuda/include/thrust/detail/config/device_system.h", "cuda/include/thrust/detail/config/exec_check_disable.h", - "cuda/include/thrust/detail/config/simple_defines.h", + "cuda/include/thrust/detail/config/forceinline.h", "cuda/include/thrust/detail/config/global_workarounds.h", - "cuda/include/thrust/detail/replace.inl", + "cuda/include/thrust/detail/config/host_device.h", + "cuda/include/thrust/detail/config/host_system.h", + "cuda/include/thrust/detail/config/simple_defines.h", + "cuda/include/thrust/detail/contiguous_storage.h", + "cuda/include/thrust/detail/contiguous_storage.inl", + "cuda/include/thrust/detail/copy.h", + "cuda/include/thrust/detail/copy.inl", + "cuda/include/thrust/detail/copy_if.h", + "cuda/include/thrust/detail/copy_if.inl", + "cuda/include/thrust/detail/count.inl", + "cuda/include/thrust/detail/cstdint.h", + "cuda/include/thrust/detail/device_delete.inl", + "cuda/include/thrust/detail/device_free.inl", + "cuda/include/thrust/detail/device_malloc.inl", + "cuda/include/thrust/detail/device_new.inl", "cuda/include/thrust/detail/device_ptr.inl", - "cuda/include/thrust/detail/tuple.inl", - "cuda/include/thrust/detail/malloc_and_free.h", + "cuda/include/thrust/detail/device_reference.inl", + "cuda/include/thrust/detail/device_vector.inl", + "cuda/include/thrust/detail/dispatch/is_trivial_copy.h", + "cuda/include/thrust/detail/distance.inl", + "cuda/include/thrust/detail/equal.inl", + "cuda/include/thrust/detail/execute_with_allocator.h", + "cuda/include/thrust/detail/execution_policy.h", + "cuda/include/thrust/detail/extrema.inl", + "cuda/include/thrust/detail/fill.inl", + "cuda/include/thrust/detail/find.inl", + "cuda/include/thrust/detail/for_each.inl", + "cuda/include/thrust/detail/function.h", + "cuda/include/thrust/detail/functional.inl", + "cuda/include/thrust/detail/functional/actor.h", + "cuda/include/thrust/detail/functional/actor.inl", + "cuda/include/thrust/detail/functional/argument.h", + "cuda/include/thrust/detail/functional/composite.h", + "cuda/include/thrust/detail/functional/operators.h", + "cuda/include/thrust/detail/functional/operators/arithmetic_operators.h", + "cuda/include/thrust/detail/functional/operators/assignment_operator.h", + "cuda/include/thrust/detail/functional/operators/bitwise_operators.h", + "cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h", + "cuda/include/thrust/detail/functional/operators/logical_operators.h", + "cuda/include/thrust/detail/functional/operators/operator_adaptors.h", + "cuda/include/thrust/detail/functional/operators/relational_operators.h", + "cuda/include/thrust/detail/functional/placeholder.h", + "cuda/include/thrust/detail/functional/value.h", + "cuda/include/thrust/detail/gather.inl", + "cuda/include/thrust/detail/generate.inl", + "cuda/include/thrust/detail/get_iterator_value.h", "cuda/include/thrust/detail/host_vector.inl", + "cuda/include/thrust/detail/inner_product.inl", + "cuda/include/thrust/detail/integer_math.h", + "cuda/include/thrust/detail/integer_traits.h", + "cuda/include/thrust/detail/internal_functional.h", + "cuda/include/thrust/detail/logical.inl", + "cuda/include/thrust/detail/malloc_and_free.h", + "cuda/include/thrust/detail/merge.inl", + "cuda/include/thrust/detail/minmax.h", + "cuda/include/thrust/detail/mismatch.inl", + "cuda/include/thrust/detail/mpl/math.h", + "cuda/include/thrust/detail/numeric_traits.h", + "cuda/include/thrust/detail/overlapped_copy.h", + "cuda/include/thrust/detail/pair.inl", + "cuda/include/thrust/detail/partition.inl", + "cuda/include/thrust/detail/pointer.h", + "cuda/include/thrust/detail/pointer.inl", + "cuda/include/thrust/detail/range/head_flags.h", + "cuda/include/thrust/detail/range/tail_flags.h", "cuda/include/thrust/detail/raw_pointer_cast.h", - "cuda/include/thrust/detail/advance.inl", - "cuda/include/thrust/detail/contiguous_storage.h", "cuda/include/thrust/detail/raw_reference_cast.h", - "cuda/include/thrust/detail/contiguous_storage.inl", - "cuda/include/thrust/reverse.h", - "cuda/include/thrust/device_malloc_allocator.h", - "cuda/include/thrust/scatter.h", - "cuda/include/thrust/pair.h", - "cuda/include/thrust/advance.h", - "cuda/include/thrust/find.h", - "cuda/include/thrust/device_ptr.h", - "cuda/include/thrust/generate.h", - "cuda/include/thrust/uninitialized_fill.h", - "cuda/include/thrust/system/system_error.h", - "cuda/include/thrust/system/detail/bad_alloc.h", - "cuda/include/thrust/system/detail/adl/transform_scan.h", - "cuda/include/thrust/system/detail/adl/unique_by_key.h", - "cuda/include/thrust/system/detail/adl/partition.h", - "cuda/include/thrust/system/detail/adl/unique.h", - "cuda/include/thrust/system/detail/adl/adjacent_difference.h", - "cuda/include/thrust/system/detail/adl/sequence.h", - "cuda/include/thrust/system/detail/adl/merge.h", - "cuda/include/thrust/system/detail/adl/transform_reduce.h", - "cuda/include/thrust/system/detail/adl/gather.h", - "cuda/include/thrust/system/detail/adl/sort.h", - "cuda/include/thrust/system/detail/adl/scan.h", - "cuda/include/thrust/system/detail/adl/temporary_buffer.h", - "cuda/include/thrust/system/detail/adl/scan_by_key.h", - "cuda/include/thrust/system/detail/adl/reverse.h", - "cuda/include/thrust/system/detail/adl/assign_value.h", - "cuda/include/thrust/system/detail/adl/scatter.h", - "cuda/include/thrust/system/detail/adl/find.h", - "cuda/include/thrust/system/detail/adl/generate.h", - "cuda/include/thrust/system/detail/adl/uninitialized_fill.h", - "cuda/include/thrust/system/detail/adl/remove.h", - "cuda/include/thrust/system/detail/adl/tabulate.h", - "cuda/include/thrust/system/detail/adl/for_each.h", - "cuda/include/thrust/system/detail/adl/reduce_by_key.h", - "cuda/include/thrust/system/detail/adl/reduce.h", - "cuda/include/thrust/system/detail/adl/equal.h", - "cuda/include/thrust/system/detail/adl/copy.h", - "cuda/include/thrust/system/detail/adl/swap_ranges.h", - "cuda/include/thrust/system/detail/adl/uninitialized_copy.h", - "cuda/include/thrust/system/detail/adl/binary_search.h", - "cuda/include/thrust/system/detail/adl/set_operations.h", - "cuda/include/thrust/system/detail/adl/mismatch.h", - "cuda/include/thrust/system/detail/adl/extrema.h", - "cuda/include/thrust/system/detail/adl/count.h", - "cuda/include/thrust/system/detail/adl/replace.h", + "cuda/include/thrust/detail/reduce.inl", + "cuda/include/thrust/detail/reference.h", + "cuda/include/thrust/detail/reference.inl", + "cuda/include/thrust/detail/reference_forward_declaration.h", + "cuda/include/thrust/detail/remove.inl", + "cuda/include/thrust/detail/replace.inl", + "cuda/include/thrust/detail/reverse.inl", + "cuda/include/thrust/detail/scan.inl", + "cuda/include/thrust/detail/scatter.inl", + "cuda/include/thrust/detail/seq.h", + "cuda/include/thrust/detail/sequence.inl", + "cuda/include/thrust/detail/set_operations.inl", + "cuda/include/thrust/detail/sort.inl", + "cuda/include/thrust/detail/static_assert.h", + "cuda/include/thrust/detail/static_map.h", + "cuda/include/thrust/detail/swap.h", + "cuda/include/thrust/detail/swap.inl", + "cuda/include/thrust/detail/swap_ranges.inl", + "cuda/include/thrust/detail/tabulate.inl", + "cuda/include/thrust/detail/temporary_array.h", + "cuda/include/thrust/detail/temporary_array.inl", + "cuda/include/thrust/detail/temporary_buffer.h", + "cuda/include/thrust/detail/transform.inl", + "cuda/include/thrust/detail/transform_reduce.inl", + "cuda/include/thrust/detail/transform_scan.inl", + "cuda/include/thrust/detail/trivial_sequence.h", + "cuda/include/thrust/detail/tuple.inl", + "cuda/include/thrust/detail/tuple_meta_transform.h", + "cuda/include/thrust/detail/tuple_transform.h", + "cuda/include/thrust/detail/type_traits.h", + "cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h", + "cuda/include/thrust/detail/type_traits/function_traits.h", + "cuda/include/thrust/detail/type_traits/has_member_function.h", + "cuda/include/thrust/detail/type_traits/has_nested_type.h", + "cuda/include/thrust/detail/type_traits/has_trivial_assign.h", + "cuda/include/thrust/detail/type_traits/is_call_possible.h", + "cuda/include/thrust/detail/type_traits/is_metafunction_defined.h", + "cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h", + "cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h", + "cuda/include/thrust/detail/type_traits/minimum_type.h", + "cuda/include/thrust/detail/type_traits/pointer_traits.h", + "cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h", + "cuda/include/thrust/detail/uninitialized_copy.inl", + "cuda/include/thrust/detail/uninitialized_fill.inl", + "cuda/include/thrust/detail/unique.inl", + "cuda/include/thrust/detail/use_default.h", + "cuda/include/thrust/detail/util/align.h", + "cuda/include/thrust/detail/util/blocking.h", + "cuda/include/thrust/detail/vector_base.h", + "cuda/include/thrust/detail/vector_base.inl", + "cuda/include/thrust/device_allocator.h", + "cuda/include/thrust/device_delete.h", + "cuda/include/thrust/device_free.h", + "cuda/include/thrust/device_malloc.h", + "cuda/include/thrust/device_malloc_allocator.h", + "cuda/include/thrust/device_new.h", + "cuda/include/thrust/device_new_allocator.h", + "cuda/include/thrust/device_ptr.h", + "cuda/include/thrust/device_reference.h", + "cuda/include/thrust/device_vector.h", + "cuda/include/thrust/distance.h", + "cuda/include/thrust/equal.h", + "cuda/include/thrust/execution_policy.h", + "cuda/include/thrust/extrema.h", + "cuda/include/thrust/fill.h", + "cuda/include/thrust/find.h", + "cuda/include/thrust/for_each.h", + "cuda/include/thrust/functional.h", + "cuda/include/thrust/gather.h", + "cuda/include/thrust/generate.h", + "cuda/include/thrust/host_vector.h", + "cuda/include/thrust/inner_product.h", + "cuda/include/thrust/iterator/constant_iterator.h", + "cuda/include/thrust/iterator/counting_iterator.h", + "cuda/include/thrust/iterator/detail/any_assign.h", + "cuda/include/thrust/iterator/detail/any_system_tag.h", + "cuda/include/thrust/iterator/detail/constant_iterator_base.h", + "cuda/include/thrust/iterator/detail/counting_iterator.inl", + "cuda/include/thrust/iterator/detail/device_system_tag.h", + "cuda/include/thrust/iterator/detail/discard_iterator_base.h", + "cuda/include/thrust/iterator/detail/distance_from_result.h", + "cuda/include/thrust/iterator/detail/host_system_tag.h", + "cuda/include/thrust/iterator/detail/is_iterator_category.h", + "cuda/include/thrust/iterator/detail/is_trivial_iterator.h", + "cuda/include/thrust/iterator/detail/iterator_adaptor_base.h", + "cuda/include/thrust/iterator/detail/iterator_category_to_system.h", + "cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h", + "cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h", + "cuda/include/thrust/iterator/detail/iterator_facade_category.h", + "cuda/include/thrust/iterator/detail/iterator_traits.inl", + "cuda/include/thrust/iterator/detail/iterator_traversal_tags.h", + "cuda/include/thrust/iterator/detail/join_iterator.h", + "cuda/include/thrust/iterator/detail/minimum_category.h", + "cuda/include/thrust/iterator/detail/minimum_system.h", + "cuda/include/thrust/iterator/detail/normal_iterator.h", + "cuda/include/thrust/iterator/detail/permutation_iterator_base.h", + "cuda/include/thrust/iterator/detail/retag.h", + "cuda/include/thrust/iterator/detail/reverse_iterator.inl", + "cuda/include/thrust/iterator/detail/reverse_iterator_base.h", + "cuda/include/thrust/iterator/detail/tagged_iterator.h", + "cuda/include/thrust/iterator/detail/transform_iterator.inl", + "cuda/include/thrust/iterator/detail/transform_output_iterator.inl", + "cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h", + "cuda/include/thrust/iterator/detail/universal_categories.h", + "cuda/include/thrust/iterator/detail/zip_iterator.inl", + "cuda/include/thrust/iterator/detail/zip_iterator_base.h", + "cuda/include/thrust/iterator/discard_iterator.h", + "cuda/include/thrust/iterator/iterator_adaptor.h", + "cuda/include/thrust/iterator/iterator_categories.h", + "cuda/include/thrust/iterator/iterator_facade.h", + "cuda/include/thrust/iterator/iterator_traits.h", + "cuda/include/thrust/iterator/permutation_iterator.h", + "cuda/include/thrust/iterator/retag.h", + "cuda/include/thrust/iterator/reverse_iterator.h", + "cuda/include/thrust/iterator/transform_iterator.h", + "cuda/include/thrust/iterator/transform_output_iterator.h", + "cuda/include/thrust/iterator/zip_iterator.h", + "cuda/include/thrust/logical.h", + "cuda/include/thrust/memory.h", + "cuda/include/thrust/merge.h", + "cuda/include/thrust/mismatch.h", + "cuda/include/thrust/pair.h", + "cuda/include/thrust/partition.h", + "cuda/include/thrust/random.h", + "cuda/include/thrust/random/detail/discard_block_engine.inl", + "cuda/include/thrust/random/detail/linear_congruential_engine.inl", + "cuda/include/thrust/random/detail/linear_congruential_engine_discard.h", + "cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl", + "cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h", + "cuda/include/thrust/random/detail/mod.h", + "cuda/include/thrust/random/detail/normal_distribution.inl", + "cuda/include/thrust/random/detail/normal_distribution_base.h", + "cuda/include/thrust/random/detail/random_core_access.h", + "cuda/include/thrust/random/detail/subtract_with_carry_engine.inl", + "cuda/include/thrust/random/detail/uniform_int_distribution.inl", + "cuda/include/thrust/random/detail/uniform_real_distribution.inl", + "cuda/include/thrust/random/detail/xor_combine_engine.inl", + "cuda/include/thrust/random/detail/xor_combine_engine_max.h", + "cuda/include/thrust/random/discard_block_engine.h", + "cuda/include/thrust/random/linear_congruential_engine.h", + "cuda/include/thrust/random/linear_feedback_shift_engine.h", + "cuda/include/thrust/random/normal_distribution.h", + "cuda/include/thrust/random/subtract_with_carry_engine.h", + "cuda/include/thrust/random/uniform_int_distribution.h", + "cuda/include/thrust/random/uniform_real_distribution.h", + "cuda/include/thrust/random/xor_combine_engine.h", + "cuda/include/thrust/reduce.h", + "cuda/include/thrust/remove.h", + "cuda/include/thrust/replace.h", + "cuda/include/thrust/reverse.h", + "cuda/include/thrust/scan.h", + "cuda/include/thrust/scatter.h", + "cuda/include/thrust/sequence.h", + "cuda/include/thrust/set_operations.h", + "cuda/include/thrust/sort.h", + "cuda/include/thrust/swap.h", + "cuda/include/thrust/system/cpp/detail/adjacent_difference.h", + "cuda/include/thrust/system/cpp/detail/assign_value.h", + "cuda/include/thrust/system/cpp/detail/binary_search.h", + "cuda/include/thrust/system/cpp/detail/copy.h", + "cuda/include/thrust/system/cpp/detail/copy_if.h", + "cuda/include/thrust/system/cpp/detail/count.h", + "cuda/include/thrust/system/cpp/detail/equal.h", + "cuda/include/thrust/system/cpp/detail/execution_policy.h", + "cuda/include/thrust/system/cpp/detail/extrema.h", + "cuda/include/thrust/system/cpp/detail/fill.h", + "cuda/include/thrust/system/cpp/detail/find.h", + "cuda/include/thrust/system/cpp/detail/for_each.h", + "cuda/include/thrust/system/cpp/detail/gather.h", + "cuda/include/thrust/system/cpp/detail/generate.h", + "cuda/include/thrust/system/cpp/detail/get_value.h", + "cuda/include/thrust/system/cpp/detail/inner_product.h", + "cuda/include/thrust/system/cpp/detail/iter_swap.h", + "cuda/include/thrust/system/cpp/detail/logical.h", + "cuda/include/thrust/system/cpp/detail/malloc_and_free.h", + "cuda/include/thrust/system/cpp/detail/memory.inl", + "cuda/include/thrust/system/cpp/detail/merge.h", + "cuda/include/thrust/system/cpp/detail/mismatch.h", + "cuda/include/thrust/system/cpp/detail/par.h", + "cuda/include/thrust/system/cpp/detail/partition.h", + "cuda/include/thrust/system/cpp/detail/reduce.h", + "cuda/include/thrust/system/cpp/detail/reduce_by_key.h", + "cuda/include/thrust/system/cpp/detail/remove.h", + "cuda/include/thrust/system/cpp/detail/replace.h", + "cuda/include/thrust/system/cpp/detail/reverse.h", + "cuda/include/thrust/system/cpp/detail/scan.h", + "cuda/include/thrust/system/cpp/detail/scan_by_key.h", + "cuda/include/thrust/system/cpp/detail/scatter.h", + "cuda/include/thrust/system/cpp/detail/sequence.h", + "cuda/include/thrust/system/cpp/detail/set_operations.h", + "cuda/include/thrust/system/cpp/detail/sort.h", + "cuda/include/thrust/system/cpp/detail/swap_ranges.h", + "cuda/include/thrust/system/cpp/detail/tabulate.h", + "cuda/include/thrust/system/cpp/detail/temporary_buffer.h", + "cuda/include/thrust/system/cpp/detail/transform.h", + "cuda/include/thrust/system/cpp/detail/transform_reduce.h", + "cuda/include/thrust/system/cpp/detail/transform_scan.h", + "cuda/include/thrust/system/cpp/detail/uninitialized_copy.h", + "cuda/include/thrust/system/cpp/detail/uninitialized_fill.h", + "cuda/include/thrust/system/cpp/detail/unique.h", + "cuda/include/thrust/system/cpp/detail/unique_by_key.h", + "cuda/include/thrust/system/cpp/detail/vector.inl", + "cuda/include/thrust/system/cpp/execution_policy.h", + "cuda/include/thrust/system/cpp/memory.h", + "cuda/include/thrust/system/cpp/vector.h", + "cuda/include/thrust/system/cuda/config.h", + "cuda/include/thrust/system/cuda/detail/adjacent_difference.h", + "cuda/include/thrust/system/cuda/detail/assign_value.h", + "cuda/include/thrust/system/cuda/detail/binary_search.h", + "cuda/include/thrust/system/cuda/detail/copy.h", + "cuda/include/thrust/system/cuda/detail/copy_if.h", + "cuda/include/thrust/system/cuda/detail/core/agent_launcher.h", + "cuda/include/thrust/system/cuda/detail/core/alignment.h", + "cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h", + "cuda/include/thrust/system/cuda/detail/core/util.h", + "cuda/include/thrust/system/cuda/detail/count.h", + "cuda/include/thrust/system/cuda/detail/cross_system.h", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh", + "cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh", + "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh", + "cuda/include/thrust/system/cuda/detail/cub/cub.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh", + "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh", + "cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh", + "cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh", + "cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_device.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh", + "cuda/include/thrust/system/cuda/detail/cub/util_type.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh", + "cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh", + "cuda/include/thrust/system/cuda/detail/equal.h", + "cuda/include/thrust/system/cuda/detail/error.inl", + "cuda/include/thrust/system/cuda/detail/execution_policy.h", + "cuda/include/thrust/system/cuda/detail/extrema.h", + "cuda/include/thrust/system/cuda/detail/fill.h", + "cuda/include/thrust/system/cuda/detail/find.h", + "cuda/include/thrust/system/cuda/detail/for_each.h", + "cuda/include/thrust/system/cuda/detail/gather.h", + "cuda/include/thrust/system/cuda/detail/generate.h", + "cuda/include/thrust/system/cuda/detail/get_value.h", + "cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h", + "cuda/include/thrust/system/cuda/detail/guarded_driver_types.h", + "cuda/include/thrust/system/cuda/detail/inner_product.h", + "cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h", + "cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h", + "cuda/include/thrust/system/cuda/detail/iter_swap.h", + "cuda/include/thrust/system/cuda/detail/logical.h", + "cuda/include/thrust/system/cuda/detail/malloc_and_free.h", + "cuda/include/thrust/system/cuda/detail/memory.inl", + "cuda/include/thrust/system/cuda/detail/memory_buffer.h", + "cuda/include/thrust/system/cuda/detail/merge.h", + "cuda/include/thrust/system/cuda/detail/mismatch.h", + "cuda/include/thrust/system/cuda/detail/par.h", + "cuda/include/thrust/system/cuda/detail/par_to_seq.h", + "cuda/include/thrust/system/cuda/detail/parallel_for.h", + "cuda/include/thrust/system/cuda/detail/partition.h", + "cuda/include/thrust/system/cuda/detail/reduce.h", + "cuda/include/thrust/system/cuda/detail/reduce_by_key.h", + "cuda/include/thrust/system/cuda/detail/remove.h", + "cuda/include/thrust/system/cuda/detail/replace.h", + "cuda/include/thrust/system/cuda/detail/reverse.h", + "cuda/include/thrust/system/cuda/detail/scan.h", + "cuda/include/thrust/system/cuda/detail/scan_by_key.h", + "cuda/include/thrust/system/cuda/detail/scatter.h", + "cuda/include/thrust/system/cuda/detail/sequence.h", + "cuda/include/thrust/system/cuda/detail/set_operations.h", + "cuda/include/thrust/system/cuda/detail/sort.h", + "cuda/include/thrust/system/cuda/detail/swap_ranges.h", + "cuda/include/thrust/system/cuda/detail/tabulate.h", + "cuda/include/thrust/system/cuda/detail/temporary_buffer.h", + "cuda/include/thrust/system/cuda/detail/terminate.h", + "cuda/include/thrust/system/cuda/detail/transform.h", + "cuda/include/thrust/system/cuda/detail/transform_reduce.h", + "cuda/include/thrust/system/cuda/detail/transform_scan.h", + "cuda/include/thrust/system/cuda/detail/uninitialized_copy.h", + "cuda/include/thrust/system/cuda/detail/uninitialized_fill.h", + "cuda/include/thrust/system/cuda/detail/unique.h", + "cuda/include/thrust/system/cuda/detail/unique_by_key.h", + "cuda/include/thrust/system/cuda/detail/util.h", + "cuda/include/thrust/system/cuda/detail/vector.inl", + "cuda/include/thrust/system/cuda/error.h", + "cuda/include/thrust/system/cuda/execution_policy.h", + "cuda/include/thrust/system/cuda/experimental/pinned_allocator.h", + "cuda/include/thrust/system/cuda/memory.h", + "cuda/include/thrust/system/cuda/vector.h", + "cuda/include/thrust/system/detail/adl/adjacent_difference.h", + "cuda/include/thrust/system/detail/adl/assign_value.h", + "cuda/include/thrust/system/detail/adl/binary_search.h", + "cuda/include/thrust/system/detail/adl/copy.h", + "cuda/include/thrust/system/detail/adl/copy_if.h", + "cuda/include/thrust/system/detail/adl/count.h", + "cuda/include/thrust/system/detail/adl/equal.h", + "cuda/include/thrust/system/detail/adl/extrema.h", + "cuda/include/thrust/system/detail/adl/fill.h", + "cuda/include/thrust/system/detail/adl/find.h", + "cuda/include/thrust/system/detail/adl/for_each.h", + "cuda/include/thrust/system/detail/adl/gather.h", + "cuda/include/thrust/system/detail/adl/generate.h", "cuda/include/thrust/system/detail/adl/get_value.h", "cuda/include/thrust/system/detail/adl/inner_product.h", - "cuda/include/thrust/system/detail/adl/copy_if.h", - "cuda/include/thrust/system/detail/adl/logical.h", "cuda/include/thrust/system/detail/adl/iter_swap.h", + "cuda/include/thrust/system/detail/adl/logical.h", "cuda/include/thrust/system/detail/adl/malloc_and_free.h", - "cuda/include/thrust/system/detail/adl/fill.h", + "cuda/include/thrust/system/detail/adl/merge.h", + "cuda/include/thrust/system/detail/adl/mismatch.h", + "cuda/include/thrust/system/detail/adl/partition.h", + "cuda/include/thrust/system/detail/adl/reduce.h", + "cuda/include/thrust/system/detail/adl/reduce_by_key.h", + "cuda/include/thrust/system/detail/adl/remove.h", + "cuda/include/thrust/system/detail/adl/replace.h", + "cuda/include/thrust/system/detail/adl/reverse.h", + "cuda/include/thrust/system/detail/adl/scan.h", + "cuda/include/thrust/system/detail/adl/scan_by_key.h", + "cuda/include/thrust/system/detail/adl/scatter.h", + "cuda/include/thrust/system/detail/adl/sequence.h", + "cuda/include/thrust/system/detail/adl/set_operations.h", + "cuda/include/thrust/system/detail/adl/sort.h", + "cuda/include/thrust/system/detail/adl/swap_ranges.h", + "cuda/include/thrust/system/detail/adl/tabulate.h", + "cuda/include/thrust/system/detail/adl/temporary_buffer.h", "cuda/include/thrust/system/detail/adl/transform.h", + "cuda/include/thrust/system/detail/adl/transform_reduce.h", + "cuda/include/thrust/system/detail/adl/transform_scan.h", + "cuda/include/thrust/system/detail/adl/uninitialized_copy.h", + "cuda/include/thrust/system/detail/adl/uninitialized_fill.h", + "cuda/include/thrust/system/detail/adl/unique.h", + "cuda/include/thrust/system/detail/adl/unique_by_key.h", + "cuda/include/thrust/system/detail/bad_alloc.h", "cuda/include/thrust/system/detail/errno.h", "cuda/include/thrust/system/detail/error_category.inl", - "cuda/include/thrust/system/detail/sequential/transform_scan.h", - "cuda/include/thrust/system/detail/sequential/unique_by_key.h", - "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h", - "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl", - "cuda/include/thrust/system/detail/sequential/stable_merge_sort.h", - "cuda/include/thrust/system/detail/sequential/sort.inl", - "cuda/include/thrust/system/detail/sequential/partition.h", - "cuda/include/thrust/system/detail/sequential/unique.h", - "cuda/include/thrust/system/detail/sequential/execution_policy.h", - "cuda/include/thrust/system/detail/sequential/adjacent_difference.h", - "cuda/include/thrust/system/detail/sequential/sequence.h", - "cuda/include/thrust/system/detail/sequential/merge.h", - "cuda/include/thrust/system/detail/sequential/transform_reduce.h", - "cuda/include/thrust/system/detail/sequential/gather.h", - "cuda/include/thrust/system/detail/sequential/sort.h", - "cuda/include/thrust/system/detail/sequential/copy_backward.h", - "cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl", - "cuda/include/thrust/system/detail/sequential/scan.h", - "cuda/include/thrust/system/detail/sequential/temporary_buffer.h", - "cuda/include/thrust/system/detail/sequential/scan_by_key.h", - "cuda/include/thrust/system/detail/sequential/reverse.h", - "cuda/include/thrust/system/detail/sequential/assign_value.h", - "cuda/include/thrust/system/detail/sequential/scatter.h", - "cuda/include/thrust/system/detail/sequential/find.h", - "cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl", - "cuda/include/thrust/system/detail/sequential/merge.inl", - "cuda/include/thrust/system/detail/sequential/generate.h", - "cuda/include/thrust/system/detail/sequential/uninitialized_fill.h", - "cuda/include/thrust/system/detail/sequential/general_copy.h", - "cuda/include/thrust/system/detail/sequential/insertion_sort.h", - "cuda/include/thrust/system/detail/sequential/remove.h", - "cuda/include/thrust/system/detail/sequential/tabulate.h", - "cuda/include/thrust/system/detail/sequential/for_each.h", - "cuda/include/thrust/system/detail/sequential/reduce_by_key.h", - "cuda/include/thrust/system/detail/sequential/reduce.h", - "cuda/include/thrust/system/detail/sequential/equal.h", - "cuda/include/thrust/system/detail/sequential/stable_radix_sort.h", - "cuda/include/thrust/system/detail/sequential/copy.inl", - "cuda/include/thrust/system/detail/sequential/copy.h", - "cuda/include/thrust/system/detail/sequential/swap_ranges.h", - "cuda/include/thrust/system/detail/sequential/uninitialized_copy.h", - "cuda/include/thrust/system/detail/sequential/binary_search.h", - "cuda/include/thrust/system/detail/sequential/set_operations.h", - "cuda/include/thrust/system/detail/sequential/mismatch.h", - "cuda/include/thrust/system/detail/sequential/extrema.h", - "cuda/include/thrust/system/detail/sequential/count.h", - "cuda/include/thrust/system/detail/sequential/trivial_copy.h", - "cuda/include/thrust/system/detail/sequential/replace.h", - "cuda/include/thrust/system/detail/sequential/get_value.h", - "cuda/include/thrust/system/detail/sequential/inner_product.h", - "cuda/include/thrust/system/detail/sequential/copy_if.h", - "cuda/include/thrust/system/detail/sequential/logical.h", - "cuda/include/thrust/system/detail/sequential/iter_swap.h", - "cuda/include/thrust/system/detail/sequential/malloc_and_free.h", - "cuda/include/thrust/system/detail/sequential/fill.h", - "cuda/include/thrust/system/detail/sequential/transform.h", - "cuda/include/thrust/system/detail/error_condition.inl", - "cuda/include/thrust/system/detail/internal/decompose.h", "cuda/include/thrust/system/detail/error_code.inl", - "cuda/include/thrust/system/detail/generic/transform_scan.h", - "cuda/include/thrust/system/detail/generic/memory.inl", - "cuda/include/thrust/system/detail/generic/transform.inl", - "cuda/include/thrust/system/detail/generic/binary_search.inl", - "cuda/include/thrust/system/detail/generic/scan_by_key.inl", - "cuda/include/thrust/system/detail/generic/unique_by_key.h", - "cuda/include/thrust/system/detail/generic/inner_product.inl", - "cuda/include/thrust/system/detail/generic/select_system.h", - "cuda/include/thrust/system/detail/generic/sequence.inl", - "cuda/include/thrust/system/detail/generic/sort.inl", - "cuda/include/thrust/system/detail/generic/equal.inl", - "cuda/include/thrust/system/detail/generic/partition.h", - "cuda/include/thrust/system/detail/generic/unique.h", + "cuda/include/thrust/system/detail/error_condition.inl", "cuda/include/thrust/system/detail/generic/adjacent_difference.h", - "cuda/include/thrust/system/detail/generic/tag.h", - "cuda/include/thrust/system/detail/generic/unique_by_key.inl", - "cuda/include/thrust/system/detail/generic/sequence.h", - "cuda/include/thrust/system/detail/generic/type_traits.h", - "cuda/include/thrust/system/detail/generic/merge.h", - "cuda/include/thrust/system/detail/generic/reverse.inl", - "cuda/include/thrust/system/detail/generic/tabulate.inl", - "cuda/include/thrust/system/detail/generic/unique.inl", - "cuda/include/thrust/system/detail/generic/scatter.inl", - "cuda/include/thrust/system/detail/generic/set_operations.inl", - "cuda/include/thrust/system/detail/generic/copy_if.inl", - "cuda/include/thrust/system/detail/generic/transform_reduce.h", - "cuda/include/thrust/system/detail/generic/transform_scan.inl", - "cuda/include/thrust/system/detail/generic/gather.h", - "cuda/include/thrust/system/detail/generic/reduce_by_key.inl", - "cuda/include/thrust/system/detail/generic/transform_reduce.inl", - "cuda/include/thrust/system/detail/generic/sort.h", - "cuda/include/thrust/system/detail/generic/distance.inl", - "cuda/include/thrust/system/detail/generic/scan.h", - "cuda/include/thrust/system/detail/generic/temporary_buffer.h", - "cuda/include/thrust/system/detail/generic/reduce.inl", - "cuda/include/thrust/system/detail/generic/scan_by_key.h", - "cuda/include/thrust/system/detail/generic/reverse.h", - "cuda/include/thrust/system/detail/generic/temporary_buffer.inl", - "cuda/include/thrust/system/detail/generic/scatter.h", - "cuda/include/thrust/system/detail/generic/generate.inl", "cuda/include/thrust/system/detail/generic/adjacent_difference.inl", - "cuda/include/thrust/system/detail/generic/remove.inl", "cuda/include/thrust/system/detail/generic/advance.h", - "cuda/include/thrust/system/detail/generic/find.h", - "cuda/include/thrust/system/detail/generic/merge.inl", - "cuda/include/thrust/system/detail/generic/scalar/binary_search.inl", - "cuda/include/thrust/system/detail/generic/scalar/binary_search.h", - "cuda/include/thrust/system/detail/generic/extrema.inl", - "cuda/include/thrust/system/detail/generic/generate.h", - "cuda/include/thrust/system/detail/generic/uninitialized_fill.h", + "cuda/include/thrust/system/detail/generic/advance.inl", + "cuda/include/thrust/system/detail/generic/binary_search.h", + "cuda/include/thrust/system/detail/generic/binary_search.inl", + "cuda/include/thrust/system/detail/generic/copy.h", + "cuda/include/thrust/system/detail/generic/copy.inl", + "cuda/include/thrust/system/detail/generic/copy_if.h", + "cuda/include/thrust/system/detail/generic/copy_if.inl", + "cuda/include/thrust/system/detail/generic/count.h", "cuda/include/thrust/system/detail/generic/count.inl", - "cuda/include/thrust/system/detail/generic/remove.h", - "cuda/include/thrust/system/detail/generic/uninitialized_copy.inl", - "cuda/include/thrust/system/detail/generic/tabulate.h", - "cuda/include/thrust/system/detail/generic/for_each.h", "cuda/include/thrust/system/detail/generic/distance.h", - "cuda/include/thrust/system/detail/generic/swap_ranges.inl", - "cuda/include/thrust/system/detail/generic/reduce_by_key.h", - "cuda/include/thrust/system/detail/generic/reduce.h", + "cuda/include/thrust/system/detail/generic/distance.inl", "cuda/include/thrust/system/detail/generic/equal.h", - "cuda/include/thrust/system/detail/generic/mismatch.inl", - "cuda/include/thrust/system/detail/generic/copy.inl", - "cuda/include/thrust/system/detail/generic/copy.h", - "cuda/include/thrust/system/detail/generic/swap_ranges.h", - "cuda/include/thrust/system/detail/generic/uninitialized_copy.h", - "cuda/include/thrust/system/detail/generic/binary_search.h", - "cuda/include/thrust/system/detail/generic/set_operations.h", - "cuda/include/thrust/system/detail/generic/uninitialized_fill.inl", - "cuda/include/thrust/system/detail/generic/mismatch.h", - "cuda/include/thrust/system/detail/generic/scan.inl", - "cuda/include/thrust/system/detail/generic/gather.inl", + "cuda/include/thrust/system/detail/generic/equal.inl", "cuda/include/thrust/system/detail/generic/extrema.h", - "cuda/include/thrust/system/detail/generic/count.h", - "cuda/include/thrust/system/detail/generic/replace.h", + "cuda/include/thrust/system/detail/generic/extrema.inl", + "cuda/include/thrust/system/detail/generic/fill.h", + "cuda/include/thrust/system/detail/generic/find.h", + "cuda/include/thrust/system/detail/generic/find.inl", + "cuda/include/thrust/system/detail/generic/for_each.h", + "cuda/include/thrust/system/detail/generic/gather.h", + "cuda/include/thrust/system/detail/generic/gather.inl", + "cuda/include/thrust/system/detail/generic/generate.h", + "cuda/include/thrust/system/detail/generic/generate.inl", "cuda/include/thrust/system/detail/generic/inner_product.h", - "cuda/include/thrust/system/detail/generic/copy_if.h", + "cuda/include/thrust/system/detail/generic/inner_product.inl", "cuda/include/thrust/system/detail/generic/logical.h", - "cuda/include/thrust/system/detail/generic/partition.inl", "cuda/include/thrust/system/detail/generic/memory.h", - "cuda/include/thrust/system/detail/generic/find.inl", + "cuda/include/thrust/system/detail/generic/memory.inl", + "cuda/include/thrust/system/detail/generic/merge.h", + "cuda/include/thrust/system/detail/generic/merge.inl", + "cuda/include/thrust/system/detail/generic/mismatch.h", + "cuda/include/thrust/system/detail/generic/mismatch.inl", + "cuda/include/thrust/system/detail/generic/partition.h", + "cuda/include/thrust/system/detail/generic/partition.inl", + "cuda/include/thrust/system/detail/generic/reduce.h", + "cuda/include/thrust/system/detail/generic/reduce.inl", + "cuda/include/thrust/system/detail/generic/reduce_by_key.h", + "cuda/include/thrust/system/detail/generic/reduce_by_key.inl", + "cuda/include/thrust/system/detail/generic/remove.h", + "cuda/include/thrust/system/detail/generic/remove.inl", + "cuda/include/thrust/system/detail/generic/replace.h", "cuda/include/thrust/system/detail/generic/replace.inl", - "cuda/include/thrust/system/detail/generic/advance.inl", - "cuda/include/thrust/system/detail/generic/fill.h", + "cuda/include/thrust/system/detail/generic/reverse.h", + "cuda/include/thrust/system/detail/generic/reverse.inl", + "cuda/include/thrust/system/detail/generic/scalar/binary_search.h", + "cuda/include/thrust/system/detail/generic/scalar/binary_search.inl", + "cuda/include/thrust/system/detail/generic/scan.h", + "cuda/include/thrust/system/detail/generic/scan.inl", + "cuda/include/thrust/system/detail/generic/scan_by_key.h", + "cuda/include/thrust/system/detail/generic/scan_by_key.inl", + "cuda/include/thrust/system/detail/generic/scatter.h", + "cuda/include/thrust/system/detail/generic/scatter.inl", + "cuda/include/thrust/system/detail/generic/select_system.h", + "cuda/include/thrust/system/detail/generic/sequence.h", + "cuda/include/thrust/system/detail/generic/sequence.inl", + "cuda/include/thrust/system/detail/generic/set_operations.h", + "cuda/include/thrust/system/detail/generic/set_operations.inl", + "cuda/include/thrust/system/detail/generic/sort.h", + "cuda/include/thrust/system/detail/generic/sort.inl", + "cuda/include/thrust/system/detail/generic/swap_ranges.h", + "cuda/include/thrust/system/detail/generic/swap_ranges.inl", + "cuda/include/thrust/system/detail/generic/tabulate.h", + "cuda/include/thrust/system/detail/generic/tabulate.inl", + "cuda/include/thrust/system/detail/generic/tag.h", + "cuda/include/thrust/system/detail/generic/temporary_buffer.h", + "cuda/include/thrust/system/detail/generic/temporary_buffer.inl", "cuda/include/thrust/system/detail/generic/transform.h", + "cuda/include/thrust/system/detail/generic/transform.inl", + "cuda/include/thrust/system/detail/generic/transform_reduce.h", + "cuda/include/thrust/system/detail/generic/transform_reduce.inl", + "cuda/include/thrust/system/detail/generic/transform_scan.h", + "cuda/include/thrust/system/detail/generic/transform_scan.inl", + "cuda/include/thrust/system/detail/generic/type_traits.h", + "cuda/include/thrust/system/detail/generic/uninitialized_copy.h", + "cuda/include/thrust/system/detail/generic/uninitialized_copy.inl", + "cuda/include/thrust/system/detail/generic/uninitialized_fill.h", + "cuda/include/thrust/system/detail/generic/uninitialized_fill.inl", + "cuda/include/thrust/system/detail/generic/unique.h", + "cuda/include/thrust/system/detail/generic/unique.inl", + "cuda/include/thrust/system/detail/generic/unique_by_key.h", + "cuda/include/thrust/system/detail/generic/unique_by_key.inl", + "cuda/include/thrust/system/detail/internal/decompose.h", + "cuda/include/thrust/system/detail/sequential/adjacent_difference.h", + "cuda/include/thrust/system/detail/sequential/assign_value.h", + "cuda/include/thrust/system/detail/sequential/binary_search.h", + "cuda/include/thrust/system/detail/sequential/copy.h", + "cuda/include/thrust/system/detail/sequential/copy.inl", + "cuda/include/thrust/system/detail/sequential/copy_backward.h", + "cuda/include/thrust/system/detail/sequential/copy_if.h", + "cuda/include/thrust/system/detail/sequential/count.h", + "cuda/include/thrust/system/detail/sequential/equal.h", + "cuda/include/thrust/system/detail/sequential/execution_policy.h", + "cuda/include/thrust/system/detail/sequential/extrema.h", + "cuda/include/thrust/system/detail/sequential/fill.h", + "cuda/include/thrust/system/detail/sequential/find.h", + "cuda/include/thrust/system/detail/sequential/for_each.h", + "cuda/include/thrust/system/detail/sequential/gather.h", + "cuda/include/thrust/system/detail/sequential/general_copy.h", + "cuda/include/thrust/system/detail/sequential/generate.h", + "cuda/include/thrust/system/detail/sequential/get_value.h", + "cuda/include/thrust/system/detail/sequential/inner_product.h", + "cuda/include/thrust/system/detail/sequential/insertion_sort.h", + "cuda/include/thrust/system/detail/sequential/iter_swap.h", + "cuda/include/thrust/system/detail/sequential/logical.h", + "cuda/include/thrust/system/detail/sequential/malloc_and_free.h", + "cuda/include/thrust/system/detail/sequential/merge.h", + "cuda/include/thrust/system/detail/sequential/merge.inl", + "cuda/include/thrust/system/detail/sequential/mismatch.h", + "cuda/include/thrust/system/detail/sequential/partition.h", + "cuda/include/thrust/system/detail/sequential/reduce.h", + "cuda/include/thrust/system/detail/sequential/reduce_by_key.h", + "cuda/include/thrust/system/detail/sequential/remove.h", + "cuda/include/thrust/system/detail/sequential/replace.h", + "cuda/include/thrust/system/detail/sequential/reverse.h", + "cuda/include/thrust/system/detail/sequential/scan.h", + "cuda/include/thrust/system/detail/sequential/scan_by_key.h", + "cuda/include/thrust/system/detail/sequential/scatter.h", + "cuda/include/thrust/system/detail/sequential/sequence.h", + "cuda/include/thrust/system/detail/sequential/set_operations.h", + "cuda/include/thrust/system/detail/sequential/sort.h", + "cuda/include/thrust/system/detail/sequential/sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_merge_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl", + "cuda/include/thrust/system/detail/sequential/stable_radix_sort.h", + "cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl", + "cuda/include/thrust/system/detail/sequential/swap_ranges.h", + "cuda/include/thrust/system/detail/sequential/tabulate.h", + "cuda/include/thrust/system/detail/sequential/temporary_buffer.h", + "cuda/include/thrust/system/detail/sequential/transform.h", + "cuda/include/thrust/system/detail/sequential/transform_reduce.h", + "cuda/include/thrust/system/detail/sequential/transform_scan.h", + "cuda/include/thrust/system/detail/sequential/trivial_copy.h", + "cuda/include/thrust/system/detail/sequential/uninitialized_copy.h", + "cuda/include/thrust/system/detail/sequential/uninitialized_fill.h", + "cuda/include/thrust/system/detail/sequential/unique.h", + "cuda/include/thrust/system/detail/sequential/unique_by_key.h", "cuda/include/thrust/system/detail/system_error.inl", - "cuda/include/thrust/system/omp/execution_policy.h", - "cuda/include/thrust/system/omp/vector.h", - "cuda/include/thrust/system/omp/detail/transform_scan.h", - "cuda/include/thrust/system/omp/detail/memory.inl", - "cuda/include/thrust/system/omp/detail/reduce_intervals.inl", - "cuda/include/thrust/system/omp/detail/unique_by_key.h", - "cuda/include/thrust/system/omp/detail/sort.inl", - "cuda/include/thrust/system/omp/detail/partition.h", - "cuda/include/thrust/system/omp/detail/unique.h", - "cuda/include/thrust/system/omp/detail/execution_policy.h", + "cuda/include/thrust/system/error_code.h", "cuda/include/thrust/system/omp/detail/adjacent_difference.h", - "cuda/include/thrust/system/omp/detail/unique_by_key.inl", - "cuda/include/thrust/system/omp/detail/sequence.h", - "cuda/include/thrust/system/omp/detail/merge.h", - "cuda/include/thrust/system/omp/detail/unique.inl", + "cuda/include/thrust/system/omp/detail/assign_value.h", + "cuda/include/thrust/system/omp/detail/binary_search.h", + "cuda/include/thrust/system/omp/detail/copy.h", + "cuda/include/thrust/system/omp/detail/copy.inl", + "cuda/include/thrust/system/omp/detail/copy_if.h", "cuda/include/thrust/system/omp/detail/copy_if.inl", - "cuda/include/thrust/system/omp/detail/transform_reduce.h", - "cuda/include/thrust/system/omp/detail/gather.h", - "cuda/include/thrust/system/omp/detail/reduce_by_key.inl", - "cuda/include/thrust/system/omp/detail/sort.h", - "cuda/include/thrust/system/omp/detail/scan.h", - "cuda/include/thrust/system/omp/detail/temporary_buffer.h", + "cuda/include/thrust/system/omp/detail/count.h", "cuda/include/thrust/system/omp/detail/default_decomposition.h", - "cuda/include/thrust/system/omp/detail/reduce.inl", - "cuda/include/thrust/system/omp/detail/scan_by_key.h", - "cuda/include/thrust/system/omp/detail/reverse.h", - "cuda/include/thrust/system/omp/detail/assign_value.h", - "cuda/include/thrust/system/omp/detail/scatter.h", - "cuda/include/thrust/system/omp/detail/for_each.inl", "cuda/include/thrust/system/omp/detail/default_decomposition.inl", - "cuda/include/thrust/system/omp/detail/remove.inl", - "cuda/include/thrust/system/omp/detail/vector.inl", - "cuda/include/thrust/system/omp/detail/find.h", - "cuda/include/thrust/system/omp/detail/generate.h", - "cuda/include/thrust/system/omp/detail/uninitialized_fill.h", - "cuda/include/thrust/system/omp/detail/remove.h", - "cuda/include/thrust/system/omp/detail/tabulate.h", - "cuda/include/thrust/system/omp/detail/for_each.h", - "cuda/include/thrust/system/omp/detail/reduce_by_key.h", - "cuda/include/thrust/system/omp/detail/reduce.h", "cuda/include/thrust/system/omp/detail/equal.h", - "cuda/include/thrust/system/omp/detail/copy.inl", - "cuda/include/thrust/system/omp/detail/copy.h", - "cuda/include/thrust/system/omp/detail/swap_ranges.h", - "cuda/include/thrust/system/omp/detail/uninitialized_copy.h", - "cuda/include/thrust/system/omp/detail/binary_search.h", - "cuda/include/thrust/system/omp/detail/set_operations.h", - "cuda/include/thrust/system/omp/detail/mismatch.h", + "cuda/include/thrust/system/omp/detail/execution_policy.h", "cuda/include/thrust/system/omp/detail/extrema.h", - "cuda/include/thrust/system/omp/detail/count.h", - "cuda/include/thrust/system/omp/detail/replace.h", + "cuda/include/thrust/system/omp/detail/fill.h", + "cuda/include/thrust/system/omp/detail/find.h", + "cuda/include/thrust/system/omp/detail/for_each.h", + "cuda/include/thrust/system/omp/detail/for_each.inl", + "cuda/include/thrust/system/omp/detail/gather.h", + "cuda/include/thrust/system/omp/detail/generate.h", "cuda/include/thrust/system/omp/detail/get_value.h", "cuda/include/thrust/system/omp/detail/inner_product.h", - "cuda/include/thrust/system/omp/detail/copy_if.h", - "cuda/include/thrust/system/omp/detail/logical.h", - "cuda/include/thrust/system/omp/detail/partition.inl", "cuda/include/thrust/system/omp/detail/iter_swap.h", + "cuda/include/thrust/system/omp/detail/logical.h", + "cuda/include/thrust/system/omp/detail/malloc_and_free.h", + "cuda/include/thrust/system/omp/detail/memory.inl", + "cuda/include/thrust/system/omp/detail/merge.h", + "cuda/include/thrust/system/omp/detail/mismatch.h", "cuda/include/thrust/system/omp/detail/par.h", + "cuda/include/thrust/system/omp/detail/partition.h", + "cuda/include/thrust/system/omp/detail/partition.inl", + "cuda/include/thrust/system/omp/detail/reduce.h", + "cuda/include/thrust/system/omp/detail/reduce.inl", + "cuda/include/thrust/system/omp/detail/reduce_by_key.h", + "cuda/include/thrust/system/omp/detail/reduce_by_key.inl", "cuda/include/thrust/system/omp/detail/reduce_intervals.h", - "cuda/include/thrust/system/omp/detail/malloc_and_free.h", - "cuda/include/thrust/system/omp/detail/fill.h", + "cuda/include/thrust/system/omp/detail/reduce_intervals.inl", + "cuda/include/thrust/system/omp/detail/remove.h", + "cuda/include/thrust/system/omp/detail/remove.inl", + "cuda/include/thrust/system/omp/detail/replace.h", + "cuda/include/thrust/system/omp/detail/reverse.h", + "cuda/include/thrust/system/omp/detail/scan.h", + "cuda/include/thrust/system/omp/detail/scan_by_key.h", + "cuda/include/thrust/system/omp/detail/scatter.h", + "cuda/include/thrust/system/omp/detail/sequence.h", + "cuda/include/thrust/system/omp/detail/set_operations.h", + "cuda/include/thrust/system/omp/detail/sort.h", + "cuda/include/thrust/system/omp/detail/sort.inl", + "cuda/include/thrust/system/omp/detail/swap_ranges.h", + "cuda/include/thrust/system/omp/detail/tabulate.h", + "cuda/include/thrust/system/omp/detail/temporary_buffer.h", "cuda/include/thrust/system/omp/detail/transform.h", - "cuda/include/thrust/system/omp/memory.h", - "cuda/include/thrust/system/tbb/execution_policy.h", - "cuda/include/thrust/system/tbb/vector.h", - "cuda/include/thrust/system/tbb/detail/transform_scan.h", - "cuda/include/thrust/system/tbb/detail/memory.inl", - "cuda/include/thrust/system/tbb/detail/unique_by_key.h", - "cuda/include/thrust/system/tbb/detail/sort.inl", - "cuda/include/thrust/system/tbb/detail/partition.h", - "cuda/include/thrust/system/tbb/detail/unique.h", - "cuda/include/thrust/system/tbb/detail/execution_policy.h", + "cuda/include/thrust/system/omp/detail/transform_reduce.h", + "cuda/include/thrust/system/omp/detail/transform_scan.h", + "cuda/include/thrust/system/omp/detail/uninitialized_copy.h", + "cuda/include/thrust/system/omp/detail/uninitialized_fill.h", + "cuda/include/thrust/system/omp/detail/unique.h", + "cuda/include/thrust/system/omp/detail/unique.inl", + "cuda/include/thrust/system/omp/detail/unique_by_key.h", + "cuda/include/thrust/system/omp/detail/unique_by_key.inl", + "cuda/include/thrust/system/omp/detail/vector.inl", + "cuda/include/thrust/system/omp/execution_policy.h", + "cuda/include/thrust/system/omp/memory.h", + "cuda/include/thrust/system/omp/vector.h", + "cuda/include/thrust/system/system_error.h", "cuda/include/thrust/system/tbb/detail/adjacent_difference.h", - "cuda/include/thrust/system/tbb/detail/unique_by_key.inl", - "cuda/include/thrust/system/tbb/detail/sequence.h", - "cuda/include/thrust/system/tbb/detail/merge.h", - "cuda/include/thrust/system/tbb/detail/unique.inl", - "cuda/include/thrust/system/tbb/detail/copy_if.inl", - "cuda/include/thrust/system/tbb/detail/transform_reduce.h", - "cuda/include/thrust/system/tbb/detail/gather.h", - "cuda/include/thrust/system/tbb/detail/reduce_by_key.inl", - "cuda/include/thrust/system/tbb/detail/sort.h", - "cuda/include/thrust/system/tbb/detail/scan.h", - "cuda/include/thrust/system/tbb/detail/temporary_buffer.h", - "cuda/include/thrust/system/tbb/detail/reduce.inl", - "cuda/include/thrust/system/tbb/detail/scan_by_key.h", - "cuda/include/thrust/system/tbb/detail/reverse.h", "cuda/include/thrust/system/tbb/detail/assign_value.h", - "cuda/include/thrust/system/tbb/detail/scatter.h", - "cuda/include/thrust/system/tbb/detail/for_each.inl", - "cuda/include/thrust/system/tbb/detail/remove.inl", - "cuda/include/thrust/system/tbb/detail/vector.inl", - "cuda/include/thrust/system/tbb/detail/find.h", - "cuda/include/thrust/system/tbb/detail/merge.inl", - "cuda/include/thrust/system/tbb/detail/generate.h", - "cuda/include/thrust/system/tbb/detail/uninitialized_fill.h", - "cuda/include/thrust/system/tbb/detail/remove.h", - "cuda/include/thrust/system/tbb/detail/tabulate.h", - "cuda/include/thrust/system/tbb/detail/for_each.h", - "cuda/include/thrust/system/tbb/detail/reduce_by_key.h", - "cuda/include/thrust/system/tbb/detail/reduce.h", - "cuda/include/thrust/system/tbb/detail/equal.h", - "cuda/include/thrust/system/tbb/detail/copy.inl", - "cuda/include/thrust/system/tbb/detail/copy.h", - "cuda/include/thrust/system/tbb/detail/swap_ranges.h", - "cuda/include/thrust/system/tbb/detail/uninitialized_copy.h", "cuda/include/thrust/system/tbb/detail/binary_search.h", - "cuda/include/thrust/system/tbb/detail/set_operations.h", - "cuda/include/thrust/system/tbb/detail/mismatch.h", - "cuda/include/thrust/system/tbb/detail/scan.inl", - "cuda/include/thrust/system/tbb/detail/extrema.h", + "cuda/include/thrust/system/tbb/detail/copy.h", + "cuda/include/thrust/system/tbb/detail/copy.inl", + "cuda/include/thrust/system/tbb/detail/copy_if.h", + "cuda/include/thrust/system/tbb/detail/copy_if.inl", "cuda/include/thrust/system/tbb/detail/count.h", - "cuda/include/thrust/system/tbb/detail/replace.h", + "cuda/include/thrust/system/tbb/detail/equal.h", + "cuda/include/thrust/system/tbb/detail/execution_policy.h", + "cuda/include/thrust/system/tbb/detail/extrema.h", + "cuda/include/thrust/system/tbb/detail/fill.h", + "cuda/include/thrust/system/tbb/detail/find.h", + "cuda/include/thrust/system/tbb/detail/for_each.h", + "cuda/include/thrust/system/tbb/detail/for_each.inl", + "cuda/include/thrust/system/tbb/detail/gather.h", + "cuda/include/thrust/system/tbb/detail/generate.h", "cuda/include/thrust/system/tbb/detail/get_value.h", "cuda/include/thrust/system/tbb/detail/inner_product.h", - "cuda/include/thrust/system/tbb/detail/copy_if.h", - "cuda/include/thrust/system/tbb/detail/logical.h", - "cuda/include/thrust/system/tbb/detail/partition.inl", "cuda/include/thrust/system/tbb/detail/iter_swap.h", + "cuda/include/thrust/system/tbb/detail/logical.h", + "cuda/include/thrust/system/tbb/detail/malloc_and_free.h", + "cuda/include/thrust/system/tbb/detail/memory.inl", + "cuda/include/thrust/system/tbb/detail/merge.h", + "cuda/include/thrust/system/tbb/detail/merge.inl", + "cuda/include/thrust/system/tbb/detail/mismatch.h", "cuda/include/thrust/system/tbb/detail/par.h", + "cuda/include/thrust/system/tbb/detail/partition.h", + "cuda/include/thrust/system/tbb/detail/partition.inl", + "cuda/include/thrust/system/tbb/detail/reduce.h", + "cuda/include/thrust/system/tbb/detail/reduce.inl", + "cuda/include/thrust/system/tbb/detail/reduce_by_key.h", + "cuda/include/thrust/system/tbb/detail/reduce_by_key.inl", "cuda/include/thrust/system/tbb/detail/reduce_intervals.h", - "cuda/include/thrust/system/tbb/detail/malloc_and_free.h", - "cuda/include/thrust/system/tbb/detail/fill.h", + "cuda/include/thrust/system/tbb/detail/remove.h", + "cuda/include/thrust/system/tbb/detail/remove.inl", + "cuda/include/thrust/system/tbb/detail/replace.h", + "cuda/include/thrust/system/tbb/detail/reverse.h", + "cuda/include/thrust/system/tbb/detail/scan.h", + "cuda/include/thrust/system/tbb/detail/scan.inl", + "cuda/include/thrust/system/tbb/detail/scan_by_key.h", + "cuda/include/thrust/system/tbb/detail/scatter.h", + "cuda/include/thrust/system/tbb/detail/sequence.h", + "cuda/include/thrust/system/tbb/detail/set_operations.h", + "cuda/include/thrust/system/tbb/detail/sort.h", + "cuda/include/thrust/system/tbb/detail/sort.inl", + "cuda/include/thrust/system/tbb/detail/swap_ranges.h", + "cuda/include/thrust/system/tbb/detail/tabulate.h", + "cuda/include/thrust/system/tbb/detail/temporary_buffer.h", "cuda/include/thrust/system/tbb/detail/transform.h", - "cuda/include/thrust/system/tbb/memory.h", - "cuda/include/thrust/system/error_code.h", - "cuda/include/thrust/system/cpp/execution_policy.h", - "cuda/include/thrust/system/cpp/vector.h", - "cuda/include/thrust/system/cpp/detail/transform_scan.h", - "cuda/include/thrust/system/cpp/detail/memory.inl", - "cuda/include/thrust/system/cpp/detail/unique_by_key.h", - "cuda/include/thrust/system/cpp/detail/partition.h", - "cuda/include/thrust/system/cpp/detail/unique.h", - "cuda/include/thrust/system/cpp/detail/execution_policy.h", - "cuda/include/thrust/system/cpp/detail/adjacent_difference.h", - "cuda/include/thrust/system/cpp/detail/sequence.h", - "cuda/include/thrust/system/cpp/detail/merge.h", - "cuda/include/thrust/system/cpp/detail/transform_reduce.h", - "cuda/include/thrust/system/cpp/detail/gather.h", - "cuda/include/thrust/system/cpp/detail/sort.h", - "cuda/include/thrust/system/cpp/detail/scan.h", - "cuda/include/thrust/system/cpp/detail/temporary_buffer.h", - "cuda/include/thrust/system/cpp/detail/scan_by_key.h", - "cuda/include/thrust/system/cpp/detail/reverse.h", - "cuda/include/thrust/system/cpp/detail/assign_value.h", - "cuda/include/thrust/system/cpp/detail/scatter.h", - "cuda/include/thrust/system/cpp/detail/vector.inl", - "cuda/include/thrust/system/cpp/detail/find.h", - "cuda/include/thrust/system/cpp/detail/generate.h", - "cuda/include/thrust/system/cpp/detail/uninitialized_fill.h", - "cuda/include/thrust/system/cpp/detail/remove.h", - "cuda/include/thrust/system/cpp/detail/tabulate.h", - "cuda/include/thrust/system/cpp/detail/for_each.h", - "cuda/include/thrust/system/cpp/detail/reduce_by_key.h", - "cuda/include/thrust/system/cpp/detail/reduce.h", - "cuda/include/thrust/system/cpp/detail/equal.h", - "cuda/include/thrust/system/cpp/detail/copy.h", - "cuda/include/thrust/system/cpp/detail/swap_ranges.h", - "cuda/include/thrust/system/cpp/detail/uninitialized_copy.h", - "cuda/include/thrust/system/cpp/detail/binary_search.h", - "cuda/include/thrust/system/cpp/detail/set_operations.h", - "cuda/include/thrust/system/cpp/detail/mismatch.h", - "cuda/include/thrust/system/cpp/detail/extrema.h", - "cuda/include/thrust/system/cpp/detail/count.h", - "cuda/include/thrust/system/cpp/detail/replace.h", - "cuda/include/thrust/system/cpp/detail/get_value.h", - "cuda/include/thrust/system/cpp/detail/inner_product.h", - "cuda/include/thrust/system/cpp/detail/copy_if.h", - "cuda/include/thrust/system/cpp/detail/logical.h", - "cuda/include/thrust/system/cpp/detail/iter_swap.h", - "cuda/include/thrust/system/cpp/detail/par.h", - "cuda/include/thrust/system/cpp/detail/malloc_and_free.h", - "cuda/include/thrust/system/cpp/detail/fill.h", - "cuda/include/thrust/system/cpp/detail/transform.h", - "cuda/include/thrust/system/cpp/memory.h", - "cuda/include/thrust/system/cuda/execution_policy.h", - "cuda/include/thrust/system/cuda/vector.h", - "cuda/include/thrust/system/cuda/error.h", - "cuda/include/thrust/system/cuda/detail/copy_device_to_device.h", - "cuda/include/thrust/system/cuda/detail/transform_scan.h", - "cuda/include/thrust/system/cuda/detail/memory.inl", - "cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh", - "cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_device.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_type.cuh", - "cuda/include/thrust/system/cuda/detail/cub/host/spinlock.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh", - "cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh", - "cuda/include/thrust/system/cuda/detail/cub/cub.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_shift.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh", - "cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh", - "cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh", - "cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.inl", - "cuda/include/thrust/system/cuda/detail/copy_cross_system.inl", - "cuda/include/thrust/system/cuda/detail/unique_by_key.h", - "cuda/include/thrust/system/cuda/detail/bulk.h", - "cuda/include/thrust/system/cuda/detail/sort.inl", - "cuda/include/thrust/system/cuda/detail/partition.h", - "cuda/include/thrust/system/cuda/detail/unique.h", - "cuda/include/thrust/system/cuda/detail/execution_policy.h", - "cuda/include/thrust/system/cuda/detail/cuda_launch_config.h", - "cuda/include/thrust/system/cuda/detail/cub.h", - "cuda/include/thrust/system/cuda/detail/adjacent_difference.h", - "cuda/include/thrust/system/cuda/detail/sequence.h", - "cuda/include/thrust/system/cuda/detail/merge.h", - "cuda/include/thrust/system/cuda/detail/set_symmetric_difference.inl", - "cuda/include/thrust/system/cuda/detail/copy_if.inl", - "cuda/include/thrust/system/cuda/detail/transform_reduce.h", - "cuda/include/thrust/system/cuda/detail/error.inl", - "cuda/include/thrust/system/cuda/detail/gather.h", - "cuda/include/thrust/system/cuda/detail/reduce_by_key.inl", - "cuda/include/thrust/system/cuda/detail/sort.h", - "cuda/include/thrust/system/cuda/detail/synchronize.h", - "cuda/include/thrust/system/cuda/detail/scan.h", - "cuda/include/thrust/system/cuda/detail/temporary_indirect_permutation.h", - "cuda/include/thrust/system/cuda/detail/extern_shared_ptr.h", - "cuda/include/thrust/system/cuda/detail/detail/set_operation.inl", - "cuda/include/thrust/system/cuda/detail/detail/balanced_path.h", - "cuda/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/set_operation.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_closure.inl", - "cuda/include/thrust/system/cuda/detail/detail/merge.h", - "cuda/include/thrust/system/cuda/detail/detail/alignment.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_calculator.inl", - "cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl", - "cuda/include/thrust/system/cuda/detail/detail/launch_closure.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.h", - "cuda/include/thrust/system/cuda/detail/detail/uninitialized.h", - "cuda/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h", - "cuda/include/thrust/system/cuda/detail/detail/launch_calculator.h", - "cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.inl", - "cuda/include/thrust/system/cuda/detail/temporary_buffer.h", - "cuda/include/thrust/system/cuda/detail/default_decomposition.h", - "cuda/include/thrust/system/cuda/detail/reduce.inl", - "cuda/include/thrust/system/cuda/detail/scan_by_key.h", - "cuda/include/thrust/system/cuda/detail/reverse.h", - "cuda/include/thrust/system/cuda/detail/assign_value.h", - "cuda/include/thrust/system/cuda/detail/scatter.h", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.hpp", - "cuda/include/thrust/system/cuda/detail/for_each.inl", - "cuda/include/thrust/system/cuda/detail/default_decomposition.inl", - "cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h", - "cuda/include/thrust/system/cuda/detail/adjacent_difference.inl", - "cuda/include/thrust/system/cuda/detail/vector.inl", - "cuda/include/thrust/system/cuda/detail/throw_on_error.h", - "cuda/include/thrust/system/cuda/detail/find.h", - "cuda/include/thrust/system/cuda/detail/terminate.h", - "cuda/include/thrust/system/cuda/detail/merge.inl", - "cuda/include/thrust/system/cuda/detail/trivial_copy.inl", - "cuda/include/thrust/system/cuda/detail/generate.h", - "cuda/include/thrust/system/cuda/detail/execute_on_stream.h", - "cuda/include/thrust/system/cuda/detail/uninitialized_fill.h", - "cuda/include/thrust/system/cuda/detail/remove.h", - "cuda/include/thrust/system/cuda/detail/tabulate.h", - "cuda/include/thrust/system/cuda/detail/for_each.h", - "cuda/include/thrust/system/cuda/detail/reduce_by_key.h", - "cuda/include/thrust/system/cuda/detail/decomposition.h", - "cuda/include/thrust/system/cuda/detail/reduce.h", - "cuda/include/thrust/system/cuda/detail/equal.h", - "cuda/include/thrust/system/cuda/detail/runtime_introspection.h", - "cuda/include/thrust/system/cuda/detail/copy.inl", - "cuda/include/thrust/system/cuda/detail/copy.h", - "cuda/include/thrust/system/cuda/detail/swap_ranges.h", - "cuda/include/thrust/system/cuda/detail/uninitialized_copy.h", - "cuda/include/thrust/system/cuda/detail/binary_search.h", - "cuda/include/thrust/system/cuda/detail/runtime_introspection.inl", - "cuda/include/thrust/system/cuda/detail/set_operations.h", - "cuda/include/thrust/system/cuda/detail/mismatch.h", - "cuda/include/thrust/system/cuda/detail/scan.inl", - "cuda/include/thrust/system/cuda/detail/synchronize.inl", - "cuda/include/thrust/system/cuda/detail/extrema.h", - "cuda/include/thrust/system/cuda/detail/set_union.inl", - "cuda/include/thrust/system/cuda/detail/set_intersection.inl", - "cuda/include/thrust/system/cuda/detail/count.h", - "cuda/include/thrust/system/cuda/detail/trivial_copy.h", - "cuda/include/thrust/system/cuda/detail/copy_device_to_device.inl", - "cuda/include/thrust/system/cuda/detail/replace.h", - "cuda/include/thrust/system/cuda/detail/bulk/malloc.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/config.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/closure.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/async.inl", - "cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/bulk.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/execution_policy.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/uninitialized.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/async.hpp", - "cuda/include/thrust/system/cuda/detail/bulk/future.hpp", - "cuda/include/thrust/system/cuda/detail/guarded_driver_types.h", - "cuda/include/thrust/system/cuda/detail/get_value.h", - "cuda/include/thrust/system/cuda/detail/inner_product.h", - "cuda/include/thrust/system/cuda/detail/copy_if.h", - "cuda/include/thrust/system/cuda/detail/logical.h", - "cuda/include/thrust/system/cuda/detail/iter_swap.h", - "cuda/include/thrust/system/cuda/detail/block/merge.h", - "cuda/include/thrust/system/cuda/detail/block/inclusive_scan.h", - "cuda/include/thrust/system/cuda/detail/block/merge.inl", - "cuda/include/thrust/system/cuda/detail/block/merging_sort.h", - "cuda/include/thrust/system/cuda/detail/block/exclusive_scan.h", - "cuda/include/thrust/system/cuda/detail/block/reduce.h", - "cuda/include/thrust/system/cuda/detail/block/copy.h", - "cuda/include/thrust/system/cuda/detail/block/odd_even_sort.h", - "cuda/include/thrust/system/cuda/detail/par.h", - "cuda/include/thrust/system/cuda/detail/copy_cross_system.h", - "cuda/include/thrust/system/cuda/detail/reduce_intervals.h", - "cuda/include/thrust/system/cuda/detail/malloc_and_free.h", - "cuda/include/thrust/system/cuda/detail/fill.h", - "cuda/include/thrust/system/cuda/detail/set_difference.inl", - "cuda/include/thrust/system/cuda/detail/transform.h", - "cuda/include/thrust/system/cuda/experimental/pinned_allocator.h", - "cuda/include/thrust/system/cuda/memory.h", - "cuda/include/thrust/remove.h", + "cuda/include/thrust/system/tbb/detail/transform_reduce.h", + "cuda/include/thrust/system/tbb/detail/transform_scan.h", + "cuda/include/thrust/system/tbb/detail/uninitialized_copy.h", + "cuda/include/thrust/system/tbb/detail/uninitialized_fill.h", + "cuda/include/thrust/system/tbb/detail/unique.h", + "cuda/include/thrust/system/tbb/detail/unique.inl", + "cuda/include/thrust/system/tbb/detail/unique_by_key.h", + "cuda/include/thrust/system/tbb/detail/unique_by_key.inl", + "cuda/include/thrust/system/tbb/detail/vector.inl", + "cuda/include/thrust/system/tbb/execution_policy.h", + "cuda/include/thrust/system/tbb/memory.h", + "cuda/include/thrust/system/tbb/vector.h", + "cuda/include/thrust/system_error.h", "cuda/include/thrust/tabulate.h", - "cuda/include/thrust/for_each.h", - "cuda/include/thrust/distance.h", - "cuda/include/thrust/reduce.h", - "cuda/include/thrust/equal.h", - "cuda/include/thrust/complex.h", - "cuda/include/thrust/device_allocator.h", - "cuda/include/thrust/copy.h", + "cuda/include/thrust/transform.h", + "cuda/include/thrust/transform_reduce.h", + "cuda/include/thrust/transform_scan.h", + "cuda/include/thrust/tuple.h", "cuda/include/thrust/uninitialized_copy.h", - "cuda/include/thrust/device_reference.h", - "cuda/include/thrust/binary_search.h", - "cuda/include/thrust/set_operations.h", - "cuda/include/thrust/swap.h", - "cuda/include/thrust/mismatch.h", - "cuda/include/thrust/extrema.h", - "cuda/include/thrust/count.h", - "cuda/include/thrust/device_free.h", - "cuda/include/thrust/random/discard_block_engine.h", - "cuda/include/thrust/random/normal_distribution.h", - "cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h", - "cuda/include/thrust/random/detail/subtract_with_carry_engine.inl", - "cuda/include/thrust/random/detail/xor_combine_engine_max.h", - "cuda/include/thrust/random/detail/linear_congruential_engine_discard.h", - "cuda/include/thrust/random/detail/uniform_int_distribution.inl", - "cuda/include/thrust/random/detail/discard_block_engine.inl", - "cuda/include/thrust/random/detail/uniform_real_distribution.inl", - "cuda/include/thrust/random/detail/random_core_access.h", - "cuda/include/thrust/random/detail/mod.h", - "cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl", - "cuda/include/thrust/random/detail/linear_congruential_engine.inl", - "cuda/include/thrust/random/detail/xor_combine_engine.inl", - "cuda/include/thrust/random/detail/normal_distribution.inl", - "cuda/include/thrust/random/detail/normal_distribution_base.h", - "cuda/include/thrust/random/uniform_int_distribution.h", - "cuda/include/thrust/random/linear_feedback_shift_engine.h", - "cuda/include/thrust/random/xor_combine_engine.h", - "cuda/include/thrust/random/subtract_with_carry_engine.h", - "cuda/include/thrust/random/linear_congruential_engine.h", - "cuda/include/thrust/random/uniform_real_distribution.h", - "cuda/include/thrust/functional.h", - "cuda/include/thrust/replace.h", - "cuda/include/thrust/device_new_allocator.h", - "cuda/include/thrust/host_vector.h", + "cuda/include/thrust/uninitialized_fill.h", + "cuda/include/thrust/unique.h", "cuda/include/thrust/version.h", - "cuda/include/thrust/inner_product.h", - "cuda/include/thrust/iterator/iterator_traits.h", - "cuda/include/thrust/iterator/discard_iterator.h", - "cuda/include/thrust/iterator/retag.h", - "cuda/include/thrust/iterator/permutation_iterator.h", - "cuda/include/thrust/iterator/transform_iterator.h", - "cuda/include/thrust/iterator/detail/reverse_iterator.inl", - "cuda/include/thrust/iterator/detail/zip_iterator.inl", - "cuda/include/thrust/iterator/detail/counting_iterator.inl", - "cuda/include/thrust/iterator/detail/distance_from_result.h", - "cuda/include/thrust/iterator/detail/host_system_tag.h", - "cuda/include/thrust/iterator/detail/iterator_traversal_tags.h", - "cuda/include/thrust/iterator/detail/retag.h", - "cuda/include/thrust/iterator/detail/tagged_iterator.h", - "cuda/include/thrust/iterator/detail/iterator_traits.inl", - "cuda/include/thrust/iterator/detail/minimum_category.h", - "cuda/include/thrust/iterator/detail/discard_iterator_base.h", - "cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h", - "cuda/include/thrust/iterator/detail/zip_iterator_base.h", - "cuda/include/thrust/iterator/detail/normal_iterator.h", - "cuda/include/thrust/iterator/detail/join_iterator.h", - "cuda/include/thrust/iterator/detail/device_system_tag.h", - "cuda/include/thrust/iterator/detail/universal_categories.h", - "cuda/include/thrust/iterator/detail/reverse_iterator_base.h", - "cuda/include/thrust/iterator/detail/minimum_system.h", - "cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h", - "cuda/include/thrust/iterator/detail/is_iterator_category.h", - "cuda/include/thrust/iterator/detail/permutation_iterator_base.h", - "cuda/include/thrust/iterator/detail/any_assign.h", - "cuda/include/thrust/iterator/detail/any_system_tag.h", - "cuda/include/thrust/iterator/detail/is_trivial_iterator.h", - "cuda/include/thrust/iterator/detail/iterator_category_to_system.h", - "cuda/include/thrust/iterator/detail/iterator_adaptor_base.h", - "cuda/include/thrust/iterator/detail/constant_iterator_base.h", - "cuda/include/thrust/iterator/detail/transform_iterator.inl", - "cuda/include/thrust/iterator/detail/iterator_facade_category.h", - "cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h", - "cuda/include/thrust/iterator/constant_iterator.h", - "cuda/include/thrust/iterator/counting_iterator.h", - "cuda/include/thrust/iterator/iterator_adaptor.h", - "cuda/include/thrust/iterator/iterator_facade.h", - "cuda/include/thrust/iterator/iterator_categories.h", - "cuda/include/thrust/iterator/reverse_iterator.h", - "cuda/include/thrust/iterator/zip_iterator.h", - "cuda/include/thrust/logical.h", - "cuda/include/thrust/tuple.h", - "cuda/include/thrust/memory.h", - "cuda/include/thrust/random.h", - "cuda/include/thrust/fill.h", - "cuda/include/thrust/transform.h", - "cuda/include/texture_types.h", - "cuda/include/nppversion.h", - "cuda/include/cuda_texture_types.h", - "cuda/include/fatbinary.h", - "cuda/include/cublasXt.h", - "cuda/include/cuda_fp16.h", "cuda/include/vector_functions.h", - "cuda/include/cusparse.h", - "cuda/include/nppi_filtering_functions.h", - "cuda/include/nppi_morphological_operations.h", - "cuda/include/sobol_direction_vectors.h", - "cuda/include/nvblas.h", - "cuda/include/curand_mtgp32dc_p_11213.h", - "cuda/include/nvcuvid.h", - "cuda/include/cuda_runtime_api.h", - "cuda/include/curand_mtgp32_kernel.h", - "cuda/include/cublas_v2.h", - "cuda/include/builtin_types.h", - "cuda/include/nppi_geometry_transforms.h", - "cuda/include/npps_support_functions.h", - "cuda/include/cufftw.h", - "cuda/include/cuda_device_runtime_api.h", - "cuda/include/sm_30_intrinsics.hpp", + "cuda/include/vector_functions.hpp", "cuda/include/vector_types.h", - "cuda/include/sm_35_atomic_functions.h", - "cuda/include/sm_20_intrinsics.h", - "cuda/include/driver_types.h", - "cuda/include/nvToolsExtCudaRt.h", - "cuda/include/curand_globals.h", - "cuda/include/device_atomic_functions.h", - "cuda/include/surface_types.h", - "cuda/include/nvrtc.h", - "cuda/include/nppdefs.h", - "cuda/include/sm_60_atomic_functions.h", - "cuda/include/driver_functions.h", - "cuda/include/cusolver_common.h", - "cuda/include/cublas.h", - "cuda/include/curand_lognormal.h", - "cuda/include/device_atomic_functions.hpp", - "cuda/include/crt/device_runtime.h", - "cuda/include/crt/storage_class.h", - "cuda/include/crt/func_macro.h", - "cuda/include/crt/host_runtime.h", - "cuda/include/nppi_arithmetic_and_logical_operations.h", - "cuda/include/npps_arithmetic_and_logical_operations.h", - "cuda/include/nppi_computer_vision.h", - "cuda/include/surface_functions.hpp", - "cuda/include/surface_functions.h", - "cuda/include/curand_normal_static.h", - "cuda/include/curand.h", - "cuda/include/math_functions_dbl_ptx3.h", - "cuda/include/curand_philox4x32_x.h", - "cuda/include/nppi_threshold_and_compare_operations.h", - "cuda/include/nvml.h", - "cuda/include/npps.h", - "cuda/include/cuda_vdpau_interop.h", - "cuda/include/sm_61_intrinsics.hpp", - "cuda/include/cublas_api.h", - "cuda/include/nppi_color_conversion.h", - "cuda/include/math_functions_dbl_ptx3.hpp", - "cuda/include/nppcore.h", - "cuda/include/cudaGL.h", - "cuda/include/fatBinaryCtl.h", - "cuda/include/npps_statistics_functions.h", - "cuda/include/cudaVDPAU.h", - "cuda/include/curand_poisson.h", - "cuda/include/cusolverDn.h", - "cuda/include/cuda_profiler_api.h", - "cuda/include/sm_20_atomic_functions.h", - "cuda/include/nvfunctional", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-8.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-8.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-8.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-8.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-8.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-8.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-8.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-8.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-8.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-8.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-8.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-8.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-8.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-8.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-8.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-8.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-8.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-8.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-8.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-8.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-8.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-8.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-8.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-8.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-8.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-8.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-8.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-8.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-8.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-8.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-8.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-8.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-8.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-8.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-8.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-8.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-8.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-8.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-8.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-8.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-8.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-8.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-8.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-8.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-8.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-8.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-8.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-8.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-8.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-8.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-8.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-8.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-8.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-8.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-8.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-8.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-8.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-8.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-8.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-8.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-8.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/cuviddec.h" "$(@D)/cuda/include/cuviddec.h" && cp "/usr/local/cuda-8.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-8.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-8.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-8.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-8.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-8.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-8.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-8.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-8.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-8.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-8.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-8.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-8.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-8.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-8.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_device_to_device.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_rle_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_histogram_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_by_key_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_scan_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_select_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_reduce_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/device_radix_sort_dispatch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_histo.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_satomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/specializations/block_range_histo_gatomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_select.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_scan_prefix_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_range/block_range_reduce_by_key.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_histogram_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_rle_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_select_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_satomic_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_sort_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/specializations/block_histogram_gatomic_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_reduce_by_key_sweep.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block_sweep/block_scan_prefix_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/host/spinlock.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/spinlock.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_shift.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shift.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_cross_system.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_cross_system.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk.h" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cuda_launch_config.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cuda_launch_config.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/cub.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cub.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_symmetric_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_symmetric_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/synchronize.h" "$(@D)/cuda/include/thrust/system/cuda/detail/synchronize.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/temporary_indirect_permutation.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_indirect_permutation.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/extern_shared_ptr.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extern_shared_ptr.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/set_operation.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/set_operation.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/balanced_path.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/balanced_path.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/virtualized_smem_closure.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/set_operation.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/set_operation.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_primitive_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_closure.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_closure.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/alignment.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_sort_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_calculator.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_calculator.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_merge_sort.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_closure.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_closure.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_radix_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/uninitialized.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/uninitialized.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/cached_temporary_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/launch_calculator.h" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/launch_calculator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/detail/stable_sort_each.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/detail/stable_sort_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/default_decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/default_decomposition.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/throw_on_error.h" "$(@D)/cuda/include/thrust/system/cuda/detail/throw_on_error.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/merge.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/trivial_copy.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/trivial_copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/execute_on_stream.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execute_on_stream.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/decomposition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/decomposition.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/runtime_introspection.h" "$(@D)/cuda/include/thrust/system/cuda/detail/runtime_introspection.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/runtime_introspection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/runtime_introspection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/scan.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/synchronize.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/synchronize.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_union.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_union.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_intersection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_intersection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/trivial_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/trivial_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_device_to_device.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_device_to_device.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/malloc.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/malloc.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/config.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/config.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/closure.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/closure.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tail_flags.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/terminate.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/alignment.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/guarded_cuda_runtime_api.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/choose_sizes.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_meta_transform.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_task.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/head_flags.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/synchronize.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/throw_on_error.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/parameter_ptr.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launcher.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/triple_chevron_launcher.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/cuda_launch_config.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/cuda_launcher/runtime_introspection.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/async.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/async.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/tuple_transform.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/pointer_traits.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/apply_from_tuple.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/detail/is_contiguous_iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/choose_sizes.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/copy.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/merge.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/accumulate.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/scan.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/detail/stable_merge_sort.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/gather.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/sort.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/scatter.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/adjacent_difference.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/reduce_by_key.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/algorithm/for_each.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/bulk.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/bulk.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/execution_policy.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/execution_policy.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/iterator/strided_iterator.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/uninitialized.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/uninitialized.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/async.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/async.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/bulk/future.hpp" "$(@D)/cuda/include/thrust/system/cuda/detail/bulk/future.hpp" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merge.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/inclusive_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/inclusive_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merge.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merge.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/merging_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/merging_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/exclusive_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/exclusive_scan.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/block/odd_even_sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/block/odd_even_sort.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_cross_system.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_intervals.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/set_difference.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/set_difference.inl" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-8.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-8.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-8.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-8.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-8.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-8.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-8.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-8.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-8.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-8.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-8.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-8.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-8.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-8.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-8.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-8.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-8.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-8.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-8.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-8.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-8.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-8.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-8.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-8.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-8.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-8.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-8.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-8.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-8.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-8.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-8.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-8.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-8.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-8.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-8.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-8.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-8.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-8.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-8.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-8.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-8.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-8.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-8.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-8.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-8.0/include/nvcuvid.h" "$(@D)/cuda/include/nvcuvid.h" && cp "/usr/local/cuda-8.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-8.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-8.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-8.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-8.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-8.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-8.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-8.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-8.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" && cp "/usr/local/cuda-8.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-8.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-8.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-8.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-8.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-8.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-8.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-8.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-8.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-8.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-8.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-8.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-8.0/include/crt/device_runtime.h" "$(@D)/cuda/include/crt/device_runtime.h" && cp "/usr/local/cuda-8.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-8.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-8.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-8.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-8.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-8.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-8.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-8.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-8.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-8.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-8.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-8.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-8.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-8.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-8.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-8.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-8.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-8.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-8.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-8.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-8.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-8.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-8.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-8.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-8.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-8.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-8.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-8.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-8.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-8.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/include/CL/cl.h" "$(@D)/cuda/include/CL/cl.h" && cp "/usr/local/cuda-9.0/include/CL/cl.hpp" "$(@D)/cuda/include/CL/cl.hpp" && cp "/usr/local/cuda-9.0/include/CL/cl_egl.h" "$(@D)/cuda/include/CL/cl_egl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_ext.h" "$(@D)/cuda/include/CL/cl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl.h" "$(@D)/cuda/include/CL/cl_gl.h" && cp "/usr/local/cuda-9.0/include/CL/cl_gl_ext.h" "$(@D)/cuda/include/CL/cl_gl_ext.h" && cp "/usr/local/cuda-9.0/include/CL/cl_platform.h" "$(@D)/cuda/include/CL/cl_platform.h" && cp "/usr/local/cuda-9.0/include/CL/opencl.h" "$(@D)/cuda/include/CL/opencl.h" && cp "/usr/local/cuda-9.0/include/builtin_types.h" "$(@D)/cuda/include/builtin_types.h" && cp "/usr/local/cuda-9.0/include/channel_descriptor.h" "$(@D)/cuda/include/channel_descriptor.h" && cp "/usr/local/cuda-9.0/include/common_functions.h" "$(@D)/cuda/include/common_functions.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups.h" "$(@D)/cuda/include/cooperative_groups.h" && cp "/usr/local/cuda-9.0/include/cooperative_groups_helpers.h" "$(@D)/cuda/include/cooperative_groups_helpers.h" && cp "/usr/local/cuda-9.0/include/crt/common_functions.h" "$(@D)/cuda/include/crt/common_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.h" "$(@D)/cuda/include/crt/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_double_functions.hpp" "$(@D)/cuda/include/crt/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/device_functions.h" "$(@D)/cuda/include/crt/device_functions.h" && cp "/usr/local/cuda-9.0/include/crt/device_functions.hpp" "$(@D)/cuda/include/crt/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/func_macro.h" "$(@D)/cuda/include/crt/func_macro.h" && cp "/usr/local/cuda-9.0/include/crt/host_config.h" "$(@D)/cuda/include/crt/host_config.h" && cp "/usr/local/cuda-9.0/include/crt/host_defines.h" "$(@D)/cuda/include/crt/host_defines.h" && cp "/usr/local/cuda-9.0/include/crt/host_runtime.h" "$(@D)/cuda/include/crt/host_runtime.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.h" "$(@D)/cuda/include/crt/math_functions.h" && cp "/usr/local/cuda-9.0/include/crt/math_functions.hpp" "$(@D)/cuda/include/crt/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/crt/mma.h" "$(@D)/cuda/include/crt/mma.h" && cp "/usr/local/cuda-9.0/include/crt/mma.hpp" "$(@D)/cuda/include/crt/mma.hpp" && cp "/usr/local/cuda-9.0/include/crt/nvfunctional" "$(@D)/cuda/include/crt/nvfunctional" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.h" "$(@D)/cuda/include/crt/sm_70_rt.h" && cp "/usr/local/cuda-9.0/include/crt/sm_70_rt.hpp" "$(@D)/cuda/include/crt/sm_70_rt.hpp" && cp "/usr/local/cuda-9.0/include/crt/storage_class.h" "$(@D)/cuda/include/crt/storage_class.h" && cp "/usr/local/cuda-9.0/include/cuComplex.h" "$(@D)/cuda/include/cuComplex.h" && cp "/usr/local/cuda-9.0/include/cublas.h" "$(@D)/cuda/include/cublas.h" && cp "/usr/local/cuda-9.0/include/cublasXt.h" "$(@D)/cuda/include/cublasXt.h" && cp "/usr/local/cuda-9.0/include/cublas_api.h" "$(@D)/cuda/include/cublas_api.h" && cp "/usr/local/cuda-9.0/include/cublas_v2.h" "$(@D)/cuda/include/cublas_v2.h" && cp "/usr/local/cuda-9.0/include/cuda.h" "$(@D)/cuda/include/cuda.h" && cp "/usr/local/cuda-9.0/include/cudaEGL.h" "$(@D)/cuda/include/cudaEGL.h" && cp "/usr/local/cuda-9.0/include/cudaGL.h" "$(@D)/cuda/include/cudaGL.h" && cp "/usr/local/cuda-9.0/include/cudaProfiler.h" "$(@D)/cuda/include/cudaProfiler.h" && cp "/usr/local/cuda-9.0/include/cudaVDPAU.h" "$(@D)/cuda/include/cudaVDPAU.h" && cp "/usr/local/cuda-9.0/include/cuda_device_runtime_api.h" "$(@D)/cuda/include/cuda_device_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.h" "$(@D)/cuda/include/cuda_fp16.h" && cp "/usr/local/cuda-9.0/include/cuda_fp16.hpp" "$(@D)/cuda/include/cuda_fp16.hpp" && cp "/usr/local/cuda-9.0/include/cuda_gl_interop.h" "$(@D)/cuda/include/cuda_gl_interop.h" && cp "/usr/local/cuda-9.0/include/cuda_occupancy.h" "$(@D)/cuda/include/cuda_occupancy.h" && cp "/usr/local/cuda-9.0/include/cuda_profiler_api.h" "$(@D)/cuda/include/cuda_profiler_api.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime.h" "$(@D)/cuda/include/cuda_runtime.h" && cp "/usr/local/cuda-9.0/include/cuda_runtime_api.h" "$(@D)/cuda/include/cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/cuda_surface_types.h" "$(@D)/cuda/include/cuda_surface_types.h" && cp "/usr/local/cuda-9.0/include/cuda_texture_types.h" "$(@D)/cuda/include/cuda_texture_types.h" && cp "/usr/local/cuda-9.0/include/cuda_vdpau_interop.h" "$(@D)/cuda/include/cuda_vdpau_interop.h" && cp "/usr/local/cuda-9.0/include/cudalibxt.h" "$(@D)/cuda/include/cudalibxt.h" && cp "/usr/local/cuda-9.0/include/cudnn.h" "$(@D)/cuda/include/cudnn.h" && cp "/usr/local/cuda-9.0/include/cufft.h" "$(@D)/cuda/include/cufft.h" && cp "/usr/local/cuda-9.0/include/cufftXt.h" "$(@D)/cuda/include/cufftXt.h" && cp "/usr/local/cuda-9.0/include/cufftw.h" "$(@D)/cuda/include/cufftw.h" && cp "/usr/local/cuda-9.0/include/curand.h" "$(@D)/cuda/include/curand.h" && cp "/usr/local/cuda-9.0/include/curand_discrete.h" "$(@D)/cuda/include/curand_discrete.h" && cp "/usr/local/cuda-9.0/include/curand_discrete2.h" "$(@D)/cuda/include/curand_discrete2.h" && cp "/usr/local/cuda-9.0/include/curand_globals.h" "$(@D)/cuda/include/curand_globals.h" && cp "/usr/local/cuda-9.0/include/curand_kernel.h" "$(@D)/cuda/include/curand_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_lognormal.h" "$(@D)/cuda/include/curand_lognormal.h" && cp "/usr/local/cuda-9.0/include/curand_mrg32k3a.h" "$(@D)/cuda/include/curand_mrg32k3a.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32.h" "$(@D)/cuda/include/curand_mtgp32.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_host.h" "$(@D)/cuda/include/curand_mtgp32_host.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32_kernel.h" "$(@D)/cuda/include/curand_mtgp32_kernel.h" && cp "/usr/local/cuda-9.0/include/curand_mtgp32dc_p_11213.h" "$(@D)/cuda/include/curand_mtgp32dc_p_11213.h" && cp "/usr/local/cuda-9.0/include/curand_normal.h" "$(@D)/cuda/include/curand_normal.h" && cp "/usr/local/cuda-9.0/include/curand_normal_static.h" "$(@D)/cuda/include/curand_normal_static.h" && cp "/usr/local/cuda-9.0/include/curand_philox4x32_x.h" "$(@D)/cuda/include/curand_philox4x32_x.h" && cp "/usr/local/cuda-9.0/include/curand_poisson.h" "$(@D)/cuda/include/curand_poisson.h" && cp "/usr/local/cuda-9.0/include/curand_precalc.h" "$(@D)/cuda/include/curand_precalc.h" && cp "/usr/local/cuda-9.0/include/curand_uniform.h" "$(@D)/cuda/include/curand_uniform.h" && cp "/usr/local/cuda-9.0/include/cusolverDn.h" "$(@D)/cuda/include/cusolverDn.h" && cp "/usr/local/cuda-9.0/include/cusolverRf.h" "$(@D)/cuda/include/cusolverRf.h" && cp "/usr/local/cuda-9.0/include/cusolverSp.h" "$(@D)/cuda/include/cusolverSp.h" && cp "/usr/local/cuda-9.0/include/cusolverSp_LOWLEVEL_PREVIEW.h" "$(@D)/cuda/include/cusolverSp_LOWLEVEL_PREVIEW.h" && cp "/usr/local/cuda-9.0/include/cusolver_common.h" "$(@D)/cuda/include/cusolver_common.h" && cp "/usr/local/cuda-9.0/include/cusparse.h" "$(@D)/cuda/include/cusparse.h" && cp "/usr/local/cuda-9.0/include/cusparse_v2.h" "$(@D)/cuda/include/cusparse_v2.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.h" "$(@D)/cuda/include/device_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/device_atomic_functions.hpp" "$(@D)/cuda/include/device_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_double_functions.h" "$(@D)/cuda/include/device_double_functions.h" && cp "/usr/local/cuda-9.0/include/device_double_functions.hpp" "$(@D)/cuda/include/device_double_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions.h" "$(@D)/cuda/include/device_functions.h" && cp "/usr/local/cuda-9.0/include/device_functions.hpp" "$(@D)/cuda/include/device_functions.hpp" && cp "/usr/local/cuda-9.0/include/device_functions_decls.h" "$(@D)/cuda/include/device_functions_decls.h" && cp "/usr/local/cuda-9.0/include/device_launch_parameters.h" "$(@D)/cuda/include/device_launch_parameters.h" && cp "/usr/local/cuda-9.0/include/device_types.h" "$(@D)/cuda/include/device_types.h" && cp "/usr/local/cuda-9.0/include/driver_functions.h" "$(@D)/cuda/include/driver_functions.h" && cp "/usr/local/cuda-9.0/include/driver_types.h" "$(@D)/cuda/include/driver_types.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda.h" "$(@D)/cuda/include/dynlink_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuda_cuda.h" "$(@D)/cuda/include/dynlink_cuda_cuda.h" && cp "/usr/local/cuda-9.0/include/dynlink_cuviddec.h" "$(@D)/cuda/include/dynlink_cuviddec.h" && cp "/usr/local/cuda-9.0/include/dynlink_nvcuvid.h" "$(@D)/cuda/include/dynlink_nvcuvid.h" && cp "/usr/local/cuda-9.0/include/fatBinaryCtl.h" "$(@D)/cuda/include/fatBinaryCtl.h" && cp "/usr/local/cuda-9.0/include/fatbinary.h" "$(@D)/cuda/include/fatbinary.h" && cp "/usr/local/cuda-9.0/include/host_config.h" "$(@D)/cuda/include/host_config.h" && cp "/usr/local/cuda-9.0/include/host_defines.h" "$(@D)/cuda/include/host_defines.h" && cp "/usr/local/cuda-9.0/include/library_types.h" "$(@D)/cuda/include/library_types.h" && cp "/usr/local/cuda-9.0/include/math_constants.h" "$(@D)/cuda/include/math_constants.h" && cp "/usr/local/cuda-9.0/include/math_functions.h" "$(@D)/cuda/include/math_functions.h" && cp "/usr/local/cuda-9.0/include/math_functions.hpp" "$(@D)/cuda/include/math_functions.hpp" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.h" "$(@D)/cuda/include/math_functions_dbl_ptx3.h" && cp "/usr/local/cuda-9.0/include/math_functions_dbl_ptx3.hpp" "$(@D)/cuda/include/math_functions_dbl_ptx3.hpp" && cp "/usr/local/cuda-9.0/include/mma.h" "$(@D)/cuda/include/mma.h" && cp "/usr/local/cuda-9.0/include/npp.h" "$(@D)/cuda/include/npp.h" && cp "/usr/local/cuda-9.0/include/nppcore.h" "$(@D)/cuda/include/nppcore.h" && cp "/usr/local/cuda-9.0/include/nppdefs.h" "$(@D)/cuda/include/nppdefs.h" && cp "/usr/local/cuda-9.0/include/nppi.h" "$(@D)/cuda/include/nppi.h" && cp "/usr/local/cuda-9.0/include/nppi_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/nppi_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_color_conversion.h" "$(@D)/cuda/include/nppi_color_conversion.h" && cp "/usr/local/cuda-9.0/include/nppi_compression_functions.h" "$(@D)/cuda/include/nppi_compression_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_computer_vision.h" "$(@D)/cuda/include/nppi_computer_vision.h" && cp "/usr/local/cuda-9.0/include/nppi_data_exchange_and_initialization.h" "$(@D)/cuda/include/nppi_data_exchange_and_initialization.h" && cp "/usr/local/cuda-9.0/include/nppi_filtering_functions.h" "$(@D)/cuda/include/nppi_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_geometry_transforms.h" "$(@D)/cuda/include/nppi_geometry_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_linear_transforms.h" "$(@D)/cuda/include/nppi_linear_transforms.h" && cp "/usr/local/cuda-9.0/include/nppi_morphological_operations.h" "$(@D)/cuda/include/nppi_morphological_operations.h" && cp "/usr/local/cuda-9.0/include/nppi_statistics_functions.h" "$(@D)/cuda/include/nppi_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_support_functions.h" "$(@D)/cuda/include/nppi_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppi_threshold_and_compare_operations.h" "$(@D)/cuda/include/nppi_threshold_and_compare_operations.h" && cp "/usr/local/cuda-9.0/include/npps.h" "$(@D)/cuda/include/npps.h" && cp "/usr/local/cuda-9.0/include/npps_arithmetic_and_logical_operations.h" "$(@D)/cuda/include/npps_arithmetic_and_logical_operations.h" && cp "/usr/local/cuda-9.0/include/npps_conversion_functions.h" "$(@D)/cuda/include/npps_conversion_functions.h" && cp "/usr/local/cuda-9.0/include/npps_filtering_functions.h" "$(@D)/cuda/include/npps_filtering_functions.h" && cp "/usr/local/cuda-9.0/include/npps_initialization.h" "$(@D)/cuda/include/npps_initialization.h" && cp "/usr/local/cuda-9.0/include/npps_statistics_functions.h" "$(@D)/cuda/include/npps_statistics_functions.h" && cp "/usr/local/cuda-9.0/include/npps_support_functions.h" "$(@D)/cuda/include/npps_support_functions.h" && cp "/usr/local/cuda-9.0/include/nppversion.h" "$(@D)/cuda/include/nppversion.h" && cp "/usr/local/cuda-9.0/include/nvToolsExt.h" "$(@D)/cuda/include/nvToolsExt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCuda.h" "$(@D)/cuda/include/nvToolsExtCuda.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtCudaRt.h" "$(@D)/cuda/include/nvToolsExtCudaRt.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtMeta.h" "$(@D)/cuda/include/nvToolsExtMeta.h" && cp "/usr/local/cuda-9.0/include/nvToolsExtSync.h" "$(@D)/cuda/include/nvToolsExtSync.h" && cp "/usr/local/cuda-9.0/include/nvblas.h" "$(@D)/cuda/include/nvblas.h" && cp "/usr/local/cuda-9.0/include/nvfunctional" "$(@D)/cuda/include/nvfunctional" && cp "/usr/local/cuda-9.0/include/nvgraph.h" "$(@D)/cuda/include/nvgraph.h" && cp "/usr/local/cuda-9.0/include/nvml.h" "$(@D)/cuda/include/nvml.h" && cp "/usr/local/cuda-9.0/include/nvrtc.h" "$(@D)/cuda/include/nvrtc.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.h" "$(@D)/cuda/include/sm_20_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_20_atomic_functions.hpp" "$(@D)/cuda/include/sm_20_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.h" "$(@D)/cuda/include/sm_20_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_20_intrinsics.hpp" "$(@D)/cuda/include/sm_20_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.h" "$(@D)/cuda/include/sm_30_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_30_intrinsics.hpp" "$(@D)/cuda/include/sm_30_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.h" "$(@D)/cuda/include/sm_32_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_32_atomic_functions.hpp" "$(@D)/cuda/include/sm_32_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.h" "$(@D)/cuda/include/sm_32_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_32_intrinsics.hpp" "$(@D)/cuda/include/sm_32_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sm_35_atomic_functions.h" "$(@D)/cuda/include/sm_35_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_35_intrinsics.h" "$(@D)/cuda/include/sm_35_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.h" "$(@D)/cuda/include/sm_60_atomic_functions.h" && cp "/usr/local/cuda-9.0/include/sm_60_atomic_functions.hpp" "$(@D)/cuda/include/sm_60_atomic_functions.hpp" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.h" "$(@D)/cuda/include/sm_61_intrinsics.h" && cp "/usr/local/cuda-9.0/include/sm_61_intrinsics.hpp" "$(@D)/cuda/include/sm_61_intrinsics.hpp" && cp "/usr/local/cuda-9.0/include/sobol_direction_vectors.h" "$(@D)/cuda/include/sobol_direction_vectors.h" && cp "/usr/local/cuda-9.0/include/surface_functions.h" "$(@D)/cuda/include/surface_functions.h" && cp "/usr/local/cuda-9.0/include/surface_functions.hpp" "$(@D)/cuda/include/surface_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.h" "$(@D)/cuda/include/surface_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/surface_indirect_functions.hpp" "$(@D)/cuda/include/surface_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/surface_types.h" "$(@D)/cuda/include/surface_types.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.h" "$(@D)/cuda/include/texture_fetch_functions.h" && cp "/usr/local/cuda-9.0/include/texture_fetch_functions.hpp" "$(@D)/cuda/include/texture_fetch_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.h" "$(@D)/cuda/include/texture_indirect_functions.h" && cp "/usr/local/cuda-9.0/include/texture_indirect_functions.hpp" "$(@D)/cuda/include/texture_indirect_functions.hpp" && cp "/usr/local/cuda-9.0/include/texture_types.h" "$(@D)/cuda/include/texture_types.h" && cp "/usr/local/cuda-9.0/include/thrust/adjacent_difference.h" "$(@D)/cuda/include/thrust/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/advance.h" "$(@D)/cuda/include/thrust/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/binary_search.h" "$(@D)/cuda/include/thrust/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/complex.h" "$(@D)/cuda/include/thrust/complex.h" && cp "/usr/local/cuda-9.0/include/thrust/copy.h" "$(@D)/cuda/include/thrust/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/count.h" "$(@D)/cuda/include/thrust/count.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/adjacent_difference.inl" "$(@D)/cuda/include/thrust/detail/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/advance.inl" "$(@D)/cuda/include/thrust/detail/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.h" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/allocator_traits.inl" "$(@D)/cuda/include/thrust/detail/allocator/allocator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/copy_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/copy_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/default_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/default_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.h" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/destroy_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/destroy_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.h" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/fill_construct_range.inl" "$(@D)/cuda/include/thrust/detail/allocator/fill_construct_range.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/malloc_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/malloc_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/no_throw_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/no_throw_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/tagged_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/tagged_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.h" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/allocator/temporary_allocator.inl" "$(@D)/cuda/include/thrust/detail/allocator/temporary_allocator.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/binary_search.inl" "$(@D)/cuda/include/thrust/detail/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/arithmetic.h" "$(@D)/cuda/include/thrust/detail/complex/arithmetic.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/c99math.h" "$(@D)/cuda/include/thrust/detail/complex/c99math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrig.h" "$(@D)/cuda/include/thrust/detail/complex/catrig.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/catrigf.h" "$(@D)/cuda/include/thrust/detail/complex/catrigf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccosh.h" "$(@D)/cuda/include/thrust/detail/complex/ccosh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ccoshf.h" "$(@D)/cuda/include/thrust/detail/complex/ccoshf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexp.h" "$(@D)/cuda/include/thrust/detail/complex/cexp.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cexpf.h" "$(@D)/cuda/include/thrust/detail/complex/cexpf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clog.h" "$(@D)/cuda/include/thrust/detail/complex/clog.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/clogf.h" "$(@D)/cuda/include/thrust/detail/complex/clogf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/complex.inl" "$(@D)/cuda/include/thrust/detail/complex/complex.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpow.h" "$(@D)/cuda/include/thrust/detail/complex/cpow.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cpowf.h" "$(@D)/cuda/include/thrust/detail/complex/cpowf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/cproj.h" "$(@D)/cuda/include/thrust/detail/complex/cproj.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinh.h" "$(@D)/cuda/include/thrust/detail/complex/csinh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csinhf.h" "$(@D)/cuda/include/thrust/detail/complex/csinhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrt.h" "$(@D)/cuda/include/thrust/detail/complex/csqrt.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/csqrtf.h" "$(@D)/cuda/include/thrust/detail/complex/csqrtf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanh.h" "$(@D)/cuda/include/thrust/detail/complex/ctanh.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/ctanhf.h" "$(@D)/cuda/include/thrust/detail/complex/ctanhf.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/math_private.h" "$(@D)/cuda/include/thrust/detail/complex/math_private.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/complex/stream.h" "$(@D)/cuda/include/thrust/detail/complex/stream.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config.h" "$(@D)/cuda/include/thrust/detail/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler.h" "$(@D)/cuda/include/thrust/detail/config/compiler.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/compiler_fence.h" "$(@D)/cuda/include/thrust/detail/config/compiler_fence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/config.h" "$(@D)/cuda/include/thrust/detail/config/config.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/debug.h" "$(@D)/cuda/include/thrust/detail/config/debug.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/device_system.h" "$(@D)/cuda/include/thrust/detail/config/device_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/exec_check_disable.h" "$(@D)/cuda/include/thrust/detail/config/exec_check_disable.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/forceinline.h" "$(@D)/cuda/include/thrust/detail/config/forceinline.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/global_workarounds.h" "$(@D)/cuda/include/thrust/detail/config/global_workarounds.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_device.h" "$(@D)/cuda/include/thrust/detail/config/host_device.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/host_system.h" "$(@D)/cuda/include/thrust/detail/config/host_system.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/config/simple_defines.h" "$(@D)/cuda/include/thrust/detail/config/simple_defines.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.h" "$(@D)/cuda/include/thrust/detail/contiguous_storage.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/contiguous_storage.inl" "$(@D)/cuda/include/thrust/detail/contiguous_storage.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.h" "$(@D)/cuda/include/thrust/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy.inl" "$(@D)/cuda/include/thrust/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.h" "$(@D)/cuda/include/thrust/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/copy_if.inl" "$(@D)/cuda/include/thrust/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/count.inl" "$(@D)/cuda/include/thrust/detail/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/cstdint.h" "$(@D)/cuda/include/thrust/detail/cstdint.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_delete.inl" "$(@D)/cuda/include/thrust/detail/device_delete.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_free.inl" "$(@D)/cuda/include/thrust/detail/device_free.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_malloc.inl" "$(@D)/cuda/include/thrust/detail/device_malloc.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_new.inl" "$(@D)/cuda/include/thrust/detail/device_new.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_ptr.inl" "$(@D)/cuda/include/thrust/detail/device_ptr.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_reference.inl" "$(@D)/cuda/include/thrust/detail/device_reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/device_vector.inl" "$(@D)/cuda/include/thrust/detail/device_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/dispatch/is_trivial_copy.h" "$(@D)/cuda/include/thrust/detail/dispatch/is_trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/distance.inl" "$(@D)/cuda/include/thrust/detail/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/equal.inl" "$(@D)/cuda/include/thrust/detail/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/execute_with_allocator.h" "$(@D)/cuda/include/thrust/detail/execute_with_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/execution_policy.h" "$(@D)/cuda/include/thrust/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/extrema.inl" "$(@D)/cuda/include/thrust/detail/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/fill.inl" "$(@D)/cuda/include/thrust/detail/fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/find.inl" "$(@D)/cuda/include/thrust/detail/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/for_each.inl" "$(@D)/cuda/include/thrust/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/function.h" "$(@D)/cuda/include/thrust/detail/function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional.inl" "$(@D)/cuda/include/thrust/detail/functional.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.h" "$(@D)/cuda/include/thrust/detail/functional/actor.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/actor.inl" "$(@D)/cuda/include/thrust/detail/functional/actor.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/argument.h" "$(@D)/cuda/include/thrust/detail/functional/argument.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/composite.h" "$(@D)/cuda/include/thrust/detail/functional/composite.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/arithmetic_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/arithmetic_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/assignment_operator.h" "$(@D)/cuda/include/thrust/detail/functional/operators/assignment_operator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/bitwise_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/bitwise_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/compound_assignment_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/compound_assignment_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/logical_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/logical_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/operator_adaptors.h" "$(@D)/cuda/include/thrust/detail/functional/operators/operator_adaptors.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/operators/relational_operators.h" "$(@D)/cuda/include/thrust/detail/functional/operators/relational_operators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/placeholder.h" "$(@D)/cuda/include/thrust/detail/functional/placeholder.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/functional/value.h" "$(@D)/cuda/include/thrust/detail/functional/value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/gather.inl" "$(@D)/cuda/include/thrust/detail/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/generate.inl" "$(@D)/cuda/include/thrust/detail/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/get_iterator_value.h" "$(@D)/cuda/include/thrust/detail/get_iterator_value.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/host_vector.inl" "$(@D)/cuda/include/thrust/detail/host_vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/inner_product.inl" "$(@D)/cuda/include/thrust/detail/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_math.h" "$(@D)/cuda/include/thrust/detail/integer_math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/integer_traits.h" "$(@D)/cuda/include/thrust/detail/integer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/internal_functional.h" "$(@D)/cuda/include/thrust/detail/internal_functional.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/logical.inl" "$(@D)/cuda/include/thrust/detail/logical.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/merge.inl" "$(@D)/cuda/include/thrust/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/minmax.h" "$(@D)/cuda/include/thrust/detail/minmax.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/mismatch.inl" "$(@D)/cuda/include/thrust/detail/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/mpl/math.h" "$(@D)/cuda/include/thrust/detail/mpl/math.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/numeric_traits.h" "$(@D)/cuda/include/thrust/detail/numeric_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/overlapped_copy.h" "$(@D)/cuda/include/thrust/detail/overlapped_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pair.inl" "$(@D)/cuda/include/thrust/detail/pair.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/partition.inl" "$(@D)/cuda/include/thrust/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.h" "$(@D)/cuda/include/thrust/detail/pointer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/pointer.inl" "$(@D)/cuda/include/thrust/detail/pointer.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/head_flags.h" "$(@D)/cuda/include/thrust/detail/range/head_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/range/tail_flags.h" "$(@D)/cuda/include/thrust/detail/range/tail_flags.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_pointer_cast.h" "$(@D)/cuda/include/thrust/detail/raw_pointer_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/raw_reference_cast.h" "$(@D)/cuda/include/thrust/detail/raw_reference_cast.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reduce.inl" "$(@D)/cuda/include/thrust/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.h" "$(@D)/cuda/include/thrust/detail/reference.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference.inl" "$(@D)/cuda/include/thrust/detail/reference.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reference_forward_declaration.h" "$(@D)/cuda/include/thrust/detail/reference_forward_declaration.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/remove.inl" "$(@D)/cuda/include/thrust/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/replace.inl" "$(@D)/cuda/include/thrust/detail/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/reverse.inl" "$(@D)/cuda/include/thrust/detail/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scan.inl" "$(@D)/cuda/include/thrust/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/scatter.inl" "$(@D)/cuda/include/thrust/detail/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/seq.h" "$(@D)/cuda/include/thrust/detail/seq.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/sequence.inl" "$(@D)/cuda/include/thrust/detail/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/set_operations.inl" "$(@D)/cuda/include/thrust/detail/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/sort.inl" "$(@D)/cuda/include/thrust/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_assert.h" "$(@D)/cuda/include/thrust/detail/static_assert.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/static_map.h" "$(@D)/cuda/include/thrust/detail/static_map.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.h" "$(@D)/cuda/include/thrust/detail/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap.inl" "$(@D)/cuda/include/thrust/detail/swap.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/swap_ranges.inl" "$(@D)/cuda/include/thrust/detail/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tabulate.inl" "$(@D)/cuda/include/thrust/detail/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.h" "$(@D)/cuda/include/thrust/detail/temporary_array.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_array.inl" "$(@D)/cuda/include/thrust/detail/temporary_array.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform.inl" "$(@D)/cuda/include/thrust/detail/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_reduce.inl" "$(@D)/cuda/include/thrust/detail/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/transform_scan.inl" "$(@D)/cuda/include/thrust/detail/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/trivial_sequence.h" "$(@D)/cuda/include/thrust/detail/trivial_sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple.inl" "$(@D)/cuda/include/thrust/detail/tuple.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_meta_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_meta_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/tuple_transform.h" "$(@D)/cuda/include/thrust/detail/tuple_transform.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" "$(@D)/cuda/include/thrust/detail/type_traits/algorithm/intermediate_type_from_function_and_iterators.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/function_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/function_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_member_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_member_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_nested_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_nested_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/has_trivial_assign.h" "$(@D)/cuda/include/thrust/detail/type_traits/has_trivial_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_call_possible.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_call_possible.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/is_metafunction_defined.h" "$(@D)/cuda/include/thrust/detail/type_traits/is_metafunction_defined.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/iterator/is_output_iterator.h" "$(@D)/cuda/include/thrust/detail/type_traits/iterator/is_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/minimum_type.h" "$(@D)/cuda/include/thrust/detail/type_traits/minimum_type.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/pointer_traits.h" "$(@D)/cuda/include/thrust/detail/type_traits/pointer_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/type_traits/result_of_adaptable_function.h" "$(@D)/cuda/include/thrust/detail/type_traits/result_of_adaptable_function.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/detail/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/unique.inl" "$(@D)/cuda/include/thrust/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/detail/use_default.h" "$(@D)/cuda/include/thrust/detail/use_default.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/align.h" "$(@D)/cuda/include/thrust/detail/util/align.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/util/blocking.h" "$(@D)/cuda/include/thrust/detail/util/blocking.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.h" "$(@D)/cuda/include/thrust/detail/vector_base.h" && cp "/usr/local/cuda-9.0/include/thrust/detail/vector_base.inl" "$(@D)/cuda/include/thrust/detail/vector_base.inl" && cp "/usr/local/cuda-9.0/include/thrust/device_allocator.h" "$(@D)/cuda/include/thrust/device_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_delete.h" "$(@D)/cuda/include/thrust/device_delete.h" && cp "/usr/local/cuda-9.0/include/thrust/device_free.h" "$(@D)/cuda/include/thrust/device_free.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc.h" "$(@D)/cuda/include/thrust/device_malloc.h" && cp "/usr/local/cuda-9.0/include/thrust/device_malloc_allocator.h" "$(@D)/cuda/include/thrust/device_malloc_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new.h" "$(@D)/cuda/include/thrust/device_new.h" && cp "/usr/local/cuda-9.0/include/thrust/device_new_allocator.h" "$(@D)/cuda/include/thrust/device_new_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/device_ptr.h" "$(@D)/cuda/include/thrust/device_ptr.h" && cp "/usr/local/cuda-9.0/include/thrust/device_reference.h" "$(@D)/cuda/include/thrust/device_reference.h" && cp "/usr/local/cuda-9.0/include/thrust/device_vector.h" "$(@D)/cuda/include/thrust/device_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/distance.h" "$(@D)/cuda/include/thrust/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/equal.h" "$(@D)/cuda/include/thrust/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/execution_policy.h" "$(@D)/cuda/include/thrust/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/extrema.h" "$(@D)/cuda/include/thrust/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/fill.h" "$(@D)/cuda/include/thrust/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/find.h" "$(@D)/cuda/include/thrust/find.h" && cp "/usr/local/cuda-9.0/include/thrust/for_each.h" "$(@D)/cuda/include/thrust/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/functional.h" "$(@D)/cuda/include/thrust/functional.h" && cp "/usr/local/cuda-9.0/include/thrust/gather.h" "$(@D)/cuda/include/thrust/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/generate.h" "$(@D)/cuda/include/thrust/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/host_vector.h" "$(@D)/cuda/include/thrust/host_vector.h" && cp "/usr/local/cuda-9.0/include/thrust/inner_product.h" "$(@D)/cuda/include/thrust/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/constant_iterator.h" "$(@D)/cuda/include/thrust/iterator/constant_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/counting_iterator.h" "$(@D)/cuda/include/thrust/iterator/counting_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_assign.h" "$(@D)/cuda/include/thrust/iterator/detail/any_assign.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/any_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/any_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/constant_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/constant_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/counting_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/counting_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/device_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/device_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/discard_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/discard_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/distance_from_result.h" "$(@D)/cuda/include/thrust/iterator/detail/distance_from_result.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/host_system_tag.h" "$(@D)/cuda/include/thrust/iterator/detail/host_system_tag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_iterator_category.h" "$(@D)/cuda/include/thrust/iterator/detail/is_iterator_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/is_trivial_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/is_trivial_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_adaptor_base.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_adaptor_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_system.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_to_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_to_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_category_with_system_and_traversal.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_facade_category.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_facade_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traits.inl" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traits.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/iterator_traversal_tags.h" "$(@D)/cuda/include/thrust/iterator/detail/iterator_traversal_tags.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/join_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/join_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_category.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_category.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/minimum_system.h" "$(@D)/cuda/include/thrust/iterator/detail/minimum_system.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/normal_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/normal_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/permutation_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/permutation_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/retag.h" "$(@D)/cuda/include/thrust/iterator/detail/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/reverse_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/reverse_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tagged_iterator.h" "$(@D)/cuda/include/thrust/iterator/detail/tagged_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/transform_output_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/transform_output_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/tuple_of_iterator_references.h" "$(@D)/cuda/include/thrust/iterator/detail/tuple_of_iterator_references.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/universal_categories.h" "$(@D)/cuda/include/thrust/iterator/detail/universal_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator.inl" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator.inl" && cp "/usr/local/cuda-9.0/include/thrust/iterator/detail/zip_iterator_base.h" "$(@D)/cuda/include/thrust/iterator/detail/zip_iterator_base.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/discard_iterator.h" "$(@D)/cuda/include/thrust/iterator/discard_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_adaptor.h" "$(@D)/cuda/include/thrust/iterator/iterator_adaptor.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_categories.h" "$(@D)/cuda/include/thrust/iterator/iterator_categories.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_facade.h" "$(@D)/cuda/include/thrust/iterator/iterator_facade.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/iterator_traits.h" "$(@D)/cuda/include/thrust/iterator/iterator_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/permutation_iterator.h" "$(@D)/cuda/include/thrust/iterator/permutation_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/retag.h" "$(@D)/cuda/include/thrust/iterator/retag.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/reverse_iterator.h" "$(@D)/cuda/include/thrust/iterator/reverse_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/transform_output_iterator.h" "$(@D)/cuda/include/thrust/iterator/transform_output_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/iterator/zip_iterator.h" "$(@D)/cuda/include/thrust/iterator/zip_iterator.h" && cp "/usr/local/cuda-9.0/include/thrust/logical.h" "$(@D)/cuda/include/thrust/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/memory.h" "$(@D)/cuda/include/thrust/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/merge.h" "$(@D)/cuda/include/thrust/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/mismatch.h" "$(@D)/cuda/include/thrust/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/pair.h" "$(@D)/cuda/include/thrust/pair.h" && cp "/usr/local/cuda-9.0/include/thrust/partition.h" "$(@D)/cuda/include/thrust/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/random.h" "$(@D)/cuda/include/thrust/random.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/discard_block_engine.inl" "$(@D)/cuda/include/thrust/random/detail/discard_block_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_congruential_engine_discard.h" "$(@D)/cuda/include/thrust/random/detail/linear_congruential_engine_discard.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine.inl" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" "$(@D)/cuda/include/thrust/random/detail/linear_feedback_shift_engine_wordmask.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/mod.h" "$(@D)/cuda/include/thrust/random/detail/mod.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/normal_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/normal_distribution_base.h" "$(@D)/cuda/include/thrust/random/detail/normal_distribution_base.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/random_core_access.h" "$(@D)/cuda/include/thrust/random/detail/random_core_access.h" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/subtract_with_carry_engine.inl" "$(@D)/cuda/include/thrust/random/detail/subtract_with_carry_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_int_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_int_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/uniform_real_distribution.inl" "$(@D)/cuda/include/thrust/random/detail/uniform_real_distribution.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine.inl" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine.inl" && cp "/usr/local/cuda-9.0/include/thrust/random/detail/xor_combine_engine_max.h" "$(@D)/cuda/include/thrust/random/detail/xor_combine_engine_max.h" && cp "/usr/local/cuda-9.0/include/thrust/random/discard_block_engine.h" "$(@D)/cuda/include/thrust/random/discard_block_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_congruential_engine.h" "$(@D)/cuda/include/thrust/random/linear_congruential_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/linear_feedback_shift_engine.h" "$(@D)/cuda/include/thrust/random/linear_feedback_shift_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/normal_distribution.h" "$(@D)/cuda/include/thrust/random/normal_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/subtract_with_carry_engine.h" "$(@D)/cuda/include/thrust/random/subtract_with_carry_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_int_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_int_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/uniform_real_distribution.h" "$(@D)/cuda/include/thrust/random/uniform_real_distribution.h" && cp "/usr/local/cuda-9.0/include/thrust/random/xor_combine_engine.h" "$(@D)/cuda/include/thrust/random/xor_combine_engine.h" && cp "/usr/local/cuda-9.0/include/thrust/reduce.h" "$(@D)/cuda/include/thrust/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/remove.h" "$(@D)/cuda/include/thrust/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/replace.h" "$(@D)/cuda/include/thrust/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/reverse.h" "$(@D)/cuda/include/thrust/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/scan.h" "$(@D)/cuda/include/thrust/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/scatter.h" "$(@D)/cuda/include/thrust/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/sequence.h" "$(@D)/cuda/include/thrust/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/set_operations.h" "$(@D)/cuda/include/thrust/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/sort.h" "$(@D)/cuda/include/thrust/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/swap.h" "$(@D)/cuda/include/thrust/swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cpp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cpp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cpp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/count.h" "$(@D)/cuda/include/thrust/system/cpp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/equal.h" "$(@D)/cuda/include/thrust/system/cpp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cpp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/find.h" "$(@D)/cuda/include/thrust/system/cpp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cpp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/gather.h" "$(@D)/cuda/include/thrust/system/cpp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/generate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cpp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cpp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cpp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/logical.h" "$(@D)/cuda/include/thrust/system/cpp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cpp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/merge.h" "$(@D)/cuda/include/thrust/system/cpp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cpp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/par.h" "$(@D)/cuda/include/thrust/system/cpp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/partition.h" "$(@D)/cuda/include/thrust/system/cpp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/remove.h" "$(@D)/cuda/include/thrust/system/cpp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/replace.h" "$(@D)/cuda/include/thrust/system/cpp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cpp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cpp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cpp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/sort.h" "$(@D)/cuda/include/thrust/system/cpp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cpp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cpp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cpp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cpp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cpp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cpp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cpp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/execution_policy.h" "$(@D)/cuda/include/thrust/system/cpp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/memory.h" "$(@D)/cuda/include/thrust/system/cpp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cpp/vector.h" "$(@D)/cuda/include/thrust/system/cpp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/config.h" "$(@D)/cuda/include/thrust/system/cuda/config.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/cuda/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/cuda/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/cuda/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/agent_launcher.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/agent_launcher.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/alignment.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/alignment.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/triple_chevron_launch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/core/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/core/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/count.h" "$(@D)/cuda/include/thrust/system/cuda/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_downsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_radix_sort_upsweep.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_segment_fixup.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/agent_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/agent/single_pass_scan_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_adjacent_difference.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_discontinuity.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_exchange.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_rank.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_raking_layout.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_shuffle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/block_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/block_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_atomic.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_histogram_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_raking_commutative_only.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_reduce_warp_reductions.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_raking.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans2.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/block/specializations/block_scan_warp_scans3.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/cub.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/cub.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_partition.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_run_length_encode.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_segmented_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_select.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_select.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/device_spmv.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_histogram.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_radix_sort.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_reduce_by_key.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_rle.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_select_if.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_csrt.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_orig.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/device/dispatch/dispatch_spmv_row_based.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_barrier.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_even_share.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_mapping.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/grid/grid_queue.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/host/mutex.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/host/mutex.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/arg_index_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/cache_modified_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/constant_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/counting_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/discard_output_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_obj_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/tex_ref_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/iterator/transform_input_iterator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_load.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_operators.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_search.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/thread/thread_store.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_allocator.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_allocator.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_arch.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_arch.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_debug.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_debug.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_device.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_device.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_macro.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_macro.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_namespace.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_namespace.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_ptx.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_ptx.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/util_type.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/util_type.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_reduce_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_shfl.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/specializations/warp_scan_smem.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_reduce.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" "$(@D)/cuda/include/thrust/system/cuda/detail/cub/warp/warp_scan.cuh" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/equal.h" "$(@D)/cuda/include/thrust/system/cuda/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/error.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/extrema.h" "$(@D)/cuda/include/thrust/system/cuda/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/find.h" "$(@D)/cuda/include/thrust/system/cuda/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/for_each.h" "$(@D)/cuda/include/thrust/system/cuda/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/gather.h" "$(@D)/cuda/include/thrust/system/cuda/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/generate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/get_value.h" "$(@D)/cuda/include/thrust/system/cuda/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_cuda_runtime_api.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/guarded_driver_types.h" "$(@D)/cuda/include/thrust/system/cuda/detail/guarded_driver_types.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/cuda/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_cross_system.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_cross_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" "$(@D)/cuda/include/thrust/system/cuda/detail/internal/copy_device_to_device.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/cuda/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/logical.h" "$(@D)/cuda/include/thrust/system/cuda/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/cuda/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/memory_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/memory_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/merge.h" "$(@D)/cuda/include/thrust/system/cuda/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/cuda/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/par_to_seq.h" "$(@D)/cuda/include/thrust/system/cuda/detail/par_to_seq.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/parallel_for.h" "$(@D)/cuda/include/thrust/system/cuda/detail/parallel_for.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/partition.h" "$(@D)/cuda/include/thrust/system/cuda/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/remove.h" "$(@D)/cuda/include/thrust/system/cuda/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/replace.h" "$(@D)/cuda/include/thrust/system/cuda/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/reverse.h" "$(@D)/cuda/include/thrust/system/cuda/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/scatter.h" "$(@D)/cuda/include/thrust/system/cuda/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sequence.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/cuda/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/sort.h" "$(@D)/cuda/include/thrust/system/cuda/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/cuda/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/cuda/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/terminate.h" "$(@D)/cuda/include/thrust/system/cuda/detail/terminate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/cuda/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/cuda/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/cuda/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/util.h" "$(@D)/cuda/include/thrust/system/cuda/detail/util.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/detail/vector.inl" "$(@D)/cuda/include/thrust/system/cuda/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/error.h" "$(@D)/cuda/include/thrust/system/cuda/error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/execution_policy.h" "$(@D)/cuda/include/thrust/system/cuda/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/experimental/pinned_allocator.h" "$(@D)/cuda/include/thrust/system/cuda/experimental/pinned_allocator.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/memory.h" "$(@D)/cuda/include/thrust/system/cuda/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/cuda/vector.h" "$(@D)/cuda/include/thrust/system/cuda/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/adl/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/adl/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/adl/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/count.h" "$(@D)/cuda/include/thrust/system/detail/adl/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/equal.h" "$(@D)/cuda/include/thrust/system/detail/adl/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/extrema.h" "$(@D)/cuda/include/thrust/system/detail/adl/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/find.h" "$(@D)/cuda/include/thrust/system/detail/adl/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/for_each.h" "$(@D)/cuda/include/thrust/system/detail/adl/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/gather.h" "$(@D)/cuda/include/thrust/system/detail/adl/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/generate.h" "$(@D)/cuda/include/thrust/system/detail/adl/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/get_value.h" "$(@D)/cuda/include/thrust/system/detail/adl/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/adl/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/adl/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/logical.h" "$(@D)/cuda/include/thrust/system/detail/adl/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/adl/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/merge.h" "$(@D)/cuda/include/thrust/system/detail/adl/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/adl/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/partition.h" "$(@D)/cuda/include/thrust/system/detail/adl/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/remove.h" "$(@D)/cuda/include/thrust/system/detail/adl/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/replace.h" "$(@D)/cuda/include/thrust/system/detail/adl/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/reverse.h" "$(@D)/cuda/include/thrust/system/detail/adl/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/scatter.h" "$(@D)/cuda/include/thrust/system/detail/adl/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sequence.h" "$(@D)/cuda/include/thrust/system/detail/adl/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/adl/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/sort.h" "$(@D)/cuda/include/thrust/system/detail/adl/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/adl/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/adl/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/adl/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/adl/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/adl/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/adl/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/adl/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/bad_alloc.h" "$(@D)/cuda/include/thrust/system/detail/bad_alloc.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/errno.h" "$(@D)/cuda/include/thrust/system/detail/errno.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_category.inl" "$(@D)/cuda/include/thrust/system/detail/error_category.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_code.inl" "$(@D)/cuda/include/thrust/system/detail/error_code.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/error_condition.inl" "$(@D)/cuda/include/thrust/system/detail/error_condition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/adjacent_difference.inl" "$(@D)/cuda/include/thrust/system/detail/generic/adjacent_difference.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.h" "$(@D)/cuda/include/thrust/system/detail/generic/advance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/advance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/advance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/copy_if.inl" "$(@D)/cuda/include/thrust/system/detail/generic/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.h" "$(@D)/cuda/include/thrust/system/detail/generic/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/count.inl" "$(@D)/cuda/include/thrust/system/detail/generic/count.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.h" "$(@D)/cuda/include/thrust/system/detail/generic/distance.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/distance.inl" "$(@D)/cuda/include/thrust/system/detail/generic/distance.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.h" "$(@D)/cuda/include/thrust/system/detail/generic/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/equal.inl" "$(@D)/cuda/include/thrust/system/detail/generic/equal.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.h" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/extrema.inl" "$(@D)/cuda/include/thrust/system/detail/generic/extrema.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.h" "$(@D)/cuda/include/thrust/system/detail/generic/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/find.inl" "$(@D)/cuda/include/thrust/system/detail/generic/find.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/for_each.h" "$(@D)/cuda/include/thrust/system/detail/generic/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.h" "$(@D)/cuda/include/thrust/system/detail/generic/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/gather.inl" "$(@D)/cuda/include/thrust/system/detail/generic/gather.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.h" "$(@D)/cuda/include/thrust/system/detail/generic/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/generate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/generate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/inner_product.inl" "$(@D)/cuda/include/thrust/system/detail/generic/inner_product.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/logical.h" "$(@D)/cuda/include/thrust/system/detail/generic/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.h" "$(@D)/cuda/include/thrust/system/detail/generic/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/memory.inl" "$(@D)/cuda/include/thrust/system/detail/generic/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.h" "$(@D)/cuda/include/thrust/system/detail/generic/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/merge.inl" "$(@D)/cuda/include/thrust/system/detail/generic/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/mismatch.inl" "$(@D)/cuda/include/thrust/system/detail/generic/mismatch.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.h" "$(@D)/cuda/include/thrust/system/detail/generic/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/partition.inl" "$(@D)/cuda/include/thrust/system/detail/generic/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.h" "$(@D)/cuda/include/thrust/system/detail/generic/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/remove.inl" "$(@D)/cuda/include/thrust/system/detail/generic/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.h" "$(@D)/cuda/include/thrust/system/detail/generic/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/replace.inl" "$(@D)/cuda/include/thrust/system/detail/generic/replace.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.h" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/reverse.inl" "$(@D)/cuda/include/thrust/system/detail/generic/reverse.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scalar/binary_search.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scalar/binary_search.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scan_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scan_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.h" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/scatter.inl" "$(@D)/cuda/include/thrust/system/detail/generic/scatter.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/select_system.h" "$(@D)/cuda/include/thrust/system/detail/generic/select_system.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.h" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sequence.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sequence.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/set_operations.inl" "$(@D)/cuda/include/thrust/system/detail/generic/set_operations.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.h" "$(@D)/cuda/include/thrust/system/detail/generic/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/sort.inl" "$(@D)/cuda/include/thrust/system/detail/generic/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/swap_ranges.inl" "$(@D)/cuda/include/thrust/system/detail/generic/swap_ranges.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tabulate.inl" "$(@D)/cuda/include/thrust/system/detail/generic/tabulate.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/tag.h" "$(@D)/cuda/include/thrust/system/detail/generic/tag.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/temporary_buffer.inl" "$(@D)/cuda/include/thrust/system/detail/generic/temporary_buffer.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_reduce.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/transform_scan.inl" "$(@D)/cuda/include/thrust/system/detail/generic/transform_scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/type_traits.h" "$(@D)/cuda/include/thrust/system/detail/generic/type_traits.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_copy.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/uninitialized_fill.inl" "$(@D)/cuda/include/thrust/system/detail/generic/uninitialized_fill.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/generic/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/detail/generic/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/internal/decompose.h" "$(@D)/cuda/include/thrust/system/detail/internal/decompose.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/detail/sequential/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/assign_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/binary_search.h" "$(@D)/cuda/include/thrust/system/detail/sequential/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_backward.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_backward.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/copy_if.h" "$(@D)/cuda/include/thrust/system/detail/sequential/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/count.h" "$(@D)/cuda/include/thrust/system/detail/sequential/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/equal.h" "$(@D)/cuda/include/thrust/system/detail/sequential/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/execution_policy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/extrema.h" "$(@D)/cuda/include/thrust/system/detail/sequential/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/find.h" "$(@D)/cuda/include/thrust/system/detail/sequential/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/for_each.h" "$(@D)/cuda/include/thrust/system/detail/sequential/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/gather.h" "$(@D)/cuda/include/thrust/system/detail/sequential/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/general_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/general_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/generate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/get_value.h" "$(@D)/cuda/include/thrust/system/detail/sequential/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/inner_product.h" "$(@D)/cuda/include/thrust/system/detail/sequential/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/insertion_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/insertion_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/iter_swap.h" "$(@D)/cuda/include/thrust/system/detail/sequential/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/logical.h" "$(@D)/cuda/include/thrust/system/detail/sequential/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/detail/sequential/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.h" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/merge.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/mismatch.h" "$(@D)/cuda/include/thrust/system/detail/sequential/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/partition.h" "$(@D)/cuda/include/thrust/system/detail/sequential/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/remove.h" "$(@D)/cuda/include/thrust/system/detail/sequential/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/replace.h" "$(@D)/cuda/include/thrust/system/detail/sequential/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/reverse.h" "$(@D)/cuda/include/thrust/system/detail/sequential/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scan_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/scatter.h" "$(@D)/cuda/include/thrust/system/detail/sequential/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sequence.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/set_operations.h" "$(@D)/cuda/include/thrust/system/detail/sequential/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_merge_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_merge_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_primitive_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_primitive_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.h" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/stable_radix_sort.inl" "$(@D)/cuda/include/thrust/system/detail/sequential/stable_radix_sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/swap_ranges.h" "$(@D)/cuda/include/thrust/system/detail/sequential/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/tabulate.h" "$(@D)/cuda/include/thrust/system/detail/sequential/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/detail/sequential/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_reduce.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/transform_scan.h" "$(@D)/cuda/include/thrust/system/detail/sequential/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/trivial_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/trivial_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/detail/sequential/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/sequential/unique_by_key.h" "$(@D)/cuda/include/thrust/system/detail/sequential/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/detail/system_error.inl" "$(@D)/cuda/include/thrust/system/detail/system_error.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/error_code.h" "$(@D)/cuda/include/thrust/system/error_code.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/omp/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/omp/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/omp/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/count.h" "$(@D)/cuda/include/thrust/system/omp/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.h" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/default_decomposition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/default_decomposition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/equal.h" "$(@D)/cuda/include/thrust/system/omp/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/extrema.h" "$(@D)/cuda/include/thrust/system/omp/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/find.h" "$(@D)/cuda/include/thrust/system/omp/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.h" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/omp/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/gather.h" "$(@D)/cuda/include/thrust/system/omp/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/generate.h" "$(@D)/cuda/include/thrust/system/omp/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/get_value.h" "$(@D)/cuda/include/thrust/system/omp/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/omp/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/omp/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/logical.h" "$(@D)/cuda/include/thrust/system/omp/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/omp/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/memory.inl" "$(@D)/cuda/include/thrust/system/omp/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/merge.h" "$(@D)/cuda/include/thrust/system/omp/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/omp/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/par.h" "$(@D)/cuda/include/thrust/system/omp/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.h" "$(@D)/cuda/include/thrust/system/omp/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/partition.inl" "$(@D)/cuda/include/thrust/system/omp/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reduce_intervals.inl" "$(@D)/cuda/include/thrust/system/omp/detail/reduce_intervals.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.h" "$(@D)/cuda/include/thrust/system/omp/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/remove.inl" "$(@D)/cuda/include/thrust/system/omp/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/replace.h" "$(@D)/cuda/include/thrust/system/omp/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/reverse.h" "$(@D)/cuda/include/thrust/system/omp/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/scatter.h" "$(@D)/cuda/include/thrust/system/omp/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sequence.h" "$(@D)/cuda/include/thrust/system/omp/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/omp/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.h" "$(@D)/cuda/include/thrust/system/omp/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/sort.inl" "$(@D)/cuda/include/thrust/system/omp/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/omp/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/omp/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/omp/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/omp/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/omp/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/omp/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/detail/vector.inl" "$(@D)/cuda/include/thrust/system/omp/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/execution_policy.h" "$(@D)/cuda/include/thrust/system/omp/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/memory.h" "$(@D)/cuda/include/thrust/system/omp/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/omp/vector.h" "$(@D)/cuda/include/thrust/system/omp/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system/system_error.h" "$(@D)/cuda/include/thrust/system/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/adjacent_difference.h" "$(@D)/cuda/include/thrust/system/tbb/detail/adjacent_difference.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/assign_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/assign_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/binary_search.h" "$(@D)/cuda/include/thrust/system/tbb/detail/binary_search.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.h" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/copy_if.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/copy_if.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/count.h" "$(@D)/cuda/include/thrust/system/tbb/detail/count.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/equal.h" "$(@D)/cuda/include/thrust/system/tbb/detail/equal.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/extrema.h" "$(@D)/cuda/include/thrust/system/tbb/detail/extrema.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/find.h" "$(@D)/cuda/include/thrust/system/tbb/detail/find.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.h" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/for_each.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/for_each.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/gather.h" "$(@D)/cuda/include/thrust/system/tbb/detail/gather.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/generate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/generate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/get_value.h" "$(@D)/cuda/include/thrust/system/tbb/detail/get_value.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/inner_product.h" "$(@D)/cuda/include/thrust/system/tbb/detail/inner_product.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/iter_swap.h" "$(@D)/cuda/include/thrust/system/tbb/detail/iter_swap.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/logical.h" "$(@D)/cuda/include/thrust/system/tbb/detail/logical.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/malloc_and_free.h" "$(@D)/cuda/include/thrust/system/tbb/detail/malloc_and_free.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/memory.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/memory.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.h" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/merge.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/merge.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/mismatch.h" "$(@D)/cuda/include/thrust/system/tbb/detail/mismatch.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/par.h" "$(@D)/cuda/include/thrust/system/tbb/detail/par.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.h" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/partition.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/partition.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reduce_intervals.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reduce_intervals.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.h" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/remove.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/remove.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/replace.h" "$(@D)/cuda/include/thrust/system/tbb/detail/replace.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/reverse.h" "$(@D)/cuda/include/thrust/system/tbb/detail/reverse.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/scan.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scan_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scan_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/scatter.h" "$(@D)/cuda/include/thrust/system/tbb/detail/scatter.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sequence.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sequence.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/set_operations.h" "$(@D)/cuda/include/thrust/system/tbb/detail/set_operations.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.h" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/sort.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/sort.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/swap_ranges.h" "$(@D)/cuda/include/thrust/system/tbb/detail/swap_ranges.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/tabulate.h" "$(@D)/cuda/include/thrust/system/tbb/detail/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/temporary_buffer.h" "$(@D)/cuda/include/thrust/system/tbb/detail/temporary_buffer.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_reduce.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/transform_scan.h" "$(@D)/cuda/include/thrust/system/tbb/detail/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_copy.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/uninitialized_fill.h" "$(@D)/cuda/include/thrust/system/tbb/detail/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.h" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/unique_by_key.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/unique_by_key.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/detail/vector.inl" "$(@D)/cuda/include/thrust/system/tbb/detail/vector.inl" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/execution_policy.h" "$(@D)/cuda/include/thrust/system/tbb/execution_policy.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/memory.h" "$(@D)/cuda/include/thrust/system/tbb/memory.h" && cp "/usr/local/cuda-9.0/include/thrust/system/tbb/vector.h" "$(@D)/cuda/include/thrust/system/tbb/vector.h" && cp "/usr/local/cuda-9.0/include/thrust/system_error.h" "$(@D)/cuda/include/thrust/system_error.h" && cp "/usr/local/cuda-9.0/include/thrust/tabulate.h" "$(@D)/cuda/include/thrust/tabulate.h" && cp "/usr/local/cuda-9.0/include/thrust/transform.h" "$(@D)/cuda/include/thrust/transform.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_reduce.h" "$(@D)/cuda/include/thrust/transform_reduce.h" && cp "/usr/local/cuda-9.0/include/thrust/transform_scan.h" "$(@D)/cuda/include/thrust/transform_scan.h" && cp "/usr/local/cuda-9.0/include/thrust/tuple.h" "$(@D)/cuda/include/thrust/tuple.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_copy.h" "$(@D)/cuda/include/thrust/uninitialized_copy.h" && cp "/usr/local/cuda-9.0/include/thrust/uninitialized_fill.h" "$(@D)/cuda/include/thrust/uninitialized_fill.h" && cp "/usr/local/cuda-9.0/include/thrust/unique.h" "$(@D)/cuda/include/thrust/unique.h" && cp "/usr/local/cuda-9.0/include/thrust/version.h" "$(@D)/cuda/include/thrust/version.h" && cp "/usr/local/cuda-9.0/include/vector_functions.h" "$(@D)/cuda/include/vector_functions.h" && cp "/usr/local/cuda-9.0/include/vector_functions.hpp" "$(@D)/cuda/include/vector_functions.hpp" && cp "/usr/local/cuda-9.0/include/vector_types.h" "$(@D)/cuda/include/vector_types.h" """, ) @@ -1264,72 +1192,69 @@ genrule( name = "cuda-nvvm", outs = [ "cuda/nvvm/bin/cicc", - "cuda/nvvm/libdevice/libdevice.compute_50.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_30.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_20.10.bc", - "cuda/nvvm/libdevice/libdevice.compute_35.10.bc", - "cuda/nvvm/lib64/libnvvm.so.3", - "cuda/nvvm/lib64/libnvvm.so", - "cuda/nvvm/lib64/libnvvm.so.3.1.0", "cuda/nvvm/include/nvvm.h", - "cuda/nvvm/libnvvm-samples/ptxgen/README.txt", - "cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c", - "cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt", + "cuda/nvvm/lib64/libnvvm.so", + "cuda/nvvm/lib64/libnvvm.so.3", + "cuda/nvvm/lib64/libnvvm.so.3.2.0", + "cuda/nvvm/libdevice/libdevice.10.bc", + "cuda/nvvm/libnvvm-samples/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/README.txt", "cuda/nvvm/libnvvm-samples/build.bat", - "cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt", - "cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu", + "cuda/nvvm/libnvvm-samples/build.sh", + "cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h", + "cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h", "cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt", "cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp", - "cuda/nvvm/libnvvm-samples/README.txt", - "cuda/nvvm/libnvvm-samples/simple/simple.c", - "cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll", + "cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu", + "cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/ptxgen/README.txt", + "cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c", + "cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt", "cuda/nvvm/libnvvm-samples/simple/README.txt", + "cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll", "cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll", - "cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt", - "cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h", - "cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h", - "cuda/nvvm/libnvvm-samples/build.sh", - "cuda/nvvm/libnvvm-samples/CMakeLists.txt", + "cuda/nvvm/libnvvm-samples/simple/simple.c", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/nvvm/bin/cicc" "$(@D)/cuda/nvvm/bin/cicc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_50.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_50.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_30.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_30.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_20.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_20.10.bc" && cp "/usr/local/cuda-8.0/nvvm/libdevice/libdevice.compute_35.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.compute_35.10.bc" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so.3" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so" "$(@D)/cuda/nvvm/lib64/libnvvm.so" && cp "/usr/local/cuda-8.0/nvvm/lib64/libnvvm.so.3.1.0" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3.1.0" && cp "/usr/local/cuda-8.0/nvvm/include/nvvm.h" "$(@D)/cuda/nvvm/include/nvvm.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/ptxgen.c" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/build.bat" "$(@D)/cuda/nvvm/libnvvm-samples/build.bat" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple.c" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple.c" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple-gpu.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/README.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/simple-gpu64.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/simple/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/common/include/DDSWriter.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/build.sh" "$(@D)/cuda/nvvm/libnvvm-samples/build.sh" && cp "/usr/local/cuda-8.0/nvvm/libnvvm-samples/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/CMakeLists.txt" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/nvvm/bin/cicc" "$(@D)/cuda/nvvm/bin/cicc" && cp "/usr/local/cuda-9.0/nvvm/include/nvvm.h" "$(@D)/cuda/nvvm/include/nvvm.h" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so" "$(@D)/cuda/nvvm/lib64/libnvvm.so" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so.3" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3" && cp "/usr/local/cuda-9.0/nvvm/lib64/libnvvm.so.3.2.0" "$(@D)/cuda/nvvm/lib64/libnvvm.so.3.2.0" && cp "/usr/local/cuda-9.0/nvvm/libdevice/libdevice.10.bc" "$(@D)/cuda/nvvm/libdevice/libdevice.10.bc" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/build.bat" "$(@D)/cuda/nvvm/libnvvm-samples/build.bat" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/build.sh" "$(@D)/cuda/nvvm/libnvvm-samples/build.sh" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/common/include/DDSWriter.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/DDSWriter.h" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" "$(@D)/cuda/nvvm/libnvvm-samples/common/include/drvapi_error_string.h" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/cuda-c-linking.cpp" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" "$(@D)/cuda/nvvm/libnvvm-samples/cuda-c-linking/math-funcs.cu" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/ptxgen/ptxgen.c" "$(@D)/cuda/nvvm/libnvvm-samples/ptxgen/ptxgen.c" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/CMakeLists.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/CMakeLists.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/README.txt" "$(@D)/cuda/nvvm/libnvvm-samples/simple/README.txt" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple-gpu.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu.ll" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple-gpu64.ll" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple-gpu64.ll" && cp "/usr/local/cuda-9.0/nvvm/libnvvm-samples/simple/simple.c" "$(@D)/cuda/nvvm/libnvvm-samples/simple/simple.c" """, ) genrule( name = "cuda-extras", outs = [ - "cuda/extras/CUPTI/include/cupti_result.h", + "cuda/extras/CUPTI/include/GL/gl.h", + "cuda/extras/CUPTI/include/GL/glew.h", + "cuda/extras/CUPTI/include/GL/glext.h", + "cuda/extras/CUPTI/include/GL/glu.h", + "cuda/extras/CUPTI/include/GL/glut.h", + "cuda/extras/CUPTI/include/GL/glx.h", + "cuda/extras/CUPTI/include/GL/glxext.h", + "cuda/extras/CUPTI/include/GL/wglew.h", + "cuda/extras/CUPTI/include/GL/wglext.h", + "cuda/extras/CUPTI/include/cuda_stdint.h", + "cuda/extras/CUPTI/include/cupti.h", + "cuda/extras/CUPTI/include/cupti_activity.h", + "cuda/extras/CUPTI/include/cupti_callbacks.h", + "cuda/extras/CUPTI/include/cupti_driver_cbid.h", "cuda/extras/CUPTI/include/cupti_events.h", - "cuda/extras/CUPTI/include/openacc/cupti_openacc.h", + "cuda/extras/CUPTI/include/cupti_metrics.h", + "cuda/extras/CUPTI/include/cupti_nvtx_cbid.h", + "cuda/extras/CUPTI/include/cupti_result.h", + "cuda/extras/CUPTI/include/cupti_runtime_cbid.h", "cuda/extras/CUPTI/include/cupti_version.h", - "cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h", + "cuda/extras/CUPTI/include/generated_cudaGL_meta.h", "cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h", - "cuda/extras/CUPTI/include/cupti_activity.h", - "cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h", + "cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h", "cuda/extras/CUPTI/include/generated_cuda_meta.h", - "cuda/extras/CUPTI/include/cupti_nvtx_cbid.h", - "cuda/extras/CUPTI/include/cuda_stdint.h", - "cuda/extras/CUPTI/include/generated_cudaGL_meta.h", + "cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h", "cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h", - "cuda/extras/CUPTI/include/cupti_metrics.h", - "cuda/extras/CUPTI/include/cupti_callbacks.h", - "cuda/extras/CUPTI/include/cupti_runtime_cbid.h", - "cuda/extras/CUPTI/include/cupti.h", - "cuda/extras/CUPTI/include/GL/glut.h", - "cuda/extras/CUPTI/include/GL/glu.h", - "cuda/extras/CUPTI/include/GL/glxext.h", - "cuda/extras/CUPTI/include/GL/wglext.h", - "cuda/extras/CUPTI/include/GL/glx.h", - "cuda/extras/CUPTI/include/GL/glext.h", - "cuda/extras/CUPTI/include/GL/wglew.h", - "cuda/extras/CUPTI/include/GL/gl.h", - "cuda/extras/CUPTI/include/GL/glew.h", - "cuda/extras/CUPTI/include/cupti_driver_cbid.h", "cuda/extras/CUPTI/include/generated_nvtx_meta.h", + "cuda/extras/CUPTI/include/openacc/cupti_openacc.h", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-8.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/gl.h" "$(@D)/cuda/extras/CUPTI/include/GL/gl.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glew.h" "$(@D)/cuda/extras/CUPTI/include/GL/glew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glu.h" "$(@D)/cuda/extras/CUPTI/include/GL/glu.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glut.h" "$(@D)/cuda/extras/CUPTI/include/GL/glut.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glx.h" "$(@D)/cuda/extras/CUPTI/include/GL/glx.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/glxext.h" "$(@D)/cuda/extras/CUPTI/include/GL/glxext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglew.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglew.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/GL/wglext.h" "$(@D)/cuda/extras/CUPTI/include/GL/wglext.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cuda_stdint.h" "$(@D)/cuda/extras/CUPTI/include/cuda_stdint.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti.h" "$(@D)/cuda/extras/CUPTI/include/cupti.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_activity.h" "$(@D)/cuda/extras/CUPTI/include/cupti_activity.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_callbacks.h" "$(@D)/cuda/extras/CUPTI/include/cupti_callbacks.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_driver_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_driver_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_events.h" "$(@D)/cuda/extras/CUPTI/include/cupti_events.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_metrics.h" "$(@D)/cuda/extras/CUPTI/include/cupti_metrics.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_nvtx_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_nvtx_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_result.h" "$(@D)/cuda/extras/CUPTI/include/cupti_result.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_runtime_cbid.h" "$(@D)/cuda/extras/CUPTI/include/cupti_runtime_cbid.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/cupti_version.h" "$(@D)/cuda/extras/CUPTI/include/cupti_version.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaGL_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaGL_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cudaVDPAU_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cudaVDPAU_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_gl_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_runtime_api_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_cuda_vdpau_interop_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/generated_nvtx_meta.h" "$(@D)/cuda/extras/CUPTI/include/generated_nvtx_meta.h" && cp "/usr/local/cuda-9.0/extras/CUPTI/include/openacc/cupti_openacc.h" "$(@D)/cuda/extras/CUPTI/include/openacc/cupti_openacc.h" """, ) @@ -1337,26 +1262,21 @@ genrule( name = "cuda-lib", outs = [ "cuda/lib/libcuda.so", - "cuda/lib/libcudart.so.8.0", + "cuda/lib/libcudart.so.9.0", "cuda/lib/libcudart_static.a", - "cuda/lib/libcublas.so.8.0", - "cuda/lib/libcusolver.so.8.0", - "cuda/lib/libcurand.so.8.0", - "cuda/lib/libcufft.so.8.0", - "cuda/lib/libcudnn.so.6", - "cuda/lib/libcupti.so.8.0", + "cuda/lib/libcublas.so.9.0", + "cuda/lib/libcusolver.so.9.0", + "cuda/lib/libcurand.so.9.0", + "cuda/lib/libcufft.so.9.0", + "cuda/lib/libcudnn.so.7", + "cuda/lib/libcupti.so.9.0", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcudart.so.8.0.61" "$(@D)/cuda/lib/libcudart.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcublas.so.8.0.88" "$(@D)/cuda/lib/libcublas.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcusolver.so.8.0.61" "$(@D)/cuda/lib/libcusolver.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcurand.so.8.0.61" "$(@D)/cuda/lib/libcurand.so.8.0" && cp "/usr/local/cuda-8.0/targets/x86_64-linux/lib/libcufft.so.8.0.61" "$(@D)/cuda/lib/libcufft.so.8.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.6.0.21" "$(@D)/cuda/lib/libcudnn.so.6" && cp "/usr/local/cuda-8.0/extras/CUPTI/lib64/libcupti.so.8.0.61" "$(@D)/cuda/lib/libcupti.so.8.0" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.282" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.0.5" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0" """, ) -genrule( +filegroup( name = "cudnn-include", - outs = [ - "cuda/include/cudnn.h", - ], - cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/include/cudnn.h" "$(@D)/cudnn.h" - """, + srcs = [], ) diff --git a/third_party/toolchains/gpus/py/BUILD b/third_party/toolchains/gpus/py/BUILD new file mode 100644 index 0000000000..2d5ace93ff --- /dev/null +++ b/third_party/toolchains/gpus/py/BUILD @@ -0,0 +1,171 @@ +# A build file to configure python remote repository used with Bazel remote +# execution service +# DO NOT EDIT: automatically generated BUILD file + +licenses(["restricted"]) + +package(default_visibility = ["//visibility:public"]) + +cc_library( + name = "python_headers", + hdrs = [":python_include"], + data = select({ + ":windows": [":python_import_lib"], + "//conditions:default": [], + }), + includes = ["python_include"], + linkopts = select({ + # TODO(pcloudy): Ideally, this should just go into deps after resolving + # https://github.com/bazelbuild/bazel/issues/3237, + ":windows": ["$(locations :python_import_lib)"], + "//conditions:default": [], + }), +) + +cc_library( + name = "numpy_headers", + hdrs = [":numpy_include"], + includes = ["numpy_include"], +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows"}, + visibility = ["//visibility:public"], +) + +genrule( + name = "python_include", + outs = [ + "python_include/Python-ast.h", + "python_include/Python.h", + "python_include/abstract.h", + "python_include/asdl.h", + "python_include/ast.h", + "python_include/bitset.h", + "python_include/boolobject.h", + "python_include/bufferobject.h", + "python_include/bytearrayobject.h", + "python_include/bytes_methods.h", + "python_include/bytesobject.h", + "python_include/cStringIO.h", + "python_include/cellobject.h", + "python_include/ceval.h", + "python_include/classobject.h", + "python_include/cobject.h", + "python_include/code.h", + "python_include/codecs.h", + "python_include/compile.h", + "python_include/complexobject.h", + "python_include/datetime.h", + "python_include/descrobject.h", + "python_include/dictobject.h", + "python_include/dtoa.h", + "python_include/enumobject.h", + "python_include/errcode.h", + "python_include/eval.h", + "python_include/fileobject.h", + "python_include/floatobject.h", + "python_include/frameobject.h", + "python_include/funcobject.h", + "python_include/genobject.h", + "python_include/graminit.h", + "python_include/grammar.h", + "python_include/import.h", + "python_include/intobject.h", + "python_include/intrcheck.h", + "python_include/iterobject.h", + "python_include/listobject.h", + "python_include/longintrepr.h", + "python_include/longobject.h", + "python_include/marshal.h", + "python_include/memoryobject.h", + "python_include/metagrammar.h", + "python_include/methodobject.h", + "python_include/modsupport.h", + "python_include/moduleobject.h", + "python_include/node.h", + "python_include/object.h", + "python_include/objimpl.h", + "python_include/opcode.h", + "python_include/osdefs.h", + "python_include/parsetok.h", + "python_include/patchlevel.h", + "python_include/pgen.h", + "python_include/pgenheaders.h", + "python_include/py_curses.h", + "python_include/pyarena.h", + "python_include/pycapsule.h", + "python_include/pyconfig.h", + "python_include/pyctype.h", + "python_include/pydebug.h", + "python_include/pyerrors.h", + "python_include/pyexpat.h", + "python_include/pyfpe.h", + "python_include/pygetopt.h", + "python_include/pymacconfig.h", + "python_include/pymactoolbox.h", + "python_include/pymath.h", + "python_include/pymem.h", + "python_include/pyport.h", + "python_include/pystate.h", + "python_include/pystrcmp.h", + "python_include/pystrtod.h", + "python_include/pythonrun.h", + "python_include/pythread.h", + "python_include/rangeobject.h", + "python_include/setobject.h", + "python_include/sliceobject.h", + "python_include/stringobject.h", + "python_include/structmember.h", + "python_include/structseq.h", + "python_include/symtable.h", + "python_include/sysmodule.h", + "python_include/timefuncs.h", + "python_include/token.h", + "python_include/traceback.h", + "python_include/tupleobject.h", + "python_include/ucnhash.h", + "python_include/unicodeobject.h", + "python_include/warnings.h", + "python_include/weakrefobject.h", + ], + cmd = """ +cp "/usr/include/python2.7/Python-ast.h" "$(@D)/python_include/Python-ast.h" && cp "/usr/include/python2.7/Python.h" "$(@D)/python_include/Python.h" && cp "/usr/include/python2.7/abstract.h" "$(@D)/python_include/abstract.h" && cp "/usr/include/python2.7/asdl.h" "$(@D)/python_include/asdl.h" && cp "/usr/include/python2.7/ast.h" "$(@D)/python_include/ast.h" && cp "/usr/include/python2.7/bitset.h" "$(@D)/python_include/bitset.h" && cp "/usr/include/python2.7/boolobject.h" "$(@D)/python_include/boolobject.h" && cp "/usr/include/python2.7/bufferobject.h" "$(@D)/python_include/bufferobject.h" && cp "/usr/include/python2.7/bytearrayobject.h" "$(@D)/python_include/bytearrayobject.h" && cp "/usr/include/python2.7/bytes_methods.h" "$(@D)/python_include/bytes_methods.h" && cp "/usr/include/python2.7/bytesobject.h" "$(@D)/python_include/bytesobject.h" && cp "/usr/include/python2.7/cStringIO.h" "$(@D)/python_include/cStringIO.h" && cp "/usr/include/python2.7/cellobject.h" "$(@D)/python_include/cellobject.h" && cp "/usr/include/python2.7/ceval.h" "$(@D)/python_include/ceval.h" && cp "/usr/include/python2.7/classobject.h" "$(@D)/python_include/classobject.h" && cp "/usr/include/python2.7/cobject.h" "$(@D)/python_include/cobject.h" && cp "/usr/include/python2.7/code.h" "$(@D)/python_include/code.h" && cp "/usr/include/python2.7/codecs.h" "$(@D)/python_include/codecs.h" && cp "/usr/include/python2.7/compile.h" "$(@D)/python_include/compile.h" && cp "/usr/include/python2.7/complexobject.h" "$(@D)/python_include/complexobject.h" && cp "/usr/include/python2.7/datetime.h" "$(@D)/python_include/datetime.h" && cp "/usr/include/python2.7/descrobject.h" "$(@D)/python_include/descrobject.h" && cp "/usr/include/python2.7/dictobject.h" "$(@D)/python_include/dictobject.h" && cp "/usr/include/python2.7/dtoa.h" "$(@D)/python_include/dtoa.h" && cp "/usr/include/python2.7/enumobject.h" "$(@D)/python_include/enumobject.h" && cp "/usr/include/python2.7/errcode.h" "$(@D)/python_include/errcode.h" && cp "/usr/include/python2.7/eval.h" "$(@D)/python_include/eval.h" && cp "/usr/include/python2.7/fileobject.h" "$(@D)/python_include/fileobject.h" && cp "/usr/include/python2.7/floatobject.h" "$(@D)/python_include/floatobject.h" && cp "/usr/include/python2.7/frameobject.h" "$(@D)/python_include/frameobject.h" && cp "/usr/include/python2.7/funcobject.h" "$(@D)/python_include/funcobject.h" && cp "/usr/include/python2.7/genobject.h" "$(@D)/python_include/genobject.h" && cp "/usr/include/python2.7/graminit.h" "$(@D)/python_include/graminit.h" && cp "/usr/include/python2.7/grammar.h" "$(@D)/python_include/grammar.h" && cp "/usr/include/python2.7/import.h" "$(@D)/python_include/import.h" && cp "/usr/include/python2.7/intobject.h" "$(@D)/python_include/intobject.h" && cp "/usr/include/python2.7/intrcheck.h" "$(@D)/python_include/intrcheck.h" && cp "/usr/include/python2.7/iterobject.h" "$(@D)/python_include/iterobject.h" && cp "/usr/include/python2.7/listobject.h" "$(@D)/python_include/listobject.h" && cp "/usr/include/python2.7/longintrepr.h" "$(@D)/python_include/longintrepr.h" && cp "/usr/include/python2.7/longobject.h" "$(@D)/python_include/longobject.h" && cp "/usr/include/python2.7/marshal.h" "$(@D)/python_include/marshal.h" && cp "/usr/include/python2.7/memoryobject.h" "$(@D)/python_include/memoryobject.h" && cp "/usr/include/python2.7/metagrammar.h" "$(@D)/python_include/metagrammar.h" && cp "/usr/include/python2.7/methodobject.h" "$(@D)/python_include/methodobject.h" && cp "/usr/include/python2.7/modsupport.h" "$(@D)/python_include/modsupport.h" && cp "/usr/include/python2.7/moduleobject.h" "$(@D)/python_include/moduleobject.h" && cp "/usr/include/python2.7/node.h" "$(@D)/python_include/node.h" && cp "/usr/include/python2.7/object.h" "$(@D)/python_include/object.h" && cp "/usr/include/python2.7/objimpl.h" "$(@D)/python_include/objimpl.h" && cp "/usr/include/python2.7/opcode.h" "$(@D)/python_include/opcode.h" && cp "/usr/include/python2.7/osdefs.h" "$(@D)/python_include/osdefs.h" && cp "/usr/include/python2.7/parsetok.h" "$(@D)/python_include/parsetok.h" && cp "/usr/include/python2.7/patchlevel.h" "$(@D)/python_include/patchlevel.h" && cp "/usr/include/python2.7/pgen.h" "$(@D)/python_include/pgen.h" && cp "/usr/include/python2.7/pgenheaders.h" "$(@D)/python_include/pgenheaders.h" && cp "/usr/include/python2.7/py_curses.h" "$(@D)/python_include/py_curses.h" && cp "/usr/include/python2.7/pyarena.h" "$(@D)/python_include/pyarena.h" && cp "/usr/include/python2.7/pycapsule.h" "$(@D)/python_include/pycapsule.h" && cp "/usr/include/python2.7/pyconfig.h" "$(@D)/python_include/pyconfig.h" && cp "/usr/include/python2.7/pyctype.h" "$(@D)/python_include/pyctype.h" && cp "/usr/include/python2.7/pydebug.h" "$(@D)/python_include/pydebug.h" && cp "/usr/include/python2.7/pyerrors.h" "$(@D)/python_include/pyerrors.h" && cp "/usr/include/python2.7/pyexpat.h" "$(@D)/python_include/pyexpat.h" && cp "/usr/include/python2.7/pyfpe.h" "$(@D)/python_include/pyfpe.h" && cp "/usr/include/python2.7/pygetopt.h" "$(@D)/python_include/pygetopt.h" && cp "/usr/include/python2.7/pymacconfig.h" "$(@D)/python_include/pymacconfig.h" && cp "/usr/include/python2.7/pymactoolbox.h" "$(@D)/python_include/pymactoolbox.h" && cp "/usr/include/python2.7/pymath.h" "$(@D)/python_include/pymath.h" && cp "/usr/include/python2.7/pymem.h" "$(@D)/python_include/pymem.h" && cp "/usr/include/python2.7/pyport.h" "$(@D)/python_include/pyport.h" && cp "/usr/include/python2.7/pystate.h" "$(@D)/python_include/pystate.h" && cp "/usr/include/python2.7/pystrcmp.h" "$(@D)/python_include/pystrcmp.h" && cp "/usr/include/python2.7/pystrtod.h" "$(@D)/python_include/pystrtod.h" && cp "/usr/include/python2.7/pythonrun.h" "$(@D)/python_include/pythonrun.h" && cp "/usr/include/python2.7/pythread.h" "$(@D)/python_include/pythread.h" && cp "/usr/include/python2.7/rangeobject.h" "$(@D)/python_include/rangeobject.h" && cp "/usr/include/python2.7/setobject.h" "$(@D)/python_include/setobject.h" && cp "/usr/include/python2.7/sliceobject.h" "$(@D)/python_include/sliceobject.h" && cp "/usr/include/python2.7/stringobject.h" "$(@D)/python_include/stringobject.h" && cp "/usr/include/python2.7/structmember.h" "$(@D)/python_include/structmember.h" && cp "/usr/include/python2.7/structseq.h" "$(@D)/python_include/structseq.h" && cp "/usr/include/python2.7/symtable.h" "$(@D)/python_include/symtable.h" && cp "/usr/include/python2.7/sysmodule.h" "$(@D)/python_include/sysmodule.h" && cp "/usr/include/python2.7/timefuncs.h" "$(@D)/python_include/timefuncs.h" && cp "/usr/include/python2.7/token.h" "$(@D)/python_include/token.h" && cp "/usr/include/python2.7/traceback.h" "$(@D)/python_include/traceback.h" && cp "/usr/include/python2.7/tupleobject.h" "$(@D)/python_include/tupleobject.h" && cp "/usr/include/python2.7/ucnhash.h" "$(@D)/python_include/ucnhash.h" && cp "/usr/include/python2.7/unicodeobject.h" "$(@D)/python_include/unicodeobject.h" && cp "/usr/include/python2.7/warnings.h" "$(@D)/python_include/warnings.h" && cp "/usr/include/python2.7/weakrefobject.h" "$(@D)/python_include/weakrefobject.h" + """, +) + +genrule( + name = "numpy_include", + outs = [ + "numpy_include/numpy/__multiarray_api.h", + "numpy_include/numpy/__ufunc_api.h", + "numpy_include/numpy/_neighborhood_iterator_imp.h", + "numpy_include/numpy/_numpyconfig.h", + "numpy_include/numpy/arrayobject.h", + "numpy_include/numpy/arrayscalars.h", + "numpy_include/numpy/halffloat.h", + "numpy_include/numpy/multiarray_api.txt", + "numpy_include/numpy/ndarrayobject.h", + "numpy_include/numpy/ndarraytypes.h", + "numpy_include/numpy/noprefix.h", + "numpy_include/numpy/npy_1_7_deprecated_api.h", + "numpy_include/numpy/npy_3kcompat.h", + "numpy_include/numpy/npy_common.h", + "numpy_include/numpy/npy_cpu.h", + "numpy_include/numpy/npy_endian.h", + "numpy_include/numpy/npy_interrupt.h", + "numpy_include/numpy/npy_math.h", + "numpy_include/numpy/npy_no_deprecated_api.h", + "numpy_include/numpy/npy_os.h", + "numpy_include/numpy/numpyconfig.h", + "numpy_include/numpy/old_defines.h", + "numpy_include/numpy/oldnumeric.h", + "numpy_include/numpy/ufunc_api.txt", + "numpy_include/numpy/ufuncobject.h", + "numpy_include/numpy/utils.h", + ], + cmd = """ +cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/__multiarray_api.h" "$(@D)/numpy_include/numpy/__multiarray_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/__ufunc_api.h" "$(@D)/numpy_include/numpy/__ufunc_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/_neighborhood_iterator_imp.h" "$(@D)/numpy_include/numpy/_neighborhood_iterator_imp.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/_numpyconfig.h" "$(@D)/numpy_include/numpy/_numpyconfig.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/arrayobject.h" "$(@D)/numpy_include/numpy/arrayobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/arrayscalars.h" "$(@D)/numpy_include/numpy/arrayscalars.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/halffloat.h" "$(@D)/numpy_include/numpy/halffloat.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/multiarray_api.txt" "$(@D)/numpy_include/numpy/multiarray_api.txt" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ndarrayobject.h" "$(@D)/numpy_include/numpy/ndarrayobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ndarraytypes.h" "$(@D)/numpy_include/numpy/ndarraytypes.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/noprefix.h" "$(@D)/numpy_include/numpy/noprefix.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_1_7_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_1_7_deprecated_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_3kcompat.h" "$(@D)/numpy_include/numpy/npy_3kcompat.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_common.h" "$(@D)/numpy_include/numpy/npy_common.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_cpu.h" "$(@D)/numpy_include/numpy/npy_cpu.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_endian.h" "$(@D)/numpy_include/numpy/npy_endian.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_interrupt.h" "$(@D)/numpy_include/numpy/npy_interrupt.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_math.h" "$(@D)/numpy_include/numpy/npy_math.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_no_deprecated_api.h" "$(@D)/numpy_include/numpy/npy_no_deprecated_api.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/npy_os.h" "$(@D)/numpy_include/numpy/npy_os.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/numpyconfig.h" "$(@D)/numpy_include/numpy/numpyconfig.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/old_defines.h" "$(@D)/numpy_include/numpy/old_defines.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/oldnumeric.h" "$(@D)/numpy_include/numpy/oldnumeric.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ufunc_api.txt" "$(@D)/numpy_include/numpy/ufunc_api.txt" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/ufuncobject.h" "$(@D)/numpy_include/numpy/ufuncobject.h" && cp "/usr/local/lib/python2.7/dist-packages/numpy/core/include/numpy/utils.h" "$(@D)/numpy_include/numpy/utils.h" + """, +) -- GitLab From cf11a4cb47cb550cc6a1de5e5eb4394a9d949e09 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Fri, 2 Mar 2018 11:15:14 -0800 Subject: [PATCH 351/884] [XLA] Support while loops and constant in HLO BF16 propagation. PiperOrigin-RevId: 187644155 --- tensorflow/compiler/xla/literal_util.cc | 18 + tensorflow/compiler/xla/literal_util.h | 5 + tensorflow/compiler/xla/service/BUILD | 2 + .../xla/service/bfloat16_propagation.cc | 390 ++++++++++++++---- .../xla/service/bfloat16_propagation.h | 41 +- .../xla/service/bfloat16_propagation_test.cc | 227 ++++++++++ 6 files changed, 598 insertions(+), 85 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index a345e95a8b..1d1418fc2f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -1434,6 +1434,24 @@ StatusOr> Literal::Convert( } } +StatusOr> Literal::ConvertToShape( + const Shape& dest_shape) const { + if (!ShapeUtil::IsTuple(dest_shape)) { + return Convert(dest_shape.element_type()); + } + std::vector elements; + for (int i = 0; i < ShapeUtil::TupleElementCount(shape()); ++i) { + auto element = LiteralView::Create(*this, {i}); + TF_ASSIGN_OR_RETURN( + auto new_element, + element.ConvertToShape(ShapeUtil::GetSubshape(dest_shape, {i}))); + elements.push_back(std::move(*new_element)); + } + auto converted = MakeUnique(); + *converted = Literal::MoveIntoTuple(&elements); + return std::move(converted); +} + template bool Literal::Piece::EqualElementsInternal( const Literal::Piece& other, std::vector* multi_index) const { diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index 1d58f0cbc7..cdc5d807e0 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -333,6 +333,11 @@ class Literal { StatusOr> Convert( PrimitiveType primitive_dest_type) const; + // Converts this literal to the given shape. Returns an error is the + // conversion is not possible. + StatusOr> ConvertToShape( + const Shape& dest_shape) const; + // Creates a scalar literal value zero of the given primitive type. static Literal Zero(PrimitiveType primitive_type); diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index e4ae812532..d71790fb2d 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -129,6 +129,7 @@ cc_library( ":hlo_dce", ":hlo_pass", ":tuple_simplifier", + "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_tree", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:util", @@ -148,6 +149,7 @@ tf_cc_test( "//tensorflow/compiler/xla:test_helpers", "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:literal_test_util", "//tensorflow/compiler/xla/tests:xla_internal_test_main", # fixdeps: keep ], ) diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 6145c690b9..7708504dc9 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/bfloat16_propagation.h" +#include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/map_util.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" #include "tensorflow/compiler/xla/service/hlo_dce.h" @@ -68,33 +69,53 @@ void BFloat16Propagation::DetermineAndMutateFusionComputationPrecision( for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); } + computations_visited_in_mutation_pass_.insert( + fusion->fused_instructions_computation()); } -void BFloat16Propagation::AdjustFusionParameters(HloInstruction* fusion) { - CHECK_EQ(fusion->fused_parameters().size(), fusion->operand_count()); - for (int64 i = 0; i < fusion->operand_count(); ++i) { - auto parameter = fusion->fused_parameter(i); - ShapeUtil::ForEachMutableSubshape( - parameter->mutable_shape(), - [&](Shape* subshape, const ShapeIndex& index) { - if (!ShapeUtil::IsLeafIndex(parameter->shape(), index)) { - return; - } - PrimitiveType operand_type = - ShapeUtil::GetSubshape(fusion->operand(i)->shape(), index) - .element_type(); - if (subshape->element_type() == operand_type) { - return; - } - CHECK(operand_type == F32 || operand_type == BF16); - subshape->set_element_type(operand_type); +void BFloat16Propagation::DetermineAndMutateWhileComputationsPrecision( + HloInstruction* while_hlo) { + CHECK_EQ(while_hlo->opcode(), HloOpcode::kWhile); + + // We are depending on the while node itself having already been analyzed for + // whether it can output BF16 and this has been adjusted in the output shape, + // and now we're looking to update the body and condition computations to + // match the new output shape, as well as recursively process the whole while + // node even if the output shape was not modified. + HloComputation* body = while_hlo->while_body(); + auto body_root = body->root_instruction(); + HloComputation* condition = while_hlo->while_condition(); + + ShapeUtil::ForEachMutableSubshape( + body_root->mutable_shape(), + [this, while_hlo, body_root](Shape* subshape, const ShapeIndex& index) { + if (subshape->element_type() != F32) { + return; + } + if (ShapeUtil::GetSubshape(while_hlo->shape(), index).element_type() == + BF16) { + subshape->set_element_type(BF16); changed_ = true; - VLOG(2) << "Fused parameter " << parameter->ToString() + VLOG(2) << "While body root " << body_root->ToString() << " at shape index " << index - << " adjusted to match operand in fusion " - << fusion->ToString(); - }); + << " changed to BF16 precision for while " + << while_hlo->ToString(); + } + }); + + auto body_insts = body->MakeInstructionPostOrder(); + for (auto inst_it = body_insts.rbegin(); inst_it != body_insts.rend(); + ++inst_it) { + DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); } + computations_visited_in_mutation_pass_.insert(body); + + auto condition_insts = condition->MakeInstructionPostOrder(); + for (auto inst_it = condition_insts.rbegin(); + inst_it != condition_insts.rend(); ++inst_it) { + DetermineAndMutateInstructionPrecision(*inst_it, /*skip_parameters=*/false); + } + computations_visited_in_mutation_pass_.insert(condition); } bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, @@ -108,14 +129,45 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, continue; } for (const HloUse& use : value->uses()) { + if (!ContainsKey(instructions_visited_in_mutation_pass_, + use.instruction)) { + // We don't know yet whether use.instruction will consume BF16 since it + // hasn't been visited. Although we visit instructions in reverse + // topological order, this is still possible because there may be + // unvisited instruction that alias the same buffer. In this case, we + // aggressively skip this use, and if this causes inconsistency (e.g., + // one use is in BF16 but another use is in F32), it will be resolved at + // the end of the BFloat16Propagation pass. + continue; + } + // Any visited user that can accept BF16 has already been updated if + // necessary, e.g., the output has been changed to BF16 if it propagates + // precision, or a called computation's parameters have been changed to + // BF16 for fusions or whiles. if (use.instruction->opcode() == HloOpcode::kFusion) { - auto fused_parameter = + const auto* fused_parameter = use.instruction->fused_parameter(use.operand_number); if (ShapeUtil::GetSubshape(fused_parameter->shape(), use.operand_index) .element_type() != BF16) { return false; } continue; + } else if (use.instruction->opcode() == HloOpcode::kWhile) { + const auto* cond_parameter = + use.instruction->while_condition()->parameter_instruction( + use.operand_number); + if (ShapeUtil::GetSubshape(cond_parameter->shape(), use.operand_index) + .element_type() != BF16) { + return false; + } + const auto* body_parameter = + use.instruction->while_body()->parameter_instruction( + use.operand_number); + if (ShapeUtil::GetSubshape(body_parameter->shape(), use.operand_index) + .element_type() != BF16) { + return false; + } + continue; } if (bfloat16_support_->EffectiveOperandPrecisionIsBF16( *use.instruction, use.operand_number)) { @@ -149,24 +201,36 @@ bool BFloat16Propagation::AllUsersConsumeBF16(const HloInstruction& hlo, void BFloat16Propagation::DetermineAndMutateInstructionPrecision( HloInstruction* hlo, bool skip_parameters) { - // We handle any fusion computation after the instruction is handled, because - // we need to know a fusion's output shape before propagating inside its fused - // computation. - auto cleaner = tensorflow::gtl::MakeCleanup([this, hlo] { - if (hlo->opcode() == HloOpcode::kFusion) { - DetermineAndMutateFusionComputationPrecision(hlo); - } - }); + // We handle any fusion computation or while body/condition after the + // instruction is handled, because we need to know the output shape of a + // fusion or while before propagating inside its computations. + bool postpone_processing_called_computations = false; + auto cleaner = tensorflow::gtl::MakeCleanup( + [this, hlo, &postpone_processing_called_computations] { + if (!postpone_processing_called_computations) { + if (hlo->opcode() == HloOpcode::kFusion) { + DetermineAndMutateFusionComputationPrecision(hlo); + } else if (hlo->opcode() == HloOpcode::kWhile) { + DetermineAndMutateWhileComputationsPrecision(hlo); + } + } + instructions_visited_in_mutation_pass_.insert(hlo); + }); + + if (hlo->opcode() == HloOpcode::kWhile && + (caller_counts_[hlo->while_condition()] > 1 || + caller_counts_[hlo->while_body()] > 1)) { + postpone_processing_called_computations = true; + return; + } // Do not change precision for instructions related to entry and exit of a // computation, and control flow, because this pass might break the interfaces // or assumptions for them. if (hlo->opcode() == HloOpcode::kInfeed || // hlo->opcode() == HloOpcode::kOutfeed || // - hlo->opcode() == HloOpcode::kConstant || // hlo->opcode() == HloOpcode::kCustomCall || // hlo->opcode() == HloOpcode::kCall || // - hlo->opcode() == HloOpcode::kWhile || // hlo->opcode() == HloOpcode::kConditional || // (hlo->opcode() == HloOpcode::kParameter && skip_parameters)) { return; @@ -231,60 +295,198 @@ bool BFloat16Propagation::InstructionIsCandidateForBF16Output( return true; } -Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( - HloModule* module) { - std::list computations_topological_order = - module->MakeComputationPostOrder(); - for (auto comp_it = computations_topological_order.rbegin(); - comp_it != computations_topological_order.rend(); ++comp_it) { - auto insts = (*comp_it)->MakeInstructionPostOrder(); - // Do the adjustment on each instruction in the computation in reverse - // topological order. - for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - auto hlo = *inst_it; - auto adjust_buffer = [this, hlo](Shape* subshape, - const ShapeIndex& index) { - if (subshape->element_type() != F32 && - subshape->element_type() != BF16) { - return; +void BFloat16Propagation::AdjustCalledComputationParameters( + HloInstruction* hlo) { + auto adjust_computation = + [this, hlo](HloComputation* computation, + tensorflow::gtl::ArraySlice operands) { + // Adjust parameters. + CHECK_EQ(operands.size(), computation->num_parameters()); + for (int64 i = 0; i < operands.size(); ++i) { + auto parameter = computation->parameter_instruction(i); + ShapeUtil::ForEachMutableSubshape( + parameter->mutable_shape(), + [this, i, hlo, &operands, parameter](Shape* subshape, + const ShapeIndex& index) { + if (!ShapeUtil::IsLeafIndex(parameter->shape(), index)) { + return; + } + PrimitiveType operand_type = + ShapeUtil::GetSubshape(operands[i]->shape(), index) + .element_type(); + if (subshape->element_type() == operand_type) { + return; + } + CHECK(operand_type == F32 || operand_type == BF16); + subshape->set_element_type(operand_type); + changed_ = true; + VLOG(2) << "Called computation parameter " + << parameter->ToString() << " at shape index " << index + << " adjusted to match operand in HLO " + << hlo->ToString(); + }); } - PrimitiveType type = BF16; - for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { - if (value->shape().element_type() == BF16) { - continue; + }; + + switch (hlo->opcode()) { + case HloOpcode::kFusion: + adjust_computation(hlo->fused_instructions_computation(), + hlo->operands()); + break; + case HloOpcode::kWhile: + adjust_computation(hlo->while_condition(), hlo->operands()); + adjust_computation(hlo->while_body(), hlo->operands()); + break; + default: + break; + } +} + +void BFloat16Propagation::AdjustCalledComputationRoot(HloInstruction* hlo) { + auto adjust_computation = [this, hlo](HloComputation* computation, + const Shape& output_shape) { + // Adjust root. + HloInstruction* root = computation->root_instruction(); + ShapeUtil::ForEachMutableSubshape( + root->mutable_shape(), [this, hlo, root, &output_shape]( + Shape* subshape, const ShapeIndex& index) { + if (!ShapeUtil::IsLeafIndex(hlo->shape(), index)) { + return; } - CHECK_EQ(value->shape().element_type(), F32); - type = F32; - break; - } - // It's possible that a user has been changed from BF16 to F32 - // during this final adjustment pass, so we need to check - // AllUsersConsumeBF16() again. - if (type == BF16 && !AllUsersConsumeBF16(*hlo, index)) { - type = F32; - } - if (type == F32) { - for (const auto* value : - dataflow_->GetValueSet(hlo, index).values()) { - // We rely on the fact that this adjustment works in reverse - // topological order. Adding the value to - // values_that_must_be_kept_as_f32_ will ensure the correctness - // of the adjustment for HLOs that will be processed later. - values_that_must_be_kept_as_f32_.insert(value); + const PrimitiveType output_type = + ShapeUtil::GetSubshape(output_shape, index).element_type(); + if (subshape->element_type() == output_type) { + return; + } + CHECK(output_type == F32 || output_type == BF16); + subshape->set_element_type(output_type); + // It's possible that output_type is F32, but the root instruction's + // type is BF16; e.g., a fusion node's output was changed to BF16 + // initially but then adjusted back to F32, and the fusion computation + // is now being adjusted after the fusion node. + if (output_type == F32) { + for (const auto* value : + dataflow_->GetValueSet(root, index).values()) { + // We rely on the fact that this adjustment works in reverse + // topological order so that called computation will be + // processed later. Adding the value to + // values_that_must_be_kept_as_f32_ will ensure the + // correctness of the adjustment for HLOs that will be + // processed later. + values_that_must_be_kept_as_f32_.insert(value); + } } + changed_ = true; + VLOG(2) << "Called computation root " << root->ToString() + << " at shape index " << index + << " adjusted to match output shape of " << hlo->ToString(); + }); + }; + + switch (hlo->opcode()) { + case HloOpcode::kFusion: + adjust_computation(hlo->fused_instructions_computation(), hlo->shape()); + break; + case HloOpcode::kWhile: + adjust_computation(hlo->while_condition(), hlo->shape()); + adjust_computation(hlo->while_body(), hlo->shape()); + break; + default: + break; + } +} + +bool BFloat16Propagation::ResolveInconsistencyOfAliasingBuffersHelper( + HloComputation* computation, + tensorflow::gtl::FlatSet* visited_computations) { + bool parameter_changed = false; + auto insts = computation->MakeInstructionPostOrder(); + // Do the adjustment on each instruction in the computation in reverse + // topological order. + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + auto hlo = *inst_it; + auto adjust_hlo_output = [this, hlo, ¶meter_changed]( + Shape* subshape, const ShapeIndex& index) { + if (subshape->element_type() != F32 && subshape->element_type() != BF16) { + return; + } + PrimitiveType type = BF16; + for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { + if (value->shape().element_type() == BF16) { + continue; } + CHECK_EQ(value->shape().element_type(), F32); + type = F32; + break; + } + // It's possible that a user has been changed from BF16 to F32 + // during this final adjustment pass, so we need to check + // AllUsersConsumeBF16() again. + if (type == BF16 && !AllUsersConsumeBF16(*hlo, index)) { + type = F32; + } + if (type == F32) { + for (const auto* value : dataflow_->GetValueSet(hlo, index).values()) { + // We rely on the fact that this adjustment works in reverse + // topological order. Adding the value to + // values_that_must_be_kept_as_f32_ will ensure the correctness + // of the adjustment for HLOs that will be processed later. + values_that_must_be_kept_as_f32_.insert(value); + } + } + if (type != subshape->element_type()) { subshape->set_element_type(type); - }; - ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), adjust_buffer); - } - // Now adjust parameters of fusions inside this computation. - for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { - auto hlo = *inst_it; - if (hlo->opcode() == HloOpcode::kFusion) { - AdjustFusionParameters(hlo); + VLOG(2) << "HloInstruction output at shape index " << index + << " adjusted to " << *subshape << ": " << hlo->ToString(); + if (hlo->opcode() == HloOpcode::kParameter) { + parameter_changed = true; + } + } + }; + ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), adjust_hlo_output); + AdjustCalledComputationRoot(hlo); + if (hlo->opcode() == HloOpcode::kWhile) { + // We need to run on the while body and condition repeatedly until a fixed + // point is reached, i.e., the parameters do not change any more. We may + // need more than one iteration because the while input and output alias + // each other, so changing one input parameter requires changing the + // corresponding output element and thus may transitively require changing + // another input parameter. A fixed point will be reached because the + // parameters can only be changed from BF16 to F32, not the other way + // around. + tensorflow::gtl::FlatSet visited_in_while; + while (ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_condition(), + &visited_in_while) || + ResolveInconsistencyOfAliasingBuffersHelper(hlo->while_body(), + &visited_in_while)) { + visited_in_while.clear(); + ShapeUtil::ForEachMutableSubshape(hlo->mutable_shape(), + adjust_hlo_output); + AdjustCalledComputationRoot(hlo); } + visited_computations->insert(visited_in_while.begin(), + visited_in_while.end()); } } + // Now adjust parameters of called computations. + for (auto inst_it = insts.rbegin(); inst_it != insts.rend(); ++inst_it) { + AdjustCalledComputationParameters(*inst_it); + } + return parameter_changed; +} + +Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( + HloModule* module) { + std::list computations_topological_order = + module->MakeComputationPostOrder(); + tensorflow::gtl::FlatSet resolved; + for (auto comp_it = computations_topological_order.rbegin(); + comp_it != computations_topological_order.rend(); ++comp_it) { + if (ContainsKey(resolved, *comp_it)) { + continue; + } + ResolveInconsistencyOfAliasingBuffersHelper(*comp_it, &resolved); + } // We could have changed a fusion computation's root shape to have a different // precision than the fusion node's output, if the fusion root does not @@ -382,9 +584,39 @@ Status BFloat16Propagation::ResolveInconsistencyOfAliasingBuffers( needs_tuple_simplifier |= ShapeUtil::IsTuple(hlo->shape()); } } + + // We may have converted some constants from F32 to BF16, so adjust the + // constant literals in such cases. We do this here instead of when the + // constant node's is changed because 1) the HloInstruction interface does not + // allow resetting the literal so we have to create a new kConstant + // instruction to replace the old one, which invalidates dataflow analysis, + // and 2) it's possible that a kConstant's output gets changed to BF16 at the + // beginning but later on adjusted back to F32, so converting literals here + // can avoid repeated conversions. + // + // TODO(b/73833576): Consider resetting literal in HloInstruction. + bool needs_dce = needs_tuple_simplifier; + for (auto computation : computations_topological_order) { + for (auto hlo : computation->MakeInstructionPostOrder()) { + if (hlo->opcode() != HloOpcode::kConstant) { + continue; + } + if (!ShapeUtil::Equal(hlo->literal().shape(), hlo->shape())) { + TF_ASSIGN_OR_RETURN(auto converted_literal, + hlo->literal().ConvertToShape(hlo->shape())); + auto new_constant = computation->AddInstruction( + HloInstruction::CreateConstant(std::move(converted_literal))); + TF_RETURN_IF_ERROR(hlo->ReplaceAllUsesWith(new_constant)); + needs_dce = true; + } + } + } + if (needs_tuple_simplifier) { TupleSimplifier tuple_simplifier; TF_RETURN_IF_ERROR(tuple_simplifier.Run(module).status()); + } + if (needs_dce) { HloDCE dce; TF_RETURN_IF_ERROR(dce.Run(module).status()); } diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.h b/tensorflow/compiler/xla/service/bfloat16_propagation.h index ccf77d7b4e..89a5ac5db1 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.h +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.h @@ -38,7 +38,8 @@ namespace xla { // be bitwise identical to that without this pass; this is possible if the // backend already reduces precision to BF16 on some HLO instructions. // -// This pass will not modify the signature of any non-fusion computation. +// This pass will not modify the signature of a computation, unless it is a +// fusion computation or its only caller is a while. // // !!! WARNING !!! This pass can introduce mixed precision in individual HLOs, // which has two issues: @@ -92,8 +93,23 @@ class BFloat16Propagation : public HloPassInterface { bool skip_parameters); // Special handling in the mutation pass for fusion computations. + // + // Precondition: hlo->opcode() == kFusion void DetermineAndMutateFusionComputationPrecision(HloInstruction* fusion); + // Special handling in the mutation pass for while computations. + // + // Precondition: hlo->opcode() == kWhile + void DetermineAndMutateWhileComputationsPrecision(HloInstruction* while_hlo); + + // The set of HloInstructions that have been visited in the mutation pass. + tensorflow::gtl::FlatSet + instructions_visited_in_mutation_pass_; + + // The set of HloComputations that have been visited in the mutation pass. + tensorflow::gtl::FlatSet + computations_visited_in_mutation_pass_; + // *************************** // Functions called by the final inconsistency resolving pass. @@ -102,9 +118,20 @@ class BFloat16Propagation : public HloPassInterface { // same precision. Status ResolveInconsistencyOfAliasingBuffers(HloModule* module); - // Makes the fusion parameters match the precision of the actual parameters - // passed to the fusion node. - void AdjustFusionParameters(HloInstruction* fusion); + // Resolves inconsistency of aliasing buffers for the given computation, and + // recursively runs on a while instruction's condition and body until a fixed + // point is reached. + bool ResolveInconsistencyOfAliasingBuffersHelper( + HloComputation* computation, + tensorflow::gtl::FlatSet* visited_computations); + + // Makes the parameters of called computations match how they are called by + // the given HLO. + void AdjustCalledComputationParameters(HloInstruction* hlo); + + // Makes the root instructions of called computations match how they are used + // by the given HLO. + void AdjustCalledComputationRoot(HloInstruction* hlo); // *************************** // Functions called and state used by two or more passes. @@ -117,8 +144,10 @@ class BFloat16Propagation : public HloPassInterface { // The set of F32 HLO values that must be kept in F32. tensorflow::gtl::FlatSet values_that_must_be_kept_as_f32_; - // *************************** - // State used by both passes. + // Mapping from each HloComputation to the number of callers to it in the + // module. Populated at the beginning of this pass. + tensorflow::gtl::FlatMap caller_counts_; + const BFloat16Support* bfloat16_support_; std::unique_ptr dataflow_; diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc index 2047e2053a..5950b004b3 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tests/literal_test_util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" namespace xla { @@ -121,6 +122,41 @@ TEST_F(BFloat16PropagationTest, PropagateThroughSelectButNotAdd) { EXPECT_FALSE(OutputsBF16(c)); } +// Tests that if a constant is converted to BF16 then its literal must also be +// converted. +TEST_F(BFloat16PropagationTest, ConvertConstantLiteral) { + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + Array2D array_a(4, 4); + array_a.FillUnique(1.0f); + Array2D array_b(4, 4); + array_b.FillUnique(10.0f); + + HloInstruction* a = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateFromArray(array_a))); + HloInstruction* b = builder.AddInstruction( + HloInstruction::CreateConstant(Literal::CreateFromArray(array_b))); + HloInstruction* dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, a, b)); + + auto module = CreateNewModule(); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE(OutputsBF16(dot->operand(0))); + EXPECT_TRUE(OutputsBF16(dot->operand(1))); + EXPECT_EQ(dot->operand(0)->opcode(), HloOpcode::kConstant); + EXPECT_EQ(dot->operand(1)->opcode(), HloOpcode::kConstant); + LiteralTestUtil::ExpectEqual( + dot->operand(0)->literal(), + *LiteralTestUtil::ConvertF32ToBF16(*Literal::CreateFromArray(array_a))); + LiteralTestUtil::ExpectEqual( + dot->operand(1)->literal(), + *LiteralTestUtil::ConvertF32ToBF16(*Literal::CreateFromArray(array_b))); +} + // Tests that BF16 can be propagated through nested tuples. TEST_F(BFloat16PropagationTest, PropagateThroughTuples) { auto builder = HloComputation::Builder(TestName()); @@ -390,4 +426,195 @@ TEST_F(BFloat16PropagationTest, SelectOverTuples) { EXPECT_TRUE(OutputsBF16(xpose)); } +// Tests that BF16 is propagated properly through while computations. +TEST_F(BFloat16PropagationTest, PropagateThroughWhile) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* tuple = + builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); + + auto builder_cond = HloComputation::Builder("cond"); + auto cond_param = builder_cond.AddInstruction( + HloInstruction::CreateParameter(0, tuple->shape(), "cond_param")); + auto cond_lhs = builder_cond.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond_param, 0)); + auto cond_rhs = builder_cond.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond_param, 1)); + // This add should prevent RHS from using BF16 + auto cond_add_rhs = builder_cond.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, cond_rhs, cond_rhs)); + auto cond_dot = builder_cond.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond_lhs, cond_add_rhs)); + builder_cond.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond = module->AddEmbeddedComputation(builder_cond.Build()); + + auto builder_body = HloComputation::Builder("body"); + auto body_param = builder_body.AddInstruction( + HloInstruction::CreateParameter(0, tuple->shape(), "body_param")); + auto body_lhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 0)); + auto body_rhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 1)); + auto body_dot = builder_body.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, body_lhs, body_rhs)); + builder_body.AddInstruction( + HloInstruction::CreateTuple({body_dot, body_rhs})); + auto body = module->AddEmbeddedComputation(builder_body.Build()); + + auto while_hlo = builder.AddInstruction( + HloInstruction::CreateWhile(tuple->shape(), cond, body, tuple)); + + auto lhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while_hlo, 0)); + auto rhs = builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while_hlo, 1)); + auto dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, lhs, rhs)); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + + EXPECT_EQ(computation->root_instruction(), dot); + EXPECT_TRUE(OutputsBF16(lhs)); + EXPECT_FALSE(OutputsBF16(rhs)); + EXPECT_TRUE(OutputsBF16(body_dot)); + EXPECT_TRUE(OutputsBF16(body_lhs)); + EXPECT_FALSE(OutputsBF16(body_rhs)); + EXPECT_TRUE(OutputsBF16(cond_lhs)); + EXPECT_FALSE(OutputsBF16(cond_rhs)); + EXPECT_TRUE(OutputsBF16(add0)); + EXPECT_FALSE(OutputsBF16(add1)); +} + +// Tests that BF16 is not propagated through multiple whiles that invoke the +// same computation as long as one while prevents the propagation. +TEST_F(BFloat16PropagationTest, DoNotPropagateWhilesCallingSameComputation) { + auto module = CreateNewModule(); + auto builder = HloComputation::Builder(TestName()); + Shape shape = ShapeUtil::MakeShape(F32, {4, 4}); + + HloInstruction* param0 = builder.AddInstruction( + HloInstruction::CreateParameter(0, shape, "param0")); + HloInstruction* param1 = builder.AddInstruction( + HloInstruction::CreateParameter(1, shape, "param1")); + HloInstruction* add0 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add1 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add2 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* add3 = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1)); + HloInstruction* tuple0 = + builder.AddInstruction(HloInstruction::CreateTuple({add0, add1})); + HloInstruction* tuple1 = + builder.AddInstruction(HloInstruction::CreateTuple({add2, add3})); + + // Condition computation for the first while. + auto builder_cond0 = HloComputation::Builder("cond0"); + auto cond0_param = builder_cond0.AddInstruction( + HloInstruction::CreateParameter(0, tuple0->shape(), "cond0_param")); + auto cond0_lhs = builder_cond0.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond0_param, 0)); + auto cond0_rhs = builder_cond0.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond0_param, 1)); + // This add should prevent RHS from using BF16 + auto cond0_add_rhs = + builder_cond0.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, cond0_rhs, cond0_rhs)); + auto cond0_dot = builder_cond0.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond0_lhs, cond0_add_rhs)); + builder_cond0.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond0.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond0_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond0.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond0_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond0 = module->AddEmbeddedComputation(builder_cond0.Build()); + + // Condition computation for the second while. + auto builder_cond1 = HloComputation::Builder("cond1"); + auto cond1_param = builder_cond1.AddInstruction( + HloInstruction::CreateParameter(0, tuple1->shape(), "cond1_param")); + auto cond1_lhs = builder_cond1.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond1_param, 0)); + auto cond1_rhs = builder_cond1.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, cond1_param, 1)); + // This add should prevent LHS from using BF16 + auto cond1_add_lhs = + builder_cond1.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kAdd, cond1_lhs, cond1_lhs)); + auto cond1_dot = builder_cond1.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, cond1_add_lhs, cond1_rhs)); + builder_cond1.AddInstruction(HloInstruction::CreateBinary( + ShapeUtil::MakeShape(PRED, {}), HloOpcode::kGt, + builder_cond1.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond1_dot, {0, 0}, {1, 1}, {1, 1})), + builder_cond1.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {}), cond1_dot, {1, 1}, {2, 2}, {1, 1})))); + auto cond1 = module->AddEmbeddedComputation(builder_cond1.Build()); + + // Body computation shared by both whiles. + auto builder_body = HloComputation::Builder("body"); + auto body_param = builder_body.AddInstruction( + HloInstruction::CreateParameter(0, tuple0->shape(), "body_param")); + auto body_lhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 0)); + auto body_rhs = builder_body.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, body_param, 1)); + auto body_dot = builder_body.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, body_lhs, body_rhs)); + builder_body.AddInstruction( + HloInstruction::CreateTuple({body_dot, body_rhs})); + auto body = module->AddEmbeddedComputation(builder_body.Build()); + + auto while0 = builder.AddInstruction( + HloInstruction::CreateWhile(tuple0->shape(), cond0, body, tuple0)); + auto while1 = builder.AddInstruction( + HloInstruction::CreateWhile(tuple1->shape(), cond1, body, tuple1)); + + auto lhs = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while0, 0)), + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while0, 1)))); + auto rhs = builder.AddInstruction(HloInstruction::CreateBinary( + shape, HloOpcode::kDot, + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while1, 0)), + builder.AddInstruction( + HloInstruction::CreateGetTupleElement(shape, while1, 1)))); + auto dot = builder.AddInstruction( + HloInstruction::CreateBinary(shape, HloOpcode::kDot, lhs, rhs)); + auto computation = module->AddEntryComputation(builder.Build()); + + EXPECT_TRUE(PropagatePrecision(module.get())); + EXPECT_FALSE(OutputsBF16(body_dot)); + EXPECT_FALSE(OutputsBF16(body_rhs)); + EXPECT_FALSE(OutputsBF16(body_lhs)); + EXPECT_FALSE(OutputsBF16(cond0_lhs)); + EXPECT_FALSE(OutputsBF16(cond0_rhs)); + EXPECT_FALSE(OutputsBF16(cond1_lhs)); + EXPECT_FALSE(OutputsBF16(cond1_rhs)); + EXPECT_TRUE(OutputsBF16(cond0_add_rhs)); + EXPECT_TRUE(OutputsBF16(cond1_add_lhs)); + EXPECT_EQ(computation->root_instruction(), dot); +} + } // namespace xla -- GitLab From 3fb65ed8667df659ea8634a7e142e989cecea9f8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 11:18:41 -0800 Subject: [PATCH 352/884] Add a configurable preference for scheduling fuller batches sooner to the adaptive shared batcher. A full batch will now be scheduled before an older, nearly empty batch as long as the age gap is less than full_batch_scheduling_boost_micros. This parameter improves latency under heavy load, but too large a value will harm tail latency. PiperOrigin-RevId: 187644796 --- .../adaptive_shared_batch_scheduler.h | 61 +++++++++------- .../adaptive_shared_batch_scheduler_test.cc | 71 +++++++++++++++++++ 2 files changed, 107 insertions(+), 25 deletions(-) diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h index 661ed239d3..339d792302 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler.h @@ -19,7 +19,6 @@ limitations under the License. #include #include #include -#include #include #include #include @@ -44,15 +43,14 @@ template class ASBSQueue; } // namespace internal -// EXPERIMENTAL: API MAY BE SUBJECTED TO SUDDEN CHANGES. -// // Shared batch scheduler designed to minimize latency. The scheduler keeps // track of a number of queues (one per model or model version) which are // continuously enqueuing requests. The scheduler groups the requests into // batches which it periodically sends off for processing (see // shared_batch_scheduler.h for more details). AdaptiveSharedBatchScheduler -// (ASBS) prioritizes batches by age (i.e. the batch's oldest request) -// irrespective of queue or batch size. +// (ASBS) prioritizes batches primarily by age (i.e. the batch's oldest request) +// along with a configurable preference for scheduling larger batches first. +// // // ASBS tries to keep the system busy by maintaining an adjustable number of // concurrently processed batches. If a new batch is created, and the number of @@ -93,6 +91,13 @@ class AdaptiveSharedBatchScheduler // for num_batch_threads allows for large in_flight_batches_limit_, which // will harm latency for some time once load increases again. int64 num_batch_threads = port::NumSchedulableCPUs(); + // Although batch selection is primarily based on age, this parameter + // specifies a preference for larger batches. A full batch will be + // scheduled before an older, nearly empty batch as long as the age gap is + // less than full_batch_scheduling_boost_micros. The optimal value for this + // parameter should be of order the batch processing latency, but must be + // chosen carefully, as too large a value will harm tail latency. + int64 full_batch_scheduling_boost_micros = 0; // The environment to use (typically only overridden by test code). Env* env = Env::Default(); // Initial limit for number of batches being concurrently processed. @@ -153,17 +158,9 @@ class AdaptiveSharedBatchScheduler const Options options_; - struct BatchCompare { - bool operator()(const internal::ASBSBatch* a, - const internal::ASBSBatch* b); - }; - // Collection of batches added by AddBatch, ordered by age. Owned by scheduler // until they are released for processing. - std::priority_queue*, - std::vector*>, - BatchCompare> - batches_ GUARDED_BY(mu_); + std::vector*> batches_ GUARDED_BY(mu_); // Unowned queues and callbacks added by AddQueue. std::unordered_map*, BatchProcessor> @@ -288,6 +285,11 @@ Status AdaptiveSharedBatchScheduler::Create( return errors::InvalidArgument("num_batch_threads must be positive; was ", options.num_batch_threads); } + if (options.full_batch_scheduling_boost_micros < 0) { + return errors::InvalidArgument( + "full_batch_scheduling_boost_micros can't be negative; was ", + options.full_batch_scheduling_boost_micros); + } if (options.initial_in_flight_batches_limit > options.num_batch_threads) { return errors::InvalidArgument( "initial_in_flight_batches_limit (", @@ -348,7 +350,7 @@ template void AdaptiveSharedBatchScheduler::AddBatch( const internal::ASBSBatch* batch) { mutex_lock l(mu_); - batches_.push(batch); + batches_.push_back(batch); MaybeScheduleNextBatch(); } @@ -366,10 +368,26 @@ void AdaptiveSharedBatchScheduler::MaybeScheduleNextBatch() { // Non-integer limit handled probabilistially. if (in_flight_batches_limit_ - in_flight_batches_ < 1 && rand_double_(rand_engine_) > - (in_flight_batches_limit_ - in_flight_batches_)) + in_flight_batches_limit_ - in_flight_batches_) { return; - const internal::ASBSBatch* batch = batches_.top(); - batches_.pop(); + } + auto best_it = batches_.begin(); + double best_score = + (*best_it)->creation_time_micros() - + options_.full_batch_scheduling_boost_micros * (*best_it)->size() / + static_cast((*best_it)->queue()->max_task_size()); + for (auto it = batches_.begin() + 1; it != batches_.end(); it++) { + const double score = + (*it)->creation_time_micros() - + options_.full_batch_scheduling_boost_micros * (*it)->size() / + static_cast((*it)->queue()->max_task_size()); + if (score < best_score) { + best_score = score; + best_it = it; + } + } + const internal::ASBSBatch* batch = *best_it; + batches_.erase(best_it); // Queue may destroy itself after ReleaseBatch is called. batch->queue()->ReleaseBatch(batch); batch_thread_pool_->Schedule( @@ -427,13 +445,6 @@ void AdaptiveSharedBatchScheduler::CallbackWrapper( MaybeScheduleNextBatch(); } -template -bool AdaptiveSharedBatchScheduler::BatchCompare::operator()( - const internal::ASBSBatch* a, - const internal::ASBSBatch* b) { - return a->creation_time_micros() > b->creation_time_micros(); -} - // ---------------- ASBSQueue ---------------- namespace internal { diff --git a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc index 109234287e..1be0c1f5c6 100644 --- a/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc +++ b/tensorflow/core/kernels/batching_util/adaptive_shared_batch_scheduler_test.cc @@ -180,6 +180,77 @@ TEST(AdaptiveSharedBatchSchedulerTest, InFlightBatchesLimitTuning) { stop_teardown.Notify(); } +TEST(AdaptiveSharedBatchSchedulerTest, FullBatchSchedulingBoostMicros) { + test_util::FakeClockEnv env(Env::Default()); + Notification start_teardown, stop_teardown; + std::unique_ptr teardown_thread = + CreateFakeClockAdvancerThread(&env, &start_teardown, &stop_teardown); + { + AdaptiveSharedBatchScheduler::Options options; + options.env = &env; + options.initial_in_flight_batches_limit = 1; + options.batches_to_average_over = 1000; + options.full_batch_scheduling_boost_micros = 100; + mutex mu; + int processed_batches = 0; + Notification finish_processing; + auto queue_callback = [&mu, &processed_batches, &finish_processing]( + std::unique_ptr> batch) { + ASSERT_TRUE(batch->IsClosed()); + finish_processing.WaitForNotification(); + mutex_lock l(mu); + processed_batches++; + switch (processed_batches) { + case 1: + EXPECT_EQ(100, batch->size()); + break; + case 2: + EXPECT_EQ(50, batch->size()); + break; + case 3: + EXPECT_EQ(900, batch->size()); + break; + case 4: + EXPECT_EQ(200, batch->size()); + break; + default: + EXPECT_TRUE(false) << "Should only have 4 batches"; + } + }; + std::shared_ptr> scheduler; + TF_ASSERT_OK( + AdaptiveSharedBatchScheduler::Create(options, &scheduler)); + AdaptiveSharedBatchScheduler::QueueOptions queue_options; + std::unique_ptr> queue1; + std::unique_ptr> queue2; + queue_options.max_batch_size = 1000; + TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue1)); + queue_options.max_batch_size = 100; + TF_ASSERT_OK(scheduler->AddQueue(queue_options, queue_callback, &queue2)); + + // First batch immediately processed. + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + env.AdvanceByMicroseconds(10); + TF_ASSERT_OK(ScheduleTask(100, queue1.get())); + env.AdvanceByMicroseconds(10); + + TF_ASSERT_OK(ScheduleTask(50, queue2.get())); + env.AdvanceByMicroseconds(45); + + TF_ASSERT_OK(ScheduleTask(900, queue1.get())); + + // Second batch - creation time: 0, fullness: 0.2, sched score: -20 + // Third batch - creation time: 20, fullness: 0.5, sched score: -30 + // Fourth batch - creation time: 65, fullness: 0.9, sched score: -25 + + finish_processing.Notify(); + start_teardown.Notify(); + } + stop_teardown.Notify(); +} + TEST(AdaptiveSharedBatchSchedulerTest, DeleteQueue) { AdaptiveSharedBatchScheduler::Options options; options.initial_in_flight_batches_limit = 1; -- GitLab From 1ded0ecca819e8569f120a3eb35cc477636f3340 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 11:35:53 -0800 Subject: [PATCH 353/884] GCS: Update throttle state even if disabled. PiperOrigin-RevId: 187647263 --- tensorflow/core/platform/cloud/gcs_throttle.cc | 4 +--- tensorflow/core/platform/cloud/gcs_throttle.h | 13 +++++++++++-- .../core/platform/cloud/gcs_throttle_test.cc | 18 ++++++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/cloud/gcs_throttle.cc b/tensorflow/core/platform/cloud/gcs_throttle.cc index eb5f8958a3..27dd06a625 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle.cc +++ b/tensorflow/core/platform/cloud/gcs_throttle.cc @@ -26,10 +26,9 @@ GcsThrottle::GcsThrottle(EnvTime* env_time) bool GcsThrottle::AdmitRequest() { mutex_lock l(mu_); - if (!config_.enabled) return true; UpdateState(); if (available_tokens_ < config_.tokens_per_request) { - return false; + return false || !config_.enabled; } available_tokens_ -= config_.tokens_per_request; return true; @@ -37,7 +36,6 @@ bool GcsThrottle::AdmitRequest() { void GcsThrottle::RecordResponse(size_t num_bytes) { mutex_lock l(mu_); - if (!config_.enabled) return; UpdateState(); available_tokens_ -= request_bytes_to_tokens(num_bytes); } diff --git a/tensorflow/core/platform/cloud/gcs_throttle.h b/tensorflow/core/platform/cloud/gcs_throttle.h index 1a89daef08..6d5eed7338 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle.h +++ b/tensorflow/core/platform/cloud/gcs_throttle.h @@ -109,13 +109,22 @@ class GcsThrottle { * purpose of this function is to make available to monitoring or other * instrumentation the number of available tokens in the pool. */ - inline int64 available_tokens() { + inline int64 available_tokens() LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); - if (!config_.enabled) return 0; UpdateState(); return available_tokens_; } + /** + * is_enabled determines if the throttle is enabled. + * + * If !is_enabled(), AdmitRequest() will always return true. + */ + bool is_enabled() LOCKS_EXCLUDED(mu_) { + mutex_lock l(mu_); + return config_.enabled; + } + private: /** * UpdateState updates the available_tokens_ and last_updated_secs_ variables. diff --git a/tensorflow/core/platform/cloud/gcs_throttle_test.cc b/tensorflow/core/platform/cloud/gcs_throttle_test.cc index 694756022e..57193ac405 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle_test.cc +++ b/tensorflow/core/platform/cloud/gcs_throttle_test.cc @@ -96,6 +96,24 @@ TEST_F(GcsThrottleTest, ReverseTime) { EXPECT_EQ(200000, throttle_.available_tokens()); } +TEST(GcsThrottleDisabledTest, Disabled) { + TestTime time; + GcsThrottle throttle(&time); + ASSERT_FALSE(throttle.is_enabled()); // Verify throttle is disabled. + + EXPECT_EQ(0, throttle.available_tokens()); + time.AdvanceSeconds(1); + EXPECT_EQ(100000, throttle.available_tokens()); + EXPECT_TRUE(throttle.AdmitRequest()); + EXPECT_EQ(99900, throttle.available_tokens()); + time.AdvanceSeconds(1); + EXPECT_EQ(199900, throttle.available_tokens()); + throttle.RecordResponse(128000000); // 128 MB response. + EXPECT_LT(0, throttle.available_tokens()); + // Admit request even without available tokens + EXPECT_TRUE(throttle.AdmitRequest()); +} + } // namespace } // namespace tensorflow -- GitLab From 2abc47106624e0102c917535dd6df45561550ade Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Fri, 2 Mar 2018 11:59:02 -0800 Subject: [PATCH 354/884] Move the PS_OPS from Estimator to device_setter to benefit more users. PiperOrigin-RevId: 187650283 --- tensorflow/python/estimator/estimator.py | 10 ++-------- tensorflow/python/training/device_setter.py | 9 +++++++++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 1a2b33721a..60351471f1 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -49,6 +49,7 @@ from tensorflow.python.saved_model import builder as saved_model_builder from tensorflow.python.saved_model import tag_constants from tensorflow.python.summary import summary from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import device_setter from tensorflow.python.training import evaluation from tensorflow.python.training import monitored_session from tensorflow.python.training import saver @@ -1007,13 +1008,6 @@ def _get_replica_device_setter(config): Returns: A replica device setter, or None. """ - ps_ops = [ - 'Variable', 'VariableV2', 'AutoReloadVariable', 'MutableHashTable', - 'MutableHashTableV2', 'MutableHashTableOfTensors', - 'MutableHashTableOfTensorsV2', 'MutableDenseHashTable', - 'MutableDenseHashTableV2', 'VarHandleOp' - ] - if config.task_type: worker_device = '/job:%s/task:%d' % (config.task_type, config.task_id) else: @@ -1024,7 +1018,7 @@ def _get_replica_device_setter(config): ps_tasks=config.num_ps_replicas, worker_device=worker_device, merge_devices=True, - ps_ops=ps_ops, + ps_ops=list(device_setter.STANDARD_PS_OPS), cluster=config.cluster_spec) else: return None diff --git a/tensorflow/python/training/device_setter.py b/tensorflow/python/training/device_setter.py index 689088bb41..0e824d89e9 100644 --- a/tensorflow/python/training/device_setter.py +++ b/tensorflow/python/training/device_setter.py @@ -25,6 +25,15 @@ from tensorflow.python.platform import tf_logging as logging from tensorflow.python.training import server_lib from tensorflow.python.util.tf_export import tf_export +# This is a tuple of PS ops used by tf.estimator.Esitmator which should work in +# almost all of cases. +STANDARD_PS_OPS = ( + "Variable", "VariableV2", "AutoReloadVariable", "MutableHashTable", + "MutableHashTableV2", "MutableHashTableOfTensors", + "MutableHashTableOfTensorsV2", "MutableDenseHashTable", + "MutableDenseHashTableV2", "VarHandleOp" +) + class _RoundRobinStrategy(object): """Returns the next ps task index for placement in round-robin order. -- GitLab From 41aa3e75ca35c763c23aeedf2409589b7814c7f1 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 2 Mar 2018 12:19:23 -0800 Subject: [PATCH 355/884] GCS: Extract block cache interface from implementation. PiperOrigin-RevId: 187652953 --- tensorflow/core/platform/cloud/BUILD | 20 +- .../core/platform/cloud/file_block_cache.h | 161 +----------- .../core/platform/cloud/gcs_file_system.cc | 15 +- ...block_cache.cc => ram_file_block_cache.cc} | 35 +-- .../platform/cloud/ram_file_block_cache.h | 229 ++++++++++++++++++ ...e_test.cc => ram_file_block_cache_test.cc} | 60 ++--- 6 files changed, 311 insertions(+), 209 deletions(-) rename tensorflow/core/platform/cloud/{file_block_cache.cc => ram_file_block_cache.cc} (89%) create mode 100644 tensorflow/core/platform/cloud/ram_file_block_cache.h rename tensorflow/core/platform/cloud/{file_block_cache_test.cc => ram_file_block_cache_test.cc} (92%) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 9ba25dea4f..0a17a419d3 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -38,13 +38,24 @@ cc_library( cc_library( name = "file_block_cache", - srcs = ["file_block_cache.cc"], hdrs = ["file_block_cache.h"], copts = tf_copts(), visibility = ["//tensorflow:__subpackages__"], deps = ["//tensorflow/core:lib"], ) +cc_library( + name = "ram_file_block_cache", + srcs = ["ram_file_block_cache.cc"], + hdrs = ["ram_file_block_cache.h"], + copts = tf_copts(), + visibility = ["//tensorflow:__subpackages__"], + deps = [ + ":file_block_cache", + "//tensorflow/core:lib", + ], +) + cc_library( name = "gcs_dns_cache", srcs = ["gcs_dns_cache.cc"], @@ -83,6 +94,7 @@ cc_library( ":gcs_throttle", ":google_auth_provider", ":http_request", + ":ram_file_block_cache", ":retrying_file_system", ":retrying_utils", ":time_util", @@ -245,12 +257,12 @@ tf_cc_test( ) tf_cc_test( - name = "file_block_cache_test", + name = "ram_file_block_cache_test", size = "small", - srcs = ["file_block_cache_test.cc"], + srcs = ["ram_file_block_cache_test.cc"], deps = [ - ":file_block_cache", ":now_seconds_env", + ":ram_file_block_cache", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:test", diff --git a/tensorflow/core/platform/cloud/file_block_cache.h b/tensorflow/core/platform/cloud/file_block_cache.h index 5c180e2332..da16788247 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.h +++ b/tensorflow/core/platform/cloud/file_block_cache.h @@ -32,7 +32,7 @@ limitations under the License. namespace tensorflow { -/// \brief An LRU block cache of file contents, keyed by {filename, offset}. +/// \brief A block cache of file contents, keyed by {filename, offset}. /// /// This class should be shared by read-only random access files on a remote /// filesystem (e.g. GCS). @@ -48,27 +48,7 @@ class FileBlockCache { size_t* bytes_transferred)> BlockFetcher; - FileBlockCache(size_t block_size, size_t max_bytes, uint64 max_staleness, - BlockFetcher block_fetcher, Env* env = Env::Default()) - : block_size_(block_size), - max_bytes_(max_bytes), - max_staleness_(max_staleness), - block_fetcher_(block_fetcher), - env_(env) { - if (max_staleness_ > 0) { - pruning_thread_.reset(env_->StartThread(ThreadOptions(), "TF_prune_FBC", - [this] { Prune(); })); - } - } - - ~FileBlockCache() { - if (pruning_thread_) { - stop_pruning_thread_.Notify(); - // Destroying pruning_thread_ will block until Prune() receives the above - // notification and returns. - pruning_thread_.reset(); - } - } + virtual ~FileBlockCache() {} /// Read `n` bytes from `filename` starting at `offset` into `out`. This /// method will return: @@ -84,143 +64,22 @@ class FileBlockCache { /// placed in `out`. /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed /// in `out`). - Status Read(const string& filename, size_t offset, size_t n, char* buffer, - size_t* bytes_transferred); + virtual Status Read(const string& filename, size_t offset, size_t n, + char* buffer, size_t* bytes_transferred) = 0; /// Remove all cached blocks for `filename`. - void RemoveFile(const string& filename) LOCKS_EXCLUDED(mu_); + virtual void RemoveFile(const string& filename) = 0; /// Remove all cached data. - void Flush() LOCKS_EXCLUDED(mu_); + virtual void Flush() = 0; /// Accessors for cache parameters. - size_t block_size() const { return block_size_; } - size_t max_bytes() const { return max_bytes_; } - uint64 max_staleness() const { return max_staleness_; } + virtual size_t block_size() const = 0; + virtual size_t max_bytes() const = 0; + virtual uint64 max_staleness() const = 0; /// The current size (in bytes) of the cache. - size_t CacheSize() const LOCKS_EXCLUDED(mu_); - - private: - /// The size of the blocks stored in the LRU cache, as well as the size of the - /// reads from the underlying filesystem. - const size_t block_size_; - /// The maximum number of bytes (sum of block sizes) allowed in the LRU cache. - const size_t max_bytes_; - /// The maximum staleness of any block in the LRU cache, in seconds. - const uint64 max_staleness_; - /// The callback to read a block from the underlying filesystem. - const BlockFetcher block_fetcher_; - /// The Env from which we read timestamps. - Env* const env_; // not owned - - /// \brief The key type for the file block cache. - /// - /// The file block cache key is a {filename, offset} pair. - typedef std::pair Key; - - /// \brief The state of a block. - /// - /// A block begins in the CREATED stage. The first thread will attempt to read - /// the block from the filesystem, transitioning the state of the block to - /// FETCHING. After completing, if the read was successful the state should - /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can - /// re-fetch the block if the state is ERROR. - enum class FetchState { - CREATED, - FETCHING, - FINISHED, - ERROR, - }; - - /// \brief A block of a file. - /// - /// A file block consists of the block data, the block's current position in - /// the LRU cache, the timestamp (seconds since epoch) at which the block - /// was cached, a coordination lock, and state & condition variables. - /// - /// Thread safety: - /// The iterator and timestamp fields should only be accessed while holding - /// the block-cache-wide mu_ instance variable. The state variable should only - /// be accessed while holding the Block's mu lock. The data vector should only - /// be accessed after state == FINISHED, and it should never be modified. - /// - /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock - /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking - /// mu_. - struct Block { - /// The block data. - std::vector data; - /// A list iterator pointing to the block's position in the LRU list. - std::list::iterator lru_iterator; - /// A list iterator pointing to the block's position in the LRA list. - std::list::iterator lra_iterator; - /// The timestamp (seconds since epoch) at which the block was cached. - uint64 timestamp; - /// Mutex to guard state variable - mutex mu; - /// The state of the block. - FetchState state GUARDED_BY(mu) = FetchState::CREATED; - /// Wait on cond_var if state is FETCHING. - condition_variable cond_var; - }; - - /// \brief The block map type for the file block cache. - /// - /// The block map is an ordered map from Key to Block. - typedef std::map> BlockMap; - - /// Prune the cache by removing files with expired blocks. - void Prune() LOCKS_EXCLUDED(mu_); - - bool BlockNotStale(const std::shared_ptr& block) - EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Look up a Key in the block cache. - std::shared_ptr Lookup(const Key& key) LOCKS_EXCLUDED(mu_); - - Status MaybeFetch(const Key& key, const std::shared_ptr& block) - LOCKS_EXCLUDED(mu_); - - /// Trim the block cache to make room for another entry. - void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Update the LRU iterator for the block at `key`. - Status UpdateLRU(const Key& key, const std::shared_ptr& block) - LOCKS_EXCLUDED(mu_); - - /// Remove all blocks of a file, with mu_ already held. - void RemoveFile_Locked(const string& filename) EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// Remove the block `entry` from the block map and LRU list, and update the - /// cache size accordingly. - void RemoveBlock(BlockMap::iterator entry) EXCLUSIVE_LOCKS_REQUIRED(mu_); - - /// The cache pruning thread that removes files with expired blocks. - std::unique_ptr pruning_thread_; - - /// Notification for stopping the cache pruning thread. - Notification stop_pruning_thread_; - - /// Guards access to the block map, LRU list, and cached byte count. - mutable mutex mu_; - - /// The block map (map from Key to Block). - BlockMap block_map_ GUARDED_BY(mu_); - - /// The LRU list of block keys. The front of the list identifies the most - /// recently accessed block. - std::list lru_list_ GUARDED_BY(mu_); - - /// The LRA (least recently added) list of block keys. The front of the list - /// identifies the most recently added block. - /// - /// Note: blocks are added to lra_list_ only after they have successfully been - /// fetched from the underlying block store. - std::list lra_list_ GUARDED_BY(mu_); - - /// The combined number of bytes in all of the cached blocks. - size_t cache_size_ GUARDED_BY(mu_) = 0; + virtual size_t CacheSize() const = 0; }; } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 01ca0d76ba..84b65cec4f 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -36,6 +36,7 @@ limitations under the License. #include "tensorflow/core/platform/cloud/curl_http_request.h" #include "tensorflow/core/platform/cloud/file_block_cache.h" #include "tensorflow/core/platform/cloud/google_auth_provider.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include "tensorflow/core/platform/cloud/retrying_utils.h" #include "tensorflow/core/platform/cloud/time_util.h" #include "tensorflow/core/platform/env.h" @@ -783,13 +784,13 @@ Status GcsFileSystem::NewRandomAccessFile( // A helper function to build a FileBlockCache for GcsFileSystem. std::unique_ptr GcsFileSystem::MakeFileBlockCache( size_t block_size, size_t max_bytes, uint64 max_staleness) { - std::unique_ptr file_block_cache( - new FileBlockCache(block_size, max_bytes, max_staleness, - [this](const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred) { - return LoadBufferFromGCS(filename, offset, n, buffer, - bytes_transferred); - })); + std::unique_ptr file_block_cache(new RamFileBlockCache( + block_size, max_bytes, max_staleness, + [this](const string& filename, size_t offset, size_t n, char* buffer, + size_t* bytes_transferred) { + return LoadBufferFromGCS(filename, offset, n, buffer, + bytes_transferred); + })); return file_block_cache; } diff --git a/tensorflow/core/platform/cloud/file_block_cache.cc b/tensorflow/core/platform/cloud/ram_file_block_cache.cc similarity index 89% rename from tensorflow/core/platform/cloud/file_block_cache.cc rename to tensorflow/core/platform/cloud/ram_file_block_cache.cc index 6add1142a1..55a5657a50 100644 --- a/tensorflow/core/platform/cloud/file_block_cache.cc +++ b/tensorflow/core/platform/cloud/ram_file_block_cache.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include #include #include "tensorflow/core/lib/gtl/cleanup.h" @@ -21,7 +21,7 @@ limitations under the License. namespace tensorflow { -bool FileBlockCache::BlockNotStale(const std::shared_ptr& block) { +bool RamFileBlockCache::BlockNotStale(const std::shared_ptr& block) { mutex_lock l(block->mu); if (block->state != FetchState::FINISHED) { return true; // No need to check for staleness. @@ -30,7 +30,8 @@ bool FileBlockCache::BlockNotStale(const std::shared_ptr& block) { return env_->NowSeconds() - block->timestamp <= max_staleness_; } -std::shared_ptr FileBlockCache::Lookup(const Key& key) { +std::shared_ptr RamFileBlockCache::Lookup( + const Key& key) { mutex_lock lock(mu_); auto entry = block_map_.find(key); if (entry != block_map_.end()) { @@ -55,15 +56,15 @@ std::shared_ptr FileBlockCache::Lookup(const Key& key) { } // Remove blocks from the cache until we do not exceed our maximum size. -void FileBlockCache::Trim() { +void RamFileBlockCache::Trim() { while (!lru_list_.empty() && cache_size_ > max_bytes_) { RemoveBlock(block_map_.find(lru_list_.back())); } } /// Move the block to the front of the LRU list if it isn't already there. -Status FileBlockCache::UpdateLRU(const Key& key, - const std::shared_ptr& block) { +Status RamFileBlockCache::UpdateLRU(const Key& key, + const std::shared_ptr& block) { mutex_lock lock(mu_); if (block->timestamp == 0) { // The block was evicted from another thread. Allow it to remain evicted. @@ -92,8 +93,8 @@ Status FileBlockCache::UpdateLRU(const Key& key, return Status::OK(); } -Status FileBlockCache::MaybeFetch(const Key& key, - const std::shared_ptr& block) { +Status RamFileBlockCache::MaybeFetch(const Key& key, + const std::shared_ptr& block) { bool downloaded_block = false; auto reconcile_state = gtl::MakeCleanup([this, &downloaded_block, &key, &block] { @@ -151,11 +152,11 @@ Status FileBlockCache::MaybeFetch(const Key& key, } } return errors::Internal( - "Control flow should never reach the end of FileBlockCache::Fetch."); + "Control flow should never reach the end of RamFileBlockCache::Fetch."); } -Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, - char* buffer, size_t* bytes_transferred) { +Status RamFileBlockCache::Read(const string& filename, size_t offset, size_t n, + char* buffer, size_t* bytes_transferred) { *bytes_transferred = 0; if (n == 0) { return Status::OK(); @@ -216,12 +217,12 @@ Status FileBlockCache::Read(const string& filename, size_t offset, size_t n, return Status::OK(); } -size_t FileBlockCache::CacheSize() const { +size_t RamFileBlockCache::CacheSize() const { mutex_lock lock(mu_); return cache_size_; } -void FileBlockCache::Prune() { +void RamFileBlockCache::Prune() { while (!WaitForNotificationWithTimeout(&stop_pruning_thread_, 1000000)) { mutex_lock lock(mu_); uint64 now = env_->NowSeconds(); @@ -238,7 +239,7 @@ void FileBlockCache::Prune() { } } -void FileBlockCache::Flush() { +void RamFileBlockCache::Flush() { mutex_lock lock(mu_); block_map_.clear(); lru_list_.clear(); @@ -246,12 +247,12 @@ void FileBlockCache::Flush() { cache_size_ = 0; } -void FileBlockCache::RemoveFile(const string& filename) { +void RamFileBlockCache::RemoveFile(const string& filename) { mutex_lock lock(mu_); RemoveFile_Locked(filename); } -void FileBlockCache::RemoveFile_Locked(const string& filename) { +void RamFileBlockCache::RemoveFile_Locked(const string& filename) { Key begin = std::make_pair(filename, 0); auto it = block_map_.lower_bound(begin); while (it != block_map_.end() && it->first.first == filename) { @@ -261,7 +262,7 @@ void FileBlockCache::RemoveFile_Locked(const string& filename) { } } -void FileBlockCache::RemoveBlock(BlockMap::iterator entry) { +void RamFileBlockCache::RemoveBlock(BlockMap::iterator entry) { // This signals that the block is removed, and should not be inadvertently // reinserted into the cache in UpdateLRU. entry->second->timestamp = 0; diff --git a/tensorflow/core/platform/cloud/ram_file_block_cache.h b/tensorflow/core/platform/cloud/ram_file_block_cache.h new file mode 100644 index 0000000000..7fdd7b2e02 --- /dev/null +++ b/tensorflow/core/platform/cloud/ram_file_block_cache.h @@ -0,0 +1,229 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ +#define TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ + +#include +#include +#include +#include +#include +#include +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/stringpiece.h" +#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/mutex.h" +#include "tensorflow/core/platform/notification.h" +#include "tensorflow/core/platform/thread_annotations.h" +#include "tensorflow/core/platform/types.h" + +namespace tensorflow { + +/// \brief An LRU block cache of file contents, keyed by {filename, offset}. +/// +/// This class should be shared by read-only random access files on a remote +/// filesystem (e.g. GCS). +class RamFileBlockCache : public FileBlockCache { + public: + /// The callback executed when a block is not found in the cache, and needs to + /// be fetched from the backing filesystem. This callback is provided when the + /// cache is constructed. The returned Status should be OK as long as the + /// read from the remote filesystem succeeded (similar to the semantics of the + /// read(2) system call). + typedef std::function + BlockFetcher; + + RamFileBlockCache(size_t block_size, size_t max_bytes, uint64 max_staleness, + BlockFetcher block_fetcher, Env* env = Env::Default()) + : block_size_(block_size), + max_bytes_(max_bytes), + max_staleness_(max_staleness), + block_fetcher_(block_fetcher), + env_(env) { + if (max_staleness_ > 0) { + pruning_thread_.reset(env_->StartThread(ThreadOptions(), "TF_prune_FBC", + [this] { Prune(); })); + } + } + + ~RamFileBlockCache() override { + if (pruning_thread_) { + stop_pruning_thread_.Notify(); + // Destroying pruning_thread_ will block until Prune() receives the above + // notification and returns. + pruning_thread_.reset(); + } + } + + /// Read `n` bytes from `filename` starting at `offset` into `out`. This + /// method will return: + /// + /// 1) The error from the remote filesystem, if the read from the remote + /// filesystem failed. + /// 2) PRECONDITION_FAILED if the read from the remote filesystem succeeded, + /// but the read returned a partial block, and the LRU cache contained a + /// block at a higher offset (indicating that the partial block should have + /// been a full block). + /// 3) OUT_OF_RANGE if the read from the remote filesystem succeeded, but + /// the file contents do not extend past `offset` and thus nothing was + /// placed in `out`. + /// 4) OK otherwise (i.e. the read succeeded, and at least one byte was placed + /// in `out`). + Status Read(const string& filename, size_t offset, size_t n, char* buffer, + size_t* bytes_transferred) override; + + /// Remove all cached blocks for `filename`. + void RemoveFile(const string& filename) override LOCKS_EXCLUDED(mu_); + + /// Remove all cached data. + void Flush() LOCKS_EXCLUDED(mu_) override; + + /// Accessors for cache parameters. + size_t block_size() const override { return block_size_; } + size_t max_bytes() const override { return max_bytes_; } + uint64 max_staleness() const override { return max_staleness_; } + + /// The current size (in bytes) of the cache. + size_t CacheSize() const override LOCKS_EXCLUDED(mu_); + + private: + /// The size of the blocks stored in the LRU cache, as well as the size of the + /// reads from the underlying filesystem. + const size_t block_size_; + /// The maximum number of bytes (sum of block sizes) allowed in the LRU cache. + const size_t max_bytes_; + /// The maximum staleness of any block in the LRU cache, in seconds. + const uint64 max_staleness_; + /// The callback to read a block from the underlying filesystem. + const BlockFetcher block_fetcher_; + /// The Env from which we read timestamps. + Env* const env_; // not owned + + /// \brief The key type for the file block cache. + /// + /// The file block cache key is a {filename, offset} pair. + typedef std::pair Key; + + /// \brief The state of a block. + /// + /// A block begins in the CREATED stage. The first thread will attempt to read + /// the block from the filesystem, transitioning the state of the block to + /// FETCHING. After completing, if the read was successful the state should + /// be FINISHED. Otherwise the state should be ERROR. A subsequent read can + /// re-fetch the block if the state is ERROR. + enum class FetchState { + CREATED, + FETCHING, + FINISHED, + ERROR, + }; + + /// \brief A block of a file. + /// + /// A file block consists of the block data, the block's current position in + /// the LRU cache, the timestamp (seconds since epoch) at which the block + /// was cached, a coordination lock, and state & condition variables. + /// + /// Thread safety: + /// The iterator and timestamp fields should only be accessed while holding + /// the block-cache-wide mu_ instance variable. The state variable should only + /// be accessed while holding the Block's mu lock. The data vector should only + /// be accessed after state == FINISHED, and it should never be modified. + /// + /// In order to prevent deadlocks, never grab the block-cache-wide mu_ lock + /// AFTER grabbing any block's mu lock. It is safe to grab mu without locking + /// mu_. + struct Block { + /// The block data. + std::vector data; + /// A list iterator pointing to the block's position in the LRU list. + std::list::iterator lru_iterator; + /// A list iterator pointing to the block's position in the LRA list. + std::list::iterator lra_iterator; + /// The timestamp (seconds since epoch) at which the block was cached. + uint64 timestamp; + /// Mutex to guard state variable + mutex mu; + /// The state of the block. + FetchState state GUARDED_BY(mu) = FetchState::CREATED; + /// Wait on cond_var if state is FETCHING. + condition_variable cond_var; + }; + + /// \brief The block map type for the file block cache. + /// + /// The block map is an ordered map from Key to Block. + typedef std::map> BlockMap; + + /// Prune the cache by removing files with expired blocks. + void Prune() LOCKS_EXCLUDED(mu_); + + bool BlockNotStale(const std::shared_ptr& block) + EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Look up a Key in the block cache. + std::shared_ptr Lookup(const Key& key) LOCKS_EXCLUDED(mu_); + + Status MaybeFetch(const Key& key, const std::shared_ptr& block) + LOCKS_EXCLUDED(mu_); + + /// Trim the block cache to make room for another entry. + void Trim() EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Update the LRU iterator for the block at `key`. + Status UpdateLRU(const Key& key, const std::shared_ptr& block) + LOCKS_EXCLUDED(mu_); + + /// Remove all blocks of a file, with mu_ already held. + void RemoveFile_Locked(const string& filename) EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// Remove the block `entry` from the block map and LRU list, and update the + /// cache size accordingly. + void RemoveBlock(BlockMap::iterator entry) EXCLUSIVE_LOCKS_REQUIRED(mu_); + + /// The cache pruning thread that removes files with expired blocks. + std::unique_ptr pruning_thread_; + + /// Notification for stopping the cache pruning thread. + Notification stop_pruning_thread_; + + /// Guards access to the block map, LRU list, and cached byte count. + mutable mutex mu_; + + /// The block map (map from Key to Block). + BlockMap block_map_ GUARDED_BY(mu_); + + /// The LRU list of block keys. The front of the list identifies the most + /// recently accessed block. + std::list lru_list_ GUARDED_BY(mu_); + + /// The LRA (least recently added) list of block keys. The front of the list + /// identifies the most recently added block. + /// + /// Note: blocks are added to lra_list_ only after they have successfully been + /// fetched from the underlying block store. + std::list lra_list_ GUARDED_BY(mu_); + + /// The combined number of bytes in all of the cached blocks. + size_t cache_size_ GUARDED_BY(mu_) = 0; +}; + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_PLATFORM_CLOUD_RAM_FILE_BLOCK_CACHE_H_ diff --git a/tensorflow/core/platform/cloud/file_block_cache_test.cc b/tensorflow/core/platform/cloud/ram_file_block_cache_test.cc similarity index 92% rename from tensorflow/core/platform/cloud/file_block_cache_test.cc rename to tensorflow/core/platform/cloud/ram_file_block_cache_test.cc index 596fdbf19e..d555b682a6 100644 --- a/tensorflow/core/platform/cloud/file_block_cache_test.cc +++ b/tensorflow/core/platform/cloud/ram_file_block_cache_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/cloud/file_block_cache.h" +#include "tensorflow/core/platform/cloud/ram_file_block_cache.h" #include #include "tensorflow/core/lib/core/blocking_counter.h" #include "tensorflow/core/lib/core/status_test_util.h" @@ -25,8 +25,8 @@ limitations under the License. namespace tensorflow { namespace { -Status ReadCache(FileBlockCache* cache, const string& filename, size_t offset, - size_t n, std::vector* out) { +Status ReadCache(RamFileBlockCache* cache, const string& filename, + size_t offset, size_t n, std::vector* out) { out->clear(); out->resize(n, 0); size_t bytes_transferred = 0; @@ -37,7 +37,7 @@ Status ReadCache(FileBlockCache* cache, const string& filename, size_t offset, return status; } -TEST(FileBlockCacheTest, PassThrough) { +TEST(RamFileBlockCacheTest, PassThrough) { const string want_filename = "foo/bar"; const size_t want_offset = 42; const size_t want_n = 1024; @@ -54,9 +54,9 @@ TEST(FileBlockCacheTest, PassThrough) { return Status::OK(); }; // If block_size, max_bytes, or both are zero, the cache is a pass-through. - FileBlockCache cache1(1, 0, 0, fetcher); - FileBlockCache cache2(0, 1, 0, fetcher); - FileBlockCache cache3(0, 0, 0, fetcher); + RamFileBlockCache cache1(1, 0, 0, fetcher); + RamFileBlockCache cache2(0, 1, 0, fetcher); + RamFileBlockCache cache3(0, 0, 0, fetcher); std::vector out; TF_EXPECT_OK(ReadCache(&cache1, want_filename, want_offset, want_n, &out)); EXPECT_EQ(calls, 1); @@ -66,7 +66,7 @@ TEST(FileBlockCacheTest, PassThrough) { EXPECT_EQ(calls, 3); } -TEST(FileBlockCacheTest, BlockAlignment) { +TEST(RamFileBlockCacheTest, BlockAlignment) { // Initialize a 256-byte buffer. This is the file underlying the reads we'll // do in this test. const size_t size = 256; @@ -89,7 +89,7 @@ TEST(FileBlockCacheTest, BlockAlignment) { for (size_t block_size = 2; block_size <= 4; block_size++) { // Make a cache of N-byte block size (1 block) and verify that reads of // varying offsets and lengths return correct data. - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); for (size_t offset = 0; offset < 10; offset++) { for (size_t n = block_size - 2; n <= block_size + 2; n++) { std::vector got; @@ -117,7 +117,7 @@ TEST(FileBlockCacheTest, BlockAlignment) { } } -TEST(FileBlockCacheTest, CacheHits) { +TEST(RamFileBlockCacheTest, CacheHits) { const size_t block_size = 16; std::set calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, @@ -132,7 +132,7 @@ TEST(FileBlockCacheTest, CacheHits) { return Status::OK(); }; const uint32 block_count = 256; - FileBlockCache cache(block_size, block_count * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_count * block_size, 0, fetcher); std::vector out; out.resize(block_count, 0); // The cache has space for `block_count` blocks. The loop with i = 0 should @@ -146,7 +146,7 @@ TEST(FileBlockCacheTest, CacheHits) { } } -TEST(FileBlockCacheTest, OutOfRange) { +TEST(RamFileBlockCacheTest, OutOfRange) { // Tests reads of a 24-byte file with block size 16. const size_t block_size = 16; const size_t file_size = 24; @@ -172,7 +172,7 @@ TEST(FileBlockCacheTest, OutOfRange) { *bytes_transferred = bytes_to_copy; return Status::OK(); }; - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); std::vector out; // Reading the first 16 bytes should be fine. TF_EXPECT_OK(ReadCache(&cache, "", 0, block_size, &out)); @@ -191,7 +191,7 @@ TEST(FileBlockCacheTest, OutOfRange) { EXPECT_EQ(out.size(), file_size - block_size); } -TEST(FileBlockCacheTest, Inconsistent) { +TEST(RamFileBlockCacheTest, Inconsistent) { // Tests the detection of interrupted reads leading to partially filled blocks // where we expected complete blocks. const size_t block_size = 16; @@ -205,7 +205,7 @@ TEST(FileBlockCacheTest, Inconsistent) { *bytes_transferred = 1; return Status::OK(); }; - FileBlockCache cache(block_size, 2 * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, 2 * block_size, 0, fetcher); std::vector out; // Read the second block; this should yield an OK status and a single byte. TF_EXPECT_OK(ReadCache(&cache, "", block_size, block_size, &out)); @@ -216,7 +216,7 @@ TEST(FileBlockCacheTest, Inconsistent) { EXPECT_EQ(status.code(), error::INTERNAL); } -TEST(FileBlockCacheTest, LRU) { +TEST(RamFileBlockCacheTest, LRU) { const size_t block_size = 16; std::list calls; auto fetcher = [&calls, block_size](const string& filename, size_t offset, @@ -233,7 +233,7 @@ TEST(FileBlockCacheTest, LRU) { return Status::OK(); }; const uint32 block_count = 2; - FileBlockCache cache(block_size, block_count * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_count * block_size, 0, fetcher); std::vector out; // Read blocks from the cache, and verify the LRU behavior based on the // fetcher calls that the cache makes. @@ -265,7 +265,7 @@ TEST(FileBlockCacheTest, LRU) { TF_EXPECT_OK(ReadCache(&cache, "", 0, 1, &out)); } -TEST(FileBlockCacheTest, MaxStaleness) { +TEST(RamFileBlockCacheTest, MaxStaleness) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -278,7 +278,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { std::unique_ptr env(new NowSecondsEnv); // Create a cache with max staleness of 2 seconds, and verify that it works as // expected. - FileBlockCache cache1(8, 16, 2 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache1(8, 16, 2 /* max staleness */, fetcher, env.get()); // Execute the first read to load the block. TF_EXPECT_OK(ReadCache(&cache1, "", 0, 1, &out)); EXPECT_EQ(calls, 1); @@ -294,7 +294,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { // as expected. calls = 0; env->SetNowSeconds(0); - FileBlockCache cache2(8, 16, 0 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache2(8, 16, 0 /* max staleness */, fetcher, env.get()); // Execute the first read to load the block. TF_EXPECT_OK(ReadCache(&cache2, "", 0, 1, &out)); EXPECT_EQ(calls, 1); @@ -305,7 +305,7 @@ TEST(FileBlockCacheTest, MaxStaleness) { EXPECT_EQ(calls, 1); } -TEST(FileBlockCacheTest, RemoveFile) { +TEST(RamFileBlockCacheTest, RemoveFile) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -321,7 +321,7 @@ TEST(FileBlockCacheTest, RemoveFile) { }; // This cache has space for 4 blocks; we'll read from two files. const size_t n = 3; - FileBlockCache cache(8, 32, 0, fetcher); + RamFileBlockCache cache(8, 32, 0, fetcher); std::vector out; std::vector a(n, 'a'); std::vector b(n, 'b'); @@ -367,7 +367,7 @@ TEST(FileBlockCacheTest, RemoveFile) { EXPECT_EQ(calls, 6); } -TEST(FileBlockCacheTest, Prune) { +TEST(RamFileBlockCacheTest, Prune) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -381,7 +381,7 @@ TEST(FileBlockCacheTest, Prune) { std::unique_ptr env(new NowSecondsEnv); uint64 now = Env::Default()->NowSeconds(); env->SetNowSeconds(now); - FileBlockCache cache(8, 32, 1 /* max staleness */, fetcher, env.get()); + RamFileBlockCache cache(8, 32, 1 /* max staleness */, fetcher, env.get()); // Read three blocks into the cache, and advance the timestamp by one second // with each read. Start with a block of "a" at the current timestamp `now`. TF_EXPECT_OK(ReadCache(&cache, "a", 0, 1, &out)); @@ -426,7 +426,7 @@ TEST(FileBlockCacheTest, Prune) { EXPECT_EQ(cache.CacheSize(), 0); } -TEST(FileBlockCacheTest, ParallelReads) { +TEST(RamFileBlockCacheTest, ParallelReads) { // This fetcher won't respond until either `callers` threads are calling it // concurrently (at which point it will respond with success to all callers), // or 10 seconds have elapsed (at which point it will respond with an error). @@ -444,7 +444,7 @@ TEST(FileBlockCacheTest, ParallelReads) { return Status::OK(); }; const int block_size = 8; - FileBlockCache cache(block_size, 2 * callers * block_size, 0, fetcher); + RamFileBlockCache cache(block_size, 2 * callers * block_size, 0, fetcher); std::vector> threads; for (int i = 0; i < callers; i++) { threads.emplace_back( @@ -461,7 +461,7 @@ TEST(FileBlockCacheTest, ParallelReads) { // executed, or 10 seconds have passed). } -TEST(FileBlockCacheTest, CoalesceConcurrentReads) { +TEST(RamFileBlockCacheTest, CoalesceConcurrentReads) { // Concurrent reads to the same file blocks should be de-duplicated. const size_t block_size = 16; int num_requests = 0; @@ -479,7 +479,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) { Env::Default()->SleepForMicroseconds(100000); // 0.1 secs return Status::OK(); }; - FileBlockCache cache(block_size, block_size, 0, fetcher); + RamFileBlockCache cache(block_size, block_size, 0, fetcher); // Fork off thread for parallel read. std::unique_ptr concurrent( Env::Default()->StartThread({}, "concurrent", [&cache, block_size] { @@ -496,7 +496,7 @@ TEST(FileBlockCacheTest, CoalesceConcurrentReads) { EXPECT_EQ(1, num_requests); } -TEST(FileBlockCacheTest, Flush) { +TEST(RamFileBlockCacheTest, Flush) { int calls = 0; auto fetcher = [&calls](const string& filename, size_t offset, size_t n, char* buffer, size_t* bytes_transferred) { @@ -505,7 +505,7 @@ TEST(FileBlockCacheTest, Flush) { *bytes_transferred = n; return Status::OK(); }; - FileBlockCache cache(16, 32, 0, fetcher); + RamFileBlockCache cache(16, 32, 0, fetcher); std::vector out; TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out)); TF_EXPECT_OK(ReadCache(&cache, "", 0, 16, &out)); -- GitLab From 45f56944c862a8c67c34efedcee501f365a08aee Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Fri, 2 Mar 2018 12:25:13 -0800 Subject: [PATCH 356/884] FreezeSavedModel support for ResourceVariables. PiperOrigin-RevId: 187653676 --- tensorflow/cc/tools/BUILD | 1 + tensorflow/cc/tools/freeze_saved_model.cc | 55 +++- .../cc/tools/freeze_saved_model_test.cc | 268 +++++++++++------- 3 files changed, 211 insertions(+), 113 deletions(-) diff --git a/tensorflow/cc/tools/BUILD b/tensorflow/cc/tools/BUILD index 97f66e79b8..f413a5cc52 100644 --- a/tensorflow/cc/tools/BUILD +++ b/tensorflow/cc/tools/BUILD @@ -32,6 +32,7 @@ tf_cc_test( deps = [ ":freeze_saved_model", "//tensorflow/cc:cc_ops", + "//tensorflow/cc:resource_variable_ops", "//tensorflow/core:core_cpu", "//tensorflow/core:framework_internal", "//tensorflow/core:protos_all_cc", diff --git a/tensorflow/cc/tools/freeze_saved_model.cc b/tensorflow/cc/tools/freeze_saved_model.cc index ddf372cdef..4ddddcb586 100644 --- a/tensorflow/cc/tools/freeze_saved_model.cc +++ b/tensorflow/cc/tools/freeze_saved_model.cc @@ -75,16 +75,13 @@ void GetNodeNameToNodeDefMap( // variable nodes to convert. void GetReachableNodesAndVariables( GraphDef* graph_def, const std::unordered_set& outputs, + const std::unordered_map& name_to_node_map, std::unordered_set* reachable_node_names, std::unordered_set* variable_node_names) { // TODO(suharshs): Add support for ResourceVariables. static const std::unordered_set* kVariableTypes = - new std::unordered_set({"Variable", "VariableV2"}); - // name_to_node_map is needed to get the inputs from the NodeDef corresponding - // the a string node name. These inputs are used when doing our backwards - // traversal. - std::unordered_map name_to_node_map; - GetNodeNameToNodeDefMap(graph_def, &name_to_node_map); + new std::unordered_set({"Variable", "VariableV2", "VarHandleOp"}); + std::queue nodes_to_visit; for (const string& tensor_name : outputs) { // We need to strip off the tensor part to get the node name. @@ -99,7 +96,7 @@ void GetReachableNodesAndVariables( continue; } reachable_node_names->insert(node_name); - NodeDef* node = name_to_node_map[node_name]; + NodeDef* node = name_to_node_map.at(node_name); if (kVariableTypes->find(node->op()) != kVariableTypes->end()) { variable_node_names->insert(node->name()); } @@ -111,7 +108,9 @@ void GetReachableNodesAndVariables( // Gets a map from variable name to variable value. Status GetVariableNameToTensorMap( - Session* session, std::unordered_set variable_names_set, + Session* session, + const std::unordered_map& name_to_node_map, + std::unordered_set variable_names_set, std::unordered_map* variable_name_to_value_map) { if (variable_names_set.empty()) { return Status::OK(); @@ -120,8 +119,14 @@ Status GetVariableNameToTensorMap( std::vector tensor_names; for (const string& node_name : variable_names_set) { variable_names.push_back(node_name); - // We need to run tensors, so append ":0". - tensor_names.push_back(node_name + ":0"); + NodeDef* node_def = name_to_node_map.at(node_name); + if (node_def->op() == "VarHandleOp") { + // If this is a resource variable, we have to run the corresponding + // ReadVariableOp. + tensor_names.push_back(node_name + "/Read/ReadVariableOp:0"); + } else { + tensor_names.push_back(node_name + ":0"); + } } std::vector outputs; TF_RETURN_IF_ERROR( @@ -143,6 +148,15 @@ void ConvertVariableToConstant(const NodeDef& variable_node, (*const_node->mutable_attr())["value"].mutable_tensor()); } +// Converts a ReadVariableOp NodeDef to an Identity NodeDef. +void ConvertReadVariableOpToIdentity(const NodeDef& node, + NodeDef* identity_node) { + identity_node->set_name(node.name()); + identity_node->set_op("Identity"); + (*identity_node->mutable_attr())["T"] = node.attr().at("dtype"); + identity_node->add_input(node.input(0)); +} + // Freezes the subgraph of all nodes needed by `outputs`. Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, const std::unordered_set& outputs, @@ -155,14 +169,19 @@ Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, if (graph_def.node_size() == 0) { return Status::OK(); } + // name_to_node_map is needed to get the inputs from the NodeDef corresponding + // the a string node name. These inputs are used when doing our backwards + // traversal. + std::unordered_map name_to_node_map; + GetNodeNameToNodeDefMap(&graph_def, &name_to_node_map); std::unordered_set reachable_node_names; std::unordered_set variable_node_names; - GetReachableNodesAndVariables(&graph_def, outputs, &reachable_node_names, - &variable_node_names); + GetReachableNodesAndVariables(&graph_def, outputs, name_to_node_map, + &reachable_node_names, &variable_node_names); std::unordered_map variable_to_value_map; - TF_RETURN_IF_ERROR( - GetVariableNameToTensorMap(saved_model_bundle.session.get(), - variable_node_names, &variable_to_value_map)); + TF_RETURN_IF_ERROR(GetVariableNameToTensorMap( + saved_model_bundle.session.get(), name_to_node_map, variable_node_names, + &variable_to_value_map)); // We copy the nodes in the same order they were in the original graph_def. for (const NodeDef& node : graph_def.node()) { if (reachable_node_names.find(node.name()) == reachable_node_names.end()) { @@ -171,6 +190,12 @@ Status FreezeGraphDef(const SavedModelBundle& saved_model_bundle, if (variable_node_names.find(node.name()) != variable_node_names.end()) { ConvertVariableToConstant(node, variable_to_value_map[node.name()], frozen_graph_def->add_node()); + } else if (node.op() == "ReadVariableOp" && + variable_node_names.find(node.input(0)) != + variable_node_names.end()) { + // If the node is a ReadVariableOp, its input VarHandleOp will be + // converted to a Constant, so we will need to convert it to an Identity. + ConvertReadVariableOpToIdentity(node, frozen_graph_def->add_node()); } else { // If the node isn't a variable, just copy the node as-is. *frozen_graph_def->add_node() = node; diff --git a/tensorflow/cc/tools/freeze_saved_model_test.cc b/tensorflow/cc/tools/freeze_saved_model_test.cc index 52a81a5028..cd35fd3b95 100644 --- a/tensorflow/cc/tools/freeze_saved_model_test.cc +++ b/tensorflow/cc/tools/freeze_saved_model_test.cc @@ -15,6 +15,7 @@ limitations under the License. #include "tensorflow/cc/tools/freeze_saved_model.h" +#include "tensorflow/cc/ops/resource_variable_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/graph.pb.h" @@ -113,6 +114,160 @@ class FreezeTest : public ::testing::Test { test::ExpectTensorEqual(unfrozen_outputs[0], frozen_outputs[0]); } + + void TestFreezeGraphWithoutDependentVariables(bool use_resource) { + // Test freezing a graph with variables that are not needed by the outputs + // in the SignatureDef. The resulting graph shouldn't be frozen, but + // non-dependent nodes should be pruned. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output b = ops::Const(scope.WithOpName("b"), 10.0f, {}); + Output c = ops::Mul(scope.WithOpName("c"), a, b); + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + Output read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + } else { + Output var = + ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), var, a); + } + + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + GraphDef expected_graph_def; + Scope expected_scope = Scope::NewRootScope(); + Output expected_a = ops::Const(expected_scope.WithOpName("a"), 10.0f, {}); + Output expected_b = ops::Const(expected_scope.WithOpName("b"), 10.0f, {}); + Output expected_c = + ops::Mul(expected_scope.WithOpName("c"), expected_a, expected_b); + TF_ASSERT_OK(expected_scope.ToGraphDef(&expected_graph_def)); + + GraphDefEqual(frozen_graph_def, expected_graph_def); + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } + + void TestFreezeGraphWithDependentVariables(bool use_resource) { + // Test freezing a graph with variables that are needed by outputs in the + // SignatureDef. The variables should be frozen. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output read_var; + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + } else { + Output read_var = + ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), read_var, a); + } + Output c = ops::Mul(scope.WithOpName("c"), a, read_var); + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + // If using normal variables there should be 3 nodes in the resulting + // graph_def. If using resource variables there should be 4 nodes in the + // resulting graph_def. + // In both cases, none should be variables. + size_t expected_nodes = use_resource ? 4 : 3; + EXPECT_EQ(frozen_graph_def.node_size(), expected_nodes); + for (const NodeDef& node : frozen_graph_def.node()) { + EXPECT_NE(node.op(), "Variable") << node.name(); + EXPECT_NE(node.op(), "VariableV2") << node.name(); + EXPECT_NE(node.op(), "VarHandleOp") << node.name(); + EXPECT_NE(node.op(), "ReadVariableOp") << node.name(); + } + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } + + void TestFreezeGraphWithAndWithoutDependentVariables(bool use_resource) { + // Test freezing a graph with some variables that are needed and not needed + // by + // the outputs in the SignatureDef. The resulting graph should only freeze + // dependent variables. + SavedModelBundle saved_model_bundle; + GraphDef graph_def; + Scope scope = Scope::NewRootScope(); + Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); + Output read_var; + + if (use_resource) { + Output var = + ops::VarHandleOp(scope.WithOpName("var"), DataType::DT_FLOAT, {}); + read_var = ops::ReadVariableOp( + scope.WithOpName("var/Read/ReadVariableOp"), var, DataType::DT_FLOAT); + auto assign = ops::AssignVariableOp(scope.WithOpName("assign"), var, a); + Output var_1 = + ops::VarHandleOp(scope.WithOpName("var_1"), DataType::DT_FLOAT, {}); + Output read_var_1 = + ops::ReadVariableOp(scope.WithOpName("var_1/Read/ReadVariableOp"), + var, DataType::DT_FLOAT); + auto assign_1 = + ops::AssignVariableOp(scope.WithOpName("assign_1"), var_1, a); + } else { + read_var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); + Output assign = ops::Assign(scope.WithOpName("assign"), read_var, a); + Output var_1 = + ops::Variable(scope.WithOpName("var_1"), {}, DataType::DT_FLOAT); + Output assign_1 = ops::Assign(scope.WithOpName("assign_1"), var_1, a); + } + + Output c = ops::Mul(scope.WithOpName("c"), a, read_var); + TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); + // "c" isnt dependent on the variable, so nothing should be frozen. + TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( + graph_def, {"c:0"}, "assign", &saved_model_bundle)); + + GraphDef frozen_graph_def; + std::unordered_set inputs; + std::unordered_set outputs; + TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, + &inputs, &outputs)); + + // There should be 3 nodes in the resulting graph_def, and none should be + // variables. + size_t expected_nodes = use_resource ? 4 : 3; + EXPECT_EQ(frozen_graph_def.node_size(), expected_nodes); + for (const NodeDef& node : frozen_graph_def.node()) { + EXPECT_NE(node.op(), "Variable") << node.name(); + EXPECT_NE(node.op(), "VariableV2") << node.name(); + EXPECT_NE(node.op(), "VarHandleOp") << node.name(); + EXPECT_NE(node.op(), "ReadVariableOp") << node.name(); + } + + RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), + frozen_graph_def, "c:0"); + } }; TEST_F(FreezeTest, InputsAndOutputsSingleSignatureDef) { @@ -196,111 +351,28 @@ TEST_F(FreezeTest, GraphDefWithNoVariables) { GraphDefEqual(frozen_graph_def, graph_def); } -TEST_F(FreezeTest, GraphDefWithVariablesNotNeededByOutputs) { - // Test freezing a graph with variables that are not needed by the outputs in - // the SignatureDef. The resulting graph shouldn't be frozen, but - // non-dependent nodes should be pruned. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output b = ops::Const(scope.WithOpName("b"), 10.0f, {}); - Output c = ops::Mul(scope.WithOpName("c"), a, b); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); - - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); - - GraphDef expected_graph_def; - Scope expected_scope = Scope::NewRootScope(); - Output expected_a = ops::Const(expected_scope.WithOpName("a"), 10.0f, {}); - Output expected_b = ops::Const(expected_scope.WithOpName("b"), 10.0f, {}); - Output expected_c = - ops::Mul(expected_scope.WithOpName("c"), expected_a, expected_b); - TF_ASSERT_OK(expected_scope.ToGraphDef(&expected_graph_def)); - - GraphDefEqual(frozen_graph_def, expected_graph_def); - - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithoutDependentVariables) { + TestFreezeGraphWithoutDependentVariables(false); } -TEST_F(FreezeTest, GraphDefWithVariablesNeededByOutputs) { - // Test freezing a graph with variables that are needed by outputs in the - // SignatureDef. The variables should be frozen. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output c = ops::Mul(scope.WithOpName("c"), a, var); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); - - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); - - // There should be 3 nodes in the resulting graph_def, and none should be - // variables. - EXPECT_EQ(frozen_graph_def.node_size(), 3); - for (const NodeDef& node : frozen_graph_def.node()) { - EXPECT_NE(node.op(), "Variable") << node.name(); - EXPECT_NE(node.op(), "VariableV2") << node.name(); - } - - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithoutDependentResourceVariables) { + TestFreezeGraphWithoutDependentVariables(true); } -TEST_F(FreezeTest, GraphDefWithVariablesNeededAndNotNeededByOutputs) { - // Test freezing a graph with some variables that are needed and not needed by - // the outputs in the SignatureDef. The resulting graph should only freeze - // dependent variables. - SavedModelBundle saved_model_bundle; - GraphDef graph_def; - Scope scope = Scope::NewRootScope(); - Output a = ops::Const(scope.WithOpName("a"), 10.0f, {}); - Output var = ops::Variable(scope.WithOpName("var"), {}, DataType::DT_FLOAT); - Output c = ops::Mul(scope.WithOpName("c"), a, var); - Output assign = ops::Assign(scope.WithOpName("assign"), var, a); - Output var_1 = - ops::Variable(scope.WithOpName("var_1"), {}, DataType::DT_FLOAT); - Output assign_1 = ops::Assign(scope.WithOpName("assign_1"), var, a); - TF_ASSERT_OK(scope.ToGraphDef(&graph_def)); - // "c" isnt dependent on the variable, so nothing should be frozen. - TF_ASSERT_OK(AddGraphDefWithOutputsToSavedModelBundle( - graph_def, {"c:0"}, assign.name(), &saved_model_bundle)); +TEST_F(FreezeTest, GraphDefWithDependentVariables) { + TestFreezeGraphWithDependentVariables(false); +} - GraphDef frozen_graph_def; - std::unordered_set inputs; - std::unordered_set outputs; - TF_ASSERT_OK(FreezeSavedModel(saved_model_bundle, &frozen_graph_def, &inputs, - &outputs)); +TEST_F(FreezeTest, GraphDefWithDependentResourceVariables) { + TestFreezeGraphWithDependentVariables(true); +} - // There should be 3 nodes in the resulting graph_def, and none should be - // variables. - EXPECT_EQ(frozen_graph_def.node_size(), 3); - for (const NodeDef& node : frozen_graph_def.node()) { - EXPECT_NE(node.op(), "Variable") << node.name(); - EXPECT_NE(node.op(), "VariableV2") << node.name(); - } +TEST_F(FreezeTest, GraphDefWithAndWithoutDependentVariables) { + TestFreezeGraphWithAndWithoutDependentVariables(false); +} - RunAndCompareFrozenAndUnfrozenGraphs(saved_model_bundle.session.get(), - frozen_graph_def, "c:0"); +TEST_F(FreezeTest, GraphDefWithAndWithoutDependentResourceVariables) { + TestFreezeGraphWithAndWithoutDependentVariables(true); } } // namespace -- GitLab From faab0cf5407dcf11967371b51b97f8eef6964a35 Mon Sep 17 00:00:00 2001 From: Guangda Lai Date: Fri, 2 Mar 2018 12:33:40 -0800 Subject: [PATCH 357/884] Exclude flaky tests for cuda_on_cpu. PiperOrigin-RevId: 187654568 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + tensorflow/contrib/eager/python/examples/spinn/BUILD | 5 ++++- tensorflow/python/BUILD | 6 +++++- tensorflow/python/feature_column/BUILD | 5 ++++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 82cd276ce8..10cb05ece1 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -168,6 +168,7 @@ py_test( srcs = ["interleave_dataset_op_test.py"], srcs_version = "PY2AND3", tags = [ + "no_cuda_on_cpu_tap", "no_oss", "no_pip", ], diff --git a/tensorflow/contrib/eager/python/examples/spinn/BUILD b/tensorflow/contrib/eager/python/examples/spinn/BUILD index a1f8a759e2..98d01ad1d5 100644 --- a/tensorflow/contrib/eager/python/examples/spinn/BUILD +++ b/tensorflow/contrib/eager/python/examples/spinn/BUILD @@ -38,5 +38,8 @@ cuda_py_test( "//tensorflow/python:client_testlib", "//tensorflow/python:framework_test_lib", ], - tags = ["no_pip"], # because spinn.py is under third_party/. + tags = [ + "no_cuda_on_cpu_tap", + "no_pip", # because spinn.py is under third_party/. + ], ) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index cb54cebf0f..f282abb0a5 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3945,7 +3945,10 @@ py_test( size = "small", srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], + tags = [ + "no_cuda_on_cpu_tap", + "no_windows", + ], deps = [ ":client", ":client_testlib", @@ -4739,6 +4742,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "grappler", + "no_cuda_on_cpu_tap", "no_pip", ], deps = [ diff --git a/tensorflow/python/feature_column/BUILD b/tensorflow/python/feature_column/BUILD index a758f8a4fc..238a90b67d 100644 --- a/tensorflow/python/feature_column/BUILD +++ b/tensorflow/python/feature_column/BUILD @@ -74,7 +74,10 @@ py_test( srcs = ["feature_column_test.py"], data = [":vocabulary_testdata"], srcs_version = "PY2AND3", - tags = ["no_pip"], + tags = [ + "no_cuda_on_cpu_tap", + "no_pip", + ], deps = [ ":feature_column", ":feature_column_py", -- GitLab From 85daa2e4553e49ca6ab2fbb412b18c23b5399524 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 12:43:22 -0800 Subject: [PATCH 358/884] TFTS: Switch more variables to ResourceVariables to avoid race conditions The LSTM example test was a bit flaky. PiperOrigin-RevId: 187655714 --- .../contrib/timeseries/python/timeseries/head.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index f0330bfbbd..8731b10923 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -73,7 +73,10 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _train_ops(self, features): """Add training ops to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope( + "model", + # Use ResourceVariables to avoid race conditions. + use_resource=True): model_outputs = self.state_manager.define_loss( self.model, features, estimator_lib.ModeKeys.TRAIN) @@ -107,7 +110,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _evaluate_ops(self, features): """Add ops for evaluation (aka filtering) to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): model_outputs = self.state_manager.define_loss( self.model, features, estimator_lib.ModeKeys.EVAL) metrics = {} @@ -128,7 +131,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _predict_ops(self, features): """Add ops for prediction to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): prediction = self.model.predict(features=features) prediction[feature_keys.PredictionResults.TIMES] = features[ feature_keys.PredictionFeatures.TIMES] @@ -137,7 +140,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def _serving_ops(self, features): """Add ops for serving to the graph.""" - with variable_scope.variable_scope("model"): + with variable_scope.variable_scope("model", use_resource=True): prediction_outputs = self.model.predict(features=features) with variable_scope.variable_scope("model", reuse=True): filtering_outputs = self.state_manager.define_loss( -- GitLab From a5f103a8bf6fb3a0822976cec363943e37b96dfc Mon Sep 17 00:00:00 2001 From: Jie Date: Fri, 2 Mar 2018 12:56:58 -0800 Subject: [PATCH 359/884] [removing converter type check] removing type check, since fp16 conversion will break the type consistency between TF & TRT More type check should be removed for now (and add back once TRT fp16 is fixed) --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index a36851a336..fe36c14527 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -878,10 +878,8 @@ tensorflow::Status BinaryTensorOpWeight( // Check type consistency auto dtype = TFAttrs(node_def).get("T"); - CHECK_EQ_TYPE(tensor->getType(), dtype); // Cast to int for error messages nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); - CHECK_EQ_TYPE(ttype, dtype); // Cast to int for error message // Check scale mode auto dims_w = weights.shape_; -- GitLab From e6ee32508264c6562d8a2ed19ca3187e8ac2e2e0 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 2 Mar 2018 12:59:28 -0800 Subject: [PATCH 360/884] Fixes for PR --- .../contrib/tensorrt/convert/convert_nodes.cc | 1 - .../contrib/tensorrt/python/trt_convert.py | 32 +++++++++---------- .../contrib/tensorrt/test/test_tftrt.py | 6 ++-- 3 files changed, 19 insertions(+), 20 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index a36851a336..a7287e4af4 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2067,7 +2067,6 @@ void Converter::register_op_converters() { // This could be really handled as ConvertBinary op_registry_["BiasAdd"] = ConvertScale; op_registry_["Const"] = ConvertConst; - // op_registry_["MatMul"] = ConvertFullyConnected; // not used in vgg // TODO(ben,jie): this is a temp hack. op_registry_["Identity"] = ConvertIdentity; // Identity should be removed diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 071f09d37b..d1f9f8acb9 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -23,7 +23,7 @@ import six as _six from tensorflow.core.framework import graph_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert,calib_convert +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert, calib_convert from tensorflow.python.util import compat import tensorflow as tf from tensorflow.python.grappler import tf_optimizer @@ -32,9 +32,6 @@ from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops -from tensorflow.python.framework import ops - - # TODO(skama): get outputs from session when implemented as c++ # optimization pass def create_inference_graph(input_graph_def, @@ -58,13 +55,14 @@ def create_inference_graph(input_graph_def, Raises: RuntimeError: if the returned status message is malformed. """ - supported_precision_modes={"FP32":0, - "FP16":1, - "INT8":2} + supported_precision_modes = {"FP32": 0, + "FP16": 1, + "INT8": 2} if precision_mode.upper() not in supported_precision_modes: raise ValueError(("precision mode '{}' is not supported." - "It should be one of {}").format(precision_mode,"{'FP32','FP16','INT8'}")) - mode=supported_precision_modes[precision_mode.upper()] + "It should be one of {}").format(precision_mode, + "{'FP32', 'FP16', 'INT8'}")) + mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -99,7 +97,7 @@ def create_inference_graph(input_graph_def, # pair or strings where first one is encoded status and the second # one is the transformed graphs protobuf string. out = trt_convert(input_graph_def_str, out_names, max_batch_size, - max_workspace_size_bytes,mode,minimum_segment_size) + max_workspace_size_bytes, mode, minimum_segment_size) status = to_string(out[0]) output_graph_def_string = out[1] del input_graph_def_str # Save some memory @@ -119,6 +117,8 @@ def create_inference_graph(input_graph_def, return output_graph_def def calib_graph_to_infer_graph(calibration_graph_def): + """Convert an existing calibration graph containing calibration data + to inference graph""" def py2bytes(inp): return inp @@ -132,21 +132,19 @@ def calib_graph_to_infer_graph(calibration_graph_def): return inp.decode("utf-8") if _six.PY2: - to_bytes = py2bytes to_string = py2string else: - to_bytes = py3bytes to_string = py3string - graph_str=calibration_graph_def.SerializeToString() - out=calib_convert(graph_str) - status=to_string(out[0]) + graph_str = calibration_graph_def.SerializeToString() + out = calib_convert(graph_str) + status = to_string(out[0]) output_graph_def_string = out[1] del graph_str #save some memory if len(status) < 2: - raise _impl.UnknownError(None,None,status) + raise _impl.UnknownError(None, None, status) if status[:2] != "OK": - msg=status.split(";") + msg = status.split(";") if len(msg) == 1: raise RuntimeError("Status message is malformed {}".format(status)) raise _impl._make_specific_exception(None,None,";".join(msg[1:]), int(msg[0])) diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index cfa18ab187..385a9f72af 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -89,7 +89,9 @@ def run_calibration(gdef, dumm_inp): out = out.outputs[0] with csess.Session( config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: - for _ in range(30): + # run over real calibration data here, we are mimicking a + # calibration set of 30 different batches. Use as much calibration data as you want + for _ in range(30): val = sess.run(out, {inp: dumm_inp}) return val @@ -122,7 +124,7 @@ if "__main__" in __name__: outputs=["output"], max_batch_size=inp_dims[0], max_workspace_size_bytes=1 << 25, - precision_mode="INt8", # TRT Engine precision "FP32","FP16" or "INT8" + precision_mode="INT8", # TRT Engine precision "FP32","FP16" or "INT8" minimum_segment_size=2 # minimum number of nodes in an engine ) o4 = run_graph(fp16_graph, dummy_input) -- GitLab From e0fac18b63e80963d42cb1e39243d84ae86ae01a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 12:58:08 -0800 Subject: [PATCH 361/884] Automated g4 rollback of changelist 187582263 PiperOrigin-RevId: 187657654 --- .../grappler/optimizers/memory_optimizer.cc | 23 +++++++------ .../grappler/optimizers/memory_optimizer.h | 10 +++--- .../grappler/optimizers/meta_optimizer.cc | 4 +-- .../core/protobuf/rewriter_config.proto | 19 ++++++----- .../python/grappler/memory_optimizer_test.py | 32 ++++++++++++++++++- 5 files changed, 58 insertions(+), 30 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.cc b/tensorflow/core/grappler/optimizers/memory_optimizer.cc index 694139fa50..27e9d2c78d 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.cc @@ -413,7 +413,7 @@ void RecomputeSubgraph( } void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix, + const string& recomputation_targets_name_scope, GraphDef* graph, const GrapplerItem& item) { if (optimization_level != RewriterConfig::RECOMPUTATION_HEURISTICS && optimization_level != RewriterConfig::HEURISTICS && @@ -438,15 +438,14 @@ void RecomputationRewritingPass(RewriterConfig::MemOptType optimization_level, feeds.insert(NodeName(feed.first)); } std::function is_target = - [&recomputation_targets_name_prefix](const NodeDef& node) { - // Nodes whose inputs we may want to recompute. Typically targets will - // be gradients (recomputation_targets_name_prefix="gradients/"), - // although the prefix is configurable since gradients may be created - // in a name scope. - // TODO(allenl): Use a static schedule - // (grappler::EstimateEarliestExecutionTimes) to recompute only nodes - // whose outputs will sit around for a while. - return node.name().find(recomputation_targets_name_prefix) == 0; + [&recomputation_targets_name_scope](const NodeDef& node) { + // Nodes whose inputs we may want to recompute. This matches node names + // that contain recomputation_targets_name_scope as a name scope, + // meaning it either begins with or contains the name scope. + // Defaults to "gradients/" which will match any node names that begins + // with "gradients/" or contains "/gradients/". + return node.name().find(recomputation_targets_name_scope) == 0 || + node.name().find("/" + recomputation_targets_name_scope) != -1; }; if (optimization_level == RewriterConfig::RECOMPUTATION_HEURISTICS || @@ -1225,8 +1224,8 @@ Status MemoryOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, *optimized_graph = item.graph; RecomputationRewritingPass(optimization_level_, - recomputation_targets_name_prefix_, - optimized_graph, item); + recomputation_targets_name_scope_, optimized_graph, + item); GrapplerItem optimized_item(item, std::move(*optimized_graph)); std::unordered_set skip_list; diff --git a/tensorflow/core/grappler/optimizers/memory_optimizer.h b/tensorflow/core/grappler/optimizers/memory_optimizer.h index c3dd0c45c6..5c555a2674 100644 --- a/tensorflow/core/grappler/optimizers/memory_optimizer.h +++ b/tensorflow/core/grappler/optimizers/memory_optimizer.h @@ -27,14 +27,14 @@ class MemoryOptimizer : public GraphOptimizer { public: // optimization_level: Controls the level of autonomy for the memory // optimizer. See RewriterConfig::memory_optimization. - // recomputation_targets_name_prefix: Name prefix for potential outputs of + // recomputation_targets_name_scope: Name scope for potential outputs of // recomputations. See - // RewriterConfig::memory_optimizer_target_node_name_prefix. + // RewriterConfig::memory_optimizer_target_node_name_scope. explicit MemoryOptimizer( RewriterConfig::MemOptType optimization_level, - const string& recomputation_targets_name_prefix = "gradients/") + const string& recomputation_targets_name_scope = "gradients/") : optimization_level_(optimization_level), - recomputation_targets_name_prefix_(recomputation_targets_name_prefix) {} + recomputation_targets_name_scope_(recomputation_targets_name_scope) {} ~MemoryOptimizer() override {} string name() const override { return "memory_optimizer"; }; @@ -47,7 +47,7 @@ class MemoryOptimizer : public GraphOptimizer { private: RewriterConfig::MemOptType optimization_level_; - string recomputation_targets_name_prefix_; + string recomputation_targets_name_scope_; }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 72d7b94dc8..fff1e354f4 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -119,7 +119,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, std::unique_ptr(new LayoutOptimizer())); } if (cfg_.memory_optimization() != RewriterConfig::NO_MEM_OPT) { - if (cfg_.memory_optimizer_target_node_name_prefix().empty()) { + if (cfg_.memory_optimizer_target_node_name_scope().empty()) { optimizers.push_back(std::unique_ptr( // Use the default target node name prefix "gradients/" new MemoryOptimizer(cfg_.memory_optimization()))); @@ -127,7 +127,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back( std::unique_ptr(new MemoryOptimizer( cfg_.memory_optimization(), - cfg_.memory_optimizer_target_node_name_prefix()))); + cfg_.memory_optimizer_target_node_name_scope()))); } } if (cfg_.auto_parallel().enable()) { diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 9ebf217811..0ccf2149f2 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -78,16 +78,15 @@ message RewriterConfig { // effect on manually requested memory optimization passes in the optimizers // field. MemOptType memory_optimization = 4; - // The prefix for nodes which are valid outputs of recomputations. Inputs to - // nodes with this name prefix may be recomputed (subject either to manual - // annotation of those input nodes or to manual annotation and heuristics - // depending on memory_optimization), but the prefixed nodes themselves will - // not be recomputed. Typically this will be "gradients/", indicating that - // activations from the forward pass of a graph may be recomputed as inputs to - // gradients, but may be adjusted if gradients are inside a name scope or if - // inputs to non-gradients should be recomputed. Defaults to "gradients/" if - // empty or not set. - string memory_optimizer_target_node_name_prefix = 6; + // A node name scope for node names which are valid outputs of recompuations. + // Inputs to nodes that match this scope may be recomputed (subject either to + // manual annotation of those input nodes or to manual annotation and + // heuristics depending on memory_optimization), but the nodes themselves will + // not be recomputed. This matches any sub-scopes as well, meaning the scope + // can appear not just as a top-level scope. For example, if the value is + // "gradients/", the default, it will match node name "gradients/foo", + // "foo/gradients/bar", but not "foo_gradients/" + string memory_optimizer_target_node_name_scope = 6; // Configures AutoParallel optimization passes either through the // meta-optimizer or when manually specified through the optimizers field. diff --git a/tensorflow/python/grappler/memory_optimizer_test.py b/tensorflow/python/grappler/memory_optimizer_test.py index 948911f099..4df959ce04 100644 --- a/tensorflow/python/grappler/memory_optimizer_test.py +++ b/tensorflow/python/grappler/memory_optimizer_test.py @@ -162,7 +162,8 @@ class MemoryOptimizerRecomputeTest(test.TestCase): arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, memory_optimization=rewriter_config_pb2.RewriterConfig. RECOMPUTATION_HEURISTICS, - memory_optimizer_target_node_name_prefix='optimizer/gradients/'), + # Checks that name scope "gradients/" also match sub-scope. + memory_optimizer_target_node_name_scope='gradients/'), original_metagraph) self.assertGreater( len(rewritten_graph_def.node), @@ -176,6 +177,35 @@ class MemoryOptimizerRecomputeTest(test.TestCase): len([node for node in rewritten_graph_def.node if 'Recomputed/' in node.name])) + def testRewritingNameScopedGradientNamesScope(self): + """Tests that rewriting occurs with non-standard gradient names.""" + (original_metagraph, _, _, + _) = self._GetMetaGraph(optimizer_scope_name='foo/bar') + rewritten_graph_def = tf_optimizer.OptimizeGraph( + rewriter_config_pb2.RewriterConfig( + disable_model_pruning=True, + constant_folding=rewriter_config_pb2.RewriterConfig.OFF, + dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF, + layout_optimizer=rewriter_config_pb2.RewriterConfig.OFF, + arithmetic_optimization=rewriter_config_pb2.RewriterConfig.OFF, + memory_optimization=rewriter_config_pb2.RewriterConfig. + RECOMPUTATION_HEURISTICS, + # This should not match anything. + memory_optimizer_target_node_name_scope='r/gradients/'), + original_metagraph) + self.assertEqual( + len(rewritten_graph_def.node), len(original_metagraph.graph_def.node)) + self.assertEqual(0, + len([ + node for node in original_metagraph.graph_def.node + if 'Recomputed/' in node.name + ])) + self.assertEqual(0, + len([ + node for node in rewritten_graph_def.node + if 'Recomputed/' in node.name + ])) + def _GetMemoryOptimizerSessionConfig(self): rewrite_options = rewriter_config_pb2.RewriterConfig( disable_model_pruning=True, -- GitLab From 6da9a6a739ac9a49dcf85617ed7bccfe4bccff4c Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 13:03:42 -0800 Subject: [PATCH 362/884] Make tfe.Metrics Checkpointable Same principle as Layers: use add_variable to add a dependency on any variables created. I've ignored the global/local distinction, since it makes more sense for users to control saving by either adding a dependency on the Metric or not. PiperOrigin-RevId: 187658433 --- tensorflow/contrib/eager/python/BUILD | 1 + .../contrib/eager/python/metrics_impl.py | 23 ++++++++++------ .../contrib/eager/python/metrics_test.py | 27 +++++++++++++++++++ 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index e8c514c114..6fb8287030 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -117,6 +117,7 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:internal"], deps = [ + "//tensorflow/contrib/eager/python:checkpointable_utils", "//tensorflow/contrib/summary:summary_ops", "//tensorflow/python:array_ops", "//tensorflow/python:control_flow_ops", diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index 5571e77c70..a34c4f758a 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -30,12 +30,12 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import variable_scope - +from tensorflow.python.training import checkpointable _to_replace = re.compile("[^A-Za-z0-9.]") -class Metric(object): +class Metric(checkpointable.CheckpointableBase): """A metric holds state for aggregating statistics over an evaluation run. Example use with eager execution: @@ -254,14 +254,21 @@ class Metric(object): else: collections = [ops.GraphKeys.LOCAL_VARIABLES] collections += [ops.GraphKeys.METRIC_VARIABLES] - v = variable_scope.get_variable( - name, - shape, - dtype, - initializer, + # Variables are Checkpointable dependencies of Metrics regardless of the + # global/local distinction. Users can avoid saving variables by not adding a + # dependency on the Metric. + v = self._add_variable_with_custom_getter( + name=name, + shape=shape, + dtype=dtype, + initializer=initializer, trainable=False, collections=collections, - use_resource=True) + use_resource=True, + getter=variable_scope.get_variable, + # Raise duplicate variable exceptions from get_variable rather than + # Checkpointable. + overwrite=True) self._vars.append(v) if context.in_eager_mode(): self._initial_values[v] = v.value() diff --git a/tensorflow/contrib/eager/python/metrics_test.py b/tensorflow/contrib/eager/python/metrics_test.py index c9106294dc..6b5450ba89 100644 --- a/tensorflow/contrib/eager/python/metrics_test.py +++ b/tensorflow/contrib/eager/python/metrics_test.py @@ -18,8 +18,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import tempfile +from tensorflow.contrib.eager.python import checkpointable_utils from tensorflow.contrib.eager.python import metrics from tensorflow.contrib.summary import summary_ops from tensorflow.contrib.summary import summary_test_util @@ -206,6 +208,31 @@ class MetricsTest(test.TestCase): self.assertAllEqual(m2.result().eval(), 2.0) self.assertAllEqual(m1.result().eval(), 1.0) + @test_util.run_in_graph_and_eager_modes() + def testSaveRestore(self): + checkpoint_directory = self.get_temp_dir() + checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") + mean = metrics.Mean() + checkpoint = checkpointable_utils.Checkpoint(mean=mean) + mean.build() + mean._built = True + self.evaluate(mean.init_variables()) + self.evaluate(mean(100.)) + self.evaluate(mean(200.)) + save_path = checkpoint.save(checkpoint_prefix) + self.evaluate(mean(1000.)) + checkpoint.restore(save_path).assert_consumed().run_restore_ops() + self.evaluate(mean(300.)) + self.assertAllEqual(200., self.evaluate(mean.value())) + + restore_mean = metrics.Mean() + restore_checkpoint = checkpointable_utils.Checkpoint(mean=restore_mean) + status = restore_checkpoint.restore(save_path) + restore_update = restore_mean(300.) + status.assert_consumed().run_restore_ops() + self.evaluate(restore_update) + self.assertAllEqual(200., self.evaluate(restore_mean.value())) + self.assertEqual(3, self.evaluate(restore_mean.denom)) if __name__ == "__main__": test.main() -- GitLab From 628fe285dc3e54e7036e0eafb0f6b1ff27ab3f51 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 2 Mar 2018 13:18:22 -0800 Subject: [PATCH 363/884] Remove debug from config --- configure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.py b/configure.py index 2410cf7e07..081632e605 100644 --- a/configure.py +++ b/configure.py @@ -1043,7 +1043,7 @@ def set_tf_tensorrt_install_path(environ_cp): cuda_ver = convert_version_to_int(environ_cp['TF_CUDA_VERSION']) cudnn_ver = convert_version_to_int(environ_cp['TF_CUDNN_VERSION']) - nvinfer_pattern = re.compile('.*libnvinfer(?:_debug)?.so.?(.*)$') + nvinfer_pattern = re.compile('.*libnvinfer.so.?(.*)$') highest_ver = [0, None, None] for lib_file in possible_files: -- GitLab From 1e2c2f1cddd52ed86f8d5d7f10faa6498f13dded Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 13:32:35 -0800 Subject: [PATCH 364/884] Add /learning/tfx/ to the visibility group of tensorflow/compiler/jit. PiperOrigin-RevId: 187661883 --- tensorflow/compiler/jit/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index c7c9e9bd7a..955d12dc20 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -29,7 +29,10 @@ load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda_is_configured") # Target that bundles up the XLA CPU and GPU JIT devices. cc_library( name = "jit", - visibility = [":friends"], + visibility = [ + ":friends", + "//learning/tfx:__subpackages__", + ], deps = [ ":xla_cpu_device", ":xla_cpu_jit", -- GitLab From 4b038da7006c81e3e6cd542a7015d4a84d5c2385 Mon Sep 17 00:00:00 2001 From: Eli Bendersky Date: Fri, 2 Mar 2018 13:37:41 -0800 Subject: [PATCH 365/884] Make shape inference error messages more consistent. PiperOrigin-RevId: 187662562 --- .../compiler/xla/service/shape_inference.cc | 410 +++++++++--------- .../xla/service/shape_inference_test.cc | 59 ++- .../xla/tests/broadcast_simple_test.cc | 6 +- tensorflow/compiler/xla/tests/concat_test.cc | 2 +- tensorflow/compiler/xla/tests/map_test.cc | 2 +- 5 files changed, 236 insertions(+), 243 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 607a672025..c54cb3b48d 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -169,11 +169,11 @@ bool AllUnique(tensorflow::gtl::ArraySlice slice) { tensorflow::Status ExpectNotTupleOrOpaque(const Shape& shape, tensorflow::StringPiece op_type) { if (ShapeUtil::IsTuple(shape)) { - return InvalidArgument("Expected non-tuple argument for %s. Got: %s", + return InvalidArgument("Expected non-tuple argument for %s, but got %s.", op_type.ToString().c_str(), ShapeUtil::HumanString(shape).c_str()); } else if (ShapeUtil::IsOpaque(shape)) { - return InvalidArgument("Expected non-opaque argument for %s. Got: %s", + return InvalidArgument("Expected non-opaque argument for %s, but got %s.", op_type.ToString().c_str(), ShapeUtil::HumanString(shape).c_str()); } else { @@ -193,8 +193,7 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, const Shape& accumulator_shape = reducer_shape.result(); if (ShapeUtil::Rank(accumulator_shape) != 0) { - return Unimplemented( - "Reduction function currently must have rank-0 result."); + return InvalidArgument("Reduction function must have rank 0."); } // Check that the accumulator can be passed in as the first argument. @@ -235,8 +234,8 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, if (!ShapeUtil::CompatibleIgnoringFpPrecision(accumulator_shape, reducer_shape.parameters(1))) { return InvalidArgument( - "Reduction function's second parameter shape currently must " - "match the result shape. Got %s vs %s", + "Reduction function's second parameter shape must " + "match the result shape, but got %s vs %s.", ShapeUtil::HumanString(reducer_shape.parameters(1)).c_str(), ShapeUtil::HumanString(accumulator_shape).c_str()); } @@ -258,29 +257,29 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, for (int64 i = 0; i < window.dimensions_size(); ++i) { const auto& dim = window.dimensions(i); if (dim.size() <= 0) { - return InvalidArgument("Window has a non-positive dimension. Window: %s", + return InvalidArgument("Window %s has a non-positive dimension.", window.DebugString().c_str()); } if (dim.stride() <= 0) { - return InvalidArgument("Window has a non-positive stride. Window: %s", + return InvalidArgument("Window %s has a non-positive stride.", window.DebugString().c_str()); } if (!allow_negative_padding && dim.padding_low() < 0) { - return InvalidArgument("Window has a negative low padding. Window: %s", + return InvalidArgument("Window %s has a negative low padding.", window.DebugString().c_str()); } if (!allow_negative_padding && dim.padding_high() < 0) { - return InvalidArgument("Window has a negative high padding. Window: %s", + return InvalidArgument("Window %s has a negative high padding.", window.DebugString().c_str()); } if (dim.base_dilation() < 1) { return InvalidArgument( - "Window has a non-positive base area dilation factor. Window: %s", + "Window %s has a non-positive base area dilation factor.", window.DebugString().c_str()); } if (dim.window_dilation() < 1) { return InvalidArgument( - "Window has a non-positive window dilation factor. Window: %s", + "Window %s has a non-positive window dilation factor.", window.DebugString().c_str()); } @@ -320,8 +319,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_CEIL: if (!ShapeUtil::ElementIsFloating(arg)) { return InvalidArgument( - "expected element type in shape to be floating for floor/ceil " - "operation; got %s", + "Expected element type in shape to be floating for floor/ceil " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -333,8 +332,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (!ShapeUtil::ElementIsFloating(arg) && !ShapeUtil::ElementIsComplex(arg)) { return InvalidArgument( - "expected element type in shape to be floating or complex for " - "sin/cos/exp/log/tanh operation; got %s", + "Expected element type in shape to be floating or complex for " + "sin/cos/exp/log/tanh operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -342,8 +341,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_IMAG: if (!ShapeUtil::ElementIsComplex(arg)) { return InvalidArgument( - "expected element type in shape to be complex for real/imag " - "operation; got %s", + "Expected element type in shape to be complex for real/imag " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return ShapeUtil::ChangeElementType(arg, F32); @@ -363,8 +362,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (arg.element_type() != PRED && !primitive_util::IsIntegralType(arg.element_type())) { return InvalidArgument( - "expected pred or an integral element type in argument to not " - "operation; got %s", + "Expected pred or an integral element type in argument to Not " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return arg; @@ -372,8 +371,8 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, case UNOP_IS_FINITE: if (!ShapeUtil::ElementIsFloating(arg)) { return InvalidArgument( - "expected element type in shape to be floating point for IsFinite " - "operation; got %s", + "Expected element type in shape to be floating point for IsFinite " + "operation; got %s.", PrimitiveType_Name(arg.element_type()).c_str()); } return ShapeUtil::ChangeElementType(arg, PRED); @@ -389,10 +388,10 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, tensorflow::gtl::ArraySlice arg_shapes, const int64 dimension) { if (arg_shapes.empty()) { - return InvalidArgument("Concatenate expects at least one argument"); + return InvalidArgument("Concatenate expects at least one argument."); } if (dimension < 0 || dimension >= ShapeUtil::Rank(*arg_shapes[0])) { - return InvalidArgument("dimension to concatenate along out of bounds: %lld", + return InvalidArgument("Concatenate dimension out of bounds: %lld.", dimension); } const Shape* arg_shape = nullptr; @@ -408,14 +407,14 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (ShapeUtil::Rank(*arg_shape) != ShapeUtil::Rank(*shape)) { return InvalidArgument( "Cannot concatenate arrays with different ranks: %lld (%s) vs %lld " - "(%s)", + "(%s).", ShapeUtil::Rank(*arg_shape), ShapeUtil::HumanString(*arg_shape).c_str(), ShapeUtil::Rank(*shape), ShapeUtil::HumanString(*shape).c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(*arg_shape, *shape)) { return InvalidArgument( - "cannot concatenate arrays with different element types: %s vs %s", + "Cannot concatenate arrays with different element types: %s vs %s.", PrimitiveType_Name(arg_shape->element_type()).c_str(), PrimitiveType_Name(shape->element_type()).c_str()); } @@ -428,9 +427,9 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, // concatenating. } return InvalidArgument( - "cannot concatenate arrays that differ in dimensions other than " + "Cannot concatenate arrays that differ in dimensions other than " "the one being concatenated (the other array dimensions must be " - "the same): %s vs %s in dimension %lld", + "the same): %s vs %s in dimension %lld.", ShapeUtil::HumanString(*arg_shape).c_str(), ShapeUtil::HumanString(*shape).c_str(), dimension); } @@ -452,7 +451,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, if (primitive_util::IsComplexType(old_element_type) && !primitive_util::IsComplexType(new_element_type)) { return Unimplemented( - "Unsupported conversion from complex to real type: %s => %s", + "Conversion from complex to real type %s => %s is not implemented.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -461,7 +460,7 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, // future, by recursing into the tuple elements to check all sub-conversions // are valid. For now we just reject them, though. return InvalidArgument( - "cannot convert from or to tuple type; requested conversion: %s => %s", + "Convert does not allow tuples, so cannot convert from %s to %s.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -474,24 +473,23 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, auto old_element_type = operand_shape.element_type(); if (primitive_util::IsComplexType(old_element_type) != primitive_util::IsComplexType(new_element_type)) { - return Unimplemented( - "Unsupported conversion between real and complex types: %s => %s", - ShapeUtil::HumanString(operand_shape).c_str(), - PrimitiveType_Name(new_element_type).c_str()); + return InvalidArgument("Conversion from complex to real type %s => %s.", + ShapeUtil::HumanString(operand_shape).c_str(), + PrimitiveType_Name(new_element_type).c_str()); } if (ShapeUtil::IsTuple(operand_shape) || new_element_type == TUPLE) { // Note: we may want to support tuple conversions via this operation in the // future, by recursing into the tuple elements to check all sub-conversions // are valid. For now we just reject them, though. return InvalidArgument( - "cannot convert from or to tuple type; requested conversion: %s => %s", + "Cannot convert from or to tuple type; requested conversion: %s => %s.", ShapeUtil::HumanString(operand_shape).c_str(), PrimitiveType_Name(new_element_type).c_str()); } if (primitive_util::BitWidth(old_element_type) != primitive_util::BitWidth(new_element_type)) { return InvalidArgument( - "cannot bitcast types with different bit-widths: %s => %s", + "Cannot bitcast types with different bit-widths: %s => %s.", PrimitiveType_Name(old_element_type).c_str(), PrimitiveType_Name(new_element_type).c_str()); } @@ -504,20 +502,20 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, const int mantissa_bits) { if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( - "expected element type in shape to be floating point for " - "ReducePrecision operation; got %s", + "Expected element type in shape to be floating point for " + "ReducePrecision operation; got %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } if (exponent_bits < 1) { // One exponent bit is necessary to distinguish 0 from infinity. Having // no exponent bits doesn't produce a sensible number, so we require at // least one. - return InvalidArgument("expected exponent_bits >= 1; got %d", + return InvalidArgument("Expected exponent_bits >= 1; got %d.", exponent_bits); } if (mantissa_bits < 0) { // A number with no mantissa bits is still meaningful, however. - return InvalidArgument("expected non-negative mantissa_bits; got %d", + return InvalidArgument("Expected non-negative mantissa_bits; got %d.", mantissa_bits); } return operand_shape; @@ -528,23 +526,23 @@ StatusOr InferWindowOutputShape(const Shape& base_shape, const PaddingConfig& padding_config) { if (ShapeUtil::IsTuple(operand_shape)) { return InvalidArgument( - "pad operation does not support tuple-shape operands"); + "Pad operation does not support tuple-shape operands."); } if (!ShapeUtil::IsScalar(padding_value_shape)) { return InvalidArgument( - "pad operation does not support non-scalar padding values"); + "Pad operation does not support non-scalar padding values."); } if (ShapeUtil::Rank(operand_shape) != padding_config.dimensions_size()) { return InvalidArgument( "The rank of the operand and the padding configuration do not match: " - "%s vs %s", + "%s vs %s.", ShapeUtil::HumanString(operand_shape).c_str(), padding_config.ShortDebugString().c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, padding_value_shape)) { return InvalidArgument( - "the element types of the operands to pad do not match"); + "The element types of the operands to Pad do not match."); } std::vector dimensions(ShapeUtil::Rank(operand_shape)); for (int64 i = 0; i < operand_shape.dimensions_size(); ++i) { @@ -605,7 +603,7 @@ Status ValidateDotDimensionNumbers( lhs_batch_dimensions) || !dims_in_range(ShapeUtil::Rank(rhs), rhs_contracting_dimensions, rhs_batch_dimensions)) { - return InvalidArgument("A dimension number is out of range in dot: %s", + return InvalidArgument("A dimension number is out of range in Dot: %s.", dimension_numbers.DebugString().c_str()); } @@ -623,7 +621,7 @@ Status ValidateDotDimensionNumbers( if (!dims_unique(lhs_contracting_dimensions, lhs_batch_dimensions) || !dims_unique(rhs_contracting_dimensions, rhs_batch_dimensions)) { - return InvalidArgument("A dimension number is not unique in dot: %s", + return InvalidArgument("A dimension number is not unique in Dot: %s.", dimension_numbers.DebugString().c_str()); } @@ -641,8 +639,7 @@ Status ValidateDotDimensionNumbers( rhs_non_contracting_non_batch_dims < 0 || rhs_non_contracting_non_batch_dims > 1) { return InvalidArgument( - "batch and contracting dimension number mismatch " - "with rank "); + "Batch and contracting dimension number mismatch with rank."); } // Check that batch dimension numbers are ordered before all others, and @@ -654,7 +651,7 @@ Status ValidateDotDimensionNumbers( !std::equal(batch_dim_numbers.begin(), batch_dim_numbers.end(), rhs_batch_dimensions.begin())) { return InvalidArgument( - "batch dimension numbers must precede non-batch dimensions and be" + "Batch dimension numbers must precede non-batch dimensions and be" "monotonically increasing."); } @@ -671,22 +668,22 @@ Status ValidateDotDimensionNumbers( auto fail = [lhs, rhs](const string& addendum) -> Status { string message = tensorflow::strings::Printf( - "cannot infer shape for dot operation: %s %s", + "Cannot infer shape for dot operation: %s %s.", ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); if (!addendum.empty()) { - message += ": " + addendum; + message += " " + addendum; } return InvalidArgument("%s", message.c_str()); }; // Check if both element types are the same. if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { - return fail("element types do not match"); + return fail("Element types do not match."); } if ((ShapeUtil::Rank(lhs) < 1) || (ShapeUtil::Rank(rhs) < 1)) { - return fail("dot only supports rank 1 or above."); + return fail("Dot only supports rank 1 or above."); } // Validate basic properties of dot dimension numbers. @@ -696,7 +693,7 @@ Status ValidateDotDimensionNumbers( if (dimension_numbers.lhs_contracting_dimensions_size() != dimension_numbers.rhs_contracting_dimensions_size() || dimension_numbers.lhs_contracting_dimensions_size() != 1) { - return fail("must specify one contracting dimension for both lhs and rhs."); + return fail("Must specify one contracting dimension for both lhs and rhs."); } // Check that contracting dimension sizes match. @@ -706,13 +703,13 @@ Status ValidateDotDimensionNumbers( dimension_numbers.rhs_contracting_dimensions(0); if (lhs.dimensions(lhs_contracting_dimension) != rhs.dimensions(rhs_contracting_dimension)) { - return fail("contracting dimension sizes do not match."); + return fail("Contracting dimension sizes do not match."); } // Check that number of batch dimensions match. if (dimension_numbers.lhs_batch_dimensions_size() != dimension_numbers.rhs_batch_dimensions_size()) { - return fail("must the same number of batch dimensions for lhs and rhs."); + return fail("Must the same number of batch dimensions for lhs and rhs."); } // Check that batch dimension numbers and sizes match. @@ -721,7 +718,7 @@ Status ValidateDotDimensionNumbers( dimension_numbers.rhs_batch_dimensions(i) || lhs.dimensions(dimension_numbers.lhs_batch_dimensions(i)) != rhs.dimensions(dimension_numbers.rhs_batch_dimensions(i))) { - return fail("batch dimension numbers and sizes must match for lhs/rhs."); + return fail("Batch dimension numbers and sizes must match for lhs/rhs."); } } @@ -770,10 +767,11 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } else if (rhs.dimensions(i) == 1) { output_dimensions[i] = lhs.dimensions(i); } else { - return InvalidArgument("binary op %s with incompatible shapes: %s and %s", - BinaryOperation_Name(operation).c_str(), - ShapeUtil::HumanString(lhs).c_str(), - ShapeUtil::HumanString(rhs).c_str()); + return InvalidArgument( + "Binary op %s with incompatible shapes: %s and %s.", + BinaryOperation_Name(operation).c_str(), + ShapeUtil::HumanString(lhs).c_str(), + ShapeUtil::HumanString(rhs).c_str()); } } return ShapeUtil::MakeShape(ShapeUtil::HigherPrecisionElementType(lhs, rhs), @@ -788,15 +786,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Reject "magic" inference for binops on different shapes, requiring // the user to provide an explicit broadcast dimension in this case. // See b/25177275 for more details. - return InvalidArgument("automatic shape inference not supported: %s and %s", + return InvalidArgument("Automatic shape inference not supported: %s and %s", ShapeUtil::HumanString(smaller_shape).c_str(), ShapeUtil::HumanString(larger_shape).c_str()); } else if (broadcast_dimensions.size() != ShapeUtil::Rank(smaller_shape)) { return InvalidArgument( - "size of broadcast_dimensions has to match lower-rank operand's " + "Size of broadcast_dimensions has to match lower-rank operand's " "rank; " " lower-rank operand's rank is %lld, size of broadcast_dimensions is " - "%zu", + "%zu.", ShapeUtil::Rank(smaller_shape), broadcast_dimensions.size()); } @@ -846,13 +844,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( int64 dimension_to_match = broadcast_dimensions.at(i); if (dimension_to_match < 0) { return InvalidArgument( - "broadcast dimension number (%lld) cannot be negative", + "Broadcast dimension number (%lld) cannot be negative.", dimension_to_match); } if (dimension_to_match >= larger_shape.dimensions_size()) { return InvalidArgument( - "broadcast dimension number (%lld) too large; higher-rank " - "operand has rank %d", + "Broadcast dimension number (%lld) too large; higher-rank " + "operand has rank %d.", dimension_to_match, larger_shape.dimensions_size()); } int64 small_dimension_size = smaller_shape.dimensions(i); @@ -863,7 +861,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (small_dimension_size != large_dimension_size && small_dimension_size != 1 && large_dimension_size != 1) { return InvalidArgument( - "broadcast dimension %d mismatch: %lld != %lld; %s and %s", i, + "Broadcast dimension %d mismatch: %lld != %lld; %s and %s.", i, small_dimension_size, large_dimension_size, ShapeUtil::HumanString(smaller_shape).c_str(), ShapeUtil::HumanString(larger_shape).c_str()); @@ -872,7 +870,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // order. if (i > 0 && broadcast_dimensions.at(i - 1) >= dimension_to_match) { return InvalidArgument( - "broadcast dimensions order is wrong: %lld comes after %lld", + "Broadcast dimensions order is wrong: %lld comes after %lld.", dimension_to_match, broadcast_dimensions.at(i - 1)); } @@ -892,7 +890,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( - "binary op %s with different element types: %s and %s", + "Binary op %s with different element types: %s and %s.", BinaryOperation_Name(operation).c_str(), ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); @@ -904,8 +902,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!broadcast_dimensions.empty() && broadcast_dimensions != identity_dims) { return InvalidArgument( - "broadcast dimensions field must either be not set or be the " - "identity on binary operations with operands of the same rank"); + "Broadcast dimensions field must either be not set or be the " + "identity on binary operations with operands of the same rank."); } } @@ -979,8 +977,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case BINOP_COMPLEX: { if (!ShapeUtil::ElementIsFloating(lhs)) { return InvalidArgument( - "expected element type in shape to be floating for complex compose " - "operation; got %s", + "Expected element type in shape to be floating for complex compose " + "operation; got %s.", PrimitiveType_Name(lhs.element_type()).c_str()); } TF_ASSIGN_OR_RETURN(const Shape& shape, @@ -989,7 +987,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (lhs.element_type() == F32 && rhs.element_type() == F32) { return ShapeUtil::ChangeElementType(shape, C64); } else { - return Unimplemented("complex component type not supported"); + return Unimplemented("Complex component type is not implemented."); } } case BINOP_AND: @@ -997,8 +995,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (lhs.element_type() != PRED && !primitive_util::IsIntegralType(lhs.element_type())) { return InvalidArgument( - "expected pred or integral type in argument to and/or operation; " - "got %s", + "Expected pred or integral type in argument to and/or operation; " + "got %s.", PrimitiveType_Name(lhs.element_type()).c_str()); } return InferElementwiseBinaryOpShape(operation, lhs, rhs, @@ -1016,7 +1014,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } default: return Unimplemented( - "not yet implemented; infer binary op shape: %s; lhs: %s; rhs: %s", + "Binary op shape inference: %s; lhs: %s; rhs: %s is not implemented.", BinaryOperation_Name(operation).c_str(), lhs.ShortDebugString().c_str(), rhs.ShortDebugString().c_str()); } @@ -1041,7 +1039,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case TRIOP_SELECT: return InferSelectShape(lhs, rhs, ehs); default: - return InvalidArgument("unknown operation %s", + return InvalidArgument("Unknown operation %s.", TernaryOperation_Name(operation).c_str()); } } @@ -1072,7 +1070,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return result; } default: - return InvalidArgument("unknown operation %s", + return InvalidArgument("Unknown operation %s.", VariadicOperation_Name(operation).c_str()); } } @@ -1082,7 +1080,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const ProgramShape& to_apply, tensorflow::gtl::ArraySlice dimensions) { if (arg_shapes.empty()) { - return InvalidArgument("Map expects at least one argument"); + return InvalidArgument("Map expects at least one argument."); } // All arguments must have the same shape. @@ -1113,7 +1111,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } return InvalidArgument( "Map operation requires all operands to have the same shape; got: " - "%s", + "%s.", Join(pieces, ", ").c_str()); } @@ -1122,7 +1120,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (dimensions.size() != arg_shape->dimensions_size()) { return InvalidArgument( "Map applied to a subset of dimensions currently not supported: " - "arg_dimension_size: %d, requested_map_dimensions_size: %zu", + "arg_dimension_size: %d, requested_map_dimensions_size: %zu.", arg_shape->dimensions_size(), dimensions.size()); } @@ -1130,7 +1128,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int i = 0; i < dimensions.size(); ++i) { if (dimensions[i] != i) { return InvalidArgument( - "Map requires monotonically increasing dimension numbers, found: %s ", + "Map requires monotonically increasing dimension numbers; got: %s.", Join(dimensions, ", ").c_str()); } } @@ -1139,7 +1137,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (arg_shapes.size() != to_apply.parameters_size()) { return InvalidArgument( "Map applied function arity must match number of arguments; got: " - "arity: %d, arguments: %zu", + "arity: %d, arguments: %zu.", to_apply.parameters_size(), arg_shapes.size()); } @@ -1147,8 +1145,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& output_shape = to_apply.result(); if (!ShapeUtil::IsScalar(output_shape)) { return InvalidArgument( - "mapped computation's result has to be a scalar; " - "got: %s", + "Mapped computation's result has to be a scalar; got: %s.", ShapeUtil::HumanString(output_shape).c_str()); } @@ -1157,16 +1154,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::IsScalar(parameter_shape)) { return InvalidArgument( - "mapped computation's parameter has to be a scalar; " - "got parameter %d shape: %s", + "Mapped computation's parameter has to be a scalar; " + "got parameter %d shape: %s.", i, ShapeUtil::HumanString(parameter_shape).c_str()); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(parameter_shape, *arg_shape)) { return InvalidArgument( - "mapped computation's parameter type has to match argument element " - "type; got parameter %d shape: %s, argument shape: %s", + "Mapped computation's parameter type has to match argument element " + "type; got parameter %d shape: %s, argument shape: %s.", i, ShapeUtil::HumanString(parameter_shape).c_str(), ShapeUtil::HumanString(*arg_shape).c_str()); } @@ -1197,21 +1194,21 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-training to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } if (feature_index < 0) { return InvalidArgument( "Expected feature_index of batch-norm-training to " - "be a non-negative number, got %lld", + "be a non-negative number, got %lld.", feature_index); } if (ShapeUtil::Rank(operand_shape) < 1) { return InvalidArgument( "Expected the rank of operand to " - "batch-norm-training to be at least 1; got %lld", + "batch-norm-training to be at least 1; got %lld.", ShapeUtil::Rank(operand_shape)); } @@ -1232,7 +1229,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-training must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1241,7 +1238,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of offset factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(offset_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1251,7 +1248,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-training, " "but the shape of scale factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1264,7 +1261,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of offset factor should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(offset_shape, 0), feature_count); } @@ -1272,7 +1269,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1307,21 +1304,21 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-inference to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } if (feature_index < 0) { return InvalidArgument( "Expected feature_index of batch-norm-inference to " - "be a non-negative number, got %lld", + "be a non-negative number, got %lld.", feature_index); } if (ShapeUtil::Rank(operand_shape) < 1) { return InvalidArgument( "Expected the rank of operand to " - "batch-norm-inference to be at least 1; got %lld", + "batch-norm-inference to be at least 1; got %lld.", ShapeUtil::Rank(operand_shape)); } @@ -1342,7 +1339,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-inference must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1352,7 +1349,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of offset factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(offset_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1363,7 +1360,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of scale factor is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1374,7 +1371,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of mean is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1385,7 +1382,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "The inputs should have the same element type for " "batch-norm-inference, " "but the shape of variance is %s " - "and the shape of operand is %s", + "and the shape of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(variance_shape.element_type()).c_str()); } @@ -1398,7 +1395,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of offset factor should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(offset_shape, 0), feature_count); } @@ -1406,7 +1403,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1414,7 +1411,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of mean should be the same as feature count," "but the size of mean is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(mean_shape, 0), feature_count); } @@ -1422,7 +1419,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of variance should be the same as feature count," "but the size of variance is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(variance_shape, 0), feature_count); } @@ -1455,7 +1452,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected feature_index of batch-norm-grad to be " "smaller than the rank of operand_shape; " - "got feature_index %lld, and rank %lld", + "got feature_index %lld, and rank %lld.", feature_index, ShapeUtil::Rank(operand_shape)); } @@ -1463,7 +1460,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected operand_shape of batch-norm-grad to have the same rank as" " output_grad_shape; got rank(oprand_shape) %lld, and" - " rank(output_grad_shape) %lld", + " rank(output_grad_shape) %lld.", ShapeUtil::Rank(operand_shape), ShapeUtil::Rank(output_grad_shape)); } @@ -1491,14 +1488,14 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::ElementIsFloating(operand_shape)) { return InvalidArgument( "The operand to batch-norm-grad must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(operand_shape.element_type()).c_str()); } if (!ShapeUtil::ElementIsFloating(output_grad_shape)) { return InvalidArgument( "The output_grad to batch-norm-grad must have a floating point " - "element type, but the shape is %s", + "element type, but the shape is %s.", PrimitiveType_Name(output_grad_shape.element_type()).c_str()); } @@ -1507,7 +1504,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of output_grad is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(output_grad_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1517,7 +1514,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of scale factor is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(scale_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1527,7 +1524,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1537,7 +1534,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The inputs should have the same element type for batch-norm-grad, " "but the element type of mean is %s " - "and the element type of operand is %s", + "and the element type of operand is %s.", PrimitiveType_Name(mean_shape.element_type()).c_str(), PrimitiveType_Name(operand_shape.element_type()).c_str()); } @@ -1551,7 +1548,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of mean should be the same as feature count," "but the size of offset factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(mean_shape, 0), feature_count); } @@ -1559,7 +1556,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of scale factor should be the same as feature count," "but the size of scale factor is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(scale_shape, 0), feature_count); } @@ -1567,7 +1564,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The size of variance should be the same as feature count," "but the size of variance is %lld " - "and the feature count is %lld", + "and the feature count is %lld.", ShapeUtil::GetDimension(var_shape, 0), feature_count); } @@ -1578,7 +1575,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "The bounds of operand shape should be the same as output_grad's," "but the bound of operand_shape at dimension %lld is %lld " - "and the bound of output_grad_shape is %lld", + "and the bound of output_grad_shape is %lld.", i, ShapeUtil::GetDimension(operand_shape, i), ShapeUtil::GetDimension(output_grad_shape, i)); } @@ -1596,7 +1593,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return InvalidArgument( - "Convolution with different element types: %s and %s", + "Convolution with different element types: %s and %s.", ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str()); } @@ -1612,21 +1609,19 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (window.dimensions_size() != num_spatial_dims) { return InvalidArgument( "Window must have same number of dimensions as dimension numbers.\n" - "Window: %s\nDimension numbers: %s", + "Window: %s\nDimension numbers: %s.", window.DebugString().c_str(), dnums.DebugString().c_str()); } const int num_dims = num_spatial_dims + 2; if (ShapeUtil::Rank(lhs) != num_dims) { return InvalidArgument( - "The LHS argument to a convolution should have rank %d.\n" - "lhs: %s", + "The LHS argument to a convolution should have rank %d; lhs: %s.", num_dims, ShapeUtil::HumanString(lhs).c_str()); } if (ShapeUtil::Rank(rhs) != num_dims) { return InvalidArgument( - "The RHS argument to a convolution should have rank %d.\n" - "lhs: %s", + "The RHS argument to a convolution should have rank %d; lhs: %s.", num_dims, ShapeUtil::HumanString(lhs).c_str()); } TF_DCHECK_OK(ShapeUtil::ValidateShapeWithOptionalLayout(lhs)); @@ -1663,26 +1658,26 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( !std::all_of(window_dnums.begin(), window_dnums.end(), in_range) || !std::all_of(output_dnums.begin(), output_dnums.end(), in_range)) { return InvalidArgument( - "A dimension number is out of range in convolution: %s", + "A dimension number is out of range in convolution: %s.", dnums.DebugString().c_str()); } if (input_dnums != expected_dnums) { return InvalidArgument( "Input dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } if (window_dnums != expected_dnums) { return InvalidArgument( "Window dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } if (output_dnums != expected_dnums) { return InvalidArgument( "Output dimensions of convolution must contain each dimension exactly " - "once: %s", + "once: %s.", dnums.DebugString().c_str()); } @@ -1706,7 +1701,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Expected LHS feature dimension (value %lld) to match RHS " "input feature dimension (value %lld); got (%s, %s)\n" - "Dimension numbers: {%s}", + "Dimension numbers: {%s}.", input_features, kernel_input_features, ShapeUtil::HumanString(lhs).c_str(), ShapeUtil::HumanString(rhs).c_str(), dnums.DebugString().c_str()); @@ -1720,7 +1715,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "Window dimensions do not match RHS shape:\n\t" "RHS shape: %s\n\t" "Window: {%s}\n\t" - "Dimension numbers: {%s}", + "Dimension numbers: {%s}.", ShapeUtil::HumanString(rhs).c_str(), window.ShortDebugString().c_str(), dnums.ShortDebugString().c_str()); } @@ -1748,8 +1743,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const tensorflow::gtl::ArraySlice fft_length) { const int64 fft_rank = fft_length.size(); if (fft_rank < 1 || fft_rank > 3) { - return InvalidArgument("FFT only supports ranks 1-3, but got %lld", - fft_rank); + return InvalidArgument("FFT only supports ranks 1-3; got %lld.", fft_rank); } #define RET_CHECK_RANK(x) \ if (x.dimensions_size() < fft_rank) { \ @@ -1762,7 +1756,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( case FFT: case IFFT: if (in.element_type() != C64) { - return InvalidArgument("%s requires C64 input type, found %s", + return InvalidArgument("%s requires C64 input type, found %s.", FftType_Name(fft_type).c_str(), PrimitiveType_Name(in.element_type()).c_str()); } @@ -1770,7 +1764,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return in; case RFFT: { if (in.element_type() != F32) { - return InvalidArgument("RFFT requires F32 input type, found %s", + return InvalidArgument("RFFT requires F32 input type, found %s.", PrimitiveType_Name(in.element_type()).c_str()); } RET_CHECK_RANK(in); @@ -1779,7 +1773,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[i]) { return InvalidArgument( "RFFT requires innermost dimensions match fft_length but " - "dimension %lld is %lld and should be %lld", + "dimension %lld is %lld and should be %lld.", in.dimensions_size() - fft_rank + i, in.dimensions(in.dimensions_size() - fft_rank + i), fft_length[i]); @@ -1792,7 +1786,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } case IRFFT: { if (in.element_type() != C64) { - return InvalidArgument("IRFFT requires C64 input type, found %s", + return InvalidArgument("IRFFT requires C64 input type, found %s.", PrimitiveType_Name(in.element_type()).c_str()); } RET_CHECK_RANK(in); @@ -1802,7 +1796,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[i]) { return InvalidArgument( "IRFFT requires all but one innermost dimensions match " - "fft_length, but dimension %lld is %lld and should be %lld", + "fft_length, but dimension %lld is %lld and should be %lld.", in.dimensions_size() - fft_rank + i, in.dimensions(in.dimensions_size() - fft_rank + i), fft_length[i]); @@ -1812,7 +1806,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( fft_length[fft_rank - 1] / 2 + 1) { return InvalidArgument( "IRFFT requires innermost dimension matches fft_length/2+1, but " - "dimension %d is %lld and should be %lld", + "dimension %d is %lld and should be %lld.", in.dimensions_size() - 1, in.dimensions(in.dimensions_size() - 1), fft_length[fft_rank - 1] / 2 + 1); } @@ -1850,8 +1844,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int64 dimension : dimensions_to_reduce) { if (dimension >= ShapeUtil::Rank(arg) || dimension < 0) { return InvalidArgument( - "attempting to reduce out-of-bounds dimension %lld in shape %s", - dimension, ShapeUtil::HumanString(arg).c_str()); + "Reducing out-of-bounds dimension %lld in shape %s.", dimension, + ShapeUtil::HumanString(arg).c_str()); } } TF_RETURN_IF_ERROR( @@ -1891,30 +1885,30 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Check if the select function has a proper shape of (T,T) -> PRED. if (select_shape.parameters_size() != 2) { return InvalidArgument( - "select function must take 2 parameters, but " + "Select function must take 2 parameters, but " "takes %d parameter(s).", select_shape.parameters_size()); } const Shape& select_result_shape = select_shape.result(); if (!ShapeUtil::Compatible(select_result_shape, ShapeUtil::MakeShape(PRED, {}))) { - return Unimplemented("select function must have rank-0 PRED result."); + return InvalidArgument("Select function must have rank-0 PRED result."); } const Shape& operand_element_shape = ShapeUtil::MakeShape(operand_shape.element_type(), {}); if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, select_shape.parameters(0))) { return InvalidArgument( - "select function's first parameter shape currently must " - "match the operand element shape. Got %s vs %s", + "Select function's first parameter shape currently must " + "match the operand element shape, but got %s vs %s.", ShapeUtil::HumanString(select_shape.parameters(0)).c_str(), ShapeUtil::HumanString(operand_element_shape).c_str()); } if (!ShapeUtil::CompatibleIgnoringFpPrecision(operand_element_shape, select_shape.parameters(1))) { return InvalidArgument( - "select function's second parameter shape currently must " - "match the operand element shape. Got %s vs %s", + "Select function's second parameter shape currently must " + "match the operand element shape, but got %s vs %s.", ShapeUtil::HumanString(select_shape.parameters(1)).c_str(), ShapeUtil::HumanString(operand_element_shape).c_str()); } @@ -1931,8 +1925,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::CompatibleIgnoringFpPrecision(source_shape, window_result_shape)) { return InvalidArgument( - "source shape does not match the shape of window-reduced operand: " - "source(%s), window-reduced operand(%s)", + "Source shape does not match the shape of window-reduced operand: " + "source(%s), window-reduced operand(%s).", ShapeUtil::HumanString(source_shape).c_str(), ShapeUtil::HumanString(window_result_shape).c_str()); } @@ -1946,7 +1940,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( auto error = [&](const string& message) { return InvalidArgument( "%s in slice operation; argument shape: %s; starts: {%s}; limits: " - "{%s}; strides: {%s}", + "{%s}; strides: {%s}.", message.c_str(), ShapeUtil::HumanString(arg).c_str(), Join(starts, ",").c_str(), Join(limits, ",").c_str(), Join(strides, ",").c_str()); @@ -1969,7 +1963,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (starts.size() != ShapeUtil::Rank(arg)) { return InvalidArgument( - "slice index count does not match argument rank: %zu vs %lld", + "Slice index count does not match argument rank: %zu vs %lld.", starts.size(), ShapeUtil::Rank(arg)); } @@ -1979,7 +1973,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( int64 limit_index = limits[dimension]; int64 stride = strides[dimension]; if (start_index < 0) { - return InvalidArgument("negative start index to slice: %lld", + return InvalidArgument("Negative start index to slice: %lld.", start_index); } if (limit_index > arg.dimensions(dimension)) { @@ -1999,7 +1993,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( limit_index, start_index)); } if (stride <= 0) { - return InvalidArgument("stride (%lld) must be positive", stride); + return InvalidArgument("Stride (%lld) must be positive.", stride); } sizes.push_back((limit_index - start_index + stride - 1) / stride); } @@ -2023,20 +2017,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(start_indices_shape) != 1) { return InvalidArgument( - "dynamic slice start indices of rank %lld must be rank1.", + "Dynamic slice start indices of rank %lld must be rank1.", ShapeUtil::Rank(start_indices_shape)); } if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { return InvalidArgument( - "dynamic slice start indices must be of integral type."); + "Dynamic slice start indices must be of integral type."); } const int64 start_num_dims = start_indices_shape.dimensions(0); if (ShapeUtil::Rank(operand_shape) != start_num_dims) { return InvalidArgument( - "dynamic slice start number of dimensions %lld (%s) must match rank " - "%lld of slice input (%s)", + "Dynamic slice start number of dimensions %lld (%s) must match rank " + "%lld of slice input (%s).", start_num_dims, ShapeUtil::HumanString(start_indices_shape).c_str(), ShapeUtil::Rank(operand_shape), ShapeUtil::HumanString(operand_shape).c_str()); @@ -2044,7 +2038,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (slice_sizes.size() != ShapeUtil::Rank(operand_shape)) { return InvalidArgument( - "dynamic slice index count does not match argument rank: %zu vs %lld", + "Dynamic slice index count does not match argument rank: %zu vs %lld.", slice_sizes.size(), ShapeUtil::Rank(operand_shape)); } @@ -2052,12 +2046,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const int64 input_dim_size = operand_shape.dimensions(dim); const int64 slice_dim_size = slice_sizes[dim]; if (slice_dim_size < 0) { - return InvalidArgument("negative size index to dynamic slice: %lld", + return InvalidArgument("Negative size index to dynamic slice: %lld.", slice_dim_size); } if (slice_dim_size > input_dim_size) { return InvalidArgument( - "slice dim size %lld greater than dynamic slice dimension: %lld", + "Slice dim size %lld greater than dynamic slice dimension: %lld.", slice_dim_size, input_dim_size); } VLOG(2) << tensorflow::strings::Printf("slice_sizes[%lld] = %lld", dim, @@ -2086,20 +2080,20 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(start_indices_shape) != 1) { return InvalidArgument( - "dynamic update slice start indices of rank %lld must be rank1.", + "Dynamic update slice start indices of rank %lld must be rank1.", ShapeUtil::Rank(start_indices_shape)); } if (!ShapeUtil::ElementIsIntegral(start_indices_shape)) { return InvalidArgument( - "dynamic update slice start indices must be of integral type."); + "Dynamic update slice start indices must be of integral type."); } const int64 start_num_dims = start_indices_shape.dimensions(0); if (ShapeUtil::Rank(operand_shape) != start_num_dims) { return InvalidArgument( - "dynamic slice start number of dimensions %lld (%s) must match rank " - "%lld of slice input (%s)", + "Dynamic slice start number of dimensions %lld (%s) must match rank " + "%lld of slice input (%s).", start_num_dims, ShapeUtil::HumanString(start_indices_shape).c_str(), ShapeUtil::Rank(operand_shape), ShapeUtil::HumanString(operand_shape).c_str()); @@ -2107,16 +2101,16 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::Rank(update_shape) != ShapeUtil::Rank(operand_shape)) { return InvalidArgument( - "dynamic update slice update rank does not match argument rank: " - "%lld vs %lld", + "Dynamic update slice update rank does not match argument rank: " + "%lld vs %lld.", ShapeUtil::Rank(update_shape), ShapeUtil::Rank(operand_shape)); } if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(operand_shape, update_shape)) { return InvalidArgument( - "dynamic update slice update element type does not match argument. " - "operand.element_type: %s vs update.element_type: %s", + "Dynamic update slice update element type does not match argument. " + "operand.element_type: %s vs update.element_type: %s.", PrimitiveType_Name(operand_shape.element_type()).c_str(), PrimitiveType_Name(update_shape.element_type()).c_str()); } @@ -2126,12 +2120,12 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const int64 update_dim_size = update_shape.dimensions(dim); if (update_dim_size < 0) { return InvalidArgument( - "size index %lld to dynamic update slice must be >= 0", + "Size index %lld to dynamic update slice must be >= 0.", update_dim_size); } if (update_dim_size > input_dim_size) { return InvalidArgument( - "update dim size %lld greater than dynamic slice dimension: %lld", + "Update dim size %lld greater than dynamic slice dimension: %lld.", update_dim_size, input_dim_size); } VLOG(2) << tensorflow::strings::Printf("update_sizes[%lld] = %lld", dim, @@ -2151,7 +2145,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( for (int64 dimension : dimensions) { if (dimension >= ShapeUtil::Rank(operand_shape) || dimension < 0) { return InvalidArgument( - "one of the reverse dimensions (%lld) is out-of-bounds in shape %s", + "One of the reverse dimensions (%lld) is out-of-bounds in shape %s.", dimension, ShapeUtil::HumanString(operand_shape).c_str()); } } @@ -2162,14 +2156,14 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& arg, int64 index) { if (!ShapeUtil::IsTuple(arg)) { return InvalidArgument( - "cannot infer shape: attempting to index into non-tuple: %s", + "Cannot infer shape: attempting to index into non-tuple: %s.", ShapeUtil::HumanString(arg).c_str()); } if (index >= arg.tuple_shapes_size()) { return InvalidArgument( - "cannot infer shape: attempt to index out of tuple bounds: %lld " - ">= %d in shape %s", + "Cannot infer shape: attempt to index out of tuple bounds: %lld " + ">= %d in shape %s.", index, arg.tuple_shapes_size(), ShapeUtil::HumanString(arg).c_str()); } @@ -2181,17 +2175,17 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& init) { // Check the number of parameters for given computations. if (condition.parameters_size() != 1) { - return InvalidArgument("condition must take 1 arguments; got %d", + return InvalidArgument("Condition must take 1 arguments; got %d.", condition.parameters_size()); } if (body.parameters_size() != 1) { - return InvalidArgument("body must take 1 arguments; got %d", + return InvalidArgument("Body must take 1 arguments; got %d.", body.parameters_size()); } auto shape_string = [&]() { return tensorflow::strings::Printf( - "condition: %s; body: %s; init: %s", + "Condition: %s; body: %s; init: %s.", ShapeUtil::HumanString(condition).c_str(), ShapeUtil::HumanString(body).c_str(), ShapeUtil::HumanString(init).c_str()); @@ -2199,15 +2193,15 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( // Check the shapes of computation parameters and return types. if (!ShapeUtil::ShapeIs(condition.result(), PRED, {})) { - return InvalidArgument("condition must return a boolean; got %s", + return InvalidArgument("Condition must return a boolean; got %s.", shape_string().c_str()); } if (!ShapeUtil::Compatible(body.result(), condition.parameters(0)) || !ShapeUtil::Compatible(body.result(), body.parameters(0)) || !ShapeUtil::Compatible(body.result(), init)) { return InvalidArgument( - "the parameter of condition and body, the result of the body, and init " - "must all have the same shape; got %s", + "The parameter of condition and body, the result of the body, and init " + "must all have the same shape; got %s.", shape_string().c_str()); } @@ -2219,7 +2213,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const Shape& false_operand, const ProgramShape& true_computation, const ProgramShape& false_computation) { if (!ShapeUtil::ShapeIs(predicate, PRED, {})) { - return InvalidArgument("predicate must be a boolean; got %s.", + return InvalidArgument("Predicate must be a boolean; got %s.", ShapeUtil::HumanString(predicate).c_str()); } @@ -2302,8 +2296,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (ShapeUtil::ElementsIn(operand) != ShapeUtil::ElementsIn(inferred_shape)) { return InvalidArgument( - "reshape operation has mismatched element counts: from=%lld (%s) " - "to=%lld (%s)", + "Reshape operation has mismatched element counts: from=%lld (%s) " + "to=%lld (%s).", ShapeUtil::ElementsIn(operand), ShapeUtil::HumanString(operand).c_str(), ShapeUtil::ElementsIn(inferred_shape), ShapeUtil::HumanString(inferred_shape).c_str()); @@ -2351,7 +2345,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(max, "clamp max")); if (!ShapeUtil::SameElementTypeIgnoringFpPrecision(min, operand) || !ShapeUtil::SameElementTypeIgnoringFpPrecision(max, operand)) { - return InvalidArgument("clamp op with different operand types: %s, %s, %s", + return InvalidArgument("Clamp with different operand types: %s, %s, %s.", ShapeUtil::HumanString(min).c_str(), ShapeUtil::HumanString(operand).c_str(), ShapeUtil::HumanString(max).c_str()); @@ -2372,7 +2366,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } } return Unimplemented( - "not yet implemented: %s, %s %s", min.ShortDebugString().c_str(), + "%s, %s %s is not implemented.", min.ShortDebugString().c_str(), max.ShortDebugString().c_str(), operand.ShortDebugString().c_str()); } @@ -2391,13 +2385,13 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( } if (!compatible) { return InvalidArgument( - "operands to select must be the same shape; got %s and %s", + "Operands to select must be the same shape; got %s and %s.", ShapeUtil::HumanString(on_true).c_str(), ShapeUtil::HumanString(on_false).c_str()); } if (pred.element_type() != PRED) { return InvalidArgument( - "select's pred operand must have PRED element type; got %s", + "Select's pred operand must have PRED element type; got %s.", ShapeUtil::HumanString(pred).c_str()); } if (ShapeUtil::SameDimensions(pred, on_true) || ShapeUtil::Rank(pred) == 0) { @@ -2407,9 +2401,9 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return ShapeUtil::ChangeElementType( on_true, ShapeUtil::HigherPrecisionElementType(on_true, on_false)); } else { - return Unimplemented( - "select operation with non-scalar predicate with dimensionality " - " different from the other operands: %s", + return InvalidArgument( + "Select operation with non-scalar predicate with dimensionality " + " different from the other operands: %s.", ShapeUtil::HumanString(pred).c_str()); } } @@ -2427,7 +2421,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( return InvalidArgument( "Call applied function arity must match number of arguments; got: " "arity: %d, arguments: %zu; computation signature: %s; argument " - "shapes: [%s]", + "shapes: [%s].", to_apply.parameters_size(), arg_shapes.size(), computation_signature.c_str(), argument_shapes.c_str()); } @@ -2439,7 +2433,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( if (!ShapeUtil::Compatible(arg_shape, param_shape)) { return InvalidArgument( "Call parameter must match argument; got parameter %d shape: %s, " - "argument shape: %s", + "argument shape: %s.", i, ShapeUtil::HumanString(param_shape).c_str(), ShapeUtil::HumanString(arg_shape).c_str()); } @@ -2454,14 +2448,14 @@ static Status ValidateGatherDimensionNumbers( const GatherDimensionNumbers& dim_numbers) { if (!c_is_sorted(dim_numbers.output_window_dims())) { return InvalidArgument( - "Output window dimensions in gather op must be ascending; got: %s", + "Output window dimensions in gather op must be ascending; got: %s.", Join(dim_numbers.output_window_dims(), ", ").c_str()); } if (c_adjacent_find(dim_numbers.output_window_dims()) != dim_numbers.output_window_dims().end()) { return InvalidArgument( - "Output window dimensions in gather op must not repeat; got: %s", + "Output window dimensions in gather op must not repeat; got: %s.", Join(dim_numbers.output_window_dims(), ", ").c_str()); } @@ -2474,7 +2468,7 @@ static Status ValidateGatherDimensionNumbers( if (window_index < 0 || window_index >= output_shape_rank) { return InvalidArgument( "Window index %d in gather op is out of bounds; got %lld, but should " - "have been in [0,%lld)", + "have been in [0,%lld).", i, window_index, output_shape_rank); } } @@ -2496,7 +2490,7 @@ static Status ValidateGatherDimensionNumbers( gather_dim_to_input_dim >= input_shape.dimensions_size()) { return InvalidArgument( "Invalid gather_dims_to_operand_dims mapping; domain is [0, %d), " - "got: %d->%lld", + "got: %d->%lld.", input_shape.dimensions_size(), i, gather_dim_to_input_dim); } } @@ -2511,7 +2505,7 @@ static Status ValidateGatherDimensionNumbers( sorted_gather_dims_to_operand_dims.end()) { return InvalidArgument( "Repeated dimensions are not allowed in gather_dims_to_operand_dims; " - "got: %s", + "got: %s.", Join(dim_numbers.gather_dims_to_operand_dims(), ", ").c_str()); } @@ -2519,7 +2513,7 @@ static Status ValidateGatherDimensionNumbers( if (elided_dim < 0 || elided_dim >= input_shape.dimensions_size()) { return InvalidArgument( "Invalid elided_window_dims set in gather op; valid range is [0, " - "%d), got: %lld", + "%d), got: %lld.", input_shape.dimensions_size(), elided_dim); } } @@ -2534,7 +2528,7 @@ static Status ValidateGatherDimensionNumbers( dim_numbers.elided_window_dims().end()) { return InvalidArgument( "Repeated dimensions not allowed in elided_window_dims in gather op; " - "got: %s", + "got: %s.", Join(dim_numbers.elided_window_dims(), ", ").c_str()); } @@ -2552,7 +2546,7 @@ static Status ValidateGatherDimensionNumbers( if (!ShapeUtil::ElementIsIntegral(gather_indices_shape)) { return InvalidArgument( - "Gather indices parameter must be an integral tensor; got %s", + "Gather indices parameter must be an integral tensor; got %s.", ShapeUtil::HumanString(gather_indices_shape).c_str()); } @@ -2586,7 +2580,7 @@ static Status ValidateGatherDimensionNumbers( if (window_bounds.size() != input_shape.dimensions_size()) { return InvalidArgument( "Gather op must have one window bound for every input dimension; got: " - "len(window_bounds)=%lu, input_shape.rank=%d", + "len(window_bounds)=%lu, input_shape.rank=%d.", window_bounds.size(), input_shape.dimensions_size()); } @@ -2596,7 +2590,7 @@ static Status ValidateGatherDimensionNumbers( return InvalidArgument( "All components of the window index in a gather op must either be a " "output window index or explicitly elided; got len(window_bounds)=%lu, " - "output_window_bounds=%s, elided_window_bounds=%s", + "output_window_bounds=%s, elided_window_bounds=%s.", window_bounds.size(), Join(gather_dim_numbers.output_window_dims(), ",").c_str(), Join(gather_dim_numbers.elided_window_dims(), ",").c_str()); @@ -2609,7 +2603,7 @@ static Status ValidateGatherDimensionNumbers( return InvalidArgument( "Window bound at index %d in gather op is out of range, must be " "within " - "[0, %lld), got %lld", + "[0, %lld), got %lld.", i, corresponding_input_bound + 1, window_bound); } } @@ -2618,7 +2612,7 @@ static Status ValidateGatherDimensionNumbers( if (window_bounds[gather_dim_numbers.elided_window_dims(i)] != 1) { return InvalidArgument( "Gather op can only elide window indices with bound 1, but bound is " - "%lld for index %lld at position %d", + "%lld for index %lld at position %d.", window_bounds[gather_dim_numbers.elided_window_dims(i)], gather_dim_numbers.elided_window_dims(i), i); } diff --git a/tensorflow/compiler/xla/service/shape_inference_test.cc b/tensorflow/compiler/xla/service/shape_inference_test.cc index 029d2b3b86..0e61994a78 100644 --- a/tensorflow/compiler/xla/service/shape_inference_test.cc +++ b/tensorflow/compiler/xla/service/shape_inference_test.cc @@ -135,7 +135,7 @@ TEST_F(ShapeInferenceTest, SelectBadShapes) { TernaryOperation::TRIOP_SELECT, pred_, matrix_64_48_, matrix_32_64_); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("operands to select must be the same shape")); + HasSubstr("Operands to select must be the same shape")); auto inferred_status_error2 = ShapeInference::InferTernaryOpShape( TernaryOperation::TRIOP_SELECT, s32_, matrix_64_48_, matrix_64_48_); @@ -340,7 +340,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSourceShape) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("source shape does not match")); + HasSubstr("Source shape does not match")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape1) { @@ -351,7 +351,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape1) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function must take 2 parameters")); + HasSubstr("Select function must take 2 parameters")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape2) { @@ -362,7 +362,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape2) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function must have rank-0 PRED")); + HasSubstr("Select function must have rank-0 PRED")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape3) { @@ -373,7 +373,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape3) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function's first parameter")); + HasSubstr("Select function's first parameter")); } TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape4) { @@ -384,7 +384,7 @@ TEST_F(SelectAndScatterShapeInferenceTest, SelectAndScatterWrongSelectShape4) { init_value_shape_, scatter_program_shape_); ASSERT_FALSE(inferred_status_fail.ok()); ASSERT_THAT(inferred_status_fail.status().error_message(), - HasSubstr("select function's second parameter")); + HasSubstr("Select function's second parameter")); } TEST_F(ShapeInferenceTest, Convolve) { @@ -906,7 +906,7 @@ TEST_F(ShapeInferenceTest, ScalarDotVector) { ShapeInference::InferDotOpShape(f32_, vector_32_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("dot only supports rank")); + HasSubstr("Dot only supports rank")); } // 3D 2D: error @@ -918,7 +918,7 @@ TEST_F(ShapeInferenceTest, DotWithRankHigherThanTwo) { ShapeUtil::MakeShape(F32, {32, 32, 32}), matrix_32_64_, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch and contracting dimension number mismatch")); + HasSubstr("Batch and contracting dimension number mismatch")); } // vector vector -> scalar @@ -1024,7 +1024,7 @@ TEST_F(ShapeInferenceTest, DotWithTwoContractingDimsFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("must specify one contracting dimension for both " + HasSubstr("Must specify one contracting dimension for both " "lhs and rhs")); } @@ -1044,7 +1044,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimSizesFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch dimension numbers and sizes must match")); + HasSubstr("Batch dimension numbers and sizes must match")); } // BatchMatMul with different batch dimension numbers fails. @@ -1063,7 +1063,7 @@ TEST_F(ShapeInferenceTest, DotWithMisatchedBatchDimNumbersFails) { ShapeInference::InferDotOpShape(lhs_shape, rhs_shape, dot_dnums); ASSERT_FALSE(inferred_status.ok()); ASSERT_THAT(inferred_status.status().error_message(), - HasSubstr("batch dimension numbers must precede non-batch")); + HasSubstr("Batch dimension numbers must precede non-batch")); } // BatchMatMul with out-of-range dimension numbers fails. @@ -1166,42 +1166,42 @@ TEST_F(ShapeInferenceTest, BinOpBroadcastBadDimension) { BinaryOperation::BINOP_ADD, tensor, vec8, {}); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("automatic")); + HasSubstr("Automatic")); // broadcast_dimension out of bounds for tensor's rank auto inferred_status_error2 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, vec8, {3}); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - ContainsRegex("broadcast dimension number .* too large")); + ContainsRegex("Broadcast dimension number .* too large")); // broadcast_dimension doesn't match corresponding dimension auto inferred_status_error3 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, vec8, {0}); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("Broadcast dimension 0 mismatch")); // broadcast_dimensions list too long auto inferred_status_error4 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {0, 1, 2}); ASSERT_FALSE(inferred_status_error4.ok()); ASSERT_THAT(inferred_status_error4.status().error_message(), - HasSubstr("size of broadcast_dimensions has to match")); + HasSubstr("broadcast_dimensions has to match")); // there's a dimension above the rank of the tensor auto inferred_status_error5 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {3, 0}); ASSERT_FALSE(inferred_status_error5.ok()); ASSERT_THAT(inferred_status_error5.status().error_message(), - ContainsRegex("broadcast dimension number .* too large")); + ContainsRegex("dimension number .* too large")); // broadcasting dimensions don't match in this order auto inferred_status_error6 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor, matrix8_4, {2, 1}); ASSERT_FALSE(inferred_status_error6.ok()); ASSERT_THAT(inferred_status_error6.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("dimension 0 mismatch")); // The following two tests make sure that broadcasting dimensions are listed // in a proper (strictly increasing) order, even if the lower-rank array @@ -1210,13 +1210,13 @@ TEST_F(ShapeInferenceTest, BinOpBroadcastBadDimension) { BinaryOperation::BINOP_ADD, tensor8_8_8, matrix8_8, {0, 0}); ASSERT_FALSE(inferred_status_error7.ok()); ASSERT_THAT(inferred_status_error7.status().error_message(), - HasSubstr("broadcast dimensions order is wrong")); + HasSubstr("dimensions order is wrong")); auto inferred_status_error8 = ShapeInference::InferBinaryOpShape( BinaryOperation::BINOP_ADD, tensor8_8_8, matrix8_8, {1, 0}); ASSERT_FALSE(inferred_status_error8.ok()); ASSERT_THAT(inferred_status_error8.status().error_message(), - HasSubstr("broadcast dimensions order is wrong")); + HasSubstr("dimensions order is wrong")); } // Tests for the while instruction with proper shapes. @@ -1242,7 +1242,7 @@ TEST_F(ShapeInferenceTest, WhileWithBadShapes) { ShapeInference::InferWhileShape(bad_shape_1, body, result_shape); ASSERT_FALSE(inferred_status_error1.ok()); ASSERT_THAT(inferred_status_error1.status().error_message(), - HasSubstr("condition must take 1 arguments")); + HasSubstr("Condition must take 1 arguments")); auto bad_shape_2 = ShapeUtil::MakeProgramShape({s32_, result_shape}, result_shape); @@ -1250,14 +1250,14 @@ TEST_F(ShapeInferenceTest, WhileWithBadShapes) { ShapeInference::InferWhileShape(cond, bad_shape_2, result_shape); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - HasSubstr("body must take 1 arguments")); + HasSubstr("Body must take 1 arguments")); auto bad_shape_3 = ShapeUtil::MakeProgramShape({result_shape}, s32_); auto inferred_status_error3 = ShapeInference::InferWhileShape(bad_shape_3, body, result_shape); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("condition must return a boolean")); + HasSubstr("Condition must return a boolean")); auto bad_shape_4 = ShapeUtil::MakeProgramShape({result_shape}, vector_32_); auto inferred_status_error4 = @@ -1301,13 +1301,13 @@ TEST_F(ShapeInferenceTest, ConcatenateWithBadShapes) { ShapeInference::InferConcatOpShape({&vector_32_}, /*dimension=*/-1); ASSERT_FALSE(inferred_status_error2.ok()); ASSERT_THAT(inferred_status_error2.status().error_message(), - HasSubstr("dimension to concatenate along out of bounds: -1")); + HasSubstr("dimension out of bounds: -1")); auto inferred_status_error3 = ShapeInference::InferConcatOpShape({&vector_32_}, /*dimension=*/1); ASSERT_FALSE(inferred_status_error3.ok()); ASSERT_THAT(inferred_status_error3.status().error_message(), - HasSubstr("dimension to concatenate along out of bounds: 1")); + HasSubstr("dimension out of bounds: 1")); Shape tuple = ShapeUtil::MakeTupleShape({vector_32_}); auto inferred_status_error4 = ShapeInference::InferConcatOpShape( @@ -1315,21 +1315,20 @@ TEST_F(ShapeInferenceTest, ConcatenateWithBadShapes) { ASSERT_FALSE(inferred_status_error4.ok()); ASSERT_THAT( inferred_status_error4.status().error_message(), - HasSubstr("Expected non-tuple argument for operand of concatenation.")); + HasSubstr("Expected non-tuple argument for operand of concatenation")); const Shape vector_s32 = ShapeUtil::MakeShape(S32, {32}); auto inferred_status_error5 = ShapeInference::InferConcatOpShape( {&vector_32_, &vector_s32}, /*dimension=*/0); ASSERT_FALSE(inferred_status_error5.ok()); - ASSERT_THAT( - inferred_status_error5.status().error_message(), - HasSubstr("cannot concatenate arrays with different element types")); + ASSERT_THAT(inferred_status_error5.status().error_message(), + HasSubstr("concatenate arrays with different element types")); auto inferred_status_error6 = ShapeInference::InferConcatOpShape( {&matrix_32_48_, &matrix_32_64_}, /*dimension=*/0); ASSERT_FALSE(inferred_status_error6.ok()); ASSERT_THAT(inferred_status_error6.status().error_message(), - HasSubstr("cannot concatenate arrays that differ in " + HasSubstr("concatenate arrays that differ in " "dimensions other than the one being " "concatenated")); } @@ -1467,7 +1466,7 @@ TEST_F(ShapeInferenceTest, Conditional) { ShapeUtil::MakeProgramShape({vector_64_}, f32_)); EXPECT_FALSE(inferred_status_error0.ok()); EXPECT_THAT(inferred_status_error0.status().error_message(), - HasSubstr("predicate must be a boolean")); + HasSubstr("Predicate must be a boolean")); auto inferred_status_error1 = ShapeInference::InferConditionalShape( pred_, ShapeUtil::MakeTupleShape({f32_, vector_32_}), matrix_32_48_, diff --git a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc index 03f5e08315..97095f1cc4 100644 --- a/tensorflow/compiler/xla/tests/broadcast_simple_test.cc +++ b/tensorflow/compiler/xla/tests/broadcast_simple_test.cc @@ -662,7 +662,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidBinaryAndDegenerateBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("broadcast dimension 0 mismatch")); + HasSubstr("dimension 0 mismatch")); } XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { @@ -675,7 +675,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidInDimensionBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("binary op BINOP_ADD with incompatible shapes")); + HasSubstr("op BINOP_ADD with incompatible shapes")); } XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { @@ -688,7 +688,7 @@ XLA_TEST_F(BroadcastSimpleTest, InvalidDegenerateBroadcasting) { auto result_status = Execute(&b, {}); EXPECT_FALSE(result_status.ok()); EXPECT_THAT(result_status.status().error_message(), - HasSubstr("binary op BINOP_ADD with incompatible shapes")); + HasSubstr("op BINOP_ADD with incompatible shapes")); } } // namespace diff --git a/tensorflow/compiler/xla/tests/concat_test.cc b/tensorflow/compiler/xla/tests/concat_test.cc index 1bcad5a3f3..fb0e9c724a 100644 --- a/tensorflow/compiler/xla/tests/concat_test.cc +++ b/tensorflow/compiler/xla/tests/concat_test.cc @@ -75,7 +75,7 @@ XLA_TEST_F(ConcatTest, CannotConcatR0WithR0) { StatusOr computation_status = builder.Build(); ASSERT_FALSE(computation_status.ok()); EXPECT_THAT(computation_status.status().ToString(), - HasSubstr("dimension to concatenate along out of bounds: 0")); + HasSubstr("out of bounds: 0")); } XLA_TEST_F(ConcatTest, Concat_R1_L0_With_R1_L0) { diff --git a/tensorflow/compiler/xla/tests/map_test.cc b/tensorflow/compiler/xla/tests/map_test.cc index 2b0f7e6e80..0cd812fd1b 100644 --- a/tensorflow/compiler/xla/tests/map_test.cc +++ b/tensorflow/compiler/xla/tests/map_test.cc @@ -531,7 +531,7 @@ TEST_F(MapTest, MapOperantionWithBuildError) { ASSERT_TRUE(!computation_status.ok()); EXPECT_THAT( computation_status.status().ToString(), - ::testing::HasSubstr("error from: ErrorAdd: binary op BINOP_ADD with " + ::testing::HasSubstr("error from: ErrorAdd: Binary op BINOP_ADD with " "different element types: f32[] and u16[]")); } -- GitLab From 1a15d58c8204b145c545b27efdd0a1ca069cacdc Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 2 Mar 2018 14:00:07 -0800 Subject: [PATCH 366/884] [TF:XLA] Bump open source llvm revision to r326571 PiperOrigin-RevId: 187665541 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index ea8f42ab8d..1af246f9dc 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/9a6e78e4adc959d2825f7af35b4ed0e09394d840.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", ], - sha256 = "7990b4d446de971e0acc481942920452a182d2f87a8164bdc117fd9b9ace591d", - strip_prefix = "llvm-9a6e78e4adc959d2825f7af35b4ed0e09394d840", + sha256 = "2eda56deafb8da85bc23aa52fa1fb8c39da6a58c865e5216d0a0787bd09a09ed", + strip_prefix = "llvm-193aea3782308c66a7a12f1c37520a1b4ff1dbd8", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From d3ece65e340ca7cd00874c460cf9f3e631346921 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 14:33:39 -0800 Subject: [PATCH 367/884] Checkpointable: Have MultiRNNCell add its dependent cells as dependencies PiperOrigin-RevId: 187670464 --- .../contrib/rnn/python/kernel_tests/core_rnn_cell_test.py | 2 ++ tensorflow/python/ops/rnn_cell_impl.py | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py index 0e62b315b6..d41fc0b3ac 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py @@ -187,6 +187,8 @@ class RNNCellTest(test.TestCase): ], state_is_tuple=False) self.assertEqual(cell.dtype, None) + self.assertEqual("cell-0", cell._checkpoint_dependencies[0].name) + self.assertEqual("cell-1", cell._checkpoint_dependencies[1].name) g, out_m = cell(x, m) # Layer infers the input type. self.assertEqual(cell.dtype, dtype.name) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index 923348ea44..bd7c731210 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -1187,6 +1187,10 @@ class MultiRNNCell(RNNCell): "cells must be a list or tuple, but saw: %s." % cells) self._cells = cells + for cell_number, cell in enumerate(self._cells): + # Add Checkpointable dependencies on these cells so their variables get + # saved with this object when using object-based saving. + self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) self._state_is_tuple = state_is_tuple if not state_is_tuple: if any(nest.is_sequence(c.state_size) for c in self._cells): -- GitLab From 2ac550f389f9641d689bac7b31554bdb9d59a18d Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 2 Mar 2018 14:38:08 -0800 Subject: [PATCH 368/884] Change consts --- .../contrib/tensorrt/convert/convert_graph.cc | 2 - .../contrib/tensorrt/convert/convert_nodes.cc | 112 +++++++++--------- .../tensorrt/resources/trt_int8_calibrator.cc | 15 +-- 3 files changed, 61 insertions(+), 68 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 44e9dda7b9..36145452be 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -71,8 +71,6 @@ bool IsTensorRTCandidate(const tensorflow::NodeDef& node_def) { "DepthwiseConv2dNative", "FusedBatchNorm", "FusedBatchNormV2", - //, "MatMul", - //"Reshape" // TODO(ben,jie): ... }; // LINT.ThenChange(//tensorflow/contrib/tensorrt/convert/convert_nodes.h) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 473115e4f5..d5652977be 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -319,7 +319,7 @@ void Reorder4(nvinfer1::DimsNCHW shape, const T* idata, } template -void Reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, +void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, T* odata, nvinfer1::DimsHW ostrides) { for (int h = 0; h < shape.h(); ++h) { for (int w = 0; w < shape.w(); ++w) { @@ -330,7 +330,7 @@ void Reorder2(nvinfer1::DimsHW shape, T const* idata, nvinfer1::DimsHW istrides, } // TODO(jie): fail to tensorflow!! -void ReorderCKtoKC(TRT_ShapedWeights const& iweights, +void ReorderCKtoKC(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights) { int c = iweights.shape_.d[0]; int k = iweights.shape_.d[1]; @@ -360,20 +360,20 @@ void ReorderCKtoKC(TRT_ShapedWeights const& iweights, } void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, - TRT_ShapedWeights* oweights, int nbGroups) { + TRT_ShapedWeights* oweights, int num_groups) { CHECK_EQ(iweights.type_, oweights->type_); CHECK_EQ(iweights.size_bytes(), oweights->size_bytes()); int r = iweights.shape_.d[0]; int s = iweights.shape_.d[1]; // TRT requires GKcRS, while TF depthwise has RSCK // where c=1, C=G - VLOG(2) << "nbGroups: " << nbGroups; - int c = iweights.shape_.d[2] / nbGroups; + VLOG(2) << "num_groups: " << num_groups; + int c = iweights.shape_.d[2] / num_groups; VLOG(2) << "c" << iweights.shape_.d[2] << " then " << c; - int k = iweights.shape_.d[3] * nbGroups; + int k = iweights.shape_.d[3] * num_groups; VLOG(2) << "k" << iweights.shape_.d[3] << " then " << k; - oweights->shape_.d[0] = k / nbGroups; - oweights->shape_.d[1] = c * nbGroups; + oweights->shape_.d[0] = k / num_groups; + oweights->shape_.d[1] = c * num_groups; oweights->shape_.d[2] = r; oweights->shape_.d[3] = s; nvinfer1::DimsNCHW istrides = {1, k, s * k * c, c * k}; @@ -419,7 +419,7 @@ class Converter; using OpConverter = std::function const&, + const std::vector&, std::vector*)>; class Converter { @@ -764,7 +764,7 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l, tensorflow::Status ConstantFoldUnary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input = inputs.at(0).weights(); @@ -800,7 +800,7 @@ tensorflow::Status ConstantFoldUnary( // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { TRT_ShapedWeights weights_input_l = inputs.at(0).weights(); TRT_ShapedWeights weights_input_r = inputs.at(1).weights(); @@ -1000,12 +1000,12 @@ tensorflow::Status BinaryTensorOpWeight( enum class ConvolutionType { DEFAULT, DEPTHWISE_CONV }; tensorflow::Status ConvertConv2DHelper( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs, int group // group ==0 specifies depthwise conv ) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); @@ -1025,16 +1025,16 @@ tensorflow::Status ConvertConv2DHelper( // tensor after transpose (NCHW) auto tensor_dim = tensor->getDimensions(); - int nbGroups = group; - if (nbGroups == 0) // depthwise convolution - nbGroups = tensor_dim.d[0]; - VLOG(2) << "groups count: " << nbGroups; + int num_groups = group; + if (num_groups == 0) // depthwise convolution + num_groups = tensor_dim.d[0]; + VLOG(2) << "groups count: " << num_groups; TRT_ShapedWeights weights_rsck = inputs.at(1).weights(); TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_rsck); - ReorderRSCKToKCRS(weights_rsck, &weights, nbGroups); + ReorderRSCKToKCRS(weights_rsck, &weights, num_groups); TRT_ShapedWeights biases(weights.type_); - int noutput = weights.shape_.d[0] * nbGroups; + int noutput = weights.shape_.d[0] * num_groups; nvinfer1::DimsHW kernel_size; kernel_size.h() = weights.shape_.d[2]; kernel_size.w() = weights.shape_.d[3]; @@ -1087,7 +1087,7 @@ tensorflow::Status ConvertConv2DHelper( layer->setStride(stride); layer->setPadding({padding[0].first, padding[1].first}); layer->setName(node_def.name().c_str()); - layer->setNbGroups(nbGroups); + layer->setNbGroups(num_groups); nvinfer1::ITensor* output_tensor = layer->getOutput(0); auto dim_after = output_tensor->getDimensions(); @@ -1105,8 +1105,8 @@ tensorflow::Status ConvertConv2DHelper( } tensorflow::Status ConvertConv2DHelper( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector & inputs, std::vector* outputs, ConvolutionType type) { switch (type) { case ConvolutionType::DEFAULT: @@ -1119,7 +1119,7 @@ tensorflow::Status ConvertConv2DHelper( } tensorflow::Status BinaryTensorOpTensor( - Converter& ctx, tensorflow::NodeDef const& node_def, + Converter& ctx, const tensorflow::NodeDef& node_def, const nvinfer1::ITensor* tensor_l, const nvinfer1::ITensor* tensor_r, std::vector* outputs) { static const std::unordered_map ops{ @@ -1158,8 +1158,8 @@ tensorflow::Status BinaryTensorOpTensor( } tensorflow::Status ConvertPlaceholder( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { VLOG(2) << "Placeholder should have been replace already"; return tensorflow::errors::Unimplemented("cannot convert Placeholder op"); @@ -1181,16 +1181,16 @@ tensorflow::Status ConvertPlaceholder( } tensorflow::Status ConvertConv2D(Converter& ctx, - tensorflow::NodeDef const& node_def, - std::vector const& inputs, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { return ConvertConv2DHelper(ctx, node_def, inputs, outputs, ConvolutionType::DEFAULT); } tensorflow::Status ConvertConv2DDepthwise( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { return ConvertConv2DHelper(ctx, node_def, inputs, outputs, ConvolutionType::DEPTHWISE_CONV); @@ -1198,9 +1198,9 @@ tensorflow::Status ConvertConv2DDepthwise( tensorflow::Status ConvertPool(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); TFAttrs attrs(node_def); int h_index = 2; @@ -1282,9 +1282,9 @@ tensorflow::Status ConvertPool(Converter& ctx, tensorflow::Status ConvertActivation( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); nvinfer1::IActivationLayer* layer = ctx.network()->addActivation( *const_cast(tensor), nvinfer1::ActivationType::kRELU); nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -1294,14 +1294,14 @@ tensorflow::Status ConvertActivation( tensorflow::Status ConvertScale(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) return tensorflow::errors::Unimplemented( "Only supports tensor op weight for now, at " + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); // TODO(jie): handle NHWC/NCHW transpose; TRT_ShapedWeights weights = inputs.at(1).weights(); @@ -1352,7 +1352,7 @@ tensorflow::Status ConvertScale(Converter& ctx, tensorflow::Status ConvertConst(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { const auto& weights_tensor = node_def.attr().at("value").tensor(); @@ -1540,7 +1540,7 @@ tensorflow::Status ConvertConst(Converter& ctx, tensorflow::Status ConvertIdentity( Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { outputs->push_back(inputs.at(0)); return tensorflow::Status::OK(); @@ -1548,7 +1548,7 @@ tensorflow::Status ConvertIdentity( tensorflow::Status ConvertBinary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2) return tensorflow::errors::FailedPrecondition( @@ -1575,7 +1575,7 @@ tensorflow::Status ConvertBinary(Converter& ctx, tensorflow::Status ConvertUnary(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 1) return tensorflow::errors::FailedPrecondition( @@ -1593,7 +1593,7 @@ tensorflow::Status ConvertUnary(Converter& ctx, tensorflow::Status ConvertReduce(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1601,7 +1601,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1688,7 +1688,7 @@ tensorflow::Status ConvertReduce(Converter& ctx, tensorflow::Status ConvertPad(Converter& ctx, const tensorflow::NodeDef& node_def, - std::vector const& inputs, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1696,7 +1696,7 @@ tensorflow::Status ConvertPad(Converter& ctx, "Input expects tensor and weights, at" + node_def.name()); // Implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // Restore implicit batch dimension int nb_dims = dims.nbDims + 1; @@ -1873,8 +1873,8 @@ tensorflow::Status ConvertConcat(Converter& ctx, } tensorflow::Status ConvertFusedBatchNorm( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { TFAttrs attrs(node_def); float epsilon = attrs.get("epsilon"); @@ -1959,10 +1959,10 @@ tensorflow::Status ConvertFusedBatchNorm( } tensorflow::Status ConvertMatMul(Converter& ctx, - tensorflow::NodeDef const& node_def, - std::vector const& inputs, + const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor * tensor = inputs.at(0).tensor(); // TODO(jie): transpose! TFAttrs attrs(node_def); @@ -1987,8 +1987,8 @@ tensorflow::Status ConvertMatMul(Converter& ctx, } tensorflow::Status ConvertReshape( - Converter& ctx, tensorflow::NodeDef const& node_def, - std::vector const& inputs, + Converter& ctx, const tensorflow::NodeDef& node_def, + const std::vector& inputs, std::vector* outputs) { if (inputs.size() != 2 || !inputs.at(0).is_tensor() || !inputs.at(1).is_weights()) @@ -1996,7 +1996,7 @@ tensorflow::Status ConvertReshape( "Input expects tensor and weights, at" + node_def.name()); // implement tensor binaryOp weight [channel wise] for now; - nvinfer1::ITensor const* tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); auto dims = tensor->getDimensions(); // restore implicit batch dimension @@ -2282,7 +2282,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { VLOG(2) << "BUILDING 5"; std::vector input_names; std::vector input_dtypes; - for (std::pair const& input : s.input_inds) { + for (const std::pair& input : s.input_inds) { VLOG(2) << "parsing input!!!!!"; int node_id = input.first; int output_idx = input.second; @@ -2346,7 +2346,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { VLOG(2) << "finished sorting"; for (const tensorflow::Node* node : order) { - tensorflow::NodeDef const& node_def = node->def(); + const tensorflow::NodeDef& node_def = node->def(); VLOG(2) << "converting node: " << node_def.name() << " , " << node_def.op(); TF_RETURN_IF_ERROR(converter.convert_node(node_def)); } @@ -2357,7 +2357,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { std::vector output_names; std::vector output_dtypes; int trt_engine_op_output_idx = 0; - for (std::pair const& output : s.output_inds) { + for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); @@ -2589,7 +2589,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector output_names; std::vector output_dtypes; int trt_engine_op_output_idx = 0; - for (std::pair const& output : s.output_inds) { + for (const std::pair& output : s.output_inds) { int node_id = output.first; int output_idx = output.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index f15772058f..84ff115193 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -43,16 +43,11 @@ TRTInt8Calibrator::TRTInt8Calibrator( bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, const cudaStream_t stream) { - // TODO(aaroey): make sure that in future PR: - // 1. the mutex_lock is outside of the loop - // 2. wait() is used instead of wait_for() - // 3. done_ is to be protected by the mutex - // 4. the first batch is not missed if (done_) return false; - tensorflow::mutex_lock l(cond_mtx_); + tensorflow::mutex_lock lock(cond_mtx_); while ((calib_running_ || batch_is_set_) && !done_) { // wait while calibration is running - cond_.wait(l); + cond_.wait(lock); if (done_) return false; } CHECK(!calib_running_ && !batch_is_set_); @@ -83,11 +78,11 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, int num_bindings) { - tensorflow::mutex_lock l(cond_mtx_); + tensorflow::mutex_lock lock(cond_mtx_); calib_running_ = false; cond_.notify_all(); while ((!batch_is_set_ && !done_)) { // wait until new batch arrives - cond_.wait(l); + cond_.wait(lock); } if (done_) { return false; @@ -111,7 +106,7 @@ const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { return nullptr; } void TRTInt8Calibrator::setDone() { - tensorflow::mutex_lock l(cond_mtx_); + tensorflow::mutex_lock lock(cond_mtx_); done_ = true; cond_.notify_all(); } -- GitLab From 16f74956eb75511f1bf47a62a998ed9a434a8249 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 14:55:26 -0800 Subject: [PATCH 369/884] Add a small helper which is useful for quicker debugging. PiperOrigin-RevId: 187673654 --- tensorflow/contrib/py2tf/pyct/transformer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/transformer.py b/tensorflow/contrib/py2tf/pyct/transformer.py index 877d52af01..57016bb4ce 100644 --- a/tensorflow/contrib/py2tf/pyct/transformer.py +++ b/tensorflow/contrib/py2tf/pyct/transformer.py @@ -44,6 +44,12 @@ class Base(gast.NodeTransformer): self._col_offset = 0 self.context = context + def debug_print(self, node): + """Helper method useful for debugging.""" + if __debug__: + print(pretty_printer.fmt(node)) + return node + def visit(self, node): source_code = self.context.source_code source_file = self.context.source_file -- GitLab From 809c84dc3a6252efab2b366f167135ed7826dee7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:06:13 -0800 Subject: [PATCH 370/884] Begin a library for statistical testing of samplers. So far, it consists of one-sample and two-sample equality-of-means assertions, and power analysis and experimental design for those, because that's what was needed for testing the LKJ distribution. If this API shape proves viable, more to come. PiperOrigin-RevId: 187675337 --- tensorflow/contrib/distributions/BUILD | 13 + .../kernel_tests/statistical_testing_test.py | 166 ++++ .../python/ops/statistical_testing.py | 728 ++++++++++++++++++ 3 files changed, 907 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/statistical_testing.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index ed79ef70f8..1b4877c57f 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -474,6 +474,19 @@ cuda_py_test( tags = ["nomsan"], # disable to avoid false positives from scipy. ) +cuda_py_test( + name = "statistical_testing_test", + size = "medium", + srcs = [ + "python/kernel_tests/statistical_testing_test.py", + ], + additional_deps = [ + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:client_testlib", + ], +) + cuda_py_test( name = "vector_sinh_arcsinh_diag_test", size = "medium", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py new file mode 100644 index 0000000000..3548ac1807 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/statistical_testing_test.py @@ -0,0 +1,166 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the statistical testing library.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import statistical_testing as st +from tensorflow.python.framework import errors +from tensorflow.python.ops import check_ops +from tensorflow.python.platform import test + + +class StatisticalTestingTest(test.TestCase): + + def test_dkwm_design_mean_one_sample_soundness(self): + numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + with self.test_session() as sess: + for ff in rates: + for fp in rates: + sufficient_n = st.min_num_samples_for_dkwm_mean_test( + numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + + def test_dkwm_design_mean_two_sample_soundness(self): + numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10] + rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.] + with self.test_session() as sess: + for ff in rates: + for fp in rates: + (sufficient_n1, + sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test( + numbers, 0., 1., 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample + detectable_d = d_fn( + sufficient_n1, 0., 1., sufficient_n2, 0., 1., + false_fail_rate=ff, false_pass_rate=fp) + sess.run(check_ops.assert_less_equal(detectable_d, numbers)) + + def test_true_mean_confidence_interval_by_dkwm_one_sample(self): + rng = np.random.RandomState(seed=0) + + num_samples = 5000 + # 5000 samples is chosen to be enough to find discrepancies of + # size 0.1 or more with assurance 1e-6, as confirmed here: + with self.test_session() as sess: + d = st.min_discrepancy_of_true_means_detectable_by_dkwm( + num_samples, 0., 1., false_fail_rate=1e-6, false_pass_rate=1e-6) + d = sess.run(d) + self.assertLess(d, 0.1) + + # Test that the confidence interval computed for the mean includes + # 0.5 and excludes 0.4 and 0.6. + with self.test_session() as sess: + samples = rng.uniform(size=num_samples).astype(np.float32) + (low, high) = st.true_mean_confidence_interval_by_dkwm( + samples, 0., 1., error_rate=1e-6) + low, high = sess.run([low, high]) + self.assertGreater(low, 0.4) + self.assertLess(low, 0.5) + self.assertGreater(high, 0.5) + self.assertLess(high, 0.6) + + def test_dkwm_mean_one_sample_assertion(self): + rng = np.random.RandomState(seed=0) + num_samples = 5000 + + # Test that the test assertion agrees that the mean of the standard + # uniform distribution is 0.5. + samples = rng.uniform(size=num_samples).astype(np.float32) + with self.test_session() as sess: + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.5, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.4. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.4, false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is not 0.6. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm( + samples, 0., 1., 0.6, false_fail_rate=1e-6)) + + def test_dkwm_mean_two_sample_assertion(self): + rng = np.random.RandomState(seed=0) + num_samples = 15000 + + # 15000 samples is chosen to be enough to find discrepancies of + # size 0.1 or more with assurance 1e-6, as confirmed here: + with self.test_session() as sess: + d = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + num_samples, 0., 1., num_samples, 0., 1., + false_fail_rate=1e-6, false_pass_rate=1e-6) + d = sess.run(d) + self.assertLess(d, 0.1) + + # Test that the test assertion agrees that the standard + # uniform distribution has the same mean as itself. + samples1 = rng.uniform(size=num_samples).astype(np.float32) + samples2 = rng.uniform(size=num_samples).astype(np.float32) + with self.test_session() as sess: + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., samples2, 0., 1., false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(2, 1). + beta_high_samples = rng.beta(2, 1, size=num_samples).astype(np.float32) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_high_samples, 0., 1., + false_fail_rate=1e-6)) + + # Test that the test assertion confirms that the mean of the + # standard uniform distribution is different from the mean of beta(1, 2). + beta_low_samples = rng.beta(1, 2, size=num_samples).astype(np.float32) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.assert_true_mean_equal_by_dkwm_two_sample( + samples1, 0., 1., + beta_low_samples, 0., 1., + false_fail_rate=1e-6)) + + def test_dkwm_argument_validity_checking(self): + rng = np.random.RandomState(seed=0) + samples = rng.uniform(size=5000).astype(np.float32) + + # Test that the test library complains if the given samples fall + # outside the purported bounds. + with self.test_session() as sess: + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.true_mean_confidence_interval_by_dkwm( + samples, 0., 0.5, error_rate=0.5)) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(st.true_mean_confidence_interval_by_dkwm( + samples, 0.5, 1., error_rate=0.5)) + + # But doesn't complain if they don't. + op = st.true_mean_confidence_interval_by_dkwm( + samples, 0., 1., error_rate=0.5) + _ = sess.run(op) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/statistical_testing.py b/tensorflow/contrib/distributions/python/ops/statistical_testing.py new file mode 100644 index 0000000000..d66c34cc1a --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/statistical_testing.py @@ -0,0 +1,728 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Statistical test assertions calibrated for their error rates.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import itertools + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import gen_math_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops + +__all__ = [ + "true_mean_confidence_interval_by_dkwm", + "assert_true_mean_equal_by_dkwm", + "min_discrepancy_of_true_means_detectable_by_dkwm", + "min_num_samples_for_dkwm_mean_test", + "assert_true_mean_equal_by_dkwm_two_sample", + "min_discrepancy_of_true_means_detectable_by_dkwm_two_sample", + "min_num_samples_for_dkwm_mean_two_sample_test", +] + + +def _batch_sort_vector(x, ascending=True, name=None): + with ops.name_scope(name, "sort_each_row", [x]): + x = ops.convert_to_tensor(x, name="x") + n = array_ops.shape(x)[-1] + if ascending: + y, _ = nn_ops.top_k(-x, k=n, sorted=True) + y = -y + else: + y, _ = nn_ops.top_k(x, k=n, sorted=True) + y.set_shape(x.shape) + return y + + +def _do_maximum_mean(samples, envelope, high, name=None): + """Common code between maximum_mean and minimum_mean.""" + with ops.name_scope(name, "do_maximum_mean", [samples, envelope, high]): + n = array_ops.rank(samples) + # Move the batch dimension of `samples` to the rightmost position, + # where the _batch_sort_vector function wants it. + perm = array_ops.concat([math_ops.range(1, n), [0]], axis=0) + samples = array_ops.transpose(samples, perm) + + samples = _batch_sort_vector(samples) + batch_shape = array_ops.shape(samples)[:-1] + n = array_ops.shape(samples)[-1] + step = 1. / math_ops.cast(n, dtype=samples.dtype.base_dtype) + + def _loop_body(iter_, total, to_skip): + total = array_ops.where( + step <= to_skip, + total, + array_ops.where( + to_skip > 0., + total + (step - to_skip) * samples[..., iter_], + total + step * samples[..., iter_])) + to_skip = array_ops.where(step <= to_skip, to_skip - step, 0.) + return [iter_ + 1, total, to_skip] + + _, total, _ = control_flow_ops.while_loop( + cond=lambda iter_, *args: iter_ < n, + body=_loop_body, + loop_vars=[ + 0, + array_ops.zeros(batch_shape, dtype=samples.dtype.base_dtype), + envelope, # to_skip + ]) + + return total + envelope * high + + +def _maximum_mean(samples, envelope, high, name=None): + """Returns a stochastic upper bound on the mean of a scalar distribution. + + The idea is that if the true CDF is within an `eps`-envelope of the + empirical CDF of the samples, and the support is bounded above, then + the mean is bounded above as well. In symbols, + + ```none + sup_x(|F_n(x) - F(x)|) < eps + ``` + + The 0th dimension of `samples` is interpreted as independent and + identically distributed samples. The remaining dimensions are + broadcast together with `envelope` and `high`, and operated on + separately. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `envelope` and `high`. + envelope: Floating-point tensor of sizes of admissible CDF + envelopes (i.e., the `eps` above). + high: Floating-point tensor of upper bounds on the distributions' + supports. + name: A name for this operation (optional). + + Returns: + bound: Floating-point tensor of upper bounds on the true means. + + Raises: + InvalidArgumentError: If some `sample` is found to be larger than + the corresponding `high`. + """ + with ops.name_scope(name, "maximum_mean", [samples, envelope, high]): + samples = ops.convert_to_tensor(samples, name="samples") + envelope = ops.convert_to_tensor(envelope, name="envelope") + high = ops.convert_to_tensor(high, name="high") + + xmax = math_ops.reduce_max(samples, axis=[-1]) + msg = "Given sample maximum value exceeds expectations" + check_op = check_ops.assert_less_equal(xmax, high, message=msg) + with ops.control_dependencies([check_op]): + return array_ops.identity(_do_maximum_mean(samples, envelope, high)) + + +def _minimum_mean(samples, envelope, low, name=None): + """Returns a stochastic lower bound on the mean of a scalar distribution. + + The idea is that if the true CDF is within an `eps`-envelope of the + empirical CDF of the samples, and the support is bounded below, then + the mean is bounded below as well. In symbols, + + ```none + sup_x(|F_n(x) - F(x)|) < eps + ``` + + The 0th dimension of `samples` is interpreted as independent and + identically distributed samples. The remaining dimensions are + broadcast together with `envelope` and `low`, and operated on + separately. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `envelope` and `low`. + envelope: Floating-point tensor of sizes of admissible CDF + envelopes (i.e., the `eps` above). + low: Floating-point tensor of lower bounds on the distributions' + supports. + name: A name for this operation (optional). + + Returns: + bound: Floating-point tensor of lower bounds on the true means. + + Raises: + InvalidArgumentError: If some `sample` is found to be smaller than + the corresponding `low`. + """ + with ops.name_scope(name, "minimum_mean", [samples, envelope, low]): + samples = ops.convert_to_tensor(samples, name="samples") + envelope = ops.convert_to_tensor(envelope, name="envelope") + low = ops.convert_to_tensor(low, name="low") + + xmin = math_ops.reduce_min(samples, axis=[-1]) + msg = "Given sample minimum value falls below expectations" + check_op = check_ops.assert_greater_equal(xmin, low, message=msg) + with ops.control_dependencies([check_op]): + return - _do_maximum_mean(-samples, envelope, -low) + + +def _dkwm_cdf_envelope(n, error_rate, name=None): + """Computes the CDF envelope that the DKWM inequality licenses. + + The [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval) + gives a stochastic bound on the distance between the true cumulative + distribution function (CDF) of any distribution and its empirical + CDF. To wit, for `n` iid samples from any distribution with CDF F, + + ```none + P(sup_x |F_n(x) - F(x)| > eps) < 2exp(-2n eps^2) + ``` + + This function computes the envelope size `eps` as a function of the + number of samples `n` and the desired limit on the left-hand + probability above. + + Args: + n: Tensor of numbers of samples drawn. + error_rate: Floating-point tensor of admissible rates of mistakes. + name: A name for this operation (optional). + + Returns: + eps: Tensor of maximum distances the true CDF can be from the + empirical CDF. This scales as `O(sqrt(-log(error_rate)))` and + as `O(1 / sqrt(n))`. The shape is the broadcast of `n` and + `error_rate`. + """ + with ops.name_scope(name, "dkwm_cdf_envelope", [n, error_rate]): + n = math_ops.cast(n, dtype=error_rate.dtype) + return math_ops.sqrt(-gen_math_ops.log(error_rate / 2.) / (2. * n)) + + +def _check_shape_dominates(tensor, tensors): + """Check that broadcasting `tensor` against `tensors` does not expand it. + + Why? Because I want to be very sure that the samples tensor is not + accidentally enlarged by broadcasting against tensors that are + supposed to be describing the distribution(s) sampled from, lest the + sample counts end up inflated. + + Args: + tensor: A Tensor whose shape is to be protected against broadcasting. + tensors: A list of Tensors to check + + Returns: + tensor: `tf.identity(tensor)` with control dependencies attached; + be sure to use that downstream. + """ + def check(t): + target = array_ops.shape(tensor)[1:] + result = array_ops.broadcast_dynamic_shape(target, array_ops.shape(t)) + # This rank check ensures that I don't get a wrong answer from the + # _shapes_ broadcasting against each other. + gt = check_ops.assert_greater(array_ops.rank(target), array_ops.rank(t)) + eq = check_ops.assert_equal(target, result) + return gt, eq + checks = list(itertools.chain(*[check(t) for t in tensors])) + with ops.control_dependencies(checks): + return array_ops.identity(array_ops.identity(tensor)) + + +def true_mean_confidence_interval_by_dkwm( + samples, low, high, error_rate=1e-6, name=None): + """Computes a confidence interval for the mean of a scalar distribution. + + In batch mode, computes confidence intervals for all distributions + in the batch (which need not be identically distributed). + + Relies on the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + The probability (over the randomness of drawing the given samples) + that any true mean is outside the corresponding returned interval is + no more than the given `error_rate`. The size of the intervals + scale as + `O(1 / sqrt(#samples))`, as `O(high - low)`, and as `O(-log(error_rate))`. + + Note that `error_rate` is a total error rate for all the confidence + intervals in the batch. As such, if the batch is nontrivial, the + error rate is not broadcast but divided (evenly) among the batch + members. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `low` and `high`. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + error_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + low: A floating-point tensor of stochastic lower bounds on the true means. + high: A floating-point tensor of stochastic upper bounds on the true means. + """ + with ops.name_scope( + name, "true_mean_confidence_interval_by_dkwm", + [samples, low, high, error_rate]): + samples = ops.convert_to_tensor(samples, name="samples") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + error_rate = ops.convert_to_tensor(error_rate, name="error_rate") + samples = _check_shape_dominates(samples, [low, high]) + check_ops.assert_scalar(error_rate) # Static shape + error_rate = _itemwise_error_rate(error_rate, [low, high], samples) + n = array_ops.shape(samples)[0] + envelope = _dkwm_cdf_envelope(n, error_rate) + min_mean = _minimum_mean(samples, envelope, low) + max_mean = _maximum_mean(samples, envelope, high) + return min_mean, max_mean + + +def _itemwise_error_rate( + total_error_rate, param_tensors, sample_tensor=None, name=None): + with ops.name_scope( + name, "itemwise_error_rate", + [total_error_rate, param_tensors, sample_tensor]): + result_shape = [1] + for p_tensor in param_tensors: + result_shape = array_ops.broadcast_dynamic_shape( + array_ops.shape(p_tensor), result_shape) + if sample_tensor is not None: + result_shape = array_ops.broadcast_dynamic_shape( + array_ops.shape(sample_tensor)[1:], result_shape) + num_items = math_ops.reduce_prod(result_shape) + return total_error_rate / math_ops.cast( + num_items, dtype=total_error_rate.dtype) + + +def assert_true_mean_equal_by_dkwm( + samples, low, high, expected, false_fail_rate=1e-6, name=None): + """Asserts the mean of the given distribution is as expected. + + More precisely, fails if there is enough evidence (using the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) + that the true mean of some distribution from which the given samples are + drawn is _not_ the given expected mean with statistical significance + `false_fail_rate` or stronger, otherwise passes. If you also want to + check that you are gathering enough evidence that a pass is not + spurious, see `min_num_samples_for_dkwm_mean_test` and + `min_discrepancy_of_true_means_detectable_by_dkwm`. + + Note that `false_fail_rate` is a total false failure rate for all + the assertions in the batch. As such, if the batch is nontrivial, + the assertion will insist on stronger evidence to fail any one member. + + Args: + samples: Floating-point tensor of samples from the distribution(s) + of interest. Entries are assumed IID across the 0th dimension. + The other dimensions must broadcast with `low` and `high`. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + expected: Floating-point tensor of expected true means. + false_fail_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + check: Op that raises `InvalidArgumentError` if any expected mean is + outside the corresponding confidence interval. + """ + with ops.name_scope( + name, "assert_true_mean_equal_by_dkwm", + [samples, low, high, expected, false_fail_rate]): + samples = ops.convert_to_tensor(samples, name="samples") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + expected = ops.convert_to_tensor(expected, name="expected") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + samples = _check_shape_dominates(samples, [low, high, expected]) + min_mean, max_mean = true_mean_confidence_interval_by_dkwm( + samples, low, high, error_rate=false_fail_rate) + less_op = check_ops.assert_less( + min_mean, expected, message="Mean confidence interval too high") + with ops.control_dependencies([less_op]): + return check_ops.assert_greater( + max_mean, expected, message="Mean confidence interval too low") + + +def min_discrepancy_of_true_means_detectable_by_dkwm( + n, low, high, false_fail_rate, false_pass_rate, name=None): + """Returns the minimum mean discrepancy that a DKWM-based test can detect. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Note that `false_fail_rate` is a total false failure rate for all + the tests in the batch. As such, if the batch is nontrivial, each + member will demand more samples. The `false_pass_rate` is also + interpreted as a total, but is treated asymmetrically: If each test + in the batch detects its corresponding discrepancy with probability + at least `1 - false_pass_rate`, then running all those tests and + failing if any one fails will jointly detect all those discrepancies + with the same `false_pass_rate`. + + Args: + n: Tensor of numbers of samples to be drawn from the distributions + of interest. + low: Floating-point tensor of lower bounds on the distributions' + supports. + high: Floating-point tensor of upper bounds on the distributions' + supports. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + discr: Tensor of lower bounds on the distances between true + means detectable by a DKWM-based test. + + For each batch member `i`, of `K` total, drawing `n[i]` samples from + some scalar distribution supported on `[low[i], high[i]]` is enough + to detect a difference in means of size `discr[i]` or more. + Specifically, we guarantee that (a) if the true mean is the expected + mean, `assert_true_mean_equal_by_dkwm` will fail with probability at + most `false_fail_rate / K` (which amounts to `false_fail_rate` if + applied to the whole batch at once), and (b) if the true mean + differs from the expected mean by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm` will pass with probability at most + `false_pass_rate`. + + The detectable discrepancy scales as + + - `O(high[i] - low[i])`, + - `O(1 / sqrt(n[i]))`, + - `O(-log(false_fail_rate/K))`, and + - `O(-log(false_pass_rate))`. + """ + with ops.name_scope( + name, "min_discrepancy_of_true_means_detectable_by_dkwm", + [n, low, high, false_fail_rate, false_pass_rate]): + n = ops.convert_to_tensor(n, name="n") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Algorithm: Assume a true CDF F. The DKWM inequality gives a + # stochastic bound on how far the observed empirical CDF F_n can be. + # Then, using the DKWM inequality again gives a stochastic bound on + # the farthest candidate true CDF F' that + # true_mean_confidence_interval_by_dkwm might consider. At worst, these + # errors may go in the same direction, so the distance between F and + # F' is bounded by the sum. + # On batching: false fail rates sum, so I need to reduce + # the input to account for the batching. False pass rates + # max, so I don't. + sampling_envelope = _dkwm_cdf_envelope(n, false_pass_rate) + false_fail_rate = _itemwise_error_rate(false_fail_rate, [n, low, high]) + analysis_envelope = _dkwm_cdf_envelope(n, false_fail_rate) + return (high - low) * (sampling_envelope + analysis_envelope) + + +def min_num_samples_for_dkwm_mean_test( + discrepancy, low, high, + false_fail_rate=1e-6, false_pass_rate=1e-6, name=None): + """Returns how many samples suffice for a one-sample DKWM mean test. + + To wit, returns an upper bound on the number of samples necessary to + guarantee detecting a mean difference of at least the given + `discrepancy`, with the given `false_fail_rate` and `false_pass_rate`, + using the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval) + on a scalar distribution supported on `[low, high]`. + + Args: + discrepancy: Floating-point tensor of desired upper limits on mean + differences that may go undetected with probability higher than + `1 - false_pass_rate`. + low: Tensor of lower bounds on the distributions' support. + high: Tensor of upper bounds on the distributions' support. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + n: Tensor of numbers of samples to be drawn from the distributions + of interest. + + The `discrepancy`, `low`, and `high` tensors must have + broadcast-compatible shapes. + + For each batch member `i`, of `K` total, drawing `n[i]` samples from + some scalar distribution supported on `[low[i], high[i]]` is enough + to detect a difference in means of size `discrepancy[i]` or more. + Specifically, we guarantee that (a) if the true mean is the expected + mean, `assert_true_mean_equal_by_dkwm` will fail with probability at + most `false_fail_rate / K` (which amounts to `false_fail_rate` if + applied to the whole batch at once), and (b) if the true mean + differs from the expected mean by at least `discrepancy[i]`, + `assert_true_mean_equal_by_dkwm` will pass with probability at most + `false_pass_rate`. + + The required number of samples scales + as `O((high[i] - low[i])**2)`, `O(-log(false_fail_rate/K))`, + `O(-log(false_pass_rate))`, and `O(1 / discrepancy[i]**2)`. + """ + with ops.name_scope( + name, "min_num_samples_for_dkwm_mean_test", + [low, high, false_fail_rate, false_pass_rate, discrepancy]): + discrepancy = ops.convert_to_tensor( + discrepancy, name="discrepancy") + low = ops.convert_to_tensor(low, name="low") + high = ops.convert_to_tensor(high, name="high") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Could choose to cleverly allocate envelopes, but this is sound. + envelope1 = discrepancy / (2. * (high - low)) + envelope2 = envelope1 + false_fail_rate = _itemwise_error_rate( + false_fail_rate, [low, high, discrepancy]) + n1 = -math_ops.log(false_fail_rate / 2.) / (2. * envelope1**2) + n2 = -math_ops.log(false_pass_rate / 2.) / (2. * envelope2**2) + return math_ops.maximum(n1, n2) + + +def assert_true_mean_equal_by_dkwm_two_sample( + samples1, low1, high1, samples2, low2, high2, + false_fail_rate=1e-6, name=None): + """Asserts the means of the given distributions are equal. + + More precisely, fails if there is enough evidence (using the + [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval)) + that the means of the distributions from which the given samples are + drawn are _not_ equal with statistical significance `false_fail_rate` + or stronger, otherwise passes. If you also want to check that you + are gathering enough evidence that a pass is not spurious, see + `min_num_samples_for_dkwm_mean_two_sample_test` and + `min_discrepancy_of_true_means_detectable_by_dkwm_two_sample`. + + Note that `false_fail_rate` is a total false failure rate for all + the assertions in the batch. As such, if the batch is nontrivial, + the assertion will insist on stronger evidence to fail any one member. + + Args: + samples1: Floating-point tensor of samples from the + distribution(s) A. Entries are assumed IID across the 0th + dimension. The other dimensions must broadcast with `low1`, + `high1`, `low2`, and `high2`. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + samples2: Floating-point tensor of samples from the + distribution(s) B. Entries are assumed IID across the 0th + dimension. The other dimensions must broadcast with `low1`, + `high1`, `low2`, and `high2`. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of mistakes. + name: A name for this operation (optional). + + Returns: + check: Op that raises `InvalidArgumentError` if any pair of confidence + intervals true for corresponding true means do not overlap. + """ + with ops.name_scope( + name, "assert_true_mean_equal_by_dkwm_two_sample", + [samples1, low1, high1, samples2, low2, high2, false_fail_rate]): + samples1 = ops.convert_to_tensor(samples1, name="samples1") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + samples2 = ops.convert_to_tensor(samples2, name="samples2") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + samples1 = _check_shape_dominates(samples1, [low1, high1]) + samples2 = _check_shape_dominates(samples2, [low2, high2]) + compatible_samples = check_ops.assert_equal( + array_ops.shape(samples1)[1:], array_ops.shape(samples2)[1:]) + with ops.control_dependencies([compatible_samples]): + # Could in principle play games with cleverly allocating + # significance instead of the even split below. It may be possible + # to get tighter intervals, in order to obtain a higher power test. + # Any allocation strategy that depends only on the support bounds + # and sample counts should be valid; however, because the intervals + # scale as O(-log(false_fail_rate)), there doesn't seem to be much + # room to win. + min_mean_1, max_mean_1 = true_mean_confidence_interval_by_dkwm( + samples1, low1, high1, false_fail_rate / 2.) + min_mean_2, max_mean_2 = true_mean_confidence_interval_by_dkwm( + samples2, low2, high2, false_fail_rate / 2.) + # I want to assert + # not (max_mean_1 < min_mean_2 or min_mean_1 > max_mean_2), + # but I think I only have and-combination of asserts, so use DeMorgan. + clause1_op = check_ops.assert_greater_equal(max_mean_1, min_mean_2) + with ops.control_dependencies([clause1_op]): + return check_ops.assert_less_equal(min_mean_1, max_mean_2) + + +def min_discrepancy_of_true_means_detectable_by_dkwm_two_sample( + n1, low1, high1, n2, low2, high2, + false_fail_rate, false_pass_rate, name=None): + """Returns the minimum mean discrepancy for a two-sample DKWM-based test. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Note that `false_fail_rate` is a total false failure rate for all + the tests in the batch. As such, if the batch is nontrivial, each + member will demand more samples. The `false_pass_rate` is also + interpreted as a total, but is treated asymmetrically: If each test + in the batch detects its corresponding discrepancy with probability + at least `1 - false_pass_rate`, then running all those tests and + failing if any one fails will jointly detect all those discrepancies + with the same `false_pass_rate`. + + Args: + n1: Tensor of numbers of samples to be drawn from the distributions A. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + n2: Tensor of numbers of samples to be drawn from the distributions B. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + discr: Tensor of lower bounds on the distances between true means + detectable by a two-sample DKWM-based test. + + For each batch member `i`, of `K` total, drawing `n1[i]` samples + from scalar distribution A supported on `[low1[i], high1[i]]` and `n2[i]` + samples from scalar distribution B supported on `[low2[i], high2[i]]` + is enough to detect a difference in their true means of size + `discr[i]` or more. Specifically, we guarantee that (a) if their + true means are equal, `assert_true_mean_equal_by_dkwm_two_sample` + will fail with probability at most `false_fail_rate/K` (which + amounts to `false_fail_rate` if applied to the whole batch at once), + and (b) if their true means differ by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm_two_sample` will pass with + probability at most `false_pass_rate`. + + The detectable distribution scales as + + - `O(high1[i] - low1[i])`, `O(high2[i] - low2[i])`, + - `O(1 / sqrt(n1[i]))`, `O(1 / sqrt(n2[i]))`, + - `O(-log(false_fail_rate/K))`, and + - `O(-log(false_pass_rate))`. + """ + with ops.name_scope( + name, "min_discrepancy_of_true_means_detectable_by_dkwm_two_sample", + [n1, low1, high1, n2, low2, high2, false_fail_rate, false_pass_rate]): + n1 = ops.convert_to_tensor(n1, name="n1") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + n2 = ops.convert_to_tensor(n2, name="n2") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + det_disc1 = min_discrepancy_of_true_means_detectable_by_dkwm( + n1, low1, high1, false_fail_rate / 2., false_pass_rate / 2.) + det_disc2 = min_discrepancy_of_true_means_detectable_by_dkwm( + n2, low2, high2, false_fail_rate / 2., false_pass_rate / 2.) + return det_disc1 + det_disc2 + + +def min_num_samples_for_dkwm_mean_two_sample_test( + discrepancy, low1, high1, low2, high2, + false_fail_rate=1e-6, false_pass_rate=1e-6, name=None): + """Returns how many samples suffice for a two-sample DKWM mean test. + + DKWM is the [Dvoretzky-Kiefer-Wolfowitz-Massart inequality] + (https://en.wikipedia.org/wiki/CDF-based_nonparametric_confidence_interval). + + Args: + discrepancy: Floating-point tensor of desired upper limits on mean + differences that may go undetected with probability higher than + `1 - false_pass_rate`. + low1: Floating-point tensor of lower bounds on the supports of the + distributions A. + high1: Floating-point tensor of upper bounds on the supports of + the distributions A. + low2: Floating-point tensor of lower bounds on the supports of the + distributions B. + high2: Floating-point tensor of upper bounds on the supports of + the distributions B. + false_fail_rate: *Scalar* admissible total rate of false failures. + false_pass_rate: *Scalar* admissible rate of false passes. + name: A name for this operation (optional). + + Returns: + n1: Tensor of numbers of samples to be drawn from the distributions A. + n2: Tensor of numbers of samples to be drawn from the distributions B. + + For each batch member `i`, of `K` total, drawing `n1[i]` samples + from scalar distribution A supported on `[low1[i], high1[i]]` and `n2[i]` + samples from scalar distribution B supported on `[low2[i], high2[i]]` + is enough to detect a difference in their true means of size + `discr[i]` or more. Specifically, we guarantee that (a) if their + true means are equal, `assert_true_mean_equal_by_dkwm_two_sample` + will fail with probability at most `false_fail_rate/K` (which + amounts to `false_fail_rate` if applied to the whole batch at once), + and (b) if their true means differ by at least `discr[i]`, + `assert_true_mean_equal_by_dkwm_two_sample` will pass with + probability at most `false_pass_rate`. + + The required number of samples scales as + + - `O((high1[i] - low1[i])**2)`, `O((high2[i] - low2[i])**2)`, + - `O(-log(false_fail_rate/K))`, + - `O(-log(false_pass_rate))`, and + - `O(1 / discrepancy[i]**2)`. + """ + with ops.name_scope( + name, "min_num_samples_for_dkwm_mean_two_sample_test", + [low1, high1, low2, high2, + false_fail_rate, false_pass_rate, discrepancy]): + discrepancy = ops.convert_to_tensor(discrepancy, name="discrepancy") + low1 = ops.convert_to_tensor(low1, name="low1") + high1 = ops.convert_to_tensor(high1, name="high1") + low2 = ops.convert_to_tensor(low2, name="low2") + high2 = ops.convert_to_tensor(high2, name="high2") + false_fail_rate = ops.convert_to_tensor( + false_fail_rate, name="false_fail_rate") + false_pass_rate = ops.convert_to_tensor( + false_pass_rate, name="false_pass_rate") + # Could choose to cleverly allocate discrepancy tolerances and + # failure probabilities, but this is sound. + n1 = min_num_samples_for_dkwm_mean_test( + discrepancy / 2., low1, high1, + false_fail_rate / 2., false_pass_rate / 2.) + n2 = min_num_samples_for_dkwm_mean_test( + discrepancy / 2., low2, high2, + false_fail_rate / 2., false_pass_rate / 2.) + return n1, n2 -- GitLab From 6d014ecbd63fec208742b327b94c39afd4953fb8 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 2 Mar 2018 15:11:13 -0800 Subject: [PATCH 371/884] ReadVariableOp in C for eager (only for the fastpath) PiperOrigin-RevId: 187676012 --- tensorflow/python/eager/benchmarks_test.py | 21 + tensorflow/python/eager/pywrap_tfe.h | 7 + tensorflow/python/eager/pywrap_tfe_src.cc | 460 ++++++++++++------ tensorflow/python/eager/pywrap_tfe_test.py | 31 ++ .../python/ops/resource_variable_ops.py | 4 + tensorflow/python/pywrap_tfe.i | 1 + 6 files changed, 377 insertions(+), 147 deletions(-) diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 527a919ab0..551d5647dd 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -275,6 +275,16 @@ class MicroBenchmarks(test.Benchmark): def _benchmark_read_variable(self, m, num_iters): self._run(m.value, num_iters) + def _benchmark_matmul_read_variable(self, m, num_iters): + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + + def _benchmark_matmul_read_variable_with_tape(self, m, num_iters): + with backprop.GradientTape() as tape: + tape.watch(m) + self._benchmark_gen_math_ops_matmul( + m, transpose_b=False, num_iters=num_iters) + def _benchmark_read_variable_with_tape(self, m, num_iters): with backprop.GradientTape() as tape: tape.watch(m) @@ -416,6 +426,17 @@ class MicroBenchmarks(test.Benchmark): self._benchmark_defun_matmul( m, transpose_b=True, num_iters=self._num_iters_100_by_784) + def benchmark_matmul_read_variable_op_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable(m, num_iters=self._num_iters_2_by_2) + + def benchmark_matmul_read_variable_op_with_tape_2_by_2_CPU(self): + with context.device(CPU): + m = resource_variable_ops.ResourceVariable(self._m_2_by_2) + self._benchmark_matmul_read_variable_with_tape( + m, num_iters=self._num_iters_2_by_2) + def benchmark_read_variable_op_2_by_2_CPU(self): with context.device(CPU): m = resource_variable_ops.ResourceVariable(self._m_2_by_2) diff --git a/tensorflow/python/eager/pywrap_tfe.h b/tensorflow/python/eager/pywrap_tfe.h index b1b4a6b214..32d731d0f6 100644 --- a/tensorflow/python/eager/pywrap_tfe.h +++ b/tensorflow/python/eager/pywrap_tfe.h @@ -51,6 +51,13 @@ void TFE_Py_Execute(TFE_Context* ctx, const char* device_name, // This function is not thread-safe. PyObject* TFE_Py_RegisterExceptionClass(PyObject* e); +// Registers e as the type of the ResourceVariable class. +// Returns Py_None if registration succeeds, else throws a TypeError and returns +// NULL. +// +// This function is not thread-safe. +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e); + // Registers e as the Exception to be raised when the conditions of // TFE_Py_FastPathExecute_C have not been met. When this exception is set, it // is a signal to the calling code that it should fall back to the safer (and diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 42d97dfe3f..27c9d05081 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -38,6 +38,23 @@ using tensorflow::strings::Printf; namespace { +struct FastPathOpExecInfo { + TFE_Context* ctx; + const char* device_name; + // The op def of the main op being executed. + const tensorflow::OpDef* op_def; + + bool run_callbacks; + bool run_post_exec_callbacks; + bool run_gradient_callback; + + // The op name of the main op being executed. + PyObject* name; + // The op type name of the main op being executed. + PyObject* op_name; + PyObject* callbacks; +}; + #define PARSE_VALUE(fn_name, type, check_fn, parse_fn) \ bool fn_name(const string& key, PyObject* py_value, TF_Status* status, \ type* value) { \ @@ -120,6 +137,11 @@ bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, PyObject* py_type_enum = PyObject_GetAttrString(py_value, "_type_enum"); if (py_type_enum == nullptr) { + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting a DType.dtype for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); return false; } @@ -580,6 +602,8 @@ PyObject* fallback_exception_class = nullptr; // Python function that returns a backward_function. PyObject* backward_function_getter = nullptr; +PyTypeObject* resource_variable_type = nullptr; + tensorflow::mutex _uid_mutex(tensorflow::LINKER_INITIALIZED); tensorflow::int64 _uid GUARDED_BY(_uid_mutex) = 0; @@ -628,11 +652,28 @@ PyObject* TFE_Py_RegisterExceptionClass(PyObject* e) { "TFE_Py_RegisterExceptionClass: " "Registered class should be subclass of Exception."); return nullptr; - } else { - Py_INCREF(e); - exception_class = e; - Py_RETURN_NONE; } + + Py_INCREF(e); + exception_class = e; + Py_RETURN_NONE; +} + +PyObject* TFE_Py_RegisterResourceVariableType(PyObject* e) { + if (!PyType_Check(e)) { + PyErr_SetString( + PyExc_TypeError, + "TFE_Py_RegisterResourceVariableType: Need to register a type."); + return nullptr; + } + + if (resource_variable_type != nullptr) { + Py_DECREF(resource_variable_type); + } + + Py_INCREF(e); + resource_variable_type = reinterpret_cast(e); + Py_RETURN_NONE; } PyObject* TFE_Py_RegisterFallbackExceptionClass(PyObject* e) { @@ -1375,8 +1416,12 @@ PyObject* GetPythonObjectFromString(const char* s) { #endif } -bool CheckEagerTensors(PyObject* seq, int start_index, - const tensorflow::OpDef& op_def) { +bool CheckResourceVariable(PyObject* item) { + return PyObject_TypeCheck(item, resource_variable_type); +} + +bool CheckInputsOk(PyObject* seq, int start_index, + const tensorflow::OpDef& op_def) { for (int i = 0; i < op_def.input_arg_size(); i++) { PyObject* item = PyTuple_GET_ITEM(seq, i + start_index); if (!op_def.input_arg(i).number_attr().empty() || @@ -1384,9 +1429,13 @@ bool CheckEagerTensors(PyObject* seq, int start_index, // This item should be a list input. if (!PyList_Check(item)) return false; for (Py_ssize_t j = 0; j < PyList_Size(item); j++) { - if (!EagerTensor_CheckExact(PyList_GET_ITEM(item, j))) return false; + PyObject* inner_item = PyList_GET_ITEM(item, j); + if (!EagerTensor_CheckExact(inner_item) && + !CheckResourceVariable(inner_item)) { + return false; + } } - } else if (!EagerTensor_CheckExact(item)) { + } else if (!EagerTensor_CheckExact(item) && !CheckResourceVariable(item)) { return false; } } @@ -1394,71 +1443,6 @@ bool CheckEagerTensors(PyObject* seq, int start_index, return true; } -// Adds input and type attr to the op, and to the list of flattened -// inputs/attrs. -bool AddInputToOp(PyObject* input, const tensorflow::OpDef::ArgDef* input_arg, - std::vector* flattened_attrs, - std::vector* flattened_inputs, TFE_Op* op, - TF_Status* status) { - TFE_TensorHandle* input_handle = EagerTensor_Handle(input); - if (input_arg != nullptr && !input_arg->type_attr().empty()) { - auto dtype = TFE_TensorHandleDataType(input_handle); - TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); - if (flattened_attrs != nullptr) { - flattened_attrs->push_back( - GetPythonObjectFromString(input_arg->type_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(dtype)); - } - } - - if (flattened_inputs != nullptr) { - flattened_inputs->push_back(input); - } - TFE_OpAddInput(op, input_handle, status); - if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { - return false; - } - return true; -} - -const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { - const char* op_name = TFE_GetPythonString(py_op_name); - if (op_name == nullptr) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a string for op_name, got %s instead", - py_op_name->ob_type->tp_name) - .c_str()); - return nullptr; - } - - const tensorflow::OpRegistrationData* op_reg_data = nullptr; - const tensorflow::Status lookup_status = - tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); - if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { - return nullptr; - } - return &op_reg_data->op_def; -} - -const char* GetDeviceName(PyObject* py_device_name) { - if (py_device_name != Py_None) { - return TFE_GetPythonString(py_device_name); - } - return nullptr; -} - -bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { - if (!PyList_Check(list)) { - PyErr_SetString(PyExc_TypeError, - Printf("expected a list for attr %s, got %s instead", - attr_name.data(), list->ob_type->tp_name) - .data()); - - return false; - } - return true; -} - bool OpDoesntRequireOutput(const string& op_name) { static tensorflow::gtl::FlatSet* ops_that_dont_require_outputs = new tensorflow::gtl::FlatSet({ @@ -1583,7 +1567,6 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, break; } } - if (!should_record) Py_RETURN_NONE; string c_op_name = TFE_GetPythonString(op_name); @@ -1617,50 +1600,212 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs, Py_RETURN_NONE; } -bool RunCallbacks(bool run_gradient_callback, bool run_post_exec_callbacks, - const tensorflow::OpDef* op_def, PyObject* args, - const std::vector& flattened_inputs, - const std::vector& flattened_attrs, - PyObject* flattened_result, PyObject* op_name, PyObject* name, - PyObject* callbacks) { - tensorflow::Safe_PyObjectPtr inputs = - tensorflow::make_safe(PyTuple_New(flattened_inputs.size())); +void MaybeWatchVariable(PyObject* input) { + DCHECK(CheckResourceVariable(input)); + DCHECK(PyObject_HasAttrString(input, "_trainable")); + + tensorflow::Safe_PyObjectPtr trainable( + PyObject_GetAttrString(input, "_trainable")); + if (trainable.get() == Py_False) return; + TFE_Py_TapeSetWatchVariable(input); +} + +bool ReadVariableOp(const FastPathOpExecInfo& parent_op_exec_info, + PyObject* input, tensorflow::Safe_PyObjectPtr* output, + TF_Status* status) { + MaybeWatchVariable(input); + + TFE_Op* op = TFE_NewOp(parent_op_exec_info.ctx, "ReadVariableOp", status); + auto cleaner = tensorflow::gtl::MakeCleanup([op] { TFE_DeleteOp(op); }); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Set dtype + DCHECK(PyObject_HasAttrString(input, "_dtype")); + tensorflow::Safe_PyObjectPtr dtype(PyObject_GetAttrString(input, "_dtype")); + int value; + if (!ParseTypeValue("_dtype", dtype.get(), status, &value)) { + return false; + } + TFE_OpSetAttrType(op, "dtype", static_cast(value)); + + TFE_OpSetDevice(op, parent_op_exec_info.device_name, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Get handle + tensorflow::Safe_PyObjectPtr handle(PyObject_GetAttrString(input, "_handle")); + if (!EagerTensor_CheckExact(handle.get())) return false; + TFE_OpAddInput(op, EagerTensor_Handle(handle.get()), status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + int num_retvals = 1; + TFE_TensorHandle* output_handle; + TFE_Execute(op, &output_handle, &num_retvals, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) return false; + + // Always create the py object (and correctly DECREF it) from the returned + // value, else the data will leak. + output->reset(EagerTensorFromHandle(output_handle)); + + // TODO(nareshmodi): Should we run post exec callbacks here? + if (parent_op_exec_info.run_gradient_callback) { + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(1)); + PyTuple_SET_ITEM(inputs.get(), 0, handle.release()); + + tensorflow::Safe_PyObjectPtr outputs(PyTuple_New(1)); + Py_INCREF(output->get()); // stay alive after since tuple steals. + PyTuple_SET_ITEM(outputs.get(), 0, output->get()); + + if (!RecordGradient(GetPythonObjectFromString("ReadVariableOp"), + inputs.get(), Py_None, outputs.get(), Py_None)) { + return false; + } + } + + return true; +} + +// Supports only 2 cases at the moment: +// i) input is an EagerTensor +// ii) input is a ResourceVariable - in this case, the is_variable param is set +// to true. +bool ConvertToTensor(const FastPathOpExecInfo& op_exec_info, PyObject* input, + tensorflow::Safe_PyObjectPtr* output_handle, + TF_Status* status) { + if (CheckResourceVariable(input)) { + return ReadVariableOp(op_exec_info, input, output_handle, status); + } + + Py_INCREF(input); + output_handle->reset(input); + + return true; +} + +// Adds input and type attr to the op, and to the list of flattened +// inputs/attrs. +bool AddInputToOp(const FastPathOpExecInfo& op_exec_info, PyObject* input, + const tensorflow::OpDef::ArgDef* input_arg, + std::vector* flattened_attrs, + std::vector* flattened_inputs, + TFE_Op* op, TF_Status* status) { + // py_eager_tensor's ownership is transferred to flattened_inputs if it is + // required, else the object is destroyed and DECREF'd when the object goes + // out of scope in this function. + tensorflow::Safe_PyObjectPtr py_eager_tensor = nullptr; + + if (!ConvertToTensor(op_exec_info, input, &py_eager_tensor, status)) { + return false; + } + + TFE_TensorHandle* input_handle = EagerTensor_Handle(py_eager_tensor.get()); + + if (input_arg != nullptr && !input_arg->type_attr().empty()) { + auto dtype = TFE_TensorHandleDataType(input_handle); + TFE_OpSetAttrType(op, input_arg->type_attr().data(), dtype); + if (flattened_attrs != nullptr) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(input_arg->type_attr().data())); + flattened_attrs->emplace_back(PyLong_FromLong(dtype)); + } + } + + if (flattened_inputs != nullptr) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); + } + + TFE_OpAddInput(op, input_handle, status); + if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { + return false; + } + + return true; +} + +const tensorflow::OpDef* GetOpDef(PyObject* py_op_name) { + const char* op_name = TFE_GetPythonString(py_op_name); + if (op_name == nullptr) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a string for op_name, got %s instead", + py_op_name->ob_type->tp_name) + .c_str()); + return nullptr; + } + + const tensorflow::OpRegistrationData* op_reg_data = nullptr; + const tensorflow::Status lookup_status = + tensorflow::OpRegistry::Global()->LookUp(op_name, &op_reg_data); + if (MaybeRaiseExceptionFromStatus(lookup_status, nullptr)) { + return nullptr; + } + return &op_reg_data->op_def; +} + +const char* GetDeviceName(PyObject* py_device_name) { + if (py_device_name != Py_None) { + return TFE_GetPythonString(py_device_name); + } + return nullptr; +} + +bool RaiseIfNotPyList(PyObject* list, const string& attr_name) { + if (!PyList_Check(list)) { + PyErr_SetString(PyExc_TypeError, + Printf("expected a list for attr %s, got %s instead", + attr_name.data(), list->ob_type->tp_name) + .data()); + + return false; + } + return true; +} + +bool RunCallbacks( + const FastPathOpExecInfo& op_exec_info, PyObject* args, + const std::vector& flattened_inputs, + const std::vector& flattened_attrs, + PyObject* flattened_result) { + if (!op_exec_info.run_callbacks) return true; + + tensorflow::Safe_PyObjectPtr inputs(PyTuple_New(flattened_inputs.size())); for (int i = 0; i < flattened_inputs.size(); i++) { - PyObject* input = flattened_inputs[i]; + PyObject* input = flattened_inputs[i].get(); Py_INCREF(input); PyTuple_SET_ITEM(inputs.get(), i, input); } int num_non_inferred_attrs = PyTuple_GET_SIZE(args) - - op_def->input_arg_size() - + op_exec_info.op_def->input_arg_size() - kFastPathExecuteInputStartIndex; int num_attrs = flattened_attrs.size() + num_non_inferred_attrs; - tensorflow::Safe_PyObjectPtr attrs = - tensorflow::make_safe(PyTuple_New(num_attrs)); + tensorflow::Safe_PyObjectPtr attrs(PyTuple_New(num_attrs)); for (int i = 0; i < num_non_inferred_attrs; i++) { - auto* attr = PyTuple_GET_ITEM( - args, kFastPathExecuteInputStartIndex + op_def->input_arg_size() + i); + auto* attr = + PyTuple_GET_ITEM(args, kFastPathExecuteInputStartIndex + + op_exec_info.op_def->input_arg_size() + i); Py_INCREF(attr); PyTuple_SET_ITEM(attrs.get(), i, attr); } for (int i = num_non_inferred_attrs; i < num_attrs; i++) { - // Not INCREFing anything in flattened_attrs as each of those is a new - // reference, so allow the attrs tuple to steal the reference. - PyTuple_SET_ITEM(attrs.get(), i, - flattened_attrs.at(i - num_non_inferred_attrs)); + PyObject* attr_or_name = + flattened_attrs.at(i - num_non_inferred_attrs).get(); + Py_INCREF(attr_or_name); + PyTuple_SET_ITEM(attrs.get(), i, attr_or_name); } - if (run_gradient_callback) { - RecordGradient(op_name, inputs.get(), attrs.get(), flattened_result, name); + if (op_exec_info.run_gradient_callback) { + if (!RecordGradient(op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)) { + return false; + } } - if (run_post_exec_callbacks) { - tensorflow::Safe_PyObjectPtr callback_args = tensorflow::make_safe( - Py_BuildValue("OOOOO", op_name, inputs.get(), attrs.get(), - flattened_result, name)); - for (Py_ssize_t i = 0; i < PyList_Size(callbacks); i++) { - PyObject* callback_fn = PyList_GET_ITEM(callbacks, i); + if (op_exec_info.run_post_exec_callbacks) { + tensorflow::Safe_PyObjectPtr callback_args( + Py_BuildValue("OOOOO", op_exec_info.op_name, inputs.get(), attrs.get(), + flattened_result, op_exec_info.name)); + for (Py_ssize_t i = 0; i < PyList_Size(op_exec_info.callbacks); i++) { + PyObject* callback_fn = PyList_GET_ITEM(op_exec_info.callbacks, i); if (!PyCallable_Check(callback_fn)) { PyErr_SetString( PyExc_TypeError, @@ -1695,14 +1840,30 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - TFE_Context* ctx = reinterpret_cast( + FastPathOpExecInfo op_exec_info; + + op_exec_info.ctx = reinterpret_cast( PyCapsule_GetPointer(PyTuple_GET_ITEM(args, 0), nullptr)); - const char* device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); - PyObject* op_name = PyTuple_GET_ITEM(args, 2); - const tensorflow::OpDef* op_def = GetOpDef(op_name); - if (op_def == nullptr) return nullptr; - PyObject* name = PyTuple_GET_ITEM(args, 3); - PyObject* callbacks = PyTuple_GET_ITEM(args, 4); + op_exec_info.device_name = GetDeviceName(PyTuple_GET_ITEM(args, 1)); + op_exec_info.op_name = PyTuple_GET_ITEM(args, 2); + op_exec_info.op_def = GetOpDef(op_exec_info.op_name); + if (op_exec_info.op_def == nullptr) return nullptr; + op_exec_info.name = PyTuple_GET_ITEM(args, 3); + op_exec_info.callbacks = PyTuple_GET_ITEM(args, 4); + + const tensorflow::OpDef* op_def = op_exec_info.op_def; + + // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks + // (similar to benchmark_tf_gradient_function_*). Also consider using an + // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks + // point out problems with heap allocs. + op_exec_info.run_gradient_callback = + !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); + op_exec_info.run_post_exec_callbacks = + op_exec_info.callbacks != Py_None && + PyList_Size(op_exec_info.callbacks) > 0; + op_exec_info.run_callbacks = op_exec_info.run_gradient_callback || + op_exec_info.run_post_exec_callbacks; if (args_size < kFastPathExecuteInputStartIndex + op_def->input_arg_size()) { PyErr_SetString( @@ -1715,7 +1876,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (!CheckEagerTensors(args, kFastPathExecuteInputStartIndex, *op_def)) { + if (!CheckInputsOk(args, kFastPathExecuteInputStartIndex, *op_def)) { RaiseFallbackException( "This function does not handle the case of the path where " "all inputs are not already EagerTensors."); @@ -1723,7 +1884,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } TF_Status* status = TF_NewStatus(); - TFE_Op* op = TFE_NewOp(ctx, op_def->name().c_str(), status); + TFE_Op* op = TFE_NewOp(op_exec_info.ctx, op_def->name().c_str(), status); auto cleaner = tensorflow::gtl::MakeCleanup([status, op] { TF_DeleteStatus(status); TFE_DeleteOp(op); @@ -1750,8 +1911,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { // OpRegistrationData. for (const auto& attr : op_def->attr()) { if (attr_name == attr.name()) { - SetOpAttrWithDefaults(ctx, op, attr, attr_name.data(), py_attr_value, - &attr_list_sizes, status); + SetOpAttrWithDefaults(op_exec_info.ctx, op, attr, attr_name.data(), + py_attr_value, &attr_list_sizes, status); if (TF_GetCode(status) != TF_OK) { RaiseFallbackException(TF_Message(status)); @@ -1763,33 +1924,28 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { } } - TFE_OpSetDevice(op, device_name, status); + TFE_OpSetDevice(op, op_exec_info.device_name, status); if (MaybeRaiseExceptionFromTFStatus(status, nullptr)) { return nullptr; } - // TODO(nareshmodi): Add a benchmark for the fast-path with gradient callbacks - // (similar to benchmark_tf_gradient_function_*). Also consider using an - // InlinedVector for flattened_attrs and flattened_inputs if the benchmarks - // point out problems with heap allocs. - bool run_gradient_callback = - !*ThreadTapeIsStopped() && !GetTapeSet()->empty(); - bool run_post_exec_callbacks = - callbacks != Py_None && PyList_Size(callbacks) > 0; - bool run_callbacks = run_gradient_callback || run_post_exec_callbacks; // Flat attrs and inputs as required by the record_gradient call. The attrs // here only contain inferred attrs (non-inferred attrs are added directly // from the input args). - // All items in flattened_attrs contain new references. - // All items in flattened_inputs contain borrowed references. + // All items in flattened_attrs and flattened_inputs contain + // Safe_PyObjectPtr - any time something steals a reference to this, it must + // INCREF. // TODO(nareshmodi): figure out why PyList_New/PyList_Append don't work // directly. - std::unique_ptr> flattened_attrs = nullptr; - std::unique_ptr> flattened_inputs = nullptr; + std::unique_ptr> flattened_attrs = + nullptr; + std::unique_ptr> flattened_inputs = + nullptr; - if (run_callbacks) { - flattened_attrs.reset(new std::vector); - flattened_inputs.reset(new std::vector); + // TODO(nareshmodi): Encapsulate callbacks information into a struct. + if (op_exec_info.run_callbacks) { + flattened_attrs.reset(new std::vector); + flattened_inputs.reset(new std::vector); } // Add inferred attrs and inputs. @@ -1809,16 +1965,16 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); TFE_OpSetAttrInt(op, input_arg.number_attr().data(), len); - if (run_callbacks) { - flattened_attrs->push_back( + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( GetPythonObjectFromString(input_arg.number_attr().data())); - flattened_attrs->push_back(PyLong_FromLong(len)); + flattened_attrs->emplace_back(PyLong_FromLong(len)); } attr_list_sizes[input_arg.number_attr()] = len; if (len > 0) { // First item adds the type attr. - if (!AddInputToOp(PyList_GET_ITEM(input, 0), &input_arg, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, 0), &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; @@ -1826,7 +1982,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { for (Py_ssize_t j = 1; j < len; j++) { // Since the list is homogeneous, we don't need to re-add the attr. - if (!AddInputToOp(PyList_GET_ITEM(input, j), nullptr /* input_arg */, + if (!AddInputToOp(op_exec_info, PyList_GET_ITEM(input, j), + nullptr /* input_arg */, nullptr /* flattened_attrs */, flattened_inputs.get(), op, status)) { return nullptr; @@ -1840,12 +1997,20 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_ssize_t len = PyList_Size(input); tensorflow::gtl::InlinedVector attr_value(len); PyObject* py_attr_value = nullptr; - if (run_callbacks) { + if (op_exec_info.run_callbacks) { py_attr_value = PyTuple_New(len); } for (Py_ssize_t j = 0; j < len; j++) { PyObject* py_input = PyList_GET_ITEM(input, j); - TFE_TensorHandle* input_handle = EagerTensor_Handle(py_input); + tensorflow::Safe_PyObjectPtr py_eager_tensor; + if (!ConvertToTensor(op_exec_info, py_input, &py_eager_tensor, + status)) { + return nullptr; + } + + TFE_TensorHandle* input_handle = + EagerTensor_Handle(py_eager_tensor.get()); + attr_value[j] = TFE_TensorHandleDataType(input_handle); TFE_OpAddInput(op, input_handle, status); @@ -1853,22 +2018,23 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - if (run_callbacks) { - flattened_inputs->push_back(py_input); + if (op_exec_info.run_callbacks) { + flattened_inputs->emplace_back(std::move(py_eager_tensor)); PyTuple_SET_ITEM(py_attr_value, j, PyLong_FromLong(attr_value[j])); } } - if (run_callbacks) { - flattened_attrs->push_back(GetPythonObjectFromString(attr_name.data())); - flattened_attrs->push_back(py_attr_value); + if (op_exec_info.run_callbacks) { + flattened_attrs->emplace_back( + GetPythonObjectFromString(attr_name.data())); + flattened_attrs->emplace_back(py_attr_value); } TFE_OpSetAttrTypeList(op, attr_name.data(), attr_value.data(), attr_value.size()); attr_list_sizes[attr_name] = len; } else { // The item is a single item. - if (!AddInputToOp(input, &input_arg, flattened_attrs.get(), + if (!AddInputToOp(op_exec_info, input, &input_arg, flattened_attrs.get(), flattened_inputs.get(), op, status)) { return nullptr; } @@ -1892,12 +2058,14 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { Py_BEGIN_ALLOW_THREADS; TFE_Execute(op, retvals.data(), &num_retvals, status); Py_END_ALLOW_THREADS; + if (TF_GetCode(status) != TF_OK) { // Augment the status with the op_name for easier debugging similar to // TFE_Py_Execute. TF_SetStatus(status, TF_GetCode(status), - tensorflow::strings::StrCat(TF_Message(status), " [Op:", - TFE_GetPythonString(op_name), "]") + tensorflow::strings::StrCat( + TF_Message(status), + " [Op:", TFE_GetPythonString(op_exec_info.op_name), "]") .c_str()); MaybeRaiseExceptionFromTFStatus(status, nullptr); @@ -1909,10 +2077,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { PyList_SET_ITEM(flat_result, i, EagerTensorFromHandle(retvals[i])); } - if (run_callbacks && - !RunCallbacks(run_gradient_callback, run_post_exec_callbacks, op_def, - args, *flattened_inputs, *flattened_attrs, flat_result, - op_name, name, callbacks)) { + if (!RunCallbacks(op_exec_info, args, *flattened_inputs, *flattened_attrs, + flat_result)) { return nullptr; } diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 418ed75178..46c5601f47 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops +from tensorflow.python.ops import resource_variable_ops class Tests(test.TestCase): @@ -53,6 +54,21 @@ class Tests(test.TestCase): ctx._handle, ctx.device_name, "MatMul", None, None, a_100_by_784, b_100_by_784, "transpose_a", False, "transpose_b", True)) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableMatMulCorrectResponse(self): + ctx = context.context() + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + x = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, "transpose_a", + False, "transpose_b", False) + y = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, a_2_by_2, a_2_by_2, + "transpose_a", False, "transpose_b", False) + + self.assertAllEqual(x, y) + @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created def testFastpathExecute_TapeWrite(self): @@ -67,6 +83,21 @@ class Tests(test.TestCase): self.assertAllEqual(dz_dy.numpy(), constant_op.constant(4.0, shape=[2, 2]).numpy()) + @test_util.assert_no_new_tensors + @test_util.assert_no_garbage_created + def testFastpathExecute_ResourceVariableTapeWrite(self): + ctx = context.context() + with backprop.GradientTape(persistent=True) as tape: + a_2_by_2 = constant_op.constant(1.0, shape=[2, 2]) + m = resource_variable_ops.ResourceVariable(a_2_by_2) + tape.watch(m) + z = pywrap_tensorflow.TFE_Py_FastPathExecute( + ctx._handle, ctx.device_name, "MatMul", None, None, m, m, + "transpose_a", False, "transpose_b", False) + dz_dy = tape.gradient(z, [m])[0] + self.assertAllEqual(dz_dy.numpy(), + constant_op.constant(4.0, shape=[2, 2]).numpy()) + # Tests homogeneous list op @test_util.assert_no_new_tensors @test_util.assert_no_garbage_created diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index cbac3c686d..6c5d692e82 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -21,6 +21,7 @@ from __future__ import print_function from tensorflow.core.framework import attr_value_pb2 from tensorflow.core.framework import variable_pb2 +from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import context from tensorflow.python.eager import tape from tensorflow.python.framework import dtypes @@ -932,6 +933,9 @@ class ResourceVariable(variables.Variable): "Tensor object.") +pywrap_tensorflow.TFE_Py_RegisterResourceVariableType(ResourceVariable) + + def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): return var._dense_var_to_tensor(dtype=dtype, name=name, as_ref=as_ref) # pylint: disable=protected-access diff --git a/tensorflow/python/pywrap_tfe.i b/tensorflow/python/pywrap_tfe.i index 7ab0db5268..b481ddf5d4 100644 --- a/tensorflow/python/pywrap_tfe.i +++ b/tensorflow/python/pywrap_tfe.i @@ -31,6 +31,7 @@ limitations under the License. %rename("%s") TFE_Py_RegisterExceptionClass; %rename("%s") TFE_Py_RegisterBackwardFunctionGetter; %rename("%s") TFE_Py_RegisterFallbackExceptionClass; +%rename("%s") TFE_Py_RegisterResourceVariableType; %rename("%s") TFE_Py_Execute; %rename("%s") TFE_Py_FastPathExecute; %rename("%s") TFE_Py_RecordGradient; -- GitLab From 9a45b6bdf0246477754f50fab357e568051bed4f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:24:33 -0800 Subject: [PATCH 372/884] Adds setUseNNAPI to Interpreter.java, to enable develoeprs turn on & off NNAPI. PiperOrigin-RevId: 187677765 --- .../java/org/tensorflow/lite/Interpreter.java | 10 +++- .../org/tensorflow/lite/InterpreterTest.java | 48 +++++++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java index b071cda5df..9e47e921a6 100644 --- a/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java +++ b/tensorflow/contrib/lite/java/src/main/java/org/tensorflow/lite/Interpreter.java @@ -167,7 +167,6 @@ public final class Interpreter implements AutoCloseable { return wrapper.getOutputIndex(opName); } - /** * Returns native inference timing. *

IllegalArgumentException will be thrown if the model is not initialized by the @@ -180,6 +179,15 @@ public final class Interpreter implements AutoCloseable { return wrapper.getLastNativeInferenceDurationNanoseconds(); } + /** Turns on/off Android NNAPI for hardware acceleration when it is available. */ + public void setUseNNAPI(boolean useNNAPI) { + if (wrapper != null) { + wrapper.setUseNNAPI(useNNAPI); + } else { + throw new IllegalStateException("NativeInterpreterWrapper has already been closed."); + } + } + /** Release resources associated with the {@code Interpreter}. */ @Override public void close() { diff --git a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java index 424b3de6c9..61d6c35ec8 100644 --- a/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java +++ b/tensorflow/contrib/lite/java/src/test/java/org/tensorflow/lite/InterpreterTest.java @@ -218,4 +218,52 @@ public final class InterpreterTest { int index = interpreter.getOutputIndex("MobilenetV1/Predictions/Softmax"); assertThat(index).isEqualTo(0); } + + @Test + public void testTurnOffNNAPI() throws Exception { + Path path = MODEL_FILE.toPath(); + FileChannel fileChannel = + (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = + fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + Interpreter interpreter = new Interpreter(mappedByteBuffer); + interpreter.setUseNNAPI(true); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.setUseNNAPI(false); + interpreter.run(fourD, parsedOutputs); + outputOneD = parsedOutputs[0][0][0]; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + fileChannel.close(); + } + + @Test + public void testTurnOnNNAPI() throws Exception { + Path path = MODEL_FILE.toPath(); + FileChannel fileChannel = + (FileChannel) Files.newByteChannel(path, EnumSet.of(StandardOpenOption.READ)); + MappedByteBuffer mappedByteBuffer = + fileChannel.map(FileChannel.MapMode.READ_ONLY, 0, fileChannel.size()); + Interpreter interpreter = new Interpreter(mappedByteBuffer); + interpreter.setUseNNAPI(true); + float[] oneD = {1.23f, 6.54f, 7.81f}; + float[][] twoD = {oneD, oneD, oneD, oneD, oneD, oneD, oneD, oneD}; + float[][][] threeD = {twoD, twoD, twoD, twoD, twoD, twoD, twoD, twoD}; + float[][][][] fourD = {threeD, threeD}; + float[][][][] parsedOutputs = new float[2][8][8][3]; + interpreter.run(fourD, parsedOutputs); + float[] outputOneD = parsedOutputs[0][0][0]; + float[] expected = {3.69f, 19.62f, 23.43f}; + assertThat(outputOneD).usingTolerance(0.1f).containsExactly(expected).inOrder(); + interpreter.close(); + fileChannel.close(); + } } -- GitLab From e8e4e5b99b721dcd79e0d1a9a7fe6bfb990744ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 15:25:41 -0800 Subject: [PATCH 373/884] Fix some compiler warnings in MKL build. PiperOrigin-RevId: 187677893 --- tensorflow/core/BUILD | 6 ++- .../core/common_runtime/mkl_cpu_allocator.cc | 27 ++++++++++ .../core/common_runtime/mkl_cpu_allocator.h | 4 +- tensorflow/core/graph/mkl_graph_util.h | 4 +- tensorflow/core/graph/mkl_layout_pass.cc | 13 ++--- tensorflow/core/kernels/BUILD | 50 +++++++++---------- tensorflow/core/kernels/mkl_concat_op.cc | 12 +++-- .../core/kernels/mkl_conv_grad_bias_ops.cc | 5 +- tensorflow/core/kernels/mkl_conv_ops.cc | 6 ++- .../core/kernels/mkl_fused_batch_norm_op.cc | 1 - tensorflow/core/kernels/mkl_lrn_op.cc | 9 ++-- tensorflow/core/kernels/mkl_maxpooling_op.cc | 7 +-- tensorflow/core/kernels/mkl_relu_op.cc | 1 - tensorflow/core/ops/nn_ops.cc | 8 +-- 14 files changed, 93 insertions(+), 60 deletions(-) create mode 100644 tensorflow/core/common_runtime/mkl_cpu_allocator.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3271825251..3a436ff680 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1920,7 +1920,7 @@ tf_cuda_library( ) + if_mkl( [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ], ), alwayslink = 1, @@ -2135,6 +2135,7 @@ tf_cuda_library( "common_runtime/graph_runner.cc", "common_runtime/local_device.cc", "common_runtime/memory_types.cc", + "common_runtime/mkl_cpu_allocator.cc", "common_runtime/optimization_registry.cc", "common_runtime/parallel_concat_optimizer.cc", "common_runtime/placer.cc", @@ -2174,6 +2175,7 @@ tf_cuda_library( ] + if_mkl( [ "//third_party/mkl:intel_binary_blob", + "@mkl_dnn", ], ), alwayslink = 1, @@ -2218,7 +2220,7 @@ tf_cuda_library( ] + if_mkl( [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ], ) + tf_additional_core_deps() + if_static([":core_cpu_impl"]), alwayslink = 1, diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.cc b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc new file mode 100644 index 0000000000..43a909466e --- /dev/null +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.cc @@ -0,0 +1,27 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifdef INTEL_MKL + +#include "tensorflow/core/common_runtime/mkl_cpu_allocator.h" + +namespace tensorflow { + +constexpr const char* MklCPUAllocator::kMaxLimitStr; +constexpr const size_t MklCPUAllocator::kDefaultMaxLimit; + +} // namespace tensorflow + +#endif // INTEL_MKL diff --git a/tensorflow/core/common_runtime/mkl_cpu_allocator.h b/tensorflow/core/common_runtime/mkl_cpu_allocator.h index fb092424bf..55c8411ad0 100644 --- a/tensorflow/core/common_runtime/mkl_cpu_allocator.h +++ b/tensorflow/core/common_runtime/mkl_cpu_allocator.h @@ -53,7 +53,7 @@ class MklCPUAllocator : public VisitableAllocator { static constexpr const char* kMaxLimitStr = "TF_MKL_ALLOC_MAX_BYTES"; /// Default upper limit on allocator size - 64GB - static const size_t kDefaultMaxLimit = 64LL << 30; + static constexpr size_t kDefaultMaxLimit = 64LL << 30; MklCPUAllocator() { TF_CHECK_OK(Initialize()); } @@ -158,7 +158,7 @@ class MklCPUAllocator : public VisitableAllocator { static constexpr const char* kName = "mklcpu"; /// The alignment that we need for the allocations - static const size_t kAlignment = 64; + static constexpr const size_t kAlignment = 64; VisitableAllocator* allocator_; // owned by this class }; diff --git a/tensorflow/core/graph/mkl_graph_util.h b/tensorflow/core/graph/mkl_graph_util.h index 1b99d54e8e..5f51d6083b 100644 --- a/tensorflow/core/graph/mkl_graph_util.h +++ b/tensorflow/core/graph/mkl_graph_util.h @@ -90,7 +90,7 @@ inline string GetMklOpName(const string& name) { // @input: name of the op // @input: T datatype to be used for checking op // @return: true if opname is registered as Mkl op; false otherwise -static inline bool IsMklOp(const std::string& op_name, DataType T) { +static inline bool IsMklOp(const string& op_name, DataType T) { string kernel = KernelsRegisteredForOp(op_name); bool result = kernel.find(kMklOpLabelPattern) != string::npos && (T == DT_FLOAT); @@ -104,7 +104,7 @@ static inline bool IsMklOp(const std::string& op_name, DataType T) { // @input: T datatype to be used for checking op // @return: true if opname is registered as element-wise Mkl op; // false otherwise -static inline bool IsMklElementWiseOp(const std::string& op_name, DataType T) { +static inline bool IsMklElementWiseOp(const string& op_name, DataType T) { if (!IsMklOp(op_name, T)) { return false; } diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 7d3be15299..02038c5d77 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +// TODO(intel): Improve error handling in this file; instead of CHECK failing +// all over the place, we should log an error and execute the original graph. #ifdef INTEL_MKL #include @@ -1030,8 +1032,7 @@ void MklLayoutRewritePass::GetDummyMklTensorNode(std::unique_ptr* g, TensorProto proto; proto.set_dtype(dt); uint8 zero[8] = {0, 0, 0, 0, 0, 0, 0, 0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 8); + proto.set_tensor_content(string(reinterpret_cast(zero), 8)); TensorShape dummy_shape({8}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -1144,7 +1145,8 @@ int MklLayoutRewritePass::SetUpContiguousInputs( // For that let's first find filter node that is 2nd input (slot 1) // of BackpropInput. Node* filter_node = nullptr; - old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, &filter_node); + TF_CHECK_OK(old_node->input_node(kConv2DBackpropInputFilterInputSlotIdx, + &filter_node)); CHECK_NOTNULL(filter_node); // Now check which nodes receive from filter_node. Filter feeds as @@ -1323,8 +1325,7 @@ void MklLayoutRewritePass::GetDummyWorkspaceTensorNode( TensorProto proto; proto.set_dtype(dt); float zero[1] = {0}; - proto.set_tensor_content(const_cast(static_cast(&zero)), - 4); + proto.set_tensor_content(string(reinterpret_cast(&zero), 4)); TensorShape dummy_shape({1}); dummy_shape.AsProto(proto.mutable_tensor_shape()); TF_CHECK_OK(NodeBuilder((*g)->NewName("DMT"), "Const") @@ -1829,7 +1830,7 @@ Status MklLayoutRewritePass::MergeNode(std::unique_ptr* g, Node* succ, // Create node. Node* new_node; - nb.Finalize(&**g, &new_node); + TF_CHECK_OK(nb.Finalize(&**g, &new_node)); CHECK_NOTNULL(new_node); // Set the Mkl layer label for this op. diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index feacee5d63..52be90ea1f 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -879,7 +879,7 @@ tf_kernel_library( hdrs = ["transpose_op.h"], deps = ARRAY_DEPS + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ]), ) @@ -2810,7 +2810,7 @@ tf_kernel_library( "//conditions:default": [], }) + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", + "@mkl_dnn", ]) + if_cuda([ "//tensorflow/core/platform/default/build_config:cublas_plugin", ]), @@ -5850,10 +5850,9 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5867,10 +5866,9 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5898,6 +5896,7 @@ tf_mkl_kernel_library( ], hdrs = ["mkl_pooling_ops_common.h"], deps = [ + ":bounds_check", ":ops_util", "//tensorflow/core:core_cpu", "//tensorflow/core:framework", @@ -5919,10 +5918,10 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ + "//third_party/eigen3", "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5936,19 +5935,18 @@ tf_mkl_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_ops_op_lib", - ] + if_mkl([ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( name = "mkl_fused_batch_norm_op", srcs = ["mkl_fused_batch_norm_op.cc"], - deps = NN_DEPS + if_mkl([ + deps = NN_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5962,10 +5960,10 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_concat_op", prefix = "mkl_concat_op", - deps = ARRAY_DEPS + if_mkl([ + deps = ARRAY_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( @@ -5979,19 +5977,19 @@ tf_mkl_kernel_library( tf_mkl_kernel_library( name = "mkl_identity_op", prefix = "mkl_identity_op", - deps = ARRAY_DEPS + if_mkl([ + deps = ARRAY_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( name = "mkl_lrn_op", prefix = "mkl_lrn_op", - deps = NN_DEPS + if_mkl([ + deps = NN_DEPS + [ "//third_party/mkl:intel_binary_blob", - "@mkl_dnn//:mkl_dnn", - ]), + "@mkl_dnn", + ], ) tf_mkl_kernel_library( diff --git a/tensorflow/core/kernels/mkl_concat_op.cc b/tensorflow/core/kernels/mkl_concat_op.cc index f1f267e849..aa3ea890b0 100644 --- a/tensorflow/core/kernels/mkl_concat_op.cc +++ b/tensorflow/core/kernels/mkl_concat_op.cc @@ -519,9 +519,11 @@ class MklConcatOp : public OpKernel { mkl_tensor_tf_shape.AddDim( SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension())); int tf_output_index = 0; - context->allocate_output( + // TODO(jktomer): replace this with OP_REQUIRES_OK and clean up this file + // to propagate the status up the call stack. + TF_CHECK_OK(context->allocate_output( GetTensorMetaDataIndex(tf_output_index, context->num_outputs()), - mkl_tensor_tf_shape, &mkl_tensor); + mkl_tensor_tf_shape, &mkl_tensor)); mkl_tensor_mkl_shape.SerializeMklShape( mkl_tensor->flat().data(), mkl_tensor->flat().size() * sizeof(uint8)); @@ -549,9 +551,11 @@ class MklConcatOp : public OpKernel { mkl_tensor_tf_shape.AddDim( SIZE_OF_MKL_SERIAL_DATA(mkl_tensor_mkl_shape.GetDimension())); int tf_output_index = 0; - context->allocate_output( + // TODO(jktomer): replace this with OP_REQUIRES_OK and clean up this file + // to propagate the status up the call stack. + TF_CHECK_OK(context->allocate_output( GetTensorMetaDataIndex(tf_output_index, context->num_outputs()), - mkl_tensor_tf_shape, &mkl_tensor); + mkl_tensor_tf_shape, &mkl_tensor)); mkl_tensor_mkl_shape.SerializeMklShape( mkl_tensor->flat().data(), mkl_tensor->flat().size() * sizeof(uint8)); diff --git a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc index 25c2573741..d23027a54d 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_bias_ops.cc @@ -79,8 +79,9 @@ class MklConv2DCustomBackpropBiasOp : public OpKernel { } else if (data_format_ == FORMAT_NHWC || data_format_ == FORMAT_NCHW) { mkl_context.c_size = GetTensorDim(input, data_format_, 'C'); } else { - errors::InvalidArgument("Unknown format ", - " Format must be either NCHW or NHWC. "); + context->CtxFailure(errors::InvalidArgument( + "Unknown format ", " Format must be either NCHW or NHWC. ")); + return; } TensorShape output_shape{mkl_context.c_size}; diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 2953426d58..1440da8f82 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -294,8 +294,10 @@ class MklConv2DOp : public OpKernel { mkl_filter_output_mkl_shape.SetMklLayout(mkl_context.prim_fwd, dnnResourceFilter); - size_t filter_sizes[4] = {filter.dim_size(0), filter.dim_size(1), - filter.dim_size(2), filter.dim_size(3)}; + size_t filter_sizes[4] = {static_cast(filter.dim_size(0)), + static_cast(filter.dim_size(1)), + static_cast(filter.dim_size(2)), + static_cast(filter.dim_size(3))}; mkl_filter_output_mkl_shape.SetTfLayout(filter.dims(), filter_sizes, mkl_context.filter_strides); diff --git a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc index 8313224d7f..9b2146aca3 100644 --- a/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc +++ b/tensorflow/core/kernels/mkl_fused_batch_norm_op.cc @@ -262,7 +262,6 @@ class MklFusedBatchNormOp : public OpKernel { } void MklCreateInputLayout(OpKernelContext* context) { - const Tensor& input = MklGetInput(context, 0); bool input_in_mkl_format = mkl_shape_input_shape.IsMklTensor(); if (input_in_mkl_format) { mkl_lt_input = diff --git a/tensorflow/core/kernels/mkl_lrn_op.cc b/tensorflow/core/kernels/mkl_lrn_op.cc index 5f0a12a1fb..282012c719 100644 --- a/tensorflow/core/kernels/mkl_lrn_op.cc +++ b/tensorflow/core/kernels/mkl_lrn_op.cc @@ -88,7 +88,8 @@ class MklLRNOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -357,7 +358,8 @@ class MklLRNGradOp : public OpKernel { OP_REQUIRES_OK(context, context->GetAttr("alpha", &alpha_)); OP_REQUIRES_OK(context, context->GetAttr("beta", &beta_)); workspace_enabled_ = false; - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -535,7 +537,6 @@ class MklLRNGradOp : public OpKernel { Tensor* mkl_tmp_outimage_buf_tensor) { const Tensor& in_grads = MklGetInput(context, 0); const Tensor& in_image = MklGetInput(context, 1); - const Tensor& out_image = MklGetInput(context, 2); const Tensor& workspace = MklGetInput( context, 3); /*Worskpsace is enabled, get the buffer to the workspace */ @@ -544,8 +545,6 @@ class MklLRNGradOp : public OpKernel { static_cast(in_grads.flat().data())); void* user_fwd_input = const_cast( static_cast(in_image.flat().data())); - void* user_fwd_output = const_cast( - static_cast(out_image.flat().data())); void* workspace_buffer = const_cast( static_cast(workspace.flat().data())); diff --git a/tensorflow/core/kernels/mkl_maxpooling_op.cc b/tensorflow/core/kernels/mkl_maxpooling_op.cc index 14607f26e0..ea537524b1 100644 --- a/tensorflow/core/kernels/mkl_maxpooling_op.cc +++ b/tensorflow/core/kernels/mkl_maxpooling_op.cc @@ -69,7 +69,8 @@ class MklMaxPoolingOp : public OpKernel { // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this // attribute value. - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { @@ -118,7 +119,6 @@ class MklMaxPoolingOp : public OpKernel { mkl_out_shape); Tensor* workspace_tensor; - void* workspace_buf = nullptr; TensorShape workspace_shape; mkl_workspace_shape.SetMklTensor(false); @@ -226,7 +226,8 @@ class MklMaxPoolingGradOp : public OpKernel { // We may not get this attribute for this node if it does not go through // graph rewrite pass. So we do not check for error while retrieving this // attribute value. - context->GetAttr("workspace_enabled", &workspace_enabled_); + OP_REQUIRES_OK(context, + context->GetAttr("workspace_enabled", &workspace_enabled_)); } void Compute(OpKernelContext* context) override { diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 51db3991e2..f006954c03 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -25,7 +25,6 @@ limitations under the License. #include "mkl_dnn.h" #include "mkl_dnn_types.h" -#include "tensorflow/core/platform/default/logging.h" #include "tensorflow/core/util/mkl_util.h" #ifndef INTEL_MKL_ML diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 67481fd202..910fbaca9e 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -2007,10 +2007,10 @@ REGISTER_OP("_MklFusedBatchNorm") TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 4, &x)); bool is_training; - c->GetAttr("is_training", &is_training); + TF_RETURN_IF_ERROR(c->GetAttr("is_training", &is_training)); int number_inputs = (is_training) ? 3 : 5; string data_format; - c->GetAttr("data_format", &data_format); + TF_RETURN_IF_ERROR(c->GetAttr("data_format", &data_format)); DimensionHandle channel_dim = (data_format == "NHWC") ? c->Dim(x, 3) : c->Dim(x, 1); @@ -2076,8 +2076,8 @@ REGISTER_OP("_MklFusedBatchNormGrad") bool is_training; string data_format; - c->GetAttr("is_training", &is_training); - c->GetAttr("data_format", &data_format); + TF_RETURN_IF_ERROR(c->GetAttr("is_training", &is_training)); + TF_RETURN_IF_ERROR(c->GetAttr("data_format", &data_format)); DimensionHandle channel_dim = (data_format == "NHWC") ? c->Dim(y_backprop, 3) : c->Dim(y_backprop, 1); -- GitLab From 32d44ae7ded94a435559cdd4c7e224ea07e7c03f Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Fri, 2 Mar 2018 15:45:25 -0800 Subject: [PATCH 374/884] Int8, FP16 and new ops support --- .../contrib/tensorrt/convert/convert_nodes.cc | 43 +++++++------------ .../contrib/tensorrt/python/trt_convert.py | 12 ++---- .../contrib/tensorrt/test/test_tftrt.py | 4 +- 3 files changed, 21 insertions(+), 38 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index d5652977be..2c79d28678 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -329,7 +329,7 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides, } } -// TODO(jie): fail to tensorflow!! +// TODO(jie): fallback to tensorflow!! void ReorderCKtoKC(const TRT_ShapedWeights& iweights, TRT_ShapedWeights* oweights) { int c = iweights.shape_.d[0]; @@ -355,7 +355,8 @@ void ReorderCKtoKC(const TRT_ShapedWeights& iweights, break; } default: - LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + LOG(FATAL) << "Unsupported type in reorder expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); } } @@ -396,7 +397,8 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights, } default: - LOG(FATAL) << "!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"; + LOG(FATAL) << "Unsupported type, expected fp32 or fp16 but got " + << DataTypeString(iweights.type_); } } @@ -414,7 +416,6 @@ inline std::shared_ptr infer_object(T* obj) { return std::shared_ptr(obj, InferDeleter()); } -// Logger for GIE info/warning/errors class Converter; using OpConverter = @@ -455,7 +456,7 @@ class Converter { if (trt_tensors_.count(name)) { inputs.push_back(trt_tensors_.at(name)); } else { - LOG(FATAL) << "input: " << name << "not availabled for node at, " + LOG(FATAL) << "input: " << name << " not availabled for node at, " << node_def.name(); } } @@ -474,7 +475,6 @@ class Converter { TRT_ShapedWeights weights(type, nullptr, shape); // TODO(jie): check weights size_bytes. 0 means type error weight_store_->store_.push_back(std::vector(weights.size_bytes())); - // temp_bufs_.push_back(std::vector(weights.size_bytes())); weights.SetValues(weight_store_->store_.back().data()); return weights; } @@ -778,13 +778,12 @@ tensorflow::Status ConstantFoldUnary( CHECK_EQ(weights_input.type_, TFAttrs(node_def).get("T")); - // Maybe I should do a switch LambdaFactory unary_op; if (node_def.op() == "Rsqrt") { // Compute rsqrt unary_op.op = LambdaFactory::OP_CATEGORY::RSQRT; auto ret = UnaryCompute(weights_input, &weights_output, unary_op); - // PAss the output + // Pass the output if (ret == tensorflow::Status::OK()) { outputs->push_back(TRT_TensorOrWeights(weights_output)); } @@ -796,7 +795,7 @@ tensorflow::Status ConstantFoldUnary( } // TODO(jie,ben) broadcast is needed yet not implemented -// Let's get the simple stuff working first. Maybe we should fall bakc to TF +// Let's get the simple stuff working first. Maybe we should fall back to TF // approach for constant folding tensorflow::Status ConstantFoldBinary( Converter& ctx, const tensorflow::NodeDef& node_def, @@ -843,7 +842,6 @@ tensorflow::Status ConstantFoldBinary( // Allocate output weights TRT_ShapedWeights weights_output = ctx.get_temp_weights(dtype, output_shape); - // Maybe I should do a switch LambdaFactory binary_op; if (node_def.op() == "Sub") { binary_op.op = LambdaFactory::OP_CATEGORY::SUB; @@ -1106,7 +1104,7 @@ tensorflow::Status ConvertConv2DHelper( tensorflow::Status ConvertConv2DHelper( Converter& ctx, const tensorflow::NodeDef& node_def, - const std::vector & inputs, + const std::vector& inputs, std::vector* outputs, ConvolutionType type) { switch (type) { case ConvolutionType::DEFAULT: @@ -1125,8 +1123,6 @@ tensorflow::Status BinaryTensorOpTensor( static const std::unordered_map ops{ {"Add", nvinfer1::ElementWiseOperation::kSUM}, {"Mul", nvinfer1::ElementWiseOperation::kPROD}, - // {"max", nvinfer1::ElementWiseOperation::kMAX}, - // {"min", nvinfer1::ElementWiseOperation::kMIN}, {"Sub", nvinfer1::ElementWiseOperation::kSUB}, {"Div", nvinfer1::ElementWiseOperation::kDIV}, }; @@ -1426,12 +1422,6 @@ tensorflow::Status ConvertConst(Converter& ctx, memcpy(dst, tensor_data.data(), lenData); // store into weight store weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } - // LOG(INFO) << " add: " << weights_tensor.float_val().data(); - // LOG(INFO) << " value: " << (*weights_tensor.float_val().data()); - - // weights = ctx.get_temp_weights(dtype, scalar_shape); - // std::memcpy(const_cast(weights.values), - // weights_tensor.float_val().data(), weights.size_bytes()); } else if (!weights_tensor.int_val().empty()) { VLOG(2) << "int!!!" << node_def.name(); nvinfer1::Dims scalar_shape; @@ -1905,8 +1895,9 @@ tensorflow::Status ConvertFusedBatchNorm( if ((scale_weights.type_ != tensorflow::DataType::DT_FLOAT) && (scale_weights.type_ != tensorflow::DataType::DT_HALF)) { return tensorflow::errors::Unimplemented( - "only float32 weights data type is supported, at " + node_def.name() + - " " + tensorflow::DataTypeString(scale_weights.type_)); + "only float32 or float16 weight data type is supported, for node " + + node_def.name() + " got " + + tensorflow::DataTypeString(scale_weights.type_)); } if (scale_weights.type_ == tensorflow::DT_FLOAT) { for (size_t i = 0; i < nweight; ++i) { @@ -1962,11 +1953,10 @@ tensorflow::Status ConvertMatMul(Converter& ctx, const tensorflow::NodeDef& node_def, const std::vector& inputs, std::vector* outputs) { - const nvinfer1::ITensor * tensor = inputs.at(0).tensor(); + const nvinfer1::ITensor* tensor = inputs.at(0).tensor(); // TODO(jie): transpose! TFAttrs attrs(node_def); - // bool transpose_w = bool(attrs->at("transpose_b")->i()); // tensor after transpose (NCHW) auto tensor_dim = tensor->getDimensions(); @@ -2160,7 +2150,8 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( calib_res->thr_->join(); delete calib_res->thr_; if (!calib_res->engine_) { - LOG(FATAL) << "Calibration failed!, engine is nullptr"; + LOG(FATAL) << "Calibration failed!, engine is nullptr. Did you run " + "calibration graph?"; } auto weight_rmgr = trt_rm->getManager("WeightStore"); TF_CHECK_OK(weight_rmgr->Delete( @@ -2228,7 +2219,6 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { std::list order; for (tensorflow::Node* node : order_vec) { if (s.subgraph_node_ids.count(node->id())) { - // order.push_back(node); order.push_front(node); // we want topological order to contstruct the // network layer by layer } @@ -2290,7 +2280,6 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto node_name = node->name(); input_names.push_back(node_name); // insert original node name without port // TODO(jie): alternative :) - // tensorflow::DataType tf_dtype = node->output_type(output_idx); if (!s.graph_properties.HasOutputProperties(node_name)) return tensorflow::errors::Internal("failed to find input node: " + node_name); @@ -2627,7 +2616,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } VLOG(2) << "Finished output"; - // TODO(jie): static_id is not thread safe. // Build the engine trt_builder->setMaxBatchSize(s.max_batch_size); @@ -2639,7 +2627,6 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( VLOG(0) << "Using FP16 precision mode"; } LOG(INFO) << "starting build engine"; - // TODO(ben,jie): half2 and int8 mode support string engine_plan_string; { auto trt_engine = diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index d1f9f8acb9..44983d332b 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -60,8 +60,9 @@ def create_inference_graph(input_graph_def, "INT8": 2} if precision_mode.upper() not in supported_precision_modes: raise ValueError(("precision mode '{}' is not supported." - "It should be one of {}").format(precision_mode, - "{'FP32', 'FP16', 'INT8'}")) + "It should be one of {}" + ).format(precision_mode, + "{'FP32', 'FP16', 'INT8'}")) mode = supported_precision_modes[precision_mode.upper()] def py2bytes(inp): return inp @@ -119,11 +120,6 @@ def create_inference_graph(input_graph_def, def calib_graph_to_infer_graph(calibration_graph_def): """Convert an existing calibration graph containing calibration data to inference graph""" - def py2bytes(inp): - return inp - - def py3bytes(inp): - return inp.encode("utf-8", errors="surrogateescape") def py2string(inp): return inp @@ -147,7 +143,7 @@ def calib_graph_to_infer_graph(calibration_graph_def): msg = status.split(";") if len(msg) == 1: raise RuntimeError("Status message is malformed {}".format(status)) - raise _impl._make_specific_exception(None,None,";".join(msg[1:]), int(msg[0])) + raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), int(msg[0])) output_graph_def = graph_pb2.GraphDef() output_graph_def.ParseFromString(output_graph_def_string) del output_graph_def_string #save some memory diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index 385a9f72af..ac3a0272b0 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -89,9 +89,9 @@ def run_calibration(gdef, dumm_inp): out = out.outputs[0] with csess.Session( config=cpb2.ConfigProto(gpu_options=gpu_options), graph=g) as sess: - # run over real calibration data here, we are mimicking a + # run over real calibration data here, we are mimicking a # calibration set of 30 different batches. Use as much calibration data as you want - for _ in range(30): + for _ in range(30): val = sess.run(out, {inp: dumm_inp}) return val -- GitLab From 737d2e73c82abe35ae76bd7d17793243f3dc9dd5 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 2 Mar 2018 15:52:32 -0800 Subject: [PATCH 375/884] Exit with failure if a free gpu is not found by parallel_gpu_execute. If TF_GPU_COUNT was a value greater than the actual number of GPUs, it was possible for tests to just pass without running when running under parallel_gpu_execute.sh. PiperOrigin-RevId: 187681032 --- tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh index cfeaebdbf5..d0816c92b7 100755 --- a/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh +++ b/tensorflow/tools/ci_build/gpu_build/parallel_gpu_execute.sh @@ -54,3 +54,6 @@ for i in `seq 0 $((TF_GPU_COUNT-1))`; do fi done +echo "Cannot find a free GPU to run the test $* on, exiting with failure..." +exit 1 + -- GitLab From c12f0c5f84699835f9b8111299febf9fc7aba343 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Fri, 2 Mar 2018 16:05:05 -0800 Subject: [PATCH 376/884] eager/examples: Use tf.keras.Model in RNN examples. Some notable differences between tf.keras.Model and tfe.Network: - tf.keras.Model doesn't have a track_layer() method. It tracks Layer and Checkpointable valued attributes automatically. For list and other complex structures, __setattr__ performs the role of tfe.Network.track_layer() - tf.keras.Model accepts a single positional argument. Thus either all arguments must be packaged into a single list/tuple (as in rnn_ptb.py) or be provided as keyword arguments (as in rnn_colorbot.py). PiperOrigin-RevId: 187682716 --- .../examples/rnn_colorbot/rnn_colorbot.py | 40 ++++++++++-------- .../eager/python/examples/rnn_ptb/rnn_ptb.py | 42 ++++++++++++------- 2 files changed, 50 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py index aa87b94e7b..29f0232454 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py +++ b/tensorflow/contrib/eager/python/examples/rnn_colorbot/rnn_colorbot.py @@ -109,7 +109,7 @@ def load_dataset(data_dir, url, batch_size): # pylint: disable=not-callable -class RNNColorbot(tfe.Network): +class RNNColorbot(tf.keras.Model): """Multi-layer (LSTM) RNN that regresses on real-valued vector labels. """ @@ -127,23 +127,20 @@ class RNNColorbot(tfe.Network): self.label_dimension = label_dimension self.keep_prob = keep_prob - # Note the calls to `track_layer` below; these calls register the layers as - # network components that house trainable variables. - self.cells = [ - self.track_layer(tf.nn.rnn_cell.BasicLSTMCell(size)) - for size in rnn_cell_sizes - ] - self.relu = self.track_layer( - tf.layers.Dense(label_dimension, activation=tf.nn.relu, name="relu")) + self.cells = self._add_cells( + [tf.nn.rnn_cell.BasicLSTMCell(size) for size in rnn_cell_sizes]) + self.relu = tf.layers.Dense( + label_dimension, activation=tf.nn.relu, name="relu") - def call(self, chars, sequence_length, training=False): + def call(self, inputs, training=False): """Implements the RNN logic and prediction generation. Args: - chars: a Tensor of dimension [batch_size, time_steps, 256] holding a - batch of one-hot encoded color names - sequence_length: a Tensor of dimension [batch_size] holding the length - of each character sequence (i.e., color name) + inputs: A tuple (chars, sequence_length), where chars is a batch of + one-hot encoded color names represented as a Tensor with dimensions + [batch_size, time_steps, 256] and sequence_length holds the length + of each character sequence (color name) as a Tensor with dimension + [batch_size]. training: whether the invocation is happening during training Returns: @@ -151,6 +148,7 @@ class RNNColorbot(tfe.Network): passing chars through a multi-layer RNN and applying a ReLU to the final hidden state. """ + (chars, sequence_length) = inputs # Transpose the first and second dimensions so that chars is of shape # [time_steps, batch_size, dimension]. chars = tf.transpose(chars, [1, 0, 2]) @@ -181,6 +179,14 @@ class RNNColorbot(tfe.Network): hidden_states = tf.gather_nd(chars, indices) return self.relu(hidden_states) + def _add_cells(self, cells): + # "Magic" required for keras.Model classes to track all the variables in + # a list of tf.layers.Layer objects. + # TODO(ashankar): Figure out API so user code doesn't have to do this. + for i, c in enumerate(cells): + setattr(self, "cell-%d" % i, c) + return cells + def loss(labels, predictions): """Computes mean squared loss.""" @@ -191,7 +197,7 @@ def test(model, eval_data): """Computes the average loss on eval_data, which should be a Dataset.""" avg_loss = tfe.metrics.Mean("loss") for (labels, chars, sequence_length) in tfe.Iterator(eval_data): - predictions = model(chars, sequence_length, training=False) + predictions = model((chars, sequence_length), training=False) avg_loss(loss(labels, predictions)) print("eval/loss: %.6f\n" % avg_loss.result()) with tf.contrib.summary.always_record_summaries(): @@ -204,7 +210,7 @@ def train_one_epoch(model, optimizer, train_data, log_interval=10): tf.train.get_or_create_global_step() def model_loss(labels, chars, sequence_length): - predictions = model(chars, sequence_length, training=True) + predictions = model((chars, sequence_length), training=True) loss_value = loss(labels, predictions) tf.contrib.summary.scalar("loss", loss_value) return loss_value @@ -277,7 +283,7 @@ def main(_): (chars, length) = (tf.identity(chars), tf.identity(length)) chars = tf.expand_dims(chars, 0) length = tf.expand_dims(length, 0) - preds = tf.unstack(model(chars, length, training=False)[0]) + preds = tf.unstack(model((chars, length), training=False)[0]) # Predictions cannot be negative, as they are generated by a ReLU layer; # they may, however, be greater than 1. diff --git a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py index 5c5c59c877..69cd16d12c 100644 --- a/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py +++ b/tensorflow/contrib/eager/python/examples/rnn_ptb/rnn_ptb.py @@ -39,21 +39,23 @@ from tensorflow.contrib.cudnn_rnn.python.layers import cudnn_rnn from tensorflow.contrib.eager.python import tfe -class RNN(tfe.Network): +class RNN(tf.keras.Model): """A static RNN. - Similar to tf.nn.static_rnn, implemented as a tf.layer.Layer. + Similar to tf.nn.static_rnn, implemented as a class. """ def __init__(self, hidden_dim, num_layers, keep_ratio): super(RNN, self).__init__() self.keep_ratio = keep_ratio - for _ in range(num_layers): - self.track_layer(tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim)) + self.cells = self._add_cells([ + tf.nn.rnn_cell.BasicLSTMCell(num_units=hidden_dim) + for _ in range(num_layers) + ]) def call(self, input_seq, training): batch_size = int(input_seq.shape[1]) - for c in self.layers: + for c in self.cells: state = c.zero_state(batch_size, tf.float32) outputs = [] input_seq = tf.unstack(input_seq, num=int(input_seq.shape[0]), axis=0) @@ -64,7 +66,19 @@ class RNN(tfe.Network): input_seq = tf.stack(outputs, axis=0) if training: input_seq = tf.nn.dropout(input_seq, self.keep_ratio) - return input_seq, None + # Returning a list instead of a single tensor so that the line: + # y = self.rnn(y, ...)[0] + # in PTBModel.call works for both this RNN and CudnnLSTM (which returns a + # tuple (output, output_states). + return [input_seq] + + def _add_cells(self, cells): + # "Magic" required for keras.Model classes to track all the variables in + # a list of tf.layers.Layer objects. + # TODO(ashankar): Figure out API so user code doesn't have to do this. + for i, c in enumerate(cells): + setattr(self, "cell-%d" % i, c) + return cells class Embedding(tf.layers.Layer): @@ -87,7 +101,8 @@ class Embedding(tf.layers.Layer): return tf.nn.embedding_lookup(self.embedding, x) -class PTBModel(tfe.Network): +# pylint: disable=not-callable +class PTBModel(tf.keras.Model): """LSTM for word language modeling. Model described in: @@ -109,19 +124,16 @@ class PTBModel(tfe.Network): self.keep_ratio = 1 - dropout_ratio self.use_cudnn_rnn = use_cudnn_rnn - self.embedding = self.track_layer(Embedding(vocab_size, embedding_dim)) + self.embedding = Embedding(vocab_size, embedding_dim) if self.use_cudnn_rnn: self.rnn = cudnn_rnn.CudnnLSTM( num_layers, hidden_dim, dropout=dropout_ratio) else: self.rnn = RNN(hidden_dim, num_layers, self.keep_ratio) - self.track_layer(self.rnn) - self.linear = self.track_layer( - tf.layers.Dense( - vocab_size, - kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1))) + self.linear = tf.layers.Dense( + vocab_size, kernel_initializer=tf.random_uniform_initializer(-0.1, 0.1)) self._output_shape = [-1, embedding_dim] def call(self, input_seq, training): @@ -136,7 +148,7 @@ class PTBModel(tfe.Network): y = self.embedding(input_seq) if training: y = tf.nn.dropout(y, self.keep_ratio) - y, _ = self.rnn(y, training=training) + y = self.rnn(y, training=training)[0] return self.linear(tf.reshape(y, self._output_shape)) @@ -148,7 +160,7 @@ def clip_gradients(grads_and_vars, clip_ratio): def loss_fn(model, inputs, targets, training): labels = tf.reshape(targets, [-1]) - outputs = model(inputs, training) + outputs = model(inputs, training=training) return tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=outputs)) -- GitLab From 284dac189dcae46c77f1ec70055b13e69c31e4c0 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 16:06:24 -0800 Subject: [PATCH 377/884] Checkpointable: Fix CPU/GPU device placement issues Restore ops go on the CPU, then the value gets copied to whichever device it needs to be on. This I need to do manually for restores passed as initial_values; for regular save/restore it's done by the SaveableObjects for variables. Also explicitly places some counters on the CPU. Adds a GPU-using test for Checkpointable usage. PiperOrigin-RevId: 187683050 --- tensorflow/contrib/eager/python/BUILD | 7 ++- .../eager/python/checkpointable_utils.py | 50 +++++++++++-------- .../eager/python/checkpointable_utils_test.py | 2 +- tensorflow/python/BUILD | 1 + tensorflow/python/training/checkpointable.py | 18 ++++--- 5 files changed, 45 insertions(+), 33 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 6fb8287030..7fde53476d 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -243,13 +243,13 @@ py_library( ], ) -py_test( +cuda_py_test( name = "checkpointable_utils_test", srcs = ["checkpointable_utils_test.py"], - srcs_version = "PY2AND3", - deps = [ + additional_deps = [ ":checkpointable_utils", ":network", + "@six_archive//:six", "//tensorflow/python:constant_op", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", @@ -265,7 +265,6 @@ py_test( "//tensorflow/python/eager:context", "//tensorflow/python/eager:test", "//tensorflow/python/keras", - "@six_archive//:six", ], ) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 89cd543f77..cd742991af 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -549,7 +549,8 @@ class CheckpointableSaver(object): # `Checkpointable` objects save themselves. self._root_checkpointable_ref = root_checkpointable if context.in_graph_mode(): - self._file_prefix_placeholder = constant_op.constant("model") + with ops.device("/cpu:0"): + self._file_prefix_placeholder = constant_op.constant("model") else: self._file_prefix_placeholder = None @@ -601,14 +602,16 @@ class CheckpointableSaver(object): if session is None: session = ops.get_default_session() if self._object_graph_feed_tensor is None: - self._object_graph_feed_tensor = constant_op.constant( - "", dtype=dtypes.string) + with ops.device("/cpu:0"): + self._object_graph_feed_tensor = constant_op.constant( + "", dtype=dtypes.string) object_graph_tensor = self._object_graph_feed_tensor feed_additions = {object_graph_tensor: graph_proto.SerializeToString()} else: session = None - object_graph_tensor = constant_op.constant( - graph_proto.SerializeToString(), dtype=dtypes.string) + with ops.device("/cpu:0"): + object_graph_tensor = constant_op.constant( + graph_proto.SerializeToString(), dtype=dtypes.string) feed_additions = None assert _OBJECT_GRAPH_PROTO_KEY not in named_variables named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable( @@ -627,12 +630,13 @@ class CheckpointableSaver(object): self._last_save_object_graph = graph_proto else: saver = self._last_save_saver - save_path = saver.save( - sess=_SessionWithFeedDictAdditions( - session=session, feed_additions=feed_additions), - save_path=file_prefix, - write_meta_graph=False, - global_step=checkpoint_number) + with ops.device("/cpu:0"): + save_path = saver.save( + sess=_SessionWithFeedDictAdditions( + session=session, feed_additions=feed_additions), + save_path=file_prefix, + write_meta_graph=False, + global_step=checkpoint_number) return save_path def _global_variable_names(self): @@ -718,16 +722,18 @@ class CheckpointableSaver(object): file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} else: session = None - file_prefix_tensor = constant_op.constant(save_path) + with ops.device("/cpu:0"): + file_prefix_tensor = constant_op.constant(save_path) file_prefix_feed_dict = None try: if not in_graph_mode or self._object_graph_restore_tensor is None: - object_graph_string, = io_ops.restore_v2( - prefix=file_prefix_tensor, - tensor_names=[_OBJECT_GRAPH_PROTO_KEY], - shape_and_slices=[""], - dtypes=[dtypes.string], - name="object_graph_proto_read") + with ops.device("/cpu:0"): + object_graph_string, = io_ops.restore_v2( + prefix=file_prefix_tensor, + tensor_names=[_OBJECT_GRAPH_PROTO_KEY], + shape_and_slices=[""], + dtypes=[dtypes.string], + name="object_graph_proto_read") if in_graph_mode: self._object_graph_restore_tensor = object_graph_string if in_graph_mode: @@ -826,8 +832,9 @@ class Checkpoint(core_checkpointable.Checkpointable): """Create a save counter if it does not yet exist.""" if self._save_counter is None: # Initialized to 0 and incremented before saving. - self._save_counter = add_variable( - self, name="save_counter", initializer=0, dtype=dtypes.int64) + with ops.device("/cpu:0"): + self._save_counter = add_variable( + self, name="save_counter", initializer=0, dtype=dtypes.int64) @property def save_counter(self): @@ -852,7 +859,8 @@ class Checkpoint(core_checkpointable.Checkpointable): # needs to be initialized before assign_add. This is only an issue if # restore() has not been called first. session.run(self.save_counter.initializer) - assign_op = self.save_counter.assign_add(1) + with ops.colocate_with(self.save_counter): + assign_op = self.save_counter.assign_add(1) if in_graph_mode: session.run(assign_op) return self._saver.save( diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index c9db2bcafc..9ec89edce8 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -387,7 +387,7 @@ class CheckpointingTests(test.TestCase): checkpoint_prefix = os.path.join(checkpoint_directory, "ckpt") for training_continuation in range(3): with ops.Graph().as_default(), self.test_session( - graph=ops.get_default_graph()): + graph=ops.get_default_graph()), test_util.device(use_gpu=True): model = MyModel() optimizer = adam.AdamOptimizer(0.001) root = checkpointable_utils.Checkpoint( diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index f282abb0a5..db17a3fe02 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -2882,6 +2882,7 @@ py_library( srcs = ["training/checkpointable.py"], srcs_version = "PY2AND3", deps = [ + ":array_ops", ":dtypes", ":io_ops_gen", ":ops", diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index 02c3aebda8..92e8ff3308 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -22,6 +22,7 @@ import collections from tensorflow.python.eager import context from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_io_ops as io_ops from tensorflow.python.util import nest @@ -181,13 +182,16 @@ class _CheckpointPosition(object): dtype = self._checkpoint.dtype_map[checkpoint_key] base_type = dtype.base_dtype with ops.init_scope(): - value, = io_ops.restore_v2( - prefix=self._checkpoint.save_path, - tensor_names=[checkpoint_key], - shape_and_slices=[""], - dtypes=[base_type], - name="%s_checkpoint_read" % (serialized_tensor.name,)) - value_tensors[serialized_tensor.name] = value + with ops.device("/cpu:0"): + # Run the restore itself on the CPU. + value, = io_ops.restore_v2( + prefix=self._checkpoint.save_path, + tensor_names=[checkpoint_key], + shape_and_slices=[""], + dtypes=[base_type], + name="%s_checkpoint_read" % (serialized_tensor.name,)) + # Copy the value to the current device if necessary. + value_tensors[serialized_tensor.name] = array_ops.identity(value) return value_tensors def restore_ops(self): -- GitLab From 4df167ac55346357afd612d15674c7556e21ab00 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 16:25:21 -0800 Subject: [PATCH 378/884] Loop optimizer: Convert StackPush nodes to Identity instead of eliminating them completely. Move loop optimizer to run before dependency optimizer so identity nodes will be pruned. PiperOrigin-RevId: 187685669 --- tensorflow/core/grappler/optimizers/BUILD | 1 + .../grappler/optimizers/loop_optimizer.cc | 91 ++++++++++--------- .../optimizers/loop_optimizer_test.cc | 74 ++++++++++----- .../grappler/optimizers/meta_optimizer.cc | 22 ++--- tensorflow/core/grappler/utils.cc | 8 +- tensorflow/core/grappler/utils.h | 1 + 6 files changed, 117 insertions(+), 80 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 037438ee75..7ec137373b 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -521,6 +521,7 @@ cc_library( ], visibility = ["//visibility:public"], deps = [ + ":constant_folding", ":graph_optimizer", "//tensorflow/core:framework", "//tensorflow/core:lib", diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index cc226c01db..9e427001d5 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -18,10 +18,12 @@ limitations under the License. #include #include +#include "tensorflow/core/framework/attr_value.pb.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/grappler/costs/graph_properties.h" #include "tensorflow/core/grappler/grappler_item.h" #include "tensorflow/core/grappler/op_types.h" +#include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" @@ -31,55 +33,60 @@ namespace tensorflow { namespace grappler { namespace { +std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, + int stack_node_idx) { + VLOG(1) << "Stack node: " << graph_view.graph()->node(stack_node_idx).name(); + const std::unordered_set op_types_to_traverse( + {"Stack", "StackV2", "Enter", "RefEnter", "Switch", "RefSwitch", + "Identity", "RefIdentity"}); + std::vector nodes_to_convert; + std::set fanout; + graph_view.DepthFirstSearch(op_types_to_traverse, stack_node_idx, &fanout); + for (int fanout_idx : fanout) { + const NodeDef& fanout_node = graph_view.graph()->node(fanout_idx); + VLOG(1) << "Fanout " << fanout_idx << " : " << fanout_node.name(); + if (IsStackPushOp(fanout_node)) { + nodes_to_convert.push_back(fanout_idx); + } else if (IsStackOp(fanout_node) || IsStackCloseOp(fanout_node) || + op_types_to_traverse.find(fanout_node.op()) != + op_types_to_traverse.end()) { + continue; + } else { + // The node is either a StackPop node or something unexpected behind which + // may hide a StackPop node, so we leave the graph alone. + nodes_to_convert.clear(); + break; + } + } + return nodes_to_convert; +} + Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { + *optimized_graph = graph; + NodeMap node_map(optimized_graph); SimpleGraphView graph_view; TF_RETURN_IF_ERROR(graph_view.Initialize(graph)); - const std::unordered_set op_types_to_traverse( - {"Stack", "StackV2", "Enter", "Switch", "RefSwitch", "Identity"}); - std::set nodes_to_delete; for (int node_idx = 0; node_idx < graph.node_size(); ++node_idx) { - const NodeDef& node = graph.node(node_idx); - if (IsStackOp(node)) { - std::set nodes_found; - graph_view.DepthFirstSearch(op_types_to_traverse, node_idx, &nodes_found); - bool found_pop = false; - bool found_unexpected = false; - for (int found_idx : nodes_found) { - const NodeDef& node = graph.node(found_idx); - if (IsStackPushOp(node) || IsStackOp(node) || IsStackCloseOp(node)) { - continue; - } else if (IsStackPopOp(node)) { - found_pop = true; - } else { - // Don't modify the graph if we found an unexpected op. There may be - // a pop hiding behind it. - found_unexpected = true; + if (IsStackOp(graph.node(node_idx))) { + for (int push_node_idx : + GetStackPushNodesToConvert(graph_view, node_idx)) { + // We found push nodes without corresponding pops. Convert them to + // Identity passing the data through and add a control dependency from + // the op supplying the handle. + NodeDef* push_node = optimized_graph->mutable_node(push_node_idx); + VLOG(1) << "Converting " << push_node_idx << " : " + << push_node->DebugString(); + if (push_node->attr().count("swap_memory") != 0) { + push_node->mutable_attr()->erase("swap_memory"); } + push_node->set_op("Identity"); + push_node->mutable_input()->SwapElements(0, 1); + const string ctrl_dep = ConstantFolding::AddControlDependency( + push_node->input(1), optimized_graph, &node_map); + push_node->set_input(1, ctrl_dep); + VLOG(1) << "After converting: " << push_node->DebugString(); } - if (!found_unexpected && !found_pop) { - VLOG(1) << "Found stack node with no pop: " << node.DebugString(); - // Remove all pushes. - for (int found_idx : nodes_found) { - const NodeDef& node = graph.node(found_idx); - if (IsStackPushOp(node)) { - nodes_to_delete.insert(found_idx); - } - } - } - } - } - - *optimized_graph = graph; - if (!nodes_to_delete.empty()) { - int last = optimized_graph->node_size() - 1; - for (auto it = nodes_to_delete.rbegin(); it != nodes_to_delete.rend(); - ++it) { - const int node_to_delete = *it; - optimized_graph->mutable_node()->SwapElements(node_to_delete, last); - --last; } - optimized_graph->mutable_node()->DeleteSubrange(last + 1, - nodes_to_delete.size()); } return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index bb2ee6b02b..cc9dd22b9e 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -59,34 +59,46 @@ TEST_F(LoopOptimizerTest, NoOp) { namespace { NodeDef* AddNode(const string& name, const string& op, - const std::vector& inputs, GraphDef* graph) { + const std::vector& inputs, + const std::vector>& attributes, + GraphDef* graph) { NodeDef* node = graph->add_node(); node->set_name(name); node->set_op(op); for (const string& input : inputs) { node->add_input(input); } + for (auto attr : attributes) { + (*node->mutable_attr())[attr.first] = attr.second; + } return node; } } // namespace TEST_F(LoopOptimizerTest, RemovePush_NoOp) { GrapplerItem item; + AttrValue frame_name; + frame_name.set_s("foo"); + AttrValue type; + type.set_type(DT_RESOURCE); GraphDef& graph = item.graph; + AddNode("c", "Const", {}, {}, &graph); // Stack with corresponding push/pop. - AddNode("stack1", "StackV2", {}, &graph); - AddNode("push1", "StackPushV2", {"stack1"}, &graph); - AddNode("pop1", "StackPopV2", {"stack1"}, &graph); + AddNode("stack1", "StackV2", {}, {}, &graph); + AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + AddNode("pop1", "StackPopV2", {"stack1"}, {}, &graph); // Stack with corresponding push/pop behind Enter. - AddNode("stack2", "StackV2", {}, &graph); - AddNode("push_enter", "Enter", {"stack1"}, &graph); - AddNode("push2", "StackPushV2", {"push_enter"}, &graph); - AddNode("pop_enter", "Enter", {"stack1"}, &graph); - AddNode("pop2", "StackPopV2", {"pop_enter"}, &graph); + AddNode("stack2", "StackV2", {}, {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); + AddNode("pop_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("pop2", "StackPopV2", {"pop_enter"}, {}, &graph); // Stack with unexpected op type in fanout of Stack. - AddNode("stack3", "StackV2", {}, &graph); - AddNode("push3", "StackPushV2", {"stack3"}, &graph); - AddNode("stop", "StopGradient", {"stack3"}, &graph); + AddNode("stack3", "StackV2", {}, {}, &graph); + AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); + AddNode("stop", "StopGradient", {"stack3"}, {}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); @@ -97,23 +109,39 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { GrapplerItem item; GraphDef& graph = item.graph; - AddNode("stack1", "StackV2", {}, &graph); - AddNode("push1", "StackPushV2", {"stack1"}, &graph); - AddNode("stack2", "StackV2", {}, &graph); - AddNode("push_enter", "Enter", {"stack2"}, &graph); - AddNode("push2", "StackPushV2", {"push_enter"}, &graph); + AttrValue frame_name; + frame_name.set_s("foo"); + AttrValue type; + type.set_type(DT_RESOURCE); + AddNode("c", "Const", {}, {}, &graph); + AddNode("stack1", "StackV2", {}, {}, &graph); + AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + AddNode("stack2", "StackV2", {}, {}, &graph); + AddNode("push_enter", "Enter", {"stack2"}, + {{"T", type}, {"frame_name", frame_name}}, &graph); + AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(3, output.node_size()); - int found = 0; + EXPECT_EQ(6, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { - if (output.node(i).name() == "stack1") ++found; - if (output.node(i).name() == "push_enter") ++found; - if (output.node(i).name() == "stack2") ++found; + const NodeDef& node = output.node(i); + if (node.name() == "push1") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c", node.input(0)); + EXPECT_EQ("^stack1", node.input(1)); + } else if (node.name() == "push2") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c", node.input(0)); + EXPECT_EQ("^push_enter", node.input(1)); + } else { + const NodeDef& orig_node = item.graph.node(i); + EXPECT_EQ(orig_node.ShortDebugString(), node.ShortDebugString()); + } } - EXPECT_EQ(3, found); } } // namespace diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index fff1e354f4..6fa8c03548 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -77,13 +77,13 @@ std::unique_ptr MetaOptimizer::NewOptimizer( graph_optimizer.reset( new AutoParallel(cfg_.auto_parallel().num_replicas())); } + if (optimizer == "loop") { + graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); + } if (optimizer == "dependency") { graph_optimizer.reset( new DependencyOptimizer(cfg_.dependency_optimization())); } - if (optimizer == "loop") { - graph_optimizer.reset(new LoopOptimizer(cfg_.loop_optimization())); - } return graph_optimizer; } @@ -106,14 +106,14 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, optimizers.push_back(std::unique_ptr( new ArithmeticOptimizer(cfg_.arithmetic_optimization()))); } - if (cfg_.dependency_optimization() != RewriterConfig::OFF) { - optimizers.push_back(std::unique_ptr( - new DependencyOptimizer(cfg_.dependency_optimization()))); - } if (cfg_.loop_optimization() == RewriterConfig::ON) { optimizers.push_back(std::unique_ptr( new LoopOptimizer(cfg_.loop_optimization()))); } + if (cfg_.dependency_optimization() != RewriterConfig::OFF) { + optimizers.push_back(std::unique_ptr( + new DependencyOptimizer(cfg_.dependency_optimization()))); + } if (cfg_.layout_optimizer() != RewriterConfig::OFF) { optimizers.push_back( std::unique_ptr(new LayoutOptimizer())); @@ -136,8 +136,8 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, } } else { const std::set available_optimizers = { - "pruning", "function", "constfold", "layout", "memory", - "autoparallel", "arithmetic", "dependency", "loop"}; + "pruning", "function", "constfold", "layout", "memory", + "autoparallel", "arithmetic", "loop", "dependency"}; std::vector custom_optimizer_names; for (const auto& optimizer_name : cfg_.optimizers()) { if (available_optimizers.find(optimizer_name) != @@ -233,9 +233,9 @@ bool MetaOptimizerEnabled(const RewriterConfig& cfg) { cfg.layout_optimizer() != RewriterConfig::OFF || cfg.function_optimization() == RewriterConfig::ON || cfg.constant_folding() != RewriterConfig::OFF || - cfg.dependency_optimization() != RewriterConfig::OFF || - cfg.loop_optimization() == RewriterConfig::ON || cfg.arithmetic_optimization() != RewriterConfig::OFF || + cfg.loop_optimization() == RewriterConfig::ON || + cfg.dependency_optimization() != RewriterConfig::OFF || cfg.auto_parallel().enable() || cfg.memory_optimization() != RewriterConfig::NO_MEM_OPT || !cfg.optimizers().empty(); diff --git a/tensorflow/core/grappler/utils.cc b/tensorflow/core/grappler/utils.cc index a611a93086..eb1f882ff1 100644 --- a/tensorflow/core/grappler/utils.cc +++ b/tensorflow/core/grappler/utils.cc @@ -398,12 +398,12 @@ Status SimpleGraphView::Initialize(const GraphDef& graph, bool dedup_inputs, void SimpleGraphView::DepthFirstSearch( const std::unordered_set& op_types_to_traverse, int node_idx, std::set* nodes_found) const { - const NodeDef& node = graph_->node(node_idx); - if (op_types_to_traverse.find(node.op()) == op_types_to_traverse.end()) { - nodes_found->insert(node_idx); + if (nodes_found->find(node_idx) != nodes_found->end()) { return; } - if (nodes_found->find(node_idx) != nodes_found->end()) { + nodes_found->insert(node_idx); + const string& op_type = graph_->node(node_idx).op(); + if (op_types_to_traverse.find(op_type) == op_types_to_traverse.end()) { return; } for (auto output_idx : this->outputs(node_idx)) { diff --git a/tensorflow/core/grappler/utils.h b/tensorflow/core/grappler/utils.h index 1b91a57154..fbd38c1531 100644 --- a/tensorflow/core/grappler/utils.h +++ b/tensorflow/core/grappler/utils.h @@ -178,6 +178,7 @@ class SimpleGraphView { Status Initialize(const GraphDef& graph, bool dedup_inputs, bool dedup_outputs); + const GraphDef* graph() const { return graph_; } inline int num_nodes() const { return index_to_name_.size(); } inline const int index(const string& node_name) const { const auto& it = name_to_index_.find(node_name); -- GitLab From 1bbb03eb59fcb3a4b52c45d0063dcc9875206910 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 2 Mar 2018 16:58:11 -0800 Subject: [PATCH 379/884] Don't throw errors if non-Checkpointable objects are passed to MultiRNNCell PiperOrigin-RevId: 187689371 --- tensorflow/python/ops/rnn_cell_impl.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index bd7c731210..3ae1d1184d 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -46,6 +46,7 @@ from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variable_scope as vs from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.training import checkpointable from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export @@ -1190,7 +1191,9 @@ class MultiRNNCell(RNNCell): for cell_number, cell in enumerate(self._cells): # Add Checkpointable dependencies on these cells so their variables get # saved with this object when using object-based saving. - self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) + if isinstance(cell, checkpointable.CheckpointableBase): + # TODO(allenl): Track down non-Checkpointable callers. + self._track_checkpointable(cell, name="cell-%d" % (cell_number,)) self._state_is_tuple = state_is_tuple if not state_is_tuple: if any(nest.is_sequence(c.state_size) for c in self._cells): -- GitLab From 0c92f574d18cd01134bb9f7a5a679866a0f92f7e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 2 Mar 2018 17:18:00 -0800 Subject: [PATCH 380/884] Properly handle the case of functions with no inputs PiperOrigin-RevId: 187691555 --- .../grappler/optimizers/function_optimizer.cc | 12 +++++- .../optimizers/function_optimizer_test.cc | 34 +++++++++++++++++ .../core/grappler/utils/functions_test.cc | 37 +++++++++++++++++++ 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 167e5a153a..4b830bcc6e 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -126,9 +126,17 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, GraphDef* optimized_graph) { std::unordered_map functions; for (const FunctionDef& func : item.graph.library().function()) { - if (func.attr().count("_noinline") == 0) { - functions[func.signature().name()] = &func; + // Don't inline functions marked as noinline + if (func.attr().count("_noinline") != 0) { + continue; } + // Can't create IdentityN nodes with no input or output: skip these + // functions for now. + if (func.signature().input_arg_size() == 0 || + func.signature().output_arg_size() == 0) { + continue; + } + functions[func.signature().name()] = &func; } // Nothing to do. diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 5072abaac7..8db9b7f77a 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -339,6 +339,40 @@ TEST_F(FunctionOptimizerTest, FunctionWithInputForwarding) { test::ExpectTensorEqual(tensors_expected[2], tensors[2]); } +TEST_F(FunctionOptimizerTest, FunctionWithoutInput) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "GenerateTwo", + // Args + {}, + // Return value + {"o: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"o"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}}); + + GrapplerItem item; + constexpr char device[] = "/device:CPU:0"; + item.graph = test::function::GDef( + {test::function::NDef("y", "GenerateTwo", {}, {}, device), + test::function::NDef("z", "Identity", {"y"}, {{"T", DT_FLOAT}}, device)}, + // FunctionLib + { + func, + }); + + FunctionOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + // For now we won't inline the function. + EXPECT_EQ(item.graph.DebugString(), output.DebugString()); +} + } // namespace } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/functions_test.cc b/tensorflow/core/grappler/utils/functions_test.cc index 25ec50d478..6a7d766b1c 100644 --- a/tensorflow/core/grappler/utils/functions_test.cc +++ b/tensorflow/core/grappler/utils/functions_test.cc @@ -308,6 +308,43 @@ TEST_F(FunctionsTest, FromFunctionDefWithInputForwarding) { } } +TEST_F(FunctionsTest, FromFunctionDefWithoutInput) { + const Tensor kTwo = test::AsScalar(2); + FunctionDef func = FunctionDefHelper::Define( + // Name + "GenerateTwo", + // Args + {}, + // Return value + {"o: T"}, + // Attr def + {"T: {float, double}"}, + // Nodes + {{{"two"}, "Const", {}, {{"value", kTwo}, {"dtype", DT_INT64}}}, + {{"o"}, "Cast", {"two"}, {{"SrcT", DT_INT64}, {"DstT", "$T"}}}}); + + std::unordered_map func_attr; + func_attr["T"].set_type(DT_FLOAT); + FunctionDefLibrary library; + std::unique_ptr item = + GrapplerItemFromFunctionDef(func, func_attr, library); + + EXPECT_EQ(0, item->feed.size()); + EXPECT_EQ(1, item->fetch.size()); + EXPECT_EQ("o:0", item->fetch[0]); + + EXPECT_EQ(2, item->graph.node_size()); + const NodeDef &two = item->graph.node(0); + EXPECT_EQ("two", two.name()); + EXPECT_EQ(0, two.input_size()); + const NodeDef &cast = item->graph.node(1); + EXPECT_EQ("o", cast.name()); + EXPECT_EQ(1, cast.input_size()); + EXPECT_EQ("two:0", cast.input(0)); + + std::cout << item->graph.DebugString() << std::endl; +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 9886d918a2b160b95cf410516d61ec5d3174cc83 Mon Sep 17 00:00:00 2001 From: Yin Li Date: Tue, 14 Nov 2017 20:26:58 +0800 Subject: [PATCH 381/884] Fold batch norm with batch to space --- .../graph_transforms/fold_old_batch_norms.cc | 67 +++++++++++++ .../fold_old_batch_norms_test.cc | 95 +++++++++++++++++++ 2 files changed, 162 insertions(+) diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc index d89afe85c7..d86f65325b 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms.cc @@ -182,6 +182,36 @@ Status FuseBatchNormWithConv(const NodeMatch& match, return Status::OK(); } +Status FuseBatchNormWithBatchToSpace(const NodeMatch& match, + std::vector* new_nodes) { + // Calculate the scale and offset values to apply. + std::vector scale_values; + std::vector offset_values; + TF_RETURN_IF_ERROR( + GetScaleAndOffsetValues(match, &scale_values, &offset_values)); + + // Fuse conv weights, and set the final output node name as batch_norm_node. + const NodeDef& batch_norm_node = match.node; + const NodeMatch& batch_to_space_node_match = match.inputs[0]; + const NodeMatch& conv_node_match = batch_to_space_node_match.inputs[0]; + const NodeDef& batch_to_space_node = batch_to_space_node_match.node; + const NodeDef& conv_node = conv_node_match.node; + + string biasadd_name = conv_node.name() + "/biasadd"; + TF_RETURN_IF_ERROR( + FuseScaleOffsetToConvWeights(scale_values, offset_values, conv_node_match, + biasadd_name , new_nodes)); + + NodeDef new_batch_to_space_node = batch_to_space_node; + // reuse batch_norm node name + new_batch_to_space_node.set_name(batch_norm_node.name()); + new_batch_to_space_node.set_input(0, biasadd_name); + new_nodes->push_back(batch_to_space_node_match.inputs[1].node); + new_nodes->push_back(batch_to_space_node_match.inputs[2].node); + new_nodes->push_back(new_batch_to_space_node); + return Status::OK(); +} + Status FuseBatchNormWithConvConcat(const NodeMatch& match, std::vector* new_nodes) { // Calculate the scale and offset values to apply. @@ -284,6 +314,43 @@ Status FoldOldBatchNorms(const GraphDef& input_graph_def, current_graph_def = replaced_graph_def; } while (did_graph_change); + do { + did_graph_change = false; + GraphDef replaced_graph_def; + TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( + current_graph_def, // clang-format off + {"BatchNormWithGlobalNormalization|FusedBatchNorm", // batch_norm_node + { + {"BatchToSpaceND", // batch_to_space_node + { + {"Conv2D", // conv_node + { + {"*"}, // input_node + {"Const"}, // weights_node + } + }, + {"Const"}, // block_shape + {"Const"}, // crops + } + }, + {"Const"}, // mean_node + {"Const"}, // variance_node + {"Const"}, // beta_node + {"Const"}, // gamma_node + } + }, // clang-format on + [&did_graph_change](const NodeMatch& match, + const std::set& input_nodes, + const std::set& output_nodes, + std::vector* new_nodes) { + TF_RETURN_IF_ERROR(FuseBatchNormWithBatchToSpace(match, new_nodes)); + did_graph_change = true; + return Status::OK(); + }, + {}, &replaced_graph_def)); + current_graph_def = replaced_graph_def; + } while (did_graph_change); + do { did_graph_change = false; GraphDef replaced_graph_def; diff --git a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc index b30ba9ac8b..272410c693 100644 --- a/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc +++ b/tensorflow/tools/graph_transforms/fold_old_batch_norms_test.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/cc/ops/const_op.h" #include "tensorflow/cc/ops/image_ops.h" #include "tensorflow/cc/ops/nn_ops.h" +#include "tensorflow/cc/ops/array_ops.h" #include "tensorflow/cc/ops/sendrecv_ops.h" #include "tensorflow/cc/ops/standard_ops.h" #include "tensorflow/core/framework/tensor_testutil.h" @@ -298,6 +299,96 @@ class FoldOldBatchNormsTest : public ::testing::Test { } }; +void TestFoldFusedBatchNormsWithBatchToSpace() { + auto root = tensorflow::Scope::NewRootScope(); + using namespace ::tensorflow::ops; // NOLINT(build/namespaces) + + Tensor input_data(DT_FLOAT, TensorShape({2, 1, 3, 2})); + test::FillValues( + &input_data, {1.0f, 4.0f, 2.0f, 5.0f, 3.0f, 6.0f, -1.0f, -4.0f, -2.0f, + -5.0f, -3.0f, -6.0f}); + Output input_op = + Const(root.WithOpName("input_op"), Input::Initializer(input_data)); + + Tensor weights_data(DT_FLOAT, TensorShape({1, 2, 2, 2})); + test::FillValues(&weights_data, + {1.0f, 2.0f, 3.0f, 4.0f, 0.1f, 0.2f, 0.3f, 0.4f}); + Output weights_op = + Const(root.WithOpName("weights_op"), Input::Initializer(weights_data)); + + Output conv_op = Conv2D(root.WithOpName("conv_op"), input_op, weights_op, + {1, 1, 1, 1}, "VALID"); + + Tensor block_shape_data(DT_INT32, TensorShape({2})); + test::FillValues(&block_shape_data, {1, 2}); + Output block_shape_op = + Const(root.WithOpName("block_shape_op"), Input::Initializer(block_shape_data)); + + Tensor crops_data(DT_INT32, TensorShape({2, 2})); + test::FillValues(&crops_data, {0, 0, 0, 1}); + Output crops_op = + Const(root.WithOpName("crops_op"), Input::Initializer(crops_data)); + + Output batch_to_space_op = BatchToSpaceND(root.WithOpName("batch_to_space_op"), + conv_op, block_shape_op, crops_data); + + Tensor mean_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&mean_data, {10.0f, 20.0f}); + Output mean_op = + Const(root.WithOpName("mean_op"), Input::Initializer(mean_data)); + + Tensor variance_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&variance_data, {0.25f, 0.5f}); + Output variance_op = Const(root.WithOpName("variance_op"), + Input::Initializer(variance_data)); + + Tensor beta_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&beta_data, {0.1f, 0.6f}); + Output beta_op = + Const(root.WithOpName("beta_op"), Input::Initializer(beta_data)); + + Tensor gamma_data(DT_FLOAT, TensorShape({2})); + test::FillValues(&gamma_data, {1.0f, 2.0f}); + Output gamma_op = + Const(root.WithOpName("gamma_op"), Input::Initializer(gamma_data)); + + GraphDef original_graph_def; + TF_ASSERT_OK(root.ToGraphDef(&original_graph_def)); + + NodeDef batch_norm_node; + batch_norm_node.set_op("FusedBatchNorm"); + batch_norm_node.set_name("output"); + AddNodeInput("batch_to_space_op", &batch_norm_node); + AddNodeInput("gamma_op", &batch_norm_node); + AddNodeInput("beta_op", &batch_norm_node); + AddNodeInput("mean_op", &batch_norm_node); + AddNodeInput("variance_op", &batch_norm_node); + SetNodeAttr("T", DT_FLOAT, &batch_norm_node); + SetNodeAttr("epsilon", 0.00001f, &batch_norm_node); + SetNodeAttr("is_training", false, &batch_norm_node); + *(original_graph_def.mutable_node()->Add()) = batch_norm_node; + + std::unique_ptr original_session(NewSession(SessionOptions())); + TF_ASSERT_OK(original_session->Create(original_graph_def)); + std::vector original_outputs; + TF_ASSERT_OK(original_session->Run({}, {"output"}, {}, &original_outputs)); + + GraphDef fused_graph_def; + TF_ASSERT_OK(FoldOldBatchNorms(original_graph_def, {{}, {"output"}}, + &fused_graph_def)); + + std::unique_ptr fused_session(NewSession(SessionOptions())); + TF_ASSERT_OK(fused_session->Create(fused_graph_def)); + std::vector fused_outputs; + TF_ASSERT_OK(fused_session->Run({}, {"output"}, {}, &fused_outputs)); + + test::ExpectTensorNear(original_outputs[0], fused_outputs[0], 1e-5); + + for (const NodeDef& node : fused_graph_def.node()) { + EXPECT_NE("FusedBatchNormWithBatchToSpace", node.op()); + } +} + TEST_F(FoldOldBatchNormsTest, TestFoldOldBatchNorms) { TestFoldOldBatchNorms(); } @@ -315,5 +406,9 @@ TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithConcat) { TestFoldFusedBatchNormsWithConcat(/*split=*/false); } +TEST_F(FoldOldBatchNormsTest, TestFoldFusedBatchNormsWithBatchToSpace) { + TestFoldFusedBatchNormsWithBatchToSpace(); +} + } // namespace graph_transforms } // namespace tensorflow -- GitLab From ab635a9b9691e36e42de000468c13e4f66272116 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 2 Mar 2018 18:33:21 -0800 Subject: [PATCH 382/884] Merged commit includes the following changes: 187697531 by andrewharp: Tweak whitespace for fft2d dep. -- 187696129 by A. Unique TensorFlower: Generalize support for logical expressions, comparison operators and multiple comparisons. -- 187692494 by vinuraja: * Adds a boolean attribute to ConfigureDistributedTPUOp for internal use. * Adds GraphRunner ctor which takes in the device to run the graph on. -- 187692129 by andrewharp: Audio utility classes for supporting MFCC and AudioSpectrogram operators -- PiperOrigin-RevId: 187697531 --- .../contrib/lite/kernels/internal/BUILD | 21 ++ .../contrib/lite/kernels/internal/mfcc.cc | 65 +++++ .../contrib/lite/kernels/internal/mfcc.h | 78 ++++++ .../contrib/lite/kernels/internal/mfcc_dct.cc | 78 ++++++ .../contrib/lite/kernels/internal/mfcc_dct.h | 43 +++ .../kernels/internal/mfcc_mel_filterbank.cc | 204 +++++++++++++++ .../kernels/internal/mfcc_mel_filterbank.h | 63 +++++ .../lite/kernels/internal/spectrogram.cc | 244 ++++++++++++++++++ .../lite/kernels/internal/spectrogram.h | 110 ++++++++ .../py2tf/converters/logical_expressions.py | 121 ++++++--- .../converters/logical_expressions_test.py | 4 +- tensorflow/contrib/py2tf/impl/conversion.py | 2 +- .../contrib/tpu/ops/tpu_configuration_ops.cc | 2 + .../core/common_runtime/graph_runner.cc | 25 +- tensorflow/core/common_runtime/graph_runner.h | 9 +- 15 files changed, 1018 insertions(+), 51 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc.h create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_dct.h create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h create mode 100644 tensorflow/contrib/lite/kernels/internal/spectrogram.cc create mode 100644 tensorflow/contrib/lite/kernels/internal/spectrogram.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index 6ccad3b1ce..d5dd2cbf14 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -309,6 +309,27 @@ cc_library( ], ) +# Audio support classes imported directly from TensorFlow. +cc_library( + name = "audio_utils", + srcs = [ + "mfcc.cc", + "mfcc_dct.cc", + "mfcc_mel_filterbank.cc", + "spectrogram.cc", + ], + hdrs = [ + "mfcc.h", + "mfcc_dct.h", + "mfcc_mel_filterbank.h", + "spectrogram.h", + ], + deps = [ + "//third_party/fft2d:fft2d_headers", + "@fft2d", + ], +) + cc_library( name = "tensor_utils", srcs = [ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc.cc b/tensorflow/contrib/lite/kernels/internal/mfcc.cc new file mode 100644 index 0000000000..eafe0c7afe --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc.cc @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include + +#include "tensorflow/contrib/lite/kernels/internal/mfcc.h" + +namespace tflite { +namespace internal { + +const double kDefaultUpperFrequencyLimit = 4000; +const double kDefaultLowerFrequencyLimit = 20; +const double kFilterbankFloor = 1e-12; +const int kDefaultFilterbankChannelCount = 40; +const int kDefaultDCTCoefficientCount = 13; + +Mfcc::Mfcc() + : initialized_(false), + lower_frequency_limit_(kDefaultLowerFrequencyLimit), + upper_frequency_limit_(kDefaultUpperFrequencyLimit), + filterbank_channel_count_(kDefaultFilterbankChannelCount), + dct_coefficient_count_(kDefaultDCTCoefficientCount) {} + +bool Mfcc::Initialize(int input_length, double input_sample_rate) { + bool initialized = mel_filterbank_.Initialize( + input_length, input_sample_rate, filterbank_channel_count_, + lower_frequency_limit_, upper_frequency_limit_); + initialized &= + dct_.Initialize(filterbank_channel_count_, dct_coefficient_count_); + initialized_ = initialized; + return initialized; +} + +void Mfcc::Compute(const std::vector& spectrogram_frame, + std::vector* output) const { + if (!initialized_) { + // LOG(ERROR) << "Mfcc not initialized."; + return; + } + std::vector working; + mel_filterbank_.Compute(spectrogram_frame, &working); + for (int i = 0; i < working.size(); ++i) { + double val = working[i]; + if (val < kFilterbankFloor) { + val = kFilterbankFloor; + } + working[i] = log(val); + } + dct_.Compute(working, output); +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc.h b/tensorflow/contrib/lite/kernels/internal/mfcc.h new file mode 100644 index 0000000000..d8500ecdcf --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc.h @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for computing MFCCs from spectrogram slices. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ + +#include + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" + +namespace tflite { +namespace internal { + +class Mfcc { + public: + Mfcc(); + bool Initialize(int input_length, double input_sample_rate); + + // Input is a single squared-magnitude spectrogram frame. The input spectrum + // is converted to linear magnitude and weighted into bands using a + // triangular mel filterbank, and a discrete cosine transform (DCT) of the + // values is taken. Output is populated with the lowest dct_coefficient_count + // of these values. + void Compute(const std::vector& spectrogram_frame, + std::vector* output) const; + + void set_upper_frequency_limit(double upper_frequency_limit) { + // CHECK(!initialized_) << "Set frequency limits before calling + // Initialize."; + upper_frequency_limit_ = upper_frequency_limit; + } + + void set_lower_frequency_limit(double lower_frequency_limit) { + // CHECK(!initialized_) << "Set frequency limits before calling + // Initialize."; + lower_frequency_limit_ = lower_frequency_limit; + } + + void set_filterbank_channel_count(int filterbank_channel_count) { + /// CHECK(!initialized_) << "Set channel count before calling Initialize."; + filterbank_channel_count_ = filterbank_channel_count; + } + + void set_dct_coefficient_count(int dct_coefficient_count) { + // CHECK(!initialized_) << "Set coefficient count before calling + // Initialize."; + dct_coefficient_count_ = dct_coefficient_count; + } + + private: + MfccMelFilterbank mel_filterbank_; + MfccDct dct_; + bool initialized_; + double lower_frequency_limit_; + double upper_frequency_limit_; + int filterbank_channel_count_; + int dct_coefficient_count_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc new file mode 100644 index 0000000000..b0b7d181bd --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.cc @@ -0,0 +1,78 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" + +#include + +namespace tflite { +namespace internal { + +MfccDct::MfccDct() : initialized_(false) {} + +bool MfccDct::Initialize(int input_length, int coefficient_count) { + coefficient_count_ = coefficient_count; + input_length_ = input_length; + + if (coefficient_count_ < 1) { + return false; + } + + if (input_length < 1) { + return false; + } + + if (coefficient_count_ > input_length_) { + return false; + } + + cosines_.resize(coefficient_count_); + double fnorm = sqrt(2.0 / input_length_); + // Some platforms don't have M_PI, so define a local constant here. + const double pi = atan(1) * 4; + double arg = pi / input_length_; + for (int i = 0; i < coefficient_count_; ++i) { + cosines_[i].resize(input_length_); + for (int j = 0; j < input_length_; ++j) { + cosines_[i][j] = fnorm * cos(i * arg * (j + 0.5)); + } + } + initialized_ = true; + return true; +} + +void MfccDct::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + return; + } + + output->resize(coefficient_count_); + int length = input.size(); + if (length > input_length_) { + length = input_length_; + } + + for (int i = 0; i < coefficient_count_; ++i) { + double sum = 0.0; + for (int j = 0; j < length; ++j) { + sum += cosines_[i][j] * input[j]; + } + (*output)[i] = sum; + } +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h new file mode 100644 index 0000000000..a53f5cbd9b --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_dct.h @@ -0,0 +1,43 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic minimal DCT class for MFCC speech processing. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ + +#include + +namespace tflite { +namespace internal { + +class MfccDct { + public: + MfccDct(); + bool Initialize(int input_length, int coefficient_count); + void Compute(const std::vector& input, + std::vector* output) const; + + private: + bool initialized_; + int coefficient_count_; + int input_length_; + std::vector > cosines_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_DCT_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc new file mode 100644 index 0000000000..c3deb33d91 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.cc @@ -0,0 +1,204 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This code resamples the FFT bins, and smooths then with triangle-shaped +// weights to create a mel-frequency filter bank. For filter i centered at f_i, +// there is a triangular weighting of the FFT bins that extends from +// filter f_i-1 (with a value of zero at the left edge of the triangle) to f_i +// (where the filter value is 1) to f_i+1 (where the filter values returns to +// zero). + +// Note: this code fails if you ask for too many channels. The algorithm used +// here assumes that each FFT bin contributes to at most two channels: the +// right side of a triangle for channel i, and the left side of the triangle +// for channel i+1. If you ask for so many channels that some of the +// resulting mel triangle filters are smaller than a single FFT bin, these +// channels may end up with no contributing FFT bins. The resulting mel +// spectrum output will have some channels that are always zero. + +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" + +#include + +namespace tflite { +namespace internal { + +MfccMelFilterbank::MfccMelFilterbank() : initialized_(false) {} + +bool MfccMelFilterbank::Initialize(int input_length, double input_sample_rate, + int output_channel_count, + double lower_frequency_limit, + double upper_frequency_limit) { + num_channels_ = output_channel_count; + sample_rate_ = input_sample_rate; + input_length_ = input_length; + + if (num_channels_ < 1) { + // LOG(ERROR) << "Number of filterbank channels must be positive."; + return false; + } + + if (sample_rate_ <= 0) { + // LOG(ERROR) << "Sample rate must be positive."; + return false; + } + + if (input_length < 2) { + // LOG(ERROR) << "Input length must greater than 1."; + return false; + } + + if (lower_frequency_limit < 0) { + // LOG(ERROR) << "Lower frequency limit must be nonnegative."; + return false; + } + + if (upper_frequency_limit <= lower_frequency_limit) { + /// LOG(ERROR) << "Upper frequency limit must be greater than " + // << "lower frequency limit."; + return false; + } + + // An extra center frequency is computed at the top to get the upper + // limit on the high side of the final triangular filter. + center_frequencies_.resize(num_channels_ + 1); + const double mel_low = FreqToMel(lower_frequency_limit); + const double mel_hi = FreqToMel(upper_frequency_limit); + const double mel_span = mel_hi - mel_low; + const double mel_spacing = mel_span / static_cast(num_channels_ + 1); + for (int i = 0; i < num_channels_ + 1; ++i) { + center_frequencies_[i] = mel_low + (mel_spacing * (i + 1)); + } + + // Always exclude DC; emulate HTK. + const double hz_per_sbin = + 0.5 * sample_rate_ / static_cast(input_length_ - 1); + start_index_ = static_cast(1.5 + (lower_frequency_limit / hz_per_sbin)); + end_index_ = static_cast(upper_frequency_limit / hz_per_sbin); + + // Maps the input spectrum bin indices to filter bank channels/indices. For + // each FFT bin, band_mapper tells us which channel this bin contributes to + // on the right side of the triangle. Thus this bin also contributes to the + // left side of the next channel's triangle response. + band_mapper_.resize(input_length_); + int channel = 0; + for (int i = 0; i < input_length_; ++i) { + double melf = FreqToMel(i * hz_per_sbin); + if ((i < start_index_) || (i > end_index_)) { + band_mapper_[i] = -2; // Indicate an unused Fourier coefficient. + } else { + while ((center_frequencies_[channel] < melf) && + (channel < num_channels_)) { + ++channel; + } + band_mapper_[i] = channel - 1; // Can be == -1 + } + } + + // Create the weighting functions to taper the band edges. The contribution + // of any one FFT bin is based on its distance along the continuum between two + // mel-channel center frequencies. This bin contributes weights_[i] to the + // current channel and 1-weights_[i] to the next channel. + weights_.resize(input_length_); + for (int i = 0; i < input_length_; ++i) { + channel = band_mapper_[i]; + if ((i < start_index_) || (i > end_index_)) { + weights_[i] = 0.0; + } else { + if (channel >= 0) { + weights_[i] = + (center_frequencies_[channel + 1] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[channel + 1] - center_frequencies_[channel]); + } else { + weights_[i] = (center_frequencies_[0] - FreqToMel(i * hz_per_sbin)) / + (center_frequencies_[0] - mel_low); + } + } + } + // Check the sum of FFT bin weights for every mel band to identify + // situations where the mel bands are so narrow that they don't get + // significant weight on enough (or any) FFT bins -- i.e., too many + // mel bands have been requested for the given FFT size. + std::vector bad_channels; + for (int c = 0; c < num_channels_; ++c) { + float band_weights_sum = 0.0; + for (int i = 0; i < input_length_; ++i) { + if (band_mapper_[i] == c - 1) { + band_weights_sum += (1.0 - weights_[i]); + } else if (band_mapper_[i] == c) { + band_weights_sum += weights_[i]; + } + } + // The lowest mel channels have the fewest FFT bins and the lowest + // weights sum. But given that the target gain at the center frequency + // is 1.0, if the total sum of weights is 0.5, we're in bad shape. + if (band_weights_sum < 0.5) { + bad_channels.push_back(c); + } + } + if (!bad_channels.empty()) { + /* + LOG(ERROR) << "Missing " << bad_channels.size() << " bands " + << " starting at " << bad_channels[0] + << " in mel-frequency design. " + << "Perhaps too many channels or " + << "not enough frequency resolution in spectrum. (" + << "input_length: " << input_length + << " input_sample_rate: " << input_sample_rate + << " output_channel_count: " << output_channel_count + << " lower_frequency_limit: " << lower_frequency_limit + << " upper_frequency_limit: " << upper_frequency_limit; + */ + } + initialized_ = true; + return true; +} + +// Compute the mel spectrum from the squared-magnitude FFT input by taking the +// square root, then summing FFT magnitudes under triangular integration windows +// whose widths increase with frequency. +void MfccMelFilterbank::Compute(const std::vector &input, + std::vector *output) const { + if (!initialized_) { + // LOG(ERROR) << "Mel Filterbank not initialized."; + return; + } + + if (input.size() <= end_index_) { + // LOG(ERROR) << "Input too short to compute filterbank"; + return; + } + + // Ensure output is right length and reset all values. + output->assign(num_channels_, 0.0); + + for (int i = start_index_; i <= end_index_; i++) { // For each FFT bin + double spec_val = sqrt(input[i]); + double weighted = spec_val * weights_[i]; + int channel = band_mapper_[i]; + if (channel >= 0) + (*output)[channel] += weighted; // Right side of triangle, downward slope + channel++; + if (channel < num_channels_) + (*output)[channel] += spec_val - weighted; // Left side of triangle + } +} + +double MfccMelFilterbank::FreqToMel(double freq) const { + return 1127.0 * log(1.0 + (freq / 700.0)); +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h new file mode 100644 index 0000000000..c1db28243e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h @@ -0,0 +1,63 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Basic class for applying a mel-scale mapping to a power spectrum. + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ + +#include + +namespace tflite { +namespace internal { + +class MfccMelFilterbank { + public: + MfccMelFilterbank(); + bool Initialize(int input_length, // Number of unique FFT bins fftsize/2+1. + double input_sample_rate, int output_channel_count, + double lower_frequency_limit, double upper_frequency_limit); + + // Takes a squared-magnitude spectrogram slice as input, computes a + // triangular-mel-weighted linear-magnitude filterbank, and places the result + // in output. + void Compute(const std::vector& input, + std::vector* output) const; + + private: + double FreqToMel(double freq) const; + bool initialized_; + int num_channels_; + double sample_rate_; + int input_length_; + std::vector center_frequencies_; // In mel, for each mel channel. + + // Each FFT bin b contributes to two triangular mel channels, with + // proportion weights_[b] going into mel channel band_mapper_[b], and + // proportion (1 - weights_[b]) going into channel band_mapper_[b] + 1. + // Thus, weights_ contains the weighting applied to each FFT bin for the + // upper-half of the triangular band. + std::vector weights_; // Right-side weight for this fft bin. + + // FFT bin i contributes to the upper side of mel channel band_mapper_[i] + std::vector band_mapper_; + int start_index_; // Lowest FFT bin used to calculate mel spectrum. + int end_index_; // Highest FFT bin used to calculate mel spectrum. +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_MFCC_MEL_FILTERBANK_H_ diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc new file mode 100644 index 0000000000..66ca694dc4 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -0,0 +1,244 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" + +#include + +#include "third_party/fft2d/fft.h" + +namespace tflite { +namespace internal { + +using std::complex; + +namespace { +// Returns the default Hann window function for the spectrogram. +void GetPeriodicHann(int window_length, std::vector* window) { + // Some platforms don't have M_PI, so define a local constant here. + const double pi = std::atan(1) * 4; + window->resize(window_length); + for (int i = 0; i < window_length; ++i) { + (*window)[i] = 0.5 - 0.5 * cos((2 * pi * i) / window_length); + } +} +} // namespace + +bool Spectrogram::Initialize(int window_length, int step_length) { + std::vector window; + GetPeriodicHann(window_length, &window); + return Initialize(window, step_length); +} + +inline int Log2Floor(uint n) { + if (n == 0) return -1; + int log = 0; + uint value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + +inline int Log2Ceiling(uint n) { + int floor = Log2Floor(n); + if (n == (n & ~(n - 1))) // zero or a power of two + return floor; + else + return floor + 1; +} + +inline uint NextPowerOfTwo(uint value) { + int exponent = Log2Ceiling(value); + // DCHECK_LT(exponent, std::numeric_limits::digits); + return 1 << exponent; +} + +bool Spectrogram::Initialize(const std::vector& window, + int step_length) { + window_length_ = window.size(); + window_ = window; // Copy window. + if (window_length_ < 2) { + // LOG(ERROR) << "Window length too short."; + initialized_ = false; + return false; + } + + step_length_ = step_length; + if (step_length_ < 1) { + // LOG(ERROR) << "Step length must be positive."; + initialized_ = false; + return false; + } + + fft_length_ = NextPowerOfTwo(window_length_); + // CHECK(fft_length_ >= window_length_); + output_frequency_channels_ = 1 + fft_length_ / 2; + + // Allocate 2 more than what rdft needs, so we can rationalize the layout. + fft_input_output_.assign(fft_length_ + 2, 0.0); + + int half_fft_length = fft_length_ / 2; + fft_double_working_area_.assign(half_fft_length, 0.0); + fft_integer_working_area_.assign(2 + static_cast(sqrt(half_fft_length)), + 0); + // Set flag element to ensure that the working areas are initialized + // on the first call to cdft. It's redundant given the assign above, + // but keep it as a reminder. + fft_integer_working_area_[0] = 0; + input_queue_.clear(); + samples_to_next_step_ = window_length_; + initialized_ = true; + return true; +} + +template +bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>* output) { + if (!initialized_) { + // LOG(ERROR) << "ComputeComplexSpectrogram() called before successful call + // " + // << "to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // This will convert double to float if it needs to. + spectrogram_slice[i] = complex( + fft_input_output_[2 * i], fft_input_output_[2 * i + 1]); + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); +template bool Spectrogram::ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>*); + +template +bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector>* output) { + if (!initialized_) { + // LOG(ERROR) << "ComputeSquaredMagnitudeSpectrogram() called before " + // << "successful call to Initialize()."; + return false; + } + // CHECK(output); + output->clear(); + int input_start = 0; + while (GetNextWindowOfSamples(input, &input_start)) { + // DCHECK_EQ(input_queue_.size(), window_length_); + ProcessCoreFFT(); // Processes input_queue_ to fft_input_output_. + // Add a new slice vector onto the output, to save new result to. + output->resize(output->size() + 1); + // Get a reference to the newly added slice to fill in. + auto& spectrogram_slice = output->back(); + spectrogram_slice.resize(output_frequency_channels_); + for (int i = 0; i < output_frequency_channels_; ++i) { + // Similar to the Complex case, except storing the norm. + // But the norm function is known to be a performance killer, + // so do it this way with explicit real and imagninary temps. + const double re = fft_input_output_[2 * i]; + const double im = fft_input_output_[2 * i + 1]; + // Which finally converts double to float if it needs to. + spectrogram_slice[i] = re * re + im * im; + } + } + return true; +} +// Instantiate it four ways: +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); +template bool Spectrogram::ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, std::vector>*); + +// Return true if a full window of samples is prepared; manage the queue. +template +bool Spectrogram::GetNextWindowOfSamples(const std::vector& input, + int* input_start) { + auto input_it = input.begin() + *input_start; + int input_remaining = input.end() - input_it; + if (samples_to_next_step_ > input_remaining) { + // Copy in as many samples are left and return false, no full window. + input_queue_.insert(input_queue_.end(), input_it, input.end()); + *input_start += input_remaining; // Increases it to input.size(). + samples_to_next_step_ -= input_remaining; + return false; // Not enough for a full window. + } else { + // Copy just enough into queue to make a new window, then trim the + // front off the queue to make it window-sized. + input_queue_.insert(input_queue_.end(), input_it, + input_it + samples_to_next_step_); + *input_start += samples_to_next_step_; + input_queue_.erase( + input_queue_.begin(), + input_queue_.begin() + input_queue_.size() - window_length_); + // DCHECK_EQ(window_length_, input_queue_.size()); + samples_to_next_step_ = step_length_; // Be ready for next time. + return true; // Yes, input_queue_ now contains exactly a window-full. + } +} + +void Spectrogram::ProcessCoreFFT() { + for (int j = 0; j < window_length_; ++j) { + fft_input_output_[j] = input_queue_[j] * window_[j]; + } + // Zero-pad the rest of the input buffer. + for (int j = window_length_; j < fft_length_; ++j) { + fft_input_output_[j] = 0.0; + } + const int kForwardFFT = 1; // 1 means forward; -1 reverse. + // This real FFT is a fair amount faster than using cdft here. + rdft(fft_length_, kForwardFFT, &fft_input_output_[0], + &fft_integer_working_area_[0], &fft_double_working_area_[0]); + // Make rdft result look like cdft result; + // unpack the last real value from the first position's imag slot. + fft_input_output_[fft_length_] = fft_input_output_[1]; + fft_input_output_[fft_length_ + 1] = 0; + fft_input_output_[1] = 0; +} + +} // namespace internal +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.h b/tensorflow/contrib/lite/kernels/internal/spectrogram.h new file mode 100644 index 0000000000..b77a68f7df --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.h @@ -0,0 +1,110 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Class for generating spectrogram slices from a waveform. +// Initialize() should be called before calls to other functions. Once +// Initialize() has been called and returned true, The Compute*() functions can +// be called repeatedly with sequential input data (ie. the first element of the +// next input vector directly follows the last element of the previous input +// vector). Whenever enough audio samples are buffered to produce a +// new frame, it will be placed in output. Output is cleared on each +// call to Compute*(). This class is thread-unsafe, and should only be +// called from one thread at a time. +// With the default parameters, the output of this class should be very +// close to the results of the following MATLAB code: +// overlap_samples = window_length_samples - step_samples; +// window = hann(window_length_samples, 'periodic'); +// S = abs(spectrogram(audio, window, overlap_samples)).^2; + +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ + +#include +#include +#include + +#include "third_party/fft2d/fft.h" + +namespace tflite { +namespace internal { + +class Spectrogram { + public: + Spectrogram() : initialized_(false) {} + ~Spectrogram() {} + + // Initializes the class with a given window length and step length + // (both in samples). Internally a Hann window is used as the window + // function. Returns true on success, after which calls to Process() + // are possible. window_length must be greater than 1 and step + // length must be greater than 0. + bool Initialize(int window_length, int step_length); + + // Initialize with an explicit window instead of a length. + bool Initialize(const std::vector& window, int step_length); + + // Processes an arbitrary amount of audio data (contained in input) + // to yield complex spectrogram frames. After a successful call to + // Initialize(), Process() may be called repeatedly with new input data + // each time. The audio input is buffered internally, and the output + // vector is populated with as many temporally-ordered spectral slices + // as it is possible to generate from the input. The output is cleared + // on each call before the new frames (if any) are added. + // + // The template parameters can be float or double. + template + bool ComputeComplexSpectrogram( + const std::vector& input, + std::vector>>* output); + + // This function works as the one above, but returns the power + // (the L2 norm, or the squared magnitude) of each complex value. + template + bool ComputeSquaredMagnitudeSpectrogram( + const std::vector& input, + std::vector>* output); + + // Return reference to the window function used internally. + const std::vector& GetWindow() const { return window_; } + + // Return the number of frequency channels in the spectrogram. + int output_frequency_channels() const { return output_frequency_channels_; } + + private: + template + bool GetNextWindowOfSamples(const std::vector& input, + int* input_start); + void ProcessCoreFFT(); + + int fft_length_; + int output_frequency_channels_; + int window_length_; + int step_length_; + bool initialized_; + int samples_to_next_step_; + + std::vector window_; + std::vector fft_input_output_; + std::deque input_queue_; + + // Working data areas for the FFT routines. + std::vector fft_integer_working_area_; + std::vector fft_double_working_area_; +}; + +} // namespace internal +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_SPECTROGRAM_H_ diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions.py b/tensorflow/contrib/py2tf/converters/logical_expressions.py index df980d41c9..766aa11efd 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions.py +++ b/tensorflow/contrib/py2tf/converters/logical_expressions.py @@ -23,52 +23,107 @@ from __future__ import print_function import gast -from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer -class LogicalExpressionTransformer(gast.NodeTransformer): +# TODO(mdan): Properly extrack boolean ops according to lazy eval rules. +# Note that this isn't completely safe either, because tensors may have control +# dependencies. +# Note that for loops that should be done after the loop was converted to +# tf.while_loop so that the expanded conditionals are properly scoped. + +# Used to signal that an operand is safe for non-lazy evaluation. +SAFE_BOOLEAN_OPERAND = 'SAFE_BOOLEAN_OPERAND' + + +class LogicalExpressionTransformer(transformer.Base): """Converts logical expressions to corresponding TF calls.""" - def __init__(self): + def __init__(self, context): + super(LogicalExpressionTransformer, self).__init__(context) # TODO(mdan): Look into replacing with bitwise operators instead. self.op_mapping = { - gast.And: 'tf.logical_and', - gast.Or: 'tf.logical_or', - gast.Not: 'tf.logical_not', - gast.Eq: 'tf.equal', + gast.And: 'logical_and', + gast.Eq: 'equal', + gast.Gt: 'greater', + gast.GtE: 'greater_equal', + gast.Lt: 'less', + gast.LtE: 'less_equal', + gast.Not: 'logical_not', + gast.NotEq: 'not_equal', + gast.Or: 'logical_or', + gast.USub: 'negative', } + def _expect_simple_symbol(self, operand): + if isinstance(operand, gast.Name): + return + if anno.hasanno(operand, SAFE_BOOLEAN_OPERAND): + return + raise NotImplementedError( + 'only simple local variables are supported in logical and compound ' + 'comparison expressions; for example, we support "a or b" but not ' + '"a.x or b"; for a workaround, assign the expression to a local ' + 'variable and use that instead, for example "tmp = a.x", "tmp or b"') + + def _matching_tf_op(self, operator): + op_type = type(operator) + mapped_op = self.op_mapping.get(op_type) + if not mapped_op: + raise NotImplementedError('operator %s is not yet supported' % op_type) + return mapped_op + + def _inline_tf_op(self, op_name, args): + template = """ + tf.op_name(args) + """ + replacement = templates.replace(template, op_name=op_name, args=args) + # It's a body with a single expression, we want its value. + n = replacement[0].value + anno.setanno(n, SAFE_BOOLEAN_OPERAND, True) + return n + def visit_Compare(self, node): node = self.generic_visit(node) - if len(node.ops) > 1: - raise NotImplementedError() - cmp_type = type(node.ops[0]) - if cmp_type in self.op_mapping: - tf_function = parser.parse_str(self.op_mapping[cmp_type]).body[0].value - return gast.Call( - func=tf_function, args=[node.left, node.comparators[0]], keywords=[]) - return node + ops_and_comps = list(zip(node.ops, node.comparators)) + left = node.left + op_tree = None + + # Repeated comparisons are converted to conjunctions: + # a < b < c -> a < b and b < c + while ops_and_comps: + op, right = ops_and_comps.pop(0) + binary_comparison = self._inline_tf_op(self._matching_tf_op(op), + (left, right)) + if isinstance(left, gast.Name) and isinstance(right, gast.Name): + anno.setanno(binary_comparison, SAFE_BOOLEAN_OPERAND, True) + if op_tree: + self._expect_simple_symbol(right) + op_tree = self._inline_tf_op('logical_and', + (binary_comparison, op_tree)) + else: + op_tree = binary_comparison + left = right + assert op_tree is not None + return op_tree def visit_UnaryOp(self, node): node = self.generic_visit(node) - if isinstance(node.op, gast.Not): - tf_function = parser.parse_str(self.op_mapping[type( - node.op)]).body[0].value - node = gast.Call(func=tf_function, args=[node.operand], keywords=[]) - return node + return self._inline_tf_op(self._matching_tf_op(node.op), node.operand) def visit_BoolOp(self, node): - # TODO(mdan): A normalizer may be useful here. Use ANF? node = self.generic_visit(node) - tf_function = parser.parse_str(self.op_mapping[type(node.op)]).body[0].value - left = node.values[0] - for i in range(1, len(node.values)): - left = gast.Call( - func=tf_function, args=[left, node.values[i]], keywords=[]) - return left - - -def transform(node): - transformer = LogicalExpressionTransformer() - node = transformer.visit(node) - return node + node_values = node.values + right = node.values.pop() + self._expect_simple_symbol(right) + while node_values: + left = node_values.pop() + self._expect_simple_symbol(left) + right = self._inline_tf_op(self._matching_tf_op(node.op), (left, right)) + return right + + +def transform(node, context): + return LogicalExpressionTransformer(context).visit(node) diff --git a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py b/tensorflow/contrib/py2tf/converters/logical_expressions_test.py index a28326c517..eb28c309a4 100644 --- a/tensorflow/contrib/py2tf/converters/logical_expressions_test.py +++ b/tensorflow/contrib/py2tf/converters/logical_expressions_test.py @@ -32,7 +32,7 @@ class GradientsFunctionTest(converter_test_base.TestCase): return a == b node = self.parse_and_analyze(test_fn, {}) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, self.ctx) with self.compiled(node, math_ops.equal) as result: with self.test_session() as sess: @@ -45,7 +45,7 @@ class GradientsFunctionTest(converter_test_base.TestCase): return (a or b) and (a or b or c) node = self.parse_and_analyze(test_fn, {}) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, self.ctx) with self.compiled(node, math_ops.logical_or, math_ops.logical_and) as result: diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index d95469ea53..c6f4988375 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -312,7 +312,7 @@ def node_to_graph(node, ctx, nocompile_decorators): # control_flow may create new symbols and change scopes. node = _static_analysis_pass(node, ctx) - node = logical_expressions.transform(node) + node = logical_expressions.transform(node, ctx) node = side_effect_guards.transform(node, ctx) node = name_scopes.transform(node, ctx) diff --git a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc index f8de8baa65..7bf5c21d0b 100644 --- a/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc +++ b/tensorflow/contrib/tpu/ops/tpu_configuration_ops.cc @@ -191,6 +191,7 @@ REGISTER_OP("ConfigureDistributedTPU") .Output("topology: string") .Attr("embedding_config: string = ''") .Attr("tpu_embedding_config: string = ''") + .Attr("is_global_init: bool = false") .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape) .Doc(R"doc( @@ -202,6 +203,7 @@ topology. tpu_embedding_config: Serialized tensorflow.tpu.TPUEmbeddingConfiguration that describes the embedding lookups of the program. embedding_config: Reserved. Do not use. +is_global_init: Reserved. Do not use. )doc"); REGISTER_OP("ShutdownDistributedTPU") diff --git a/tensorflow/core/common_runtime/graph_runner.cc b/tensorflow/core/common_runtime/graph_runner.cc index f1082a6003..1125d2a34a 100644 --- a/tensorflow/core/common_runtime/graph_runner.cc +++ b/tensorflow/core/common_runtime/graph_runner.cc @@ -97,7 +97,9 @@ class SimpleRendezvous : public Rendezvous { } // namespace -GraphRunner::GraphRunner(Env* env) : cpu_device_(GetCPUDevice(env)) {} +GraphRunner::GraphRunner(Env* env) + : device_deleter_(GetCPUDevice(env)), device_(device_deleter_.get()) {} +GraphRunner::GraphRunner(Device* device) : device_(device) {} GraphRunner::~GraphRunner() {} @@ -105,17 +107,18 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, const NamedTensorList& inputs, const std::vector& output_names, std::vector* outputs) { - if (cpu_device_ == nullptr) { + if (device_ == nullptr) { return errors::NotFound("Cannot find a device for GraphRunner."); } if (function_library && function_library->device() && - function_library->device()->device_type() != cpu_device_->device_type()) { - // We are running on a CPU but the function library is for a non-CPU device, - // so just ignore the function_library. + function_library->device()->device_type() != device_->device_type()) { + // Mismatch between function_library's device_type and device_'s + // device_type. // TODO(matthewmurray) Can we create a new FunctionLibraryRuntime that is - // identical to function_library except that it uses CPU? - VLOG(1) << "Cannot run on CPU device with a function library for a " + // identical to function_library except that it uses the given 'device_'? + VLOG(1) << "Cannot run on: " << device_->device_type() + << " with a function library for a " << function_library->device()->device_type() << " device."; function_library = nullptr; } @@ -146,8 +149,7 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, subgraph::RewriteGraphMetadata metadata; TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( graph_to_run.get(), input_names, output_names, {} /* target nodes */, - cpu_device_->attributes(), false /* use_function_convention */, - &metadata)); + device_->attributes(), false /* use_function_convention */, &metadata)); // Create the local executor and the Rendezvous for fetching back the // constants. @@ -158,13 +160,12 @@ Status GraphRunner::Run(Graph* graph, FunctionLibraryRuntime* function_library, LocalExecutorParams params; // The ownership of the output tensors are bound to this device's lifetime. - params.device = cpu_device_.get(); + params.device = device_; params.function_library = function_library; const int producer = graph_to_run->versions().producer(); params.create_kernel = [this, producer](const NodeDef& ndef, OpKernel** kernel) { - return CreateNonCachedKernel(cpu_device_.get(), nullptr, ndef, producer, - kernel); + return CreateNonCachedKernel(device_, nullptr, ndef, producer, kernel); }; params.delete_kernel = [](OpKernel* kernel) { delete kernel; }; diff --git a/tensorflow/core/common_runtime/graph_runner.h b/tensorflow/core/common_runtime/graph_runner.h index 1e4ae77227..1c4b2b719c 100644 --- a/tensorflow/core/common_runtime/graph_runner.h +++ b/tensorflow/core/common_runtime/graph_runner.h @@ -36,12 +36,14 @@ namespace tensorflow { // This class is only meant for internal use where one needs to // partially evaluate inexpensive nodes in a graph, such as for shape // inference or for constant folding. Because of its limited, simple -// use-cases, it executes all computation on the CPU and is not meant -// to be particularly lightweight, fast, or efficient. +// use-cases, it executes all computation on the given device (CPU by default) +// and is not meant to be particularly lightweight, fast, or efficient. class GraphRunner { public: // REQUIRES: `env` is not nullptr. GraphRunner(Env* env); + // REQUIRES: 'device' is not nullptr. Not owned. + GraphRunner(Device* device); ~GraphRunner(); // Function semantics for `inputs`, `output_names` and `outputs` @@ -59,7 +61,8 @@ class GraphRunner { std::vector* outputs); private: - std::unique_ptr cpu_device_; + std::unique_ptr device_deleter_; + Device* const device_; }; } // namespace tensorflow -- GitLab From 05a264fdf55dcd9763d43804c71f35d8c160a5a5 Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Fri, 2 Mar 2018 19:18:49 -0800 Subject: [PATCH 383/884] tfdbg: Add link to TensorBoard Debugger Plugin from the CLI documentation RELNOTES: tfdbg: TensorFlow Debugger's graphical user interface (GUI), the [TensorBoard Debugger Plugin](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md), is now in alpha. PiperOrigin-RevId: 187700265 --- .../docs_src/programmers_guide/debugger.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/debugger.md b/tensorflow/docs_src/programmers_guide/debugger.md index c8fdae6f60..5fb1c2da88 100644 --- a/tensorflow/docs_src/programmers_guide/debugger.md +++ b/tensorflow/docs_src/programmers_guide/debugger.md @@ -23,8 +23,13 @@ debuggers such as Python's `pdb` due to TensorFlow's computation-graph paradigm. > installed using `pip install .whl`, however curses on Windows > may not work as reliably as curses on Linux or Mac. -This tutorial demonstrates how to use the **tfdbg** command-line interface -(CLI) to debug the appearance of [`nan`s](https://en.wikipedia.org/wiki/NaN) +> NOTE: This guide focuses on the command-line interface (CLI) of tfdbg. For +> guide on how to use the graphical user interface (GUI) of tfdbg, i.e., the +> **TensorBoard Debugger Plugin**, please visit +> [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). + +This tutorial demonstrates how to use the **tfdbg** CLI to debug the appearance +of [`nan`s](https://en.wikipedia.org/wiki/NaN) and [`inf`s](https://en.wikipedia.org/wiki/Infinity), a frequently-encountered type of bug in TensorFlow model development. The following example is for users who use the low-level @@ -806,3 +811,13 @@ sess.run(b) the constant-folding would not occur and `tfdbg` should show the intermediate tensor dumps. + +**Q**: Is there a GUI for tfdbg? + +**A**: Yes, the **TensorBoard Debugger Plugin** is the GUI of tfdbg. + It offers features such as inspection of the computation graph, + real-time visualization of tensor values, continuation to tensor + and conditional breakpoints, and tying tensors to their + graph-construction source code, all in the browser environment. + To get started, please visit + [its README](https://github.com/tensorflow/tensorboard/blob/master/tensorboard/plugins/debugger/README.md). -- GitLab From c645201fa9861dc9e0555a693a04e503ed40d01a Mon Sep 17 00:00:00 2001 From: Michael Case Date: Sat, 3 Mar 2018 10:04:35 -0800 Subject: [PATCH 384/884] Internal Change. PiperOrigin-RevId: 187738384 --- .../tools/integration_tests/gcs_smoke_test/{BUILD.bazel => BUILD} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename tensorflow/tools/integration_tests/gcs_smoke_test/{BUILD.bazel => BUILD} (100%) diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD similarity index 100% rename from tensorflow/tools/integration_tests/gcs_smoke_test/BUILD.bazel rename to tensorflow/tools/integration_tests/gcs_smoke_test/BUILD -- GitLab From 421077f6ec9af420c9f11d6cff15ef6e0b21104d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 3 Mar 2018 14:26:21 -0800 Subject: [PATCH 385/884] Will open source this part of code. PiperOrigin-RevId: 187747019 --- tensorflow/contrib/framework/__init__.py | 2 + tensorflow/python/kernel_tests/BUILD | 3 + .../python/kernel_tests/init_ops_test.py | 79 +++++++++++++++++++ tensorflow/python/ops/init_ops.py | 58 +++++++++++++- 4 files changed, 141 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 8063250091..21f9651318 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -71,6 +71,7 @@ See the @{$python/contrib.framework} guide. @@model_variable @@variable @@VariableDeviceChooser +@@convolutional_delta_orthogonal @@zero_initializer @@load_checkpoint @@ -111,6 +112,7 @@ from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec from tensorflow.python.framework.tensor_spec import TensorSpec +from tensorflow.python.ops.init_ops import convolutional_delta_orthogonal from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = ['nest'] diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 0f13e8bba5..23b79a24c0 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1571,12 +1571,15 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", + "//tensorflow/python:layers", "//tensorflow/python:framework", "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:init_ops", + "//tensorflow/python:linalg_ops", "//tensorflow/python:math_ops", "//tensorflow/python:nn_ops", "//tensorflow/python:partitioned_variables", + "//tensorflow/python:random_ops", "//tensorflow/python:variable_scope", "//tensorflow/python:variables", ], diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index 19a7d2f9d5..c1755985ee 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -25,10 +25,13 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.layers import convolutional from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops +from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import partitioned_variables +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -571,6 +574,82 @@ class OrthogonalInitializerTest(test.TestCase): np.dot(t, t.T), np.eye(t.shape[0]), rtol=tol, atol=tol) +class ConvolutionDeltaOrthogonalInitializerTest(test.TestCase): + + def testInitializerIdentical(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + self.assertTrue(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testInitializerDifferent(self): + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(seed=2, dtype=dtype) + self.assertFalse(identicaltest(self, init1, init2, (3, 3, 10, 10))) + + def testDuplicatedInitializer(self): + init = init_ops.convolutional_delta_orthogonal() + self.assertFalse(duplicated_initializer(self, init, 1, (3, 3, 10, 10))) + + def testInvalidDataType(self): + self.assertRaises( + ValueError, init_ops.convolutional_delta_orthogonal, + dtype=dtypes.string) + + def testInvalidShape(self): + init1 = init_ops.convolutional_delta_orthogonal() + with self.test_session(graph=ops.Graph(), use_gpu=True): + self.assertRaises(ValueError, init1, shape=[3, 3, 6, 5]) + + def testGain(self): + shape = (3, 3, 10, 10) + for dtype in [dtypes.float32, dtypes.float64]: + init1 = init_ops.convolutional_delta_orthogonal(seed=1, dtype=dtype) + init2 = init_ops.convolutional_delta_orthogonal(gain=3.14, + seed=1, dtype=dtype) + with self.test_session(graph=ops.Graph(), use_gpu=True): + t1 = init1(shape).eval() + with self.test_session(graph=ops.Graph(), use_gpu=True): + t2 = init2(shape).eval() + return np.allclose(t1, t2 / 3.14, rtol=1e-15, atol=1e-15) + + def testShapesValues(self): + for dtype in [dtypes.float32]: + for kernel_size in [[3], [8], [3, 5], [2, 4], [3, 3, 3], [2, 2, 2]]: + tol = 1e-2 + # Check orthogonality by computing the 2-norms of the inputs and ouputs. + if len(kernel_size) == 1: + shape = [4, 32, 64] + convolution = convolutional.conv1d + elif len(kernel_size) == 2: + convolution = convolutional.conv2d + shape = [4, 32, 32, 64] + else: + shape = [4, 16, 16, 16, 64] + convolution = convolutional.conv3d + inputs = random_ops.random_normal(shape, dtype=dtype) + inputs_2norm = linalg_ops.norm(inputs) + outputs = convolution( + inputs, padding="same", filters=128, + kernel_size=kernel_size, use_bias=False, + kernel_initializer=init_ops.convolutional_delta_orthogonal( + gain=3.14)) + outputs_shape = shape[0:-1] + [128] + outputs_2norm = linalg_ops.norm(outputs) + my_ops = variables.global_variables_initializer() + with self.test_session(use_gpu=True) as sess: + sess.run(my_ops) + # Check the shape of the outputs + t = outputs.eval() + self.assertAllEqual(t.shape, outputs_shape) + # Check isometry of the delta-orthogonal kernel. + self.assertAllClose( + sess.run(inputs_2norm)/np.sqrt(np.prod(shape)), + sess.run(outputs_2norm)/(np.sqrt(np.prod(shape))*np.sqrt(3.14)), + rtol=tol, atol=tol) + + class IdentityInitializerTest(test.TestCase): def testInvalidDataType(self): diff --git a/tensorflow/python/ops/init_ops.py b/tensorflow/python/ops/init_ops.py index c7502d0fda..40ab22951b 100644 --- a/tensorflow/python/ops/init_ops.py +++ b/tensorflow/python/ops/init_ops.py @@ -542,6 +542,62 @@ class Orthogonal(Initializer): return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} +class ConvolutionDeltaOrthogonal(Initializer): + """Initializer that generates a delta orthogonal kernel for ConvNets. + + The shape of the tensor must have length 3, 4 or 5. The number of input + filters must not exceed the number of output filters. The center pixels of the + tensor form an orthogonal matrix. Other pixels are set to be zero. + + Args: + gain: multiplicative factor to apply to the orthogonal matrix. Default is 1. + The 2-norm of an input is multiplied by a factor of 'sqrt(gain)' after + applying this convolution. + dtype: The type of the output. + seed: A Python integer. Used to create random seeds. See + @{tf.set_random_seed} + for behavior. + """ + + def __init__(self, gain=1.0, seed=None, dtype=dtypes.float32): + self.gain = gain + self.dtype = _assert_float_dtype(dtypes.as_dtype(dtype)) + self.seed = seed + + def __call__(self, shape, dtype=None, partition_info=None): + if dtype is None: + dtype = self.dtype + # Check the shape + if len(shape) < 3 or len(shape) > 5: + raise ValueError("The tensor to initialize must be at least " + "three-dimensional and at most five-dimensional") + + if shape[-2] > shape[-1]: + raise ValueError("In_filters cannot be greater than out_filters.") + + # Generate a random matrix + a = random_ops.random_normal([shape[-1], shape[-1]], + dtype=dtype, seed=self.seed) + # Compute the qr factorization + q, _ = linalg_ops.qr(a, full_matrices=False) + q = q[:shape[-2], :] + q *= math_ops.sqrt(math_ops.cast(self.gain, dtype=dtype)) + if len(shape) == 3: + weight = array_ops.scatter_nd([[(shape[0]-1)//2]], + array_ops.expand_dims(q, 0), shape) + elif len(shape) == 4: + weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2]], + array_ops.expand_dims(q, 0), shape) + else: + weight = array_ops.scatter_nd([[(shape[0]-1)//2, (shape[1]-1)//2, + (shape[2]-1)//2]], + array_ops.expand_dims(q, 0), shape) + return weight + + def get_config(self): + return {"gain": self.gain, "seed": self.seed, "dtype": self.dtype.name} + + @tf_export("keras.initializers.Identity", "initializers.identity") class Identity(Initializer): """Initializer that generates the identity matrix. @@ -586,7 +642,7 @@ uniform_unit_scaling_initializer = UniformUnitScaling variance_scaling_initializer = VarianceScaling orthogonal_initializer = Orthogonal identity_initializer = Identity - +convolutional_delta_orthogonal = ConvolutionDeltaOrthogonal # pylint: enable=invalid-name -- GitLab From f80aaf1a3cc8da73f862b0c7218f9d8d98d2cf7a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 3 Mar 2018 15:49:05 -0800 Subject: [PATCH 386/884] Internal change. PiperOrigin-RevId: 187749767 --- .../contrib/lite/kernels/internal/quantization_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index ba06bc0975..b84d2f9ee1 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -12,8 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ -#define PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ #include @@ -63,4 +63,4 @@ int CalculateInputRadius(int input_integer_bits, int input_left_shift); } // namespace tflite -#endif // PHOTOS_VISION_LEARNING_TENSORFLOW_MINI_QUANTIZATION_UTIL_H_ +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ -- GitLab From 70bdb2959a8d10cd6357ba66d5273e6fc7aa0ac1 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Sat, 3 Mar 2018 18:31:07 -0800 Subject: [PATCH 387/884] Fix broken links in docs. PiperOrigin-RevId: 187755567 --- tensorflow/docs_src/install/install_sources.md | 3 +-- tensorflow/docs_src/install/install_windows.md | 3 +-- tensorflow/docs_src/mobile/android_build.md | 4 ++-- tensorflow/docs_src/mobile/optimizing.md | 4 ++-- tensorflow/docs_src/programmers_guide/faq.md | 3 +-- tensorflow/docs_src/programmers_guide/graphs.md | 5 ++--- tensorflow/docs_src/tutorials/layers.md | 3 +-- 7 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tensorflow/docs_src/install/install_sources.md b/tensorflow/docs_src/install/install_sources.md index 8d83e9f119..acf0af0d9d 100644 --- a/tensorflow/docs_src/install/install_sources.md +++ b/tensorflow/docs_src/install/install_sources.md @@ -393,8 +393,7 @@ TensorFlow programs:

Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index dedf485f93..f0a30ee394 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -153,8 +153,7 @@ TensorFlow programs:
Hello, TensorFlow!
-If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with -TensorFlow}. +If you are new to TensorFlow, see @{$get_started/premade_estimators$Getting Started with TensorFlow}. If the system outputs an error message instead of a greeting, see [Common installation problems](#common_installation_problems). diff --git a/tensorflow/docs_src/mobile/android_build.md b/tensorflow/docs_src/mobile/android_build.md index b5a1d5d7d1..08a5fbe41c 100644 --- a/tensorflow/docs_src/mobile/android_build.md +++ b/tensorflow/docs_src/mobile/android_build.md @@ -90,8 +90,8 @@ using [ADB](https://developer.android.com/studio/command-line/adb.html). This requires some knowledge of build systems and Android developer tools, but we'll guide you through the basics here. -- First, follow our instructions for @{$install/install_sources$installing from - sources}. This will also guide you through installing Bazel and cloning the +- First, follow our instructions for @{$install/install_sources$installing from sources}. + This will also guide you through installing Bazel and cloning the TensorFlow code. - Download the Android [SDK](https://developer.android.com/studio/index.html) diff --git a/tensorflow/docs_src/mobile/optimizing.md b/tensorflow/docs_src/mobile/optimizing.md index 44cacff5db..ca9cb043e9 100644 --- a/tensorflow/docs_src/mobile/optimizing.md +++ b/tensorflow/docs_src/mobile/optimizing.md @@ -290,8 +290,8 @@ run it on a 64-bit ARM device: You can interpret the results in exactly the same way as the desktop version above. If you have any trouble figuring out what the right input and output -names and types are, take a look at the @{$mobile/prepare_models$Preparing -models} page for details about detecting these for your model, and look at the +names and types are, take a look at the @{$mobile/prepare_models$Preparing models} +page for details about detecting these for your model, and look at the `summarize_graph` tool which may give you helpful information. diff --git a/tensorflow/docs_src/programmers_guide/faq.md b/tensorflow/docs_src/programmers_guide/faq.md index 70931f2862..1548d43877 100644 --- a/tensorflow/docs_src/programmers_guide/faq.md +++ b/tensorflow/docs_src/programmers_guide/faq.md @@ -159,8 +159,7 @@ available. These operations allow you to build sophisticated @{$reading_data$input pipelines}, at the cost of making the TensorFlow computation somewhat more complicated. See the how-to documentation for -@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using -`QueueRunner` objects to drive queues and readers} +@{$reading_data#creating-threads-to-prefetch-using-queuerunner-objects$using `QueueRunner` objects to drive queues and readers} for more information on how to use them. ## Variables diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index 9049a5a9f3..ab2ce9af2e 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -210,9 +210,8 @@ with tf.device("/device:GPU:0"): # Operations created in this context will be pinned to the GPU. result = tf.matmul(weights, img) ``` - -If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed -configuration}, you might specify the job name and task ID to place variables on +If you are deploying TensorFlow in a @{$deploy/distributed$typical distributed configuration}, +you might specify the job name and task ID to place variables on a task in the parameter server job (`"/job:ps"`), and the other operations on task in the worker job (`"/job:worker"`): diff --git a/tensorflow/docs_src/tutorials/layers.md b/tensorflow/docs_src/tutorials/layers.md index 5111b16247..ee03f440c9 100644 --- a/tensorflow/docs_src/tutorials/layers.md +++ b/tensorflow/docs_src/tutorials/layers.md @@ -625,8 +625,7 @@ operation earlier when we generated the probabilities in `cnn_model_fn`. > Note: If you don't explicitly assign a name to an operation via the `name` > argument, TensorFlow will assign a default name. A couple easy ways to > discover the names applied to operations are to visualize your graph on -> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger -> (tfdbg)}. +> @{$graph_viz$TensorBoard}) or to enable the @{$debugger$TensorFlow Debugger (tfdbg)}. Next, we create the `LoggingTensorHook`, passing `tensors_to_log` to the `tensors` argument. We set `every_n_iter=50`, which specifies that probabilities -- GitLab From be63d928eef26d3ea52c31147d49f6ae4032ac39 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Sat, 3 Mar 2018 22:12:24 -0800 Subject: [PATCH 388/884] Fix nested bullets in docs. (Need 4 spaces indent) PiperOrigin-RevId: 187763978 --- tensorflow/docs_src/get_started/custom_estimators.md | 10 +++++----- tensorflow/docs_src/programmers_guide/datasets.md | 4 ++-- tensorflow/docs_src/programmers_guide/graphs.md | 8 ++++---- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/docs_src/get_started/custom_estimators.md b/tensorflow/docs_src/get_started/custom_estimators.md index 42a246678a..185917baae 100644 --- a/tensorflow/docs_src/get_started/custom_estimators.md +++ b/tensorflow/docs_src/get_started/custom_estimators.md @@ -164,9 +164,9 @@ To implement a typical model function, you must do the following: * [Define the model](#define_the_model). * Specify additional calculations for each of the [three different modes](#modes): - * [Predict](#predict) - * [Evaluate](#evaluate) - * [Train](#train) + * [Predict](#predict) + * [Evaluate](#evaluate) + * [Train](#train) ## Define the model @@ -546,8 +546,8 @@ In brief, here's what the three graphs tell you: * accuracy: The accuracy is recorded by the following two lines: - * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation. - * `tf.summary.scalar('accuracy', accuracy[1])`, during training. + * `eval_metric_ops={'my_accuracy': accuracy})`, during evaluation. + * `tf.summary.scalar('accuracy', accuracy[1])`, during training. These tensorboard graphs are one of the main reasons it's important to pass a `global_step` to your optimizer's `minimize` method. The model can't record diff --git a/tensorflow/docs_src/programmers_guide/datasets.md b/tensorflow/docs_src/programmers_guide/datasets.md index d38fbddfa1..9ccdbde627 100644 --- a/tensorflow/docs_src/programmers_guide/datasets.md +++ b/tensorflow/docs_src/programmers_guide/datasets.md @@ -18,11 +18,11 @@ The `tf.data` API introduces two new abstractions to TensorFlow: tensors representing the image data and a label. There are two distinct ways to create a dataset: - * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a + * Creating a **source** (e.g. `Dataset.from_tensor_slices()`) constructs a dataset from one or more `tf.Tensor` objects. - * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset + * Applying a **transformation** (e.g. `Dataset.batch()`) constructs a dataset from one or more `tf.data.Dataset` objects. * A `tf.data.Iterator` provides the main way to extract elements from a diff --git a/tensorflow/docs_src/programmers_guide/graphs.md b/tensorflow/docs_src/programmers_guide/graphs.md index ab2ce9af2e..e69b717432 100644 --- a/tensorflow/docs_src/programmers_guide/graphs.md +++ b/tensorflow/docs_src/programmers_guide/graphs.md @@ -335,20 +335,20 @@ described below. controls the behavior of the session. For example, some of the configuration options include: - * `allow_soft_placement`. Set this to `True` to enable a "soft" device + * `allow_soft_placement`. Set this to `True` to enable a "soft" device placement algorithm, which ignores @{tf.device} annotations that attempt to place CPU-only operations on a GPU device, and places them on the CPU instead. - * `cluster_def`. When using distributed TensorFlow, this option allows you + * `cluster_def`. When using distributed TensorFlow, this option allows you to specify what machines to use in the computation, and provide a mapping between job names, task indices, and network addresses. See @{tf.train.ClusterSpec.as_cluster_def} for details. - * `graph_options.optimizer_options`. Provides control over the optimizations + * `graph_options.optimizer_options`. Provides control over the optimizations that TensorFlow performs on your graph before executing it. - * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory + * `gpu_options.allow_growth`. Set this to `True` to change the GPU memory allocator so that it gradually increases the amount of memory allocated, rather than allocating most of the memory at startup. -- GitLab From 806d504bbae0a7133578e85ace8b4d5779ee748f Mon Sep 17 00:00:00 2001 From: Patrick Nguyen Date: Sun, 4 Mar 2018 13:47:57 -0800 Subject: [PATCH 389/884] Prevent accidental re-use of removed field. PiperOrigin-RevId: 187798953 --- tensorflow/core/framework/function.proto | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/framework/function.proto b/tensorflow/core/framework/function.proto index bd01e86da3..72e3c43831 100644 --- a/tensorflow/core/framework/function.proto +++ b/tensorflow/core/framework/function.proto @@ -30,7 +30,8 @@ message FunctionDef { // Attributes specific to this function definition. map attr = 5; - // NOTE: field id 2 deleted on Jan 11, 2016, GraphDef version 21. + // NOTE: field id 2 deleted on Jan 11, 2017, GraphDef version 21. + reserved 2; // In both of the following fields, there is the need to specify an // output that is used as either the input to another node (in -- GitLab From 3963f0dae63dfc0383a86168bb4595d27768c9f8 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Sun, 4 Mar 2018 21:51:29 -0800 Subject: [PATCH 390/884] Correct reporter name. (#17425) --- SECURITY.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/SECURITY.md b/SECURITY.md index fea24b2739..93b25cd3bb 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -233,7 +233,7 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= ### Known vulnerabilities -| Type | Versions affected | Reported by | Additional Information | -|-------------------|:-----------------:|--------------------|-----------------------------| -| out of bounds read| <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| Type | Versions affected | Reported by | Additional Information | +|-------------------|:-----------------:|-----------------------|-----------------------------| +| out of bounds read| <=1.4 | Blade Team of TenCent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From 2a4930b7fe3e725bacfda2ab80b17f731deecc50 Mon Sep 17 00:00:00 2001 From: Martin Wicke <577277+martinwicke@users.noreply.github.com> Date: Sun, 4 Mar 2018 22:27:44 -0800 Subject: [PATCH 391/884] Correct capitalization --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 93b25cd3bb..9f252e6818 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -235,5 +235,5 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= | Type | Versions affected | Reported by | Additional Information | |-------------------|:-----------------:|-----------------------|-----------------------------| -| out of bounds read| <=1.4 | Blade Team of TenCent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| out of bounds read| <=1.4 | Blade Team of Tencent | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | -- GitLab From c3206ba3f331f135e26156c72eaabdaa5c8c2883 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 02:45:58 -0800 Subject: [PATCH 392/884] Adds checks to tf.nn.sparse_softmax_cross_entropy_with_logits to make sure that shapes for labels and logits (except last dimension) match. First, the static dimensions are checked, and only if the result is inconclusive a dynamic check is added. In sparse_softmax_cross_entropy_with_logits the input dimensions are flattened, which can lead to unexpected bugs if the order of dimensions does not match (e.g. if one is time-major and the other is batch-major). This prevents such mistakes. PiperOrigin-RevId: 187841750 --- .../python/estimator/canned/head_test.py | 7 ++- tensorflow/python/ops/nn_ops.py | 47 +++++++++++++------ 2 files changed, 39 insertions(+), 15 deletions(-) diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index a300f315c1..23158c76e7 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -300,7 +300,12 @@ class MultiClassHeadWithSoftmaxCrossEntropyLoss(test.TestCase): features = {'x': values_2x3} # Static shape. - with self.assertRaisesRegexp(ValueError, 'Dimensions must be equal'): + with self.assertRaisesRegexp( + ValueError, + r'Shape mismatch: The shape of labels \(received \(3,\)\) should equal ' + r'the shape of logits except for the last dimension ' + r'\(received \(2, 3\)\)\.' + ): head.create_loss( features=features, mode=model_fn.ModeKeys.EVAL, diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index a0d500afce..852ab365bb 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -29,6 +29,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -2025,6 +2026,9 @@ def sparse_softmax_cross_entropy_with_logits( # Store label shape for result later. labels_static_shape = labels.get_shape() labels_shape = array_ops.shape(labels) + static_shapes_fully_defined = ( + labels_static_shape.is_fully_defined() and + logits.get_shape()[:-1].is_fully_defined()) if logits.get_shape().ndims is not None and logits.get_shape().ndims == 0: raise ValueError( "Logits cannot be scalars - received shape %s." % logits.get_shape()) @@ -2034,6 +2038,12 @@ def sparse_softmax_cross_entropy_with_logits( raise ValueError("Rank mismatch: Rank of labels (received %s) should " "equal rank of logits minus 1 (received %s)." % (labels_static_shape.ndims, logits.get_shape().ndims)) + if (static_shapes_fully_defined and + labels_static_shape != logits.get_shape()[:-1]): + raise ValueError("Shape mismatch: The shape of labels (received %s) " + "should equal the shape of logits except for the last " + "dimension (received %s)." % (labels_static_shape, + logits.get_shape())) # Check if no reshapes are required. if logits.get_shape().ndims == 2: cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( @@ -2043,20 +2053,29 @@ def sparse_softmax_cross_entropy_with_logits( else: return cost - # Reshape logits to 2 dim, labels to 1 dim. - num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] - precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) - labels = array_ops.reshape(labels, [-1]) - # The second output tensor contains the gradients. We use it in - # _CrossEntropyGrad() in nn_grad but not here. - cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( - precise_logits, labels, name=name) - cost = array_ops.reshape(cost, labels_shape) - cost.set_shape(labels_static_shape) - if logits.dtype == dtypes.float16: - return math_ops.cast(cost, dtypes.float16) - else: - return cost + # Perform a check of the dynamic shapes if the static shapes are not fully + # defined. + shape_checks = [] + if not static_shapes_fully_defined: + shape_checks.append( + check_ops.assert_equal( + array_ops.shape(labels), + array_ops.shape(logits)[:-1])) + with ops.control_dependencies(shape_checks): + # Reshape logits to 2 dim, labels to 1 dim. + num_classes = array_ops.shape(logits)[array_ops.rank(logits) - 1] + precise_logits = array_ops.reshape(precise_logits, [-1, num_classes]) + labels = array_ops.reshape(labels, [-1]) + # The second output tensor contains the gradients. We use it in + # _CrossEntropyGrad() in nn_grad but not here. + cost, _ = gen_nn_ops.sparse_softmax_cross_entropy_with_logits( + precise_logits, labels, name=name) + cost = array_ops.reshape(cost, labels_shape) + cost.set_shape(labels_static_shape) + if logits.dtype == dtypes.float16: + return math_ops.cast(cost, dtypes.float16) + else: + return cost @tf_export("nn.avg_pool") -- GitLab From 386ce8080a4ab541bcade08121f679913e85720a Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 05:10:40 -0800 Subject: [PATCH 393/884] [XLA] Minor comment fixes in instruction_fusion.cc. No functional change. PiperOrigin-RevId: 187852483 --- tensorflow/compiler/xla/service/instruction_fusion.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/instruction_fusion.cc b/tensorflow/compiler/xla/service/instruction_fusion.cc index f494748e17..d69ad80bdb 100644 --- a/tensorflow/compiler/xla/service/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/instruction_fusion.cc @@ -302,7 +302,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { // Consider each operand of this instruction for fusion into this // instruction. We want to consider the operands in a particular order to - // avoid created duplicate instruction clones in the fusion instruction. + // avoid creating duplicate instruction clones in the fusion instruction. // For example, consider the following expression: // // A = ... @@ -377,7 +377,7 @@ StatusOr InstructionFusion::Run(HloModule* module) { changed = true; if (operand->user_count() == 0) { - // Operand is now dead. Remove from post order by setting it's + // Operand is now dead. Remove from post order by setting its // location to nullptr. post_order[FindOrDie(post_order_index, operand)] = nullptr; post_order_index.erase(operand); -- GitLab From d0713d3459d3b101d3fba4ac422fae7f2c1b07a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 05:18:24 -0800 Subject: [PATCH 394/884] Automated g4 rollback of changelist 185073515 PiperOrigin-RevId: 187852929 --- tensorflow/contrib/bayesflow/BUILD | 2 +- .../kernel_tests/halton_sequence_test.py | 101 +++++++-- .../python/ops/halton_sequence_impl.py | 201 +++++++++++++----- 3 files changed, 234 insertions(+), 70 deletions(-) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 3592cff90b..5fdcbffb4d 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -190,7 +190,7 @@ cuda_py_test( cuda_py_test( name = "halton_sequence_test", - size = "small", + size = "medium", srcs = ["python/kernel_tests/halton_sequence_test.py"], additional_deps = [ ":bayesflow_py", diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py index 0a85862abf..6b42bca6f9 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py @@ -36,29 +36,35 @@ class HaltonSequenceTest(test.TestCase): def test_known_values_small_bases(self): with self.test_session(): - # The first five elements of the Halton sequence with base 2 and 3 + # The first five elements of the non-randomized Halton sequence + # with base 2 and 3. expected = np.array(((1. / 2, 1. / 3), (1. / 4, 2. / 3), (3. / 4, 1. / 9), (1. / 8, 4. / 9), (5. / 8, 7. / 9)), dtype=np.float32) - sample = halton.sample(2, num_samples=5) + sample = halton.sample(2, num_results=5, randomized=False) self.assertAllClose(expected, sample.eval(), rtol=1e-6) - def test_sample_indices(self): + def test_sequence_indices(self): + """Tests access of sequence elements by index.""" with self.test_session(): dim = 5 indices = math_ops.range(10, dtype=dtypes.int32) - sample_direct = halton.sample(dim, num_samples=10) - sample_from_indices = halton.sample(dim, sample_indices=indices) + sample_direct = halton.sample(dim, num_results=10, randomized=False) + sample_from_indices = halton.sample(dim, sequence_indices=indices, + randomized=False) self.assertAllClose(sample_direct.eval(), sample_from_indices.eval(), rtol=1e-6) def test_dtypes_works_correctly(self): + """Tests that all supported dtypes work without error.""" with self.test_session(): dim = 3 - sample_float32 = halton.sample(dim, num_samples=10, dtype=dtypes.float32) - sample_float64 = halton.sample(dim, num_samples=10, dtype=dtypes.float64) + sample_float32 = halton.sample(dim, num_results=10, dtype=dtypes.float32, + seed=11) + sample_float64 = halton.sample(dim, num_results=10, dtype=dtypes.float64, + seed=21) self.assertEqual(sample_float32.eval().dtype, np.float32) self.assertEqual(sample_float64.eval().dtype, np.float64) @@ -79,7 +85,8 @@ class HaltonSequenceTest(test.TestCase): p = normal_lib.Normal(loc=mu_p, scale=sigma_p) q = normal_lib.Normal(loc=mu_q, scale=sigma_q) - cdf_sample = halton.sample(2, num_samples=n, dtype=dtypes.float64) + cdf_sample = halton.sample(2, num_results=n, dtype=dtypes.float64, + seed=1729) q_sample = q.quantile(cdf_sample) # Compute E_p[X]. @@ -90,7 +97,7 @@ class HaltonSequenceTest(test.TestCase): # Compute E_p[X^2]. e_x2 = mc.expectation_importance_sampler( f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, - seed=42) + seed=1412) stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x)) # Keep the tolerance levels the same as in monte_carlo_test.py. @@ -100,10 +107,10 @@ class HaltonSequenceTest(test.TestCase): def test_docstring_example(self): # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_samples = 1000 + num_results = 1000 dim = 3 with self.test_session(): - sample = halton.sample(dim, num_samples=num_samples) + sample = halton.sample(dim, num_results=num_results, randomized=False) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. @@ -115,16 +122,76 @@ class HaltonSequenceTest(test.TestCase): # Produces a relative absolute error of 1.7%. self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02) - # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sample_indices argument can be used to do this. + # Now skip the first 1000 samples and recompute the integral with the next + # thousand samples. The sequence_indices argument can be used to do this. - sample_indices = math_ops.range(start=1000, limit=1000 + num_samples, - dtype=dtypes.int32) - sample_leaped = halton.sample(dim, sample_indices=sample_indices) + sequence_indices = math_ops.range(start=1000, limit=1000 + num_results, + dtype=dtypes.int32) + sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, + randomized=False) integral_leaped = math_ops.reduce_mean( math_ops.reduce_prod(sample_leaped ** powers, axis=-1)) - self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.001) + self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05) + + def test_randomized_qmc_basic(self): + """Tests the randomization of the Halton sequences.""" + # This test is identical to the example given in Owen (2017), Figure 5. + + dim = 20 + num_results = 2000 + replica = 5 + + with self.test_session(): + sample = halton.sample(dim, num_results=num_results, seed=121117) + f = math_ops.reduce_mean(math_ops.reduce_sum(sample, axis=1) ** 2) + values = [f.eval() for _ in range(replica)] + self.assertAllClose(np.mean(values), 101.6667, atol=np.std(values) * 2) + + def test_partial_sum_func_qmc(self): + """Tests the QMC evaluation of (x_j + x_{j+1} ...+x_{n})^2. + + A good test of QMC is provided by the function: + + f(x_1,..x_n, x_{n+1}, ..., x_{n+m}) = (x_{n+1} + ... x_{n+m} - m / 2)^2 + + with the coordinates taking values in the unit interval. The mean and + variance of this function (with the uniform distribution over the + unit-hypercube) is exactly calculable: + + = m / 12, Var(f) = m (5m - 3) / 360 + + The purpose of the "shift" (if n > 0) in the coordinate dependence of the + function is to provide a test for Halton sequence which exhibit more + dependence in the higher axes. + + This test confirms that the mean squared error of RQMC estimation falls + as O(N^(2-e)) for any e>0. + """ + + n, m = 10, 10 + dim = n + m + num_results_lo, num_results_hi = 1000, 10000 + replica = 20 + true_mean = m / 12. + + def func_estimate(x): + return math_ops.reduce_mean( + (math_ops.reduce_sum(x[:, -m:], axis=-1) - m / 2.0) ** 2) + + with self.test_session(): + sample_lo = halton.sample(dim, num_results=num_results_lo, seed=1925) + sample_hi = halton.sample(dim, num_results=num_results_hi, seed=898128) + f_lo, f_hi = func_estimate(sample_lo), func_estimate(sample_hi) + + estimates = np.array([(f_lo.eval(), f_hi.eval()) for _ in range(replica)]) + var_lo, var_hi = np.mean((estimates - true_mean) ** 2, axis=0) + + # Expect that the variance scales as N^2 so var_hi / var_lo ~ k / 10^2 + # with k a fudge factor accounting for the residual N dependence + # of the QMC error and the sampling error. + log_rel_err = np.log(100 * var_hi / var_lo) + self.assertAllClose(log_rel_err, 0.0, atol=1.2) if __name__ == '__main__': diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py index 8cabf18903..35962109bc 100644 --- a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py @@ -26,8 +26,9 @@ import numpy as np from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import functional_ops from tensorflow.python.ops import math_ops - +from tensorflow.python.ops import random_ops __all__ = [ 'sample', @@ -39,32 +40,45 @@ __all__ = [ _MAX_DIMENSION = 1000 -def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): - r"""Returns a sample from the `m` dimensional Halton sequence. +def sample(dim, + num_results=None, + sequence_indices=None, + dtype=None, + randomized=True, + seed=None, + name=None): + r"""Returns a sample from the `dim` dimensional Halton sequence. Warning: The sequence elements take values only between 0 and 1. Care must be taken to appropriately transform the domain of a function if it differs from the unit cube before evaluating integrals using Halton samples. It is also - important to remember that quasi-random numbers are not a replacement for - pseudo-random numbers in every context. Quasi random numbers are completely - deterministic and typically have significant negative autocorrelation (unless - randomized). + important to remember that quasi-random numbers without randomization are not + a replacement for pseudo-random numbers in every context. Quasi random numbers + are completely deterministic and typically have significant negative + autocorrelation unless randomization is used. Computes the members of the low discrepancy Halton sequence in dimension - `dim`. The d-dimensional sequence takes values in the unit hypercube in d - dimensions. Currently, only dimensions up to 1000 are supported. The prime - base for the `k`-th axes is the k-th prime starting from 2. For example, - if dim = 3, then the bases will be [2, 3, 5] respectively and the first - element of the sequence will be: [0.5, 0.333, 0.2]. For a more complete - description of the Halton sequences see: + `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in + `dim` dimensions. Currently, only dimensions up to 1000 are supported. The + prime base for the k-th axes is the k-th prime starting from 2. For example, + if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first + element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more + complete description of the Halton sequences see: https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences and their applications see: https://en.wikipedia.org/wiki/Low-discrepancy_sequence. - The user must supply either `num_samples` or `sample_indices` but not both. + If `randomized` is true, this function produces a scrambled version of the + Halton sequence introduced by Owen in arXiv:1706.02808. For the advantages of + randomization of low discrepancy sequences see: + https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo + + The number of samples produced is controlled by the `num_results` and + `sequence_indices` parameters. The user must supply either `num_results` or + `sequence_indices` but not both. The former is the number of samples to produce starting from the first - element. If `sample_indices` is given instead, the specified elements of - the sequence are generated. For example, sample_indices=tf.range(10) is + element. If `sequence_indices` is given instead, the specified elements of + the sequence are generated. For example, sequence_indices=tf.range(10) is equivalent to specifying n=10. Example Use: @@ -73,9 +87,9 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): bf = tf.contrib.bayesflow # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_samples = 1000 + num_results = 1000 dim = 3 - sample = bf.halton_sequence.sample(dim, num_samples=num_samples) + sample = bf.halton_sequence.sample(dim, num_results=num_results, seed=127) # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional # hypercube. @@ -89,12 +103,13 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): print ("Estimated: %f, True Value: %f" % values) # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sample_indices argument can be used to do this. + # thousand samples. The sequence_indices argument can be used to do this. - sample_indices = tf.range(start=1000, limit=1000 + num_samples, - dtype=tf.int32) - sample_leaped = halton.sample(dim, sample_indices=sample_indices) + sequence_indices = tf.range(start=1000, limit=1000 + num_results, + dtype=tf.int32) + sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, + seed=111217) integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers, axis=-1)) @@ -107,51 +122,57 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): Args: dim: Positive Python `int` representing each sample's `event_size.` Must not be greater than 1000. - num_samples: (Optional) positive Python `int`. The number of samples to - generate. Either this parameter or sample_indices must be specified but + num_results: (Optional) positive Python `int`. The number of samples to + generate. Either this parameter or sequence_indices must be specified but not both. If this parameter is None, then the behaviour is determined by - the `sample_indices`. - sample_indices: (Optional) `Tensor` of dtype int32 and rank 1. The elements - of the sequence to compute specified by their position in the sequence. - The entries index into the Halton sequence starting with 0 and hence, - must be whole numbers. For example, sample_indices=[0, 5, 6] will produce - the first, sixth and seventh elements of the sequence. If this parameter - is None, then the `num_samples` parameter must be specified which gives - the number of desired samples starting from the first sample. + the `sequence_indices`. + sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The + elements of the sequence to compute specified by their position in the + sequence. The entries index into the Halton sequence starting with 0 and + hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will + produce the first, sixth and seventh elements of the sequence. If this + parameter is None, then the `num_results` parameter must be specified + which gives the number of desired samples starting from the first sample. dtype: (Optional) The dtype of the sample. One of `float32` or `float64`. Default is `float32`. + randomized: (Optional) bool indicating whether to produce a randomized + Halton sequence. If True, applies the randomization described in + Owen (2017) [arXiv:1706.02808]. + seed: (Optional) Python integer to seed the random number generator. Only + used if `randomized` is True. If not supplied and `randomized` is True, + no seed is set. name: (Optional) Python `str` describing ops managed by this function. If not supplied the name of this function is used. Returns: halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype - and `shape` `[num_samples, dim]` if `num_samples` was specified or shape - `[s, dim]` where s is the size of `sample_indices` if `sample_indices` + and `shape` `[num_results, dim]` if `num_results` was specified or shape + `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices` were specified. Raises: - ValueError: if both `sample_indices` and `num_samples` were specified or + ValueError: if both `sequence_indices` and `num_results` were specified or if dimension `dim` is less than 1 or greater than 1000. """ if dim < 1 or dim > _MAX_DIMENSION: raise ValueError( 'Dimension must be between 1 and {}. Supplied {}'.format(_MAX_DIMENSION, dim)) - if (num_samples is None) == (sample_indices is None): - raise ValueError('Either `num_samples` or `sample_indices` must be' + if (num_results is None) == (sequence_indices is None): + raise ValueError('Either `num_results` or `sequence_indices` must be' ' specified but not both.') dtype = dtype or dtypes.float32 if not dtype.is_floating: raise ValueError('dtype must be of `float`-type') - with ops.name_scope(name, 'sample', values=[sample_indices]): + with ops.name_scope(name, 'sample', values=[sequence_indices]): # Here and in the following, the shape layout is as follows: # [sample dimension, event dimension, coefficient dimension]. # The coefficient dimension is an intermediate axes which will hold the # weights of the starting integer when expressed in the (prime) base for # an event dimension. - indices = _get_indices(num_samples, sample_indices, dtype) + indices = _get_indices(num_results, sequence_indices, dtype) radixes = array_ops.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1]) max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices), @@ -170,17 +191,92 @@ def sample(dim, num_samples=None, sample_indices=None, dtype=None, name=None): # though we don't need it. We avoid this by setting the exponents for each # axes to 0 beyond the maximum value needed for that dimension. exponents_by_axes = array_ops.tile([math_ops.range(max_size)], [dim, 1]) - weight_mask = exponents_by_axes > max_sizes_by_axes + + # The mask is true for those coefficients that are irrelevant. + weight_mask = exponents_by_axes >= max_sizes_by_axes capped_exponents = array_ops.where( weight_mask, array_ops.zeros_like(exponents_by_axes), exponents_by_axes) weights = radixes ** capped_exponents + # The following computes the base b expansion of the indices. Suppose, + # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with + # the vector (1, b, b^2, b^3, ...) will produce + # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care + # about. Noting that all a_i < b by definition of place value expansion, + # we see that taking the elements mod b of the above vector produces the + # place value expansion coefficients. coeffs = math_ops.floor_div(indices, weights) coeffs *= 1 - math_ops.cast(weight_mask, dtype) - coeffs = (coeffs % radixes) / radixes - return math_ops.reduce_sum(coeffs / weights, axis=-1) + coeffs %= radixes + if not randomized: + coeffs /= radixes + return math_ops.reduce_sum(coeffs / weights, axis=-1) + coeffs = _randomize(coeffs, radixes, seed=seed) + # Remove the contribution from randomizing the trailing zero for the + # axes where max_size_by_axes < max_size. This will be accounted + # for separately below (using zero_correction). + coeffs *= 1 - math_ops.cast(weight_mask, dtype) + coeffs /= radixes + base_values = math_ops.reduce_sum(coeffs / weights, axis=-1) + + # The randomization used in Owen (2017) does not leave 0 invariant. While + # we have accounted for the randomization of the first `max_size_by_axes` + # coefficients, we still need to correct for the trailing zeros. Luckily, + # this is equivalent to adding a uniform random value scaled so the first + # `max_size_by_axes` coefficients are zero. The following statements perform + # this correction. + zero_correction = random_ops.random_uniform([dim, 1], seed=seed, + dtype=dtype) + zero_correction /= (radixes ** max_sizes_by_axes) + return base_values + array_ops.reshape(zero_correction, [-1]) + + +def _randomize(coeffs, radixes, seed=None): + """Applies the Owen randomization to the coefficients.""" + given_dtype = coeffs.dtype + coeffs = math_ops.to_int32(coeffs) + num_coeffs = array_ops.shape(coeffs)[-1] + radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1]) + perms = _get_permutations(num_coeffs, radixes, seed=seed) + perms = array_ops.reshape(perms, [-1]) + radix_sum = math_ops.reduce_sum(radixes) + radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True), + [-1, 1]) + offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum + permuted_coeffs = array_ops.gather(perms, coeffs + offsets) + return math_ops.cast(permuted_coeffs, dtype=given_dtype) + + +def _get_permutations(num_results, dims, seed=None): + """Uniform iid sample from the space of permutations. + + Draws a sample of size `num_results` from the group of permutations of degrees + specified by the `dims` tensor. These are packed together into one tensor + such that each row is one sample from each of the dimensions in `dims`. For + example, if dims = [2,3] and num_results = 2, the result is a tensor of shape + [2, 2 + 3] and the first row of the result might look like: + [1, 0, 2, 0, 1]. The first two elements are a permutation over 2 elements + while the next three are a permutation over 3 elements. + + Args: + num_results: A positive scalar `Tensor` of integral type. The number of + draws from the discrete uniform distribution over the permutation groups. + dims: A 1D `Tensor` of the same dtype as `num_results`. The degree of the + permutation groups from which to sample. + seed: (Optional) Python integer to seed the random number generator. + Returns: + permutations: A `Tensor` of shape `[num_results, sum(dims)]` and the same + dtype as `dims`. + """ + sample_range = math_ops.range(num_results) + def generate_one(d): + fn = lambda _: random_ops.random_shuffle(math_ops.range(d), seed=seed) + return functional_ops.map_fn(fn, sample_range) + return array_ops.concat([generate_one(d) for d in array_ops.unstack(dims)], + axis=-1) -def _get_indices(n, sample_indices, dtype, name=None): + +def _get_indices(n, sequence_indices, dtype, name=None): """Generates starting points for the Halton sequence procedure. The k'th element of the sequence is generated starting from a positive integer @@ -191,10 +287,10 @@ def _get_indices(n, sample_indices, dtype, name=None): Args: n: Positive `int`. The number of samples to generate. If this - parameter is supplied, then `sample_indices` should be None. - sample_indices: `Tensor` of dtype int32 and rank 1. The entries + parameter is supplied, then `sequence_indices` should be None. + sequence_indices: `Tensor` of dtype int32 and rank 1. The entries index into the Halton sequence starting with 0 and hence, must be whole - numbers. For example, sample_indices=[0, 5, 6] will produce the first, + numbers. For example, sequence_indices=[0, 5, 6] will produce the first, sixth and seventh elements of the sequence. If this parameter is not None then `n` must be None. dtype: The dtype of the sample. One of `float32` or `float64`. @@ -204,14 +300,14 @@ def _get_indices(n, sample_indices, dtype, name=None): Returns: indices: `Tensor` of dtype `dtype` and shape = `[n, 1, 1]`. """ - with ops.name_scope(name, 'get_indices', [n, sample_indices]): - if sample_indices is None: - sample_indices = math_ops.range(n, dtype=dtype) + with ops.name_scope(name, '_get_indices', [n, sequence_indices]): + if sequence_indices is None: + sequence_indices = math_ops.range(n, dtype=dtype) else: - sample_indices = math_ops.cast(sample_indices, dtype) + sequence_indices = math_ops.cast(sequence_indices, dtype) # Shift the indices so they are 1 based. - indices = sample_indices + 1 + indices = sequence_indices + 1 # Reshape to make space for the event dimension and the place value # coefficients. @@ -222,7 +318,7 @@ def _base_expansion_size(num, bases): """Computes the number of terms in the place value expansion. Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of - `num` in base b (ak <> 0). This function computes and returns `k` for each + `num` in base b (ak <> 0). This function computes and returns `k+1` for each base `b` specified in `bases`. This can be inferred from the base `b` logarithm of `num` as follows: @@ -261,4 +357,5 @@ def _primes_less_than(n): _PRIMES = _primes_less_than(7919+1) + assert len(_PRIMES) == _MAX_DIMENSION -- GitLab From 9423044b971615027c86128adaa2cf2cfacb290a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 06:51:33 -0800 Subject: [PATCH 395/884] Improve LinearValidOnShape. It actually only needs to check that the operation is a bitcast (ignoring element_type). So far, the check was more restrictive, which made this function always return false for a non-trivial reshape operation. However we still fail to make use of this less strict checking, because for reshapes inside a fusion node, we don't have a layout and can therefore not check if it is a bitcast or not. Also add a disabled test that will be enabled once the layout issue is fixed. PiperOrigin-RevId: 187860440 --- tensorflow/compiler/xla/service/llvm_ir/ir_array.cc | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index f3642cf0a1..9aa0ce507b 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -106,16 +106,13 @@ IrArray::IrArray(llvm::Value* base_ptr, const Shape& shape) } } -// Returns whether given linear index valid on given shape. +// Returns whether the given linear index is valid on the given shape. bool IrArray::Index::LinearValidOnShape(const Shape& a) const { - auto b = ShapeUtil::MakeShape(PRED /* irrelevant */, dims_); + auto b = ShapeUtil::MakeShape(a.element_type(), dims_); *b.mutable_layout() = layout_; return linear_ != nullptr && - ContainersEqual( - ShapeUtil::StripDegenerateDimensions(a).dimensions(), - ShapeUtil::StripDegenerateDimensions(b).dimensions()) && - LayoutUtil::Equal(ShapeUtil::StripDegenerateDimensions(a).layout(), - ShapeUtil::StripDegenerateDimensions(b).layout()); + ShapeUtil::ElementsIn(a) == ShapeUtil::ElementsIn(b) && + ShapeUtil::ReshapeIsBitcast(a, b); } IrArray::Index IrArray::Index::SourceIndexOfReshape( -- GitLab From 3a2e7635e69b5b1d1f510108d7a601edc570abc8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 07:43:58 -0800 Subject: [PATCH 396/884] Internal change. PiperOrigin-RevId: 187865303 --- tensorflow/contrib/lite/kernels/test_util.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/test_util.h b/tensorflow/contrib/lite/kernels/test_util.h index 7d476ba1ea..a9064d54e7 100644 --- a/tensorflow/contrib/lite/kernels/test_util.h +++ b/tensorflow/contrib/lite/kernels/test_util.h @@ -39,10 +39,10 @@ inline std::vector Quantize(const std::vector& data, float scale, int32_t zero_point) { std::vector q; for (float f : data) { - q.push_back(std::max( + q.push_back(static_cast(std::max( std::numeric_limits::min(), - std::min(std::numeric_limits::max(), - static_cast(std::round(zero_point + (f / scale)))))); + std::min(std::numeric_limits::max(), + std::round(zero_point + (f / scale)))))); } return q; } -- GitLab From 5e53ba5a33ee116179bc4ac4f09be76811eb3960 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 09:01:22 -0800 Subject: [PATCH 397/884] Fix a case in SparseSegmentReduction ops with missing segment IDs, where all segment IDs are empty. Added a test for this case. PiperOrigin-RevId: 187873356 --- .../core/kernels/segment_reduction_ops.cc | 7 ++++++- .../segment_reduction_ops_test.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/segment_reduction_ops.cc b/tensorflow/core/kernels/segment_reduction_ops.cc index 27b8081eb8..bbf8696531 100644 --- a/tensorflow/core/kernels/segment_reduction_ops.cc +++ b/tensorflow/core/kernels/segment_reduction_ops.cc @@ -616,7 +616,12 @@ class SparseSegmentReductionOpBase : public OpKernel { // we need to explicitly set missing indices to the default value. Tensor* output = nullptr; OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); - if (num_indices == 0) return; + if (num_indices == 0) { + if (output_rows > 0) { + output->flat_outer_dims().setConstant(default_value_); + } + return; + } OP_REQUIRES(context, output_rows > 0, errors::InvalidArgument("segment ids must be >= 0")); auto output_flat = output->flat_outer_dims(); diff --git a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py index 5a54f448d0..239a48d273 100644 --- a/tensorflow/python/kernel_tests/segment_reduction_ops_test.py +++ b/tensorflow/python/kernel_tests/segment_reduction_ops_test.py @@ -507,6 +507,25 @@ class SparseSegmentReductionOpTest(SparseSegmentReductionHelper): tf_ans = s.eval() self.assertAllClose(np_ans, tf_ans) + def testWithEmptySegments(self): + tf_x = constant_op.constant([], shape=[0, 4], dtype=dtypes_lib.float32) + ops_list = [ + math_ops.sparse_segment_sum_with_num_segments, + math_ops.sparse_segment_mean_with_num_segments + ] + segment_indices = [] + tf_indices = [] + num_segments = 5 + with self.test_session(use_gpu=False): + for tf_op in ops_list: + s = tf_op( + data=tf_x, + indices=tf_indices, + segment_ids=segment_indices, + num_segments=num_segments) + tf_ans = s.eval() + self.assertAllClose(np.zeros([5, 4]), tf_ans) + def testSegmentIdsGreaterThanZero(self): tf_x, np_x = self._input([10, 4], dtype=dtypes_lib.float32) ops_list = [(np.add, None, math_ops.sparse_segment_sum), ( -- GitLab From b0ee6b63b865d15ff722a74bbc89805e5e12c024 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Mon, 5 Mar 2018 09:18:24 -0800 Subject: [PATCH 398/884] Change the default ps_ops to STANDARD_PS_OPS PiperOrigin-RevId: 187875797 --- tensorflow/python/training/device_setter.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tensorflow/python/training/device_setter.py b/tensorflow/python/training/device_setter.py index 0e824d89e9..d31c375b4c 100644 --- a/tensorflow/python/training/device_setter.py +++ b/tensorflow/python/training/device_setter.py @@ -179,8 +179,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps", than overriding them. cluster: `ClusterDef` proto or `ClusterSpec`. ps_ops: List of strings representing `Operation` types that need to be - placed on `ps` devices. If `None`, defaults to - `["Variable", "VariableV2", "VarHandleOp"]`. + placed on `ps` devices. If `None`, defaults to `STANDARD_PS_OPS`. ps_strategy: A callable invoked for every ps `Operation` (i.e. matched by `ps_ops`), that takes the `Operation` and returns the ps task index to use. If `None`, defaults to a round-robin strategy across all `ps` @@ -210,7 +209,7 @@ def replica_device_setter(ps_tasks=0, ps_device="/job:ps", if ps_ops is None: # TODO(sherrym): Variables in the LOCAL_VARIABLES collection should not be # placed in the parameter server. - ps_ops = ["Variable", "VariableV2", "VarHandleOp"] + ps_ops = list(STANDARD_PS_OPS) if not merge_devices: logging.warning( -- GitLab From f547b77cd8aac0a2142e8f4bf80107fc52a4ef05 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 09:51:38 -0800 Subject: [PATCH 399/884] [XLA:GPU] Add some VLOGs to FusionMerger. Also use c_any_of and friends instead of std::any_of &c, and make some minor whitespace fixes in comments. No functional change. PiperOrigin-RevId: 187880113 --- .../compiler/xla/service/gpu/fusion_merger.cc | 47 +++++++++++-------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index c137fbc97e..91a916f67c 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -45,6 +45,7 @@ void MaybeResolveTupleElements(HloInstruction* instruction, // Returns the bytes read by fusion parameter 'param', by returning the byte // size of 'param' shape (or the cumulative byte sizes of all leaf tuple // elements if 'param' is tuple-shaped). +// // In the special case where all users of 'param' (or all users of a leaf // tuple element if 'param' is tuple-shaped) are Slice instructions, the size // of each slice instruction is accumulated instead, to give a more accurate @@ -63,11 +64,10 @@ double CalculateBytesReadByFusionParameter(HloInstruction* param) { // Slice for a more accurate estimate of bytes read. double bytes = 0.0; for (auto& instruction : instructions) { - if (std::all_of(instruction->users().begin(), instruction->users().end(), - [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kSlice || - instruction->opcode() == HloOpcode::kDynamicSlice; - })) { + if (c_all_of(instruction->users(), [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kSlice || + instruction->opcode() == HloOpcode::kDynamicSlice; + })) { // All users are slice: accumulate bytes of all user slice instructions. for (auto& user : instruction->users()) { bytes += ShapeUtil::ByteSizeOf(user->shape()); @@ -199,6 +199,7 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { ++total_visited_; // Skip 'fusion' instruction if there are no users into which we can merge. if (fusion->users().empty()) { + VLOG(3) << "Not merging " << fusion->name() << ": Has no users."; ++num_fail_no_users_; return Status::OK(); } @@ -208,24 +209,26 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Input fusion instructions need to be rooted at a particular HLO (e.g. // kReduce), so they shouldn't be further fused either. if (fusion->fusion_kind() != HloInstruction::FusionKind::kLoop) { + VLOG(3) << "Not merging " << fusion->name() << ": Is not loop fusion."; ++num_fail_not_loop_fusion_; return Status::OK(); } // Skip multiple output fusion. It's not yet supported. if (fusion->IsMultiOutputFusion()) { + VLOG(3) << "Not merging " << fusion->name() << ": Is multi-output fusion."; ++num_fail_not_loop_fusion_; return Status::OK(); } // Skip 'fusion' instruction if we cannot merge into all of its users. // Merging into all users enables the removal of 'fusion' from the // computation. - if (!std::all_of(fusion->users().begin(), fusion->users().end(), - [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == - HloInstruction::FusionKind::kLoop; - })) { + if (!c_all_of(fusion->users(), [](const HloInstruction* instruction) { + return instruction->opcode() == HloOpcode::kFusion && + instruction->fusion_kind() == HloInstruction::FusionKind::kLoop; + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Some of its users are not loop/input fusion kernels."; ++num_fail_merge_all_users_; return Status::OK(); } @@ -233,18 +236,17 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Skip 'fusion' instruction if any of its fused instructions are expensive. // This is done to avoid the duplication of expensive instructions, which // would occur if 'fusion' were merged into multiple users. + // // If 'fusion' has just one user, then an earlier fusion pass chose not to // fuse this producer/comsumer pair (likely because of expensive instruction // re-use by the consumer), and so we honor that choice here as well. - if (!std::all_of(fusion->fused_instructions().begin(), - fusion->fused_instructions().end(), - [](const HloInstruction* instruction) { - if (instruction->opcode() != HloOpcode::kParameter && - GpuInstructionFusion::IsExpensive(*instruction)) { - return false; - } - return true; - })) { + if (c_any_of(fusion->fused_instructions(), + [](const HloInstruction* instruction) { + return instruction->opcode() != HloOpcode::kParameter && + GpuInstructionFusion::IsExpensive(*instruction); + })) { + VLOG(3) << "Not merging " << fusion->name() + << ": Contains one or more expensive instructions."; ++num_fail_expensive_fused_instruction_; return Status::OK(); } @@ -253,6 +255,8 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // exceeds the threshold value. if (CalculateFlopsToBytesRatio(fusion) > FusionMerger::GetThresholdFlopsToBytesRatio()) { + VLOG(3) << "Not merging " << fusion->name() + << ": flops-to-bytes ratio is not favorable."; ++num_fail_flops_to_byte_ratio_; return Status::OK(); } @@ -265,6 +269,9 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { const double merged_to_current_bytes_ratio = merged_bytes_transferred / std::max(1.0, current_bytes_transferred); if (merged_to_current_bytes_ratio > 1.10) { + VLOG(3) << "Not merging " << fusion->name() + << ": merged-to-current-bytes-ratio of " + << merged_to_current_bytes_ratio << " is not favorable."; ++num_fail_net_bytes_transferred_ratio_; return Status::OK(); } -- GitLab From f09e7f9ebad85b3395628381777cba3e71f768a5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 10:07:27 -0800 Subject: [PATCH 400/884] Exposes poisson_regression_head in tf.contrib.estimator. PiperOrigin-RevId: 187882494 --- tensorflow/contrib/estimator/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/estimator/__init__.py b/tensorflow/contrib/estimator/__init__.py index 0f75b77050..6b9f9575b6 100644 --- a/tensorflow/contrib/estimator/__init__.py +++ b/tensorflow/contrib/estimator/__init__.py @@ -39,6 +39,7 @@ _allowed_symbols = [ 'multi_class_head', 'multi_head', 'multi_label_head', + 'poisson_regression_head', 'regression_head', 'DNNEstimator', 'DNNLinearCombinedEstimator', -- GitLab From 602f54c065eb9513ef3bb8557887d106637f96e5 Mon Sep 17 00:00:00 2001 From: David Soergel Date: Mon, 5 Mar 2018 10:11:20 -0800 Subject: [PATCH 401/884] Make SavedModel builder validation accept signatures involving sparse tensors. PiperOrigin-RevId: 187883080 --- tensorflow/python/saved_model/builder_impl.py | 11 +-- .../python/saved_model/saved_model_test.py | 72 +++++++++++++++---- 2 files changed, 67 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/saved_model/builder_impl.py b/tensorflow/python/saved_model/builder_impl.py index 7347da7536..3447d917e9 100644 --- a/tensorflow/python/saved_model/builder_impl.py +++ b/tensorflow/python/saved_model/builder_impl.py @@ -193,7 +193,8 @@ class SavedModelBuilder(object): def _validate_tensor_info(self, tensor_info): """Validates the `TensorInfo` proto. - Checks if the `name` and `dtype` fields exist and are non-empty. + Checks if the `encoding` (`name` or `coo_sparse`) and `dtype` fields exist + and are non-empty. Args: tensor_info: `TensorInfo` protocol buffer to validate. @@ -206,10 +207,12 @@ class SavedModelBuilder(object): raise AssertionError( "All TensorInfo protos used in the SignatureDefs must have the name " "and dtype fields set.") - if not tensor_info.name: + if tensor_info.WhichOneof("encoding") is None: + # TODO(soergel) validate each of the fields of coo_sparse raise AssertionError( - "All TensorInfo protos used in the SignatureDefs must have the name " - "field set: %s" % tensor_info) + "All TensorInfo protos used in the SignatureDefs must have one of " + "the 'encoding' fields (e.g., name or coo_sparse) set: %s" + % tensor_info) if tensor_info.dtype is types_pb2.DT_INVALID: raise AssertionError( "All TensorInfo protos used in the SignatureDefs must have the dtype " diff --git a/tensorflow/python/saved_model/saved_model_test.py b/tensorflow/python/saved_model/saved_model_test.py index d9d3168825..804255375e 100644 --- a/tensorflow/python/saved_model/saved_model_test.py +++ b/tensorflow/python/saved_model/saved_model_test.py @@ -94,7 +94,7 @@ class SavedModelTest(test.TestCase): self.assertEqual(expected_asset_file_name, asset.filename) self.assertEqual(expected_asset_tensor_name, asset.tensor_info.name) - def _validate_inputs_tensor_info(self, builder, tensor_info): + def _validate_inputs_tensor_info_fail(self, builder, tensor_info): with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) @@ -107,7 +107,18 @@ class SavedModelTest(test.TestCase): sess, ["foo"], signature_def_map={"foo_key": foo_signature}) - def _validate_outputs_tensor_info(self, builder, tensor_info): + def _validate_inputs_tensor_info_accept(self, builder, tensor_info): + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + foo_signature = signature_def_utils.build_signature_def({ + "foo_inputs": tensor_info + }, dict(), "foo") + builder.add_meta_graph_and_variables( + sess, ["foo"], + signature_def_map={"foo_key": foo_signature}) + + def _validate_outputs_tensor_info_fail(self, builder, tensor_info): with self.test_session(graph=ops.Graph()) as sess: self._init_and_validate_variable(sess, "v", 42) @@ -119,6 +130,16 @@ class SavedModelTest(test.TestCase): sess, ["foo"], signature_def_map={"foo_key": foo_signature}) + def _validate_outputs_tensor_info_accept(self, builder, tensor_info): + with self.test_session(graph=ops.Graph()) as sess: + self._init_and_validate_variable(sess, "v", 42) + + foo_signature = signature_def_utils.build_signature_def( + dict(), {"foo_outputs": tensor_info}, "foo") + builder.add_meta_graph_and_variables( + sess, ["foo"], + signature_def_map={"foo_key": foo_signature}) + def testMaybeSavedModelDir(self): base_path = test.test_src_dir_path("/python/saved_model") self.assertFalse(loader.maybe_saved_model_directory(base_path)) @@ -538,23 +559,50 @@ class SavedModelTest(test.TestCase): self.assertEqual("bar", bar_signature["bar_key"].method_name) self.assertEqual("foo_new", bar_signature["foo_key"].method_name) - def testSignatureDefValidation(self): - export_dir = self._get_export_dir("test_signature_def_validation") + def testSignatureDefValidationFails(self): + export_dir = self._get_export_dir("test_signature_def_validation_fail") builder = saved_model_builder.SavedModelBuilder(export_dir) - tensor_without_name = meta_graph_pb2.TensorInfo() - tensor_without_name.dtype = types_pb2.DT_FLOAT - self._validate_inputs_tensor_info(builder, tensor_without_name) - self._validate_outputs_tensor_info(builder, tensor_without_name) + tensor_without_encoding = meta_graph_pb2.TensorInfo() + tensor_without_encoding.dtype = types_pb2.DT_FLOAT + self._validate_inputs_tensor_info_fail(builder, tensor_without_encoding) + self._validate_outputs_tensor_info_fail(builder, tensor_without_encoding) tensor_without_dtype = meta_graph_pb2.TensorInfo() tensor_without_dtype.name = "x" - self._validate_inputs_tensor_info(builder, tensor_without_dtype) - self._validate_outputs_tensor_info(builder, tensor_without_dtype) + self._validate_inputs_tensor_info_fail(builder, tensor_without_dtype) + self._validate_outputs_tensor_info_fail(builder, tensor_without_dtype) tensor_empty = meta_graph_pb2.TensorInfo() - self._validate_inputs_tensor_info(builder, tensor_empty) - self._validate_outputs_tensor_info(builder, tensor_empty) + self._validate_inputs_tensor_info_fail(builder, tensor_empty) + self._validate_outputs_tensor_info_fail(builder, tensor_empty) + + def testSignatureDefValidationSucceedsWithName(self): + tensor_with_name = meta_graph_pb2.TensorInfo() + tensor_with_name.name = "foo" + tensor_with_name.dtype = types_pb2.DT_FLOAT + + export_dir = self._get_export_dir("test_signature_def_validation_name_1") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_name) + + export_dir = self._get_export_dir("test_signature_def_validation_name_2") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_name) + + def testSignatureDefValidationSucceedsWithCoo(self): + tensor_with_coo = meta_graph_pb2.TensorInfo() + # TODO(soergel) test validation of each of the fields of coo_sparse + tensor_with_coo.coo_sparse.values_tensor_name = "foo" + tensor_with_coo.dtype = types_pb2.DT_FLOAT + + export_dir = self._get_export_dir("test_signature_def_validation_coo_1") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_inputs_tensor_info_accept(builder, tensor_with_coo) + + export_dir = self._get_export_dir("test_signature_def_validation_coo_2") + builder = saved_model_builder.SavedModelBuilder(export_dir) + self._validate_outputs_tensor_info_accept(builder, tensor_with_coo) def testAssets(self): export_dir = self._get_export_dir("test_assets") -- GitLab From 9f9bd5c71e5cc94d16e8295386445961880744ae Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 10:47:24 -0800 Subject: [PATCH 402/884] Fix documentation of image size for inception-v3 (299 * 299) PiperOrigin-RevId: 187889122 --- tensorflow/contrib/lite/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/README.md b/tensorflow/contrib/lite/README.md index 00e93d2c4f..df8c1c623c 100644 --- a/tensorflow/contrib/lite/README.md +++ b/tensorflow/contrib/lite/README.md @@ -91,7 +91,7 @@ Currently, we only support building the Android demo app within a Python 2 environment (due to a Bazel bug). ### More about the demo -The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (229 * 229 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. +The demo is resizing each camera image frame to (224 width * 224 height) to match the quantized Mobilenet model being used (299 * 299 for Inception-v3). The resized image is converted into a ByteBuffer row by row of size 1 * 224 * 224 * 3 bytes, where 1 is the number of images in a batch. 224 * 224 (299 * 299) is the width and height of the image. 3 bytes represents three colors of a pixel. This demo uses the TensorFlow Lite Java inference API for models which take a single input and provide a single output. This outputs a two-dimensional array, with the first dimension being the category index and the second dimension being the confidence of classification. Both models have 1001 unique categories and the app sorts the probabilities of all the categories and displays the top three. The model file must be downloaded and bundled within the assets directory of the app. # iOS Demo App -- GitLab From 8382cbabf2a15f22d22a291fc47776113e6ec77c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 11:10:42 -0800 Subject: [PATCH 403/884] [XLA:GPU] Allow merging into input fusion nodes in FusionMerger. Seems to have been an oversight. "Input fusion" means that the *output* of the fusion node is the "real hero". The inputs aren't special; we can fuse more stuff in. PiperOrigin-RevId: 187892975 --- tensorflow/compiler/xla/service/gpu/BUILD | 2 + .../compiler/xla/service/gpu/fusion_merger.cc | 7 ++-- .../xla/service/gpu/fusion_merger_test.cc | 41 +++++++++++++++++++ .../xla/service/gpu/ir_emitter_unnested.cc | 7 ++++ 4 files changed, 54 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index 334efff1e6..cecbc25192 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -437,8 +437,10 @@ tf_cc_test( ":fusion_merger", ":instruction_fusion", "//tensorflow/compiler/xla:test_helpers", + "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc index 91a916f67c..3cd30b754c 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger.cc @@ -223,9 +223,10 @@ Status FusionInstructionMerger::HandleFusion(HloInstruction* fusion) { // Skip 'fusion' instruction if we cannot merge into all of its users. // Merging into all users enables the removal of 'fusion' from the // computation. - if (!c_all_of(fusion->users(), [](const HloInstruction* instruction) { - return instruction->opcode() == HloOpcode::kFusion && - instruction->fusion_kind() == HloInstruction::FusionKind::kLoop; + if (!c_all_of(fusion->users(), [](const HloInstruction* user) { + return user->opcode() == HloOpcode::kFusion && + (user->fusion_kind() == HloInstruction::FusionKind::kLoop || + user->fusion_kind() == HloInstruction::FusionKind::kInput); })) { VLOG(3) << "Not merging " << fusion->name() << ": Some of its users are not loop/input fusion kernels."; diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index deef5966b8..c0def27525 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -16,13 +16,17 @@ limitations under the License. #include "tensorflow/compiler/xla/service/gpu/fusion_merger.h" #include "tensorflow/compiler/xla/service/gpu/instruction_fusion.h" +#include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace xla { namespace gpu { namespace { +namespace op = xla::testing::opcode_matchers; + class FusionMergerTest : public HloTestBase { protected: FusionMergerTest() : module_(CreateNewModule()) {} @@ -459,6 +463,43 @@ TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { EXPECT_TRUE(FusionMerger().Run(module_.get()).ValueOrDie()); } +// Check that we're willing to merge f1_computation into f2_computation, even +// though f2 is an input fusion node. +TEST_F(FusionMergerTest, WillMergeIntoInputFusion) { + const char* const kModule = R"( + HloModule m + + f1_computation { + f1_p0 = f32[10]{0} parameter(0) + ROOT f1_root = f32[10]{0} add(f1_p0, f1_p0) + } + + add_computation { + add_lhs = f32[] parameter(0) + add_rhs = f32[] parameter(1) + ROOT add_root = f32[] add(add_lhs, add_rhs) + } + + f2_computation { + f2_p0 = f32[10]{0} parameter(0) + f2_mul = f32[10]{0} multiply(f2_p0, f2_p0) + f2_zero = f32[] constant(0) + ROOT f2_root = f32[] reduce(f2_mul, f2_zero), dimensions={0}, + to_apply=add_computation + } + + ENTRY entry { + p0 = f32[10]{0} parameter(0) + f1 = f32[10]{0} fusion(p0), kind=kLoop, calls=f1_computation + ROOT f2 = f32[] fusion(f1), kind=kInput, calls=f2_computation + } + )"; + auto module = tools::Parse(kModule).ValueOrDie(); + EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); + EXPECT_THAT(module->entry_computation()->root_instruction(), + op::Fusion(op::Parameter())); +} + } // namespace } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 30c88c0a5d..065b3a0e31 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -535,6 +535,13 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { // If no operand has a compatible shape, prefer an operand that has // the same rank at least. for (const HloInstruction* operand : operands) { + // Skip tuple-shaped operands; calling ShapeUtil::Rank on a + // tuple-shaped Shape is illegal. Perhaps more correct would be to + // recurse into them, but TODO(kramerb): Remove this code after + // assigning layouts to fusion nodes. + if (ShapeUtil::IsTuple(operand->shape())) { + continue; + } if (ShapeUtil::Rank(*input_shape) == ShapeUtil::Rank(operand->shape())) { // Do not use CopyLayoutBetweenShapes because input_shape and -- GitLab From d93b843330593375907a554985c1f8ed77dae204 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 11:20:28 -0800 Subject: [PATCH 404/884] [XLA] Allocate and track memory in replicas separately. PiperOrigin-RevId: 187894473 --- .../xla/service/allocation_tracker.cc | 148 +++++++---- .../compiler/xla/service/allocation_tracker.h | 44 +++- tensorflow/compiler/xla/service/service.cc | 240 +++++++++--------- tensorflow/compiler/xla/service/service.h | 20 +- 4 files changed, 267 insertions(+), 185 deletions(-) diff --git a/tensorflow/compiler/xla/service/allocation_tracker.cc b/tensorflow/compiler/xla/service/allocation_tracker.cc index 7a75c02531..4f819a743c 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.cc +++ b/tensorflow/compiler/xla/service/allocation_tracker.cc @@ -34,40 +34,54 @@ StatusOr AllocationTracker::Register( std::unique_ptr shaped_buffer, const string& tag) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "Register"; - return RegisterInternal(std::move(shaped_buffer), tag); + std::vector> replicated_buffers; + replicated_buffers.emplace_back(std::move(shaped_buffer)); + return RegisterInternal(std::move(replicated_buffers), tag); +} + +StatusOr AllocationTracker::RegisterReplicatedBuffers( + std::vector> replicated_buffers, + const string& tag) { + tensorflow::mutex_lock lock(mutex_); + VLOG(2) << "RegisterReplicatedBuffers"; + return RegisterInternal(std::move(replicated_buffers), tag); } StatusOr AllocationTracker::RegisterInternal( - std::unique_ptr shaped_buffer, const string& tag) { + std::vector> replicated_buffers, + const string& tag) { VLOG(2) << "RegisterInternal(" - << "tag: \"" << tag << "\" " - << "shaped_buffer: " << *shaped_buffer; - if (shaped_buffer->platform() != backend_->platform()) { - return InvalidArgument( - "AllocationTracker for platform %s cannot register buffer from " - "platform %s", - backend_->platform()->Name().c_str(), - shaped_buffer->platform()->Name().c_str()); + << "tag: \"" << tag << "\" with " << replicated_buffers.size() + << " shaped_buffers."; + for (const auto& shaped_buffer : replicated_buffers) { + VLOG(2) << "shaped_buffer:" << *shaped_buffer; + if (shaped_buffer->platform() != backend_->platform()) { + return InvalidArgument( + "AllocationTracker for platform %s cannot register buffer from " + "platform %s", + backend_->platform()->Name().c_str(), + shaped_buffer->platform()->Name().c_str()); + } } int64 handle = next_handle_++; - std::vector shape_indices; - ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(), - [this, &shape_indices](const Shape& /*subshape*/, - const ShapeIndex& index) { - shape_indices.push_back(index); - }); - for (const ShapeIndex& index : shape_indices) { - AddAllocationOrIncrementRefCount(shaped_buffer->buffer(index), - shaped_buffer->device_ordinal()); + for (auto& shaped_buffer : replicated_buffers) { + std::vector shape_indices; + ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(), + [this, &shape_indices](const Shape& /*subshape*/, + const ShapeIndex& index) { + shape_indices.push_back(index); + }); + for (const ShapeIndex& index : shape_indices) { + AddAllocationOrIncrementRefCount(shaped_buffer->buffer(index), + shaped_buffer->device_ordinal()); + } + handle_to_shaped_buffers_[handle].emplace_back(std::move(shaped_buffer)); } + GlobalDataHandle result; result.set_handle(handle); - - handle_to_shaped_buffer_[handle] = std::move(shaped_buffer); - VLOG(2) << "handle: " << handle; - return result; } @@ -75,23 +89,35 @@ tensorflow::Status AllocationTracker::Unregister(const GlobalDataHandle& data) { tensorflow::mutex_lock lock(mutex_); VLOG(2) << "Unregister(" << "handle: " << data.handle() << ")"; - TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data)); - std::vector shape_indices; - ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(), - [this, &shape_indices](const Shape& /*subshape*/, - const ShapeIndex& index) { - shape_indices.push_back(index); - }); - for (const ShapeIndex& index : shape_indices) { - TF_RETURN_IF_ERROR(DecrementRefCount(shaped_buffer->buffer(index), - shaped_buffer->device_ordinal())); + TF_ASSIGN_OR_RETURN(std::vector replicated_buffers, + ResolveInternal(data)); + for (const auto& shaped_buffer : replicated_buffers) { + std::vector shape_indices; + ShapeUtil::ForEachSubshape(shaped_buffer->on_device_shape(), + [this, &shape_indices](const Shape& /*subshape*/, + const ShapeIndex& index) { + shape_indices.push_back(index); + }); + for (const ShapeIndex& index : shape_indices) { + TF_RETURN_IF_ERROR(DecrementRefCount(shaped_buffer->buffer(index), + shaped_buffer->device_ordinal())); + } } + return Reset(data); +} - // Keep a nullptr as a tombstone for unregistered handles. This enables better - // error messages. That is, "handle has been deallocated" versus "handle does - // not exist". - handle_to_shaped_buffer_.at(data.handle()).reset(); - +Status AllocationTracker::Reset(const GlobalDataHandle& data) { + // Keep a nullptr as a tombstone for unregistered handles. This enables + // better error messages. That is, "handle has been deallocated" versus + // "handle does not exist". + auto it = handle_to_shaped_buffers_.find(data.handle()); + if (it == handle_to_shaped_buffers_.end()) { + return NotFound("no allocation record for global data handle: %lld", + data.handle()); + } + for (auto& shaped_buffer : it->second) { + shaped_buffer.reset(); + } return tensorflow::Status::OK(); } @@ -99,7 +125,11 @@ StatusOr> AllocationTracker::DeconstructTuple( const GlobalDataHandle& data) { tensorflow::mutex_lock lock(mutex_); - TF_ASSIGN_OR_RETURN(ShapedBuffer * shaped_buffer, ResolveInternal(data)); + TF_ASSIGN_OR_RETURN(std::vector replicated_buffers, + ResolveInternal(data)); + // We only need to care about replica id 0 here, since the GlobalDataHandle is + // the same for all buffers across replicas. + const ShapedBuffer* shaped_buffer = replicated_buffers[0]; if (!ShapeUtil::IsTuple(shaped_buffer->on_host_shape())) { return InvalidArgument("global data handle %lld is not a tuple", data.handle()); @@ -122,37 +152,55 @@ StatusOr> AllocationTracker::DeconstructTuple( shaped_buffer->platform(), shaped_buffer->device_ordinal()); element_buffer->set_buffer(shaped_buffer->buffer(/*index=*/{i}), /*index=*/{}); + std::vector> replicated_buffers; + replicated_buffers.emplace_back(std::move(element_buffer)); TF_ASSIGN_OR_RETURN( GlobalDataHandle element_handle, - RegisterInternal(std::move(element_buffer), "deconstructed tuple")); + RegisterInternal(std::move(replicated_buffers), "deconstructed tuple")); element_handles.push_back(element_handle); } return std::move(element_handles); } -StatusOr AllocationTracker::Resolve( +StatusOr> AllocationTracker::Resolve( const GlobalDataHandle& data) { tensorflow::mutex_lock lock(mutex_); return AllocationTracker::ResolveInternal(data); } -StatusOr AllocationTracker::ResolveInternal( +StatusOr AllocationTracker::ResolveForReplica( + const GlobalDataHandle& data, int replica_id) { + tensorflow::mutex_lock lock(mutex_); + TF_ASSIGN_OR_RETURN(std::vector replicated_buffers, + ResolveInternal(data)); + if (replica_id >= replicated_buffers.size()) { + return InvalidArgument( + "Requesting buffer for replica %d, but found buffers only for %lu " + "replicas.", + replica_id, replicated_buffers.size()); + } + return replicated_buffers[replica_id]; +} + +StatusOr> AllocationTracker::ResolveInternal( const GlobalDataHandle& data) { VLOG(2) << "resolve:" << data.handle(); - auto it = handle_to_shaped_buffer_.find(data.handle()); - if (it == handle_to_shaped_buffer_.end()) { + auto it = handle_to_shaped_buffers_.find(data.handle()); + if (it == handle_to_shaped_buffers_.end()) { return NotFound("no allocation record for global data handle: %lld", data.handle()); } - ShapedBuffer* shaped_buffer = it->second.get(); - - if (shaped_buffer == nullptr) { - return InvalidArgument("global data handle %lld was previously deallocated", - data.handle()); + std::vector replicated_buffers; + for (const auto& shaped_buffer : it->second) { + if (shaped_buffer == nullptr) { + return InvalidArgument( + "global data handle %lld was previously deallocated", data.handle()); + } + replicated_buffers.push_back(shaped_buffer.get()); } - return shaped_buffer; + return replicated_buffers; } void AllocationTracker::AddAllocationOrIncrementRefCount( diff --git a/tensorflow/compiler/xla/service/allocation_tracker.h b/tensorflow/compiler/xla/service/allocation_tracker.h index 807af86949..038aee8541 100644 --- a/tensorflow/compiler/xla/service/allocation_tracker.h +++ b/tensorflow/compiler/xla/service/allocation_tracker.h @@ -43,10 +43,17 @@ class AllocationTracker { AllocationTracker(Backend* backend) : backend_(backend), next_handle_(1) {} // Registers a shaped buffer of device memory, and returns a corresponding - // handle that can be used for talking to XLA clients. + // handle that can be used for talking to XLA clients. The given shaped buffer + // will be treated as the buffer corresponding to the only replica. StatusOr Register( std::unique_ptr shaped_buffer, const string& tag); + // Registers a vector of shaped buffers of device memory, one per replica, and + // returns a corresponding handle that can be used for talking to XLA clients. + StatusOr RegisterReplicatedBuffers( + std::vector> replicated_buffers, + const string& tag); + // Unregister the allocation for the given data handle. Status Unregister(const GlobalDataHandle& data); @@ -54,9 +61,17 @@ class AllocationTracker { StatusOr> DeconstructTuple( const GlobalDataHandle& Data); - // Resolve a handle from an XLA client to a shaped buffer, or provide an error - // status to say whether it was not found (or found, but found deallocated). - StatusOr Resolve(const GlobalDataHandle& data); + // Resolve a handle from an XLA client to a vector of shaped buffers, one per + // replica, or provide an error status to say whether any of those buffers + // were not found (or found, but found deallocated). + StatusOr> Resolve( + const GlobalDataHandle& data); + + // Resolves a handle from an XLA client and replica id to a shaped buffer, or + // provide an error status to say whether it was not found (or found, but + // found deallocated). + StatusOr ResolveForReplica(const GlobalDataHandle& data, + int replica_id); private: // Data structure encapsulating single memory allocation on the device. @@ -74,13 +89,17 @@ class AllocationTracker { // Internal helper which resolves the given GlobalDataHandle to a // ShapedBuffer. - StatusOr ResolveInternal(const GlobalDataHandle& data) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); + StatusOr> ResolveInternal( + const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - // Internal helper which registers a shaped buffer. + // Internal helper which registers a vector of shaped buffers, one per + // replica. StatusOr RegisterInternal( - std::unique_ptr shaped_buffer, const string& tag) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); + std::vector> replicated_buffers, + const string& tag) EXCLUSIVE_LOCKS_REQUIRED(mutex_); + + // Resets the shaped buffers corresponding to the given handle. + Status Reset(const GlobalDataHandle& data) EXCLUSIVE_LOCKS_REQUIRED(mutex_); // Adds the given device address to the allocation tracker, or if it already // exists, then increment it's reference count. @@ -111,9 +130,10 @@ class AllocationTracker { tensorflow::gtl::FlatMap opaque_to_allocation_map_ GUARDED_BY(mutex_); - // A map from data handle to ShapedBuffer. - tensorflow::gtl::FlatMap> - handle_to_shaped_buffer_ GUARDED_BY(mutex_); + // A map from data handle to a vector of shaped buffers that represent the + // buffers for different replicas. + tensorflow::gtl::FlatMap>> + handle_to_shaped_buffers_ GUARDED_BY(mutex_); TF_DISALLOW_COPY_AND_ASSIGN(AllocationTracker); }; diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 43d0f60598..25c2fe97e4 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -232,10 +232,14 @@ tensorflow::Status Service::ValidateResultShapeWithLayout( return ShapeUtil::ValidateShape(shape_with_layout); } -StatusOr> Service::ResolveAndValidateArguments( +StatusOr>> +Service::ResolveAndValidateArguments( tensorflow::gtl::ArraySlice arguments, - int device_ordinal) { - std::vector shaped_buffers; + tensorflow::gtl::ArraySlice + stream_executors) { + CHECK_EQ(options_.number_of_replicas(), stream_executors.size()); + std::vector> replicated_arguments; + replicated_arguments.resize(options_.number_of_replicas()); for (size_t i = 0; i < arguments.size(); ++i) { auto buffer_status = allocation_tracker_.Resolve(*arguments[i]); if (!buffer_status.ok()) { @@ -243,22 +247,25 @@ StatusOr> Service::ResolveAndValidateArguments( StrCat(buffer_status.status().error_message(), ", ", "failed to resolve allocation for parameter ", i)); } - const ShapedBuffer* shaped_buffer = buffer_status.ValueOrDie(); - - // Verify allocation is same platform and device as the execution. - if (shaped_buffer->platform() != execute_backend_->platform() || - shaped_buffer->device_ordinal() != device_ordinal) { - return InvalidArgument( - "argument %lu is on device %s:%d but computation will be executed " - "on device %s", - i, shaped_buffer->platform()->Name().c_str(), - shaped_buffer->device_ordinal(), - execute_backend_->device_name(device_ordinal).c_str()); + auto replicated_buffers = buffer_status.ValueOrDie(); + CHECK_EQ(options_.number_of_replicas(), replicated_buffers.size()); + for (int replica = 0; replica < options_.number_of_replicas(); ++replica) { + const ShapedBuffer* shaped_buffer = replicated_buffers[replica]; + int replica_device_ordinal = stream_executors[replica]->device_ordinal(); + // Verify allocation is same platform and device as the execution. + if (shaped_buffer->platform() != execute_backend_->platform() || + shaped_buffer->device_ordinal() != replica_device_ordinal) { + return InvalidArgument( + "argument %lu is on device %s:%d but computation will be executed " + "on device %s", + i, shaped_buffer->platform()->Name().c_str(), + shaped_buffer->device_ordinal(), + execute_backend_->device_name(replica_device_ordinal).c_str()); + } + replicated_arguments[replica].push_back(shaped_buffer); } - - shaped_buffers.push_back(shaped_buffer); } - return shaped_buffers; + return replicated_arguments; } StatusOr> Service::CreateModuleConfig( @@ -490,7 +497,8 @@ StatusOr> Service::BuildAndCacheExecutable( StatusOr> Service::ExecuteParallelAndRegisterResult( tensorflow::gtl::ArraySlice executables, - tensorflow::gtl::ArraySlice> arguments, + tensorflow::gtl::ArraySlice>> + arguments, Backend* backend, tensorflow::gtl::ArraySlice device_handles, tensorflow::gtl::ArraySlice result_tags, ExecutionProfile* profile) { @@ -513,6 +521,8 @@ Service::ExecuteParallelAndRegisterResult( for (int64 i = 0; i < executables.size(); i++) { // Stream executors for the replicas of the current computation. TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*backend, device_handles[i])); + CHECK_EQ(replicas.size(), arguments[i].size()); + std::vector> result_buffers; for (int64 replica = 0; replica < replicas.size(); ++replica) { TF_ASSIGN_OR_RETURN(Pool::SmartPtr stream, backend->BorrowStream(replicas[replica])); @@ -545,23 +555,20 @@ Service::ExecuteParallelAndRegisterResult( backend->StreamBorrower()); // Asynchronously launch the computation. - TF_ASSIGN_OR_RETURN( - std::unique_ptr result, - executables[i]->ExecuteAsyncOnStream(&run_options, arguments[i])); + TF_ASSIGN_OR_RETURN(std::unique_ptr result, + executables[i]->ExecuteAsyncOnStream( + &run_options, arguments[i][replica])); if (replica == 0 && profile != nullptr) { streams.back()->ThenStopTimer(timers.back().get()); } - // All replicas share the same device address for the result allocation, - // so only one of the replicas need to register the result handle. - if (replica == 0) { - TF_ASSIGN_OR_RETURN( - GlobalDataHandle handle, - allocation_tracker_.Register(std::move(result), result_tags[i])); - result_handles.push_back(handle); - } + result_buffers.emplace_back(std::move(result)); } + TF_ASSIGN_OR_RETURN(GlobalDataHandle handle, + allocation_tracker_.RegisterReplicatedBuffers( + std::move(result_buffers), result_tags[i])); + result_handles.push_back(handle); } // Wait for all executions to complete. @@ -627,9 +634,9 @@ Service::ExecuteParallelAndRegisterResult( StatusOr Service::ExecuteAndRegisterResult( Executable* executable, - const tensorflow::gtl::ArraySlice arguments, - Backend* backend, perftools::gputools::StreamExecutor* executor, - const string& result_tag, ExecutionProfile* profile) { + const tensorflow::gtl::ArraySlice> + arguments, + Backend* backend, const string& result_tag, ExecutionProfile* profile) { // Set up streams. std::vector::SmartPtr> streams; @@ -662,21 +669,26 @@ StatusOr Service::ExecuteAndRegisterResult( backend->inter_op_thread_pool()); } - std::unique_ptr result; if (options_.number_of_replicas() == 1) { - TF_ASSIGN_OR_RETURN(result, executable->ExecuteOnStreamWrapper( - &run_options[0], profile, arguments)); - } else { - // TODO(b/69985541): Support profiling also on this path. - std::vector> - repeated_arguments(options_.number_of_replicas(), arguments); - - TF_ASSIGN_OR_RETURN(auto results, executable->ExecuteOnStreams( - run_options, repeated_arguments)); - TF_RET_CHECK(!results.empty()); - result = std::move(results[0]); + TF_ASSIGN_OR_RETURN( + auto result, executable->ExecuteOnStreamWrapper(&run_options[0], + profile, arguments[0])); + return allocation_tracker_.Register(std::move(result), result_tag); + } + + // TODO(b/69985541): Support profiling also on this path. + + std::vector> + replicated_arguments; + for (const auto& arg : arguments) { + replicated_arguments.emplace_back(arg); } - return allocation_tracker_.Register(std::move(result), result_tag); + + TF_ASSIGN_OR_RETURN(auto results, executable->ExecuteOnStreams( + run_options, replicated_arguments)); + TF_RET_CHECK(!results.empty()); + return allocation_tracker_.RegisterReplicatedBuffers(std::move(results), + result_tag); } tensorflow::Status Service::SetReturnValue(const SetReturnValueRequest* arg, @@ -690,7 +702,7 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) { VLOG(1) << "running execute-parallel request: " << arg->ShortDebugString(); - std::vector> all_arguments; + std::vector>> all_arguments; std::vector> all_executors; std::vector versioned_handles; std::vector> module_configs; @@ -718,6 +730,14 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, return FailedPrecondition( "device handles must be given to execute parallel computations"); } + if (arg->requests_size() > 1 && + execution_options.device_handles_size() > 1) { + return InvalidArgument( + "Parallel requests with multiple device handles is not supported. " + "Found %d parallel requests, with request %lld containing %d device " + "handles.", + arg->requests_size(), i, execution_options.device_handles_size()); + } std::vector executors; for (const auto& device_handle : execution_options.device_handles()) { TF_ASSIGN_OR_RETURN(auto replicas, @@ -747,22 +767,26 @@ tensorflow::Status Service::ExecuteParallel(const ExecuteParallelRequest* arg, // In the case of partitioned computations, assume all arguments go on the // zeroth core. TF_ASSIGN_OR_RETURN( - std::vector arguments, - ResolveAndValidateArguments(request.arguments(), - executors[0]->device_ordinal())); + auto replicas, + Replicas(*execute_backend_, execution_options.device_handles(0))); + TF_ASSIGN_OR_RETURN( + std::vector> replicated_arguments, + ResolveAndValidateArguments(request.arguments(), replicas)); // Create an HloModuleConfig object for the computation, given the shape of - // the program and the argument allocations. + // the program and the argument allocations. Here, we care only about the + // shapes of the arguments, so, it is sufficient to use the arguments of + // replica 0. TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, - CreateModuleConfig(*program_shape, arguments, + CreateModuleConfig(*program_shape, replicated_arguments.front(), request.execution_options(), *user_computation)); VLOG(3) << "ExecuteParallel created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); // Adds to the vectors to build and execute the computations after the loop. - all_arguments.push_back(arguments); - all_arguments.insert(all_arguments.end(), executors.size() - 1, {}); + all_arguments.push_back(replicated_arguments); + all_arguments.insert(all_arguments.end(), executors.size() - 1, {{}}); versioned_handles.push_back(versioned_handle); module_configs.push_back(std::move(module_config)); computation_names.insert(computation_names.end(), executors.size(), @@ -861,15 +885,18 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, std::shared_ptr program_shape, user_computation->ComputeProgramShape(versioned_handle.version)); + TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_, + SingleComputationDeviceHandle())); TF_ASSIGN_OR_RETURN( - std::vector arguments, - ResolveAndValidateArguments(arg->arguments(), - execute_backend_->default_device_ordinal())); + std::vector> replicated_arguments, + ResolveAndValidateArguments(arg->arguments(), replicas)); + // Since we care only about the shapes of the arguments, it is sufficient to + // use the arguments of replica 0. TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, - CreateModuleConfig(*program_shape, arguments, arg->execution_options(), - *user_computation)); + CreateModuleConfig(*program_shape, replicated_arguments.front(), + arg->execution_options(), *user_computation)); VLOG(3) << "Execute created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); @@ -885,20 +912,21 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, executable->session_module()->set_execution_platform( execute_backend_->platform()->Name()); TF_RETURN_IF_ERROR(RecordArguments( - arguments, execute_backend_->default_stream_executor(), + replicated_arguments.front(), + execute_backend_->default_stream_executor(), execute_backend_->transfer_manager(), executable->session_module())); } TF_ASSIGN_OR_RETURN( *result->mutable_output(), ExecuteAndRegisterResult( - executable.get(), arguments, execute_backend_.get(), - execute_backend_->default_stream_executor(), + executable.get(), replicated_arguments, execute_backend_.get(), "result of " + user_computation->name(), result->mutable_profile())); if (executable->dumping()) { - TF_ASSIGN_OR_RETURN(const ShapedBuffer* result_buffer, - allocation_tracker_.Resolve(result->output())); + TF_ASSIGN_OR_RETURN( + const ShapedBuffer* result_buffer, + allocation_tracker_.ResolveForReplica(result->output(), 0)); TF_RETURN_IF_ERROR(RecordResult( *result_buffer, execute_backend_->default_stream_executor(), execute_backend_->transfer_manager(), executable->session_module())); @@ -926,15 +954,17 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, std::shared_ptr program_shape, user_computation->ComputeProgramShape(versioned_handle.version)); + TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_, + SingleComputationDeviceHandle())); + TF_RET_CHECK(!replicas.empty()); TF_ASSIGN_OR_RETURN( - std::vector arguments, - ResolveAndValidateArguments(arg->arguments(), - execute_backend_->default_device_ordinal())); + std::vector> replicated_arguments, + ResolveAndValidateArguments(arg->arguments(), replicas)); TF_ASSIGN_OR_RETURN( std::unique_ptr module_config, - CreateModuleConfig(*program_shape, arguments, arg->execution_options(), - *user_computation)); + CreateModuleConfig(*program_shape, replicated_arguments.front(), + arg->execution_options(), *user_computation)); VLOG(3) << "ExecuteAsync created HloModuleConfig computation layout: " << module_config->entry_computation_layout().ToString(); @@ -947,21 +977,17 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, versioned_handle, std::move(module_config), execute_backend_.get(), execute_backend_->default_stream_executor(), &profile)); - TF_ASSIGN_OR_RETURN(auto replicas, Replicas(*execute_backend_, - SingleComputationDeviceHandle())); - TF_RET_CHECK(!replicas.empty()); - // Set up streams. std::vector::SmartPtr> streams; - for (se::StreamExecutor* executor : replicas) { TF_ASSIGN_OR_RETURN(Pool::SmartPtr stream, execute_backend_->BorrowStream(executor)); streams.push_back(std::move(stream)); } - std::unique_ptr result_buffer; - for (const Pool::SmartPtr& stream : streams) { + std::vector> result_buffers; + for (size_t i = 0; i < streams.size(); ++i) { + const auto& stream = streams[i]; ExecutableRunOptions options; options.set_stream(stream.get()); options.set_allocator(execute_backend_->memory_allocator()); @@ -972,20 +998,17 @@ tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ServiceExecutableRunOptions service_options( options, execute_backend_->StreamBorrower()); - TF_ASSIGN_OR_RETURN( - std::unique_ptr this_result_buffer, - executable->ExecuteAsyncOnStream(&service_options, arguments)); + TF_ASSIGN_OR_RETURN(std::unique_ptr this_result_buffer, + executable->ExecuteAsyncOnStream( + &service_options, replicated_arguments[i])); - // Take the first result. - if (result_buffer == nullptr) { - result_buffer = std::move(this_result_buffer); - } + result_buffers.emplace_back(std::move(this_result_buffer)); } TF_ASSIGN_OR_RETURN( GlobalDataHandle output, - allocation_tracker_.Register(std::move(result_buffer), - "result of " + user_computation->name())); + allocation_tracker_.RegisterReplicatedBuffers( + std::move(result_buffers), "result of " + user_computation->name())); *result->mutable_execution() = execution_tracker_.Register( execute_backend_.get(), std::move(streams), profile, output); @@ -1013,7 +1036,7 @@ tensorflow::Status Service::WaitForExecution(const WaitForExecutionRequest* arg, tensorflow::Status Service::TransferToClient(const TransferToClientRequest* arg, TransferToClientResponse* result) { TF_ASSIGN_OR_RETURN(const ShapedBuffer* shaped_buffer, - allocation_tracker_.Resolve(arg->data())); + allocation_tracker_.ResolveForReplica(arg->data(), 0)); const Shape* return_shape; if (arg->has_shape_with_layout()) { @@ -1074,37 +1097,24 @@ tensorflow::Status Service::TransferToServer(const TransferToServerRequest* arg, replicas, Replicas(*execute_backend_, SingleComputationDeviceHandle())); } - // All memory allocation is done on the first replica. The allocations in all - // other replicas mirror the firsts'. - int master_device_ordinal = replicas[0]->device_ordinal(); - TF_ASSIGN_OR_RETURN( - std::unique_ptr shaped_buffer, - execute_backend_->transfer_manager()->AllocateShapedBuffer( - shape, execute_backend_->memory_allocator(), master_device_ordinal)); - - // Transfer the data to the replicas. + // Allocate memory in each replica and transfer the data to all replicas. + std::vector> replicated_buffers; for (se::StreamExecutor* executor : replicas) { - if (executor->device_ordinal() == master_device_ordinal) { - TF_RETURN_IF_ERROR( - execute_backend_->transfer_manager()->TransferLiteralToDevice( - executor, *literal, *shaped_buffer)); - } else { - // The replica is not the master. Create an cloned shaped buffer with - // the replica's device ordinal. This is required because - // TransferLiteralToDevice verifies that the device ordinal of the shaped - // buffer matches that of the executor. - std::unique_ptr clone = - CloneShapedBufferOnDevice(*shaped_buffer, executor->device_ordinal()); - TF_RETURN_IF_ERROR( - execute_backend_->transfer_manager()->TransferLiteralToDevice( - executor, *literal, *clone)); - } + TF_ASSIGN_OR_RETURN( + std::unique_ptr shaped_buffer, + execute_backend_->transfer_manager()->AllocateShapedBuffer( + shape, execute_backend_->memory_allocator(), + executor->device_ordinal())); + TF_RETURN_IF_ERROR( + execute_backend_->transfer_manager()->TransferLiteralToDevice( + executor, *literal, *shaped_buffer)); + replicated_buffers.emplace_back(std::move(shaped_buffer)); } - TF_ASSIGN_OR_RETURN( - *result->mutable_data(), - allocation_tracker_.Register(std::move(shaped_buffer), - StrCat("TransferToServer literal of shape ", - ShapeUtil::HumanString(shape)))); + TF_ASSIGN_OR_RETURN(*result->mutable_data(), + allocation_tracker_.RegisterReplicatedBuffers( + std::move(replicated_buffers), + StrCat("TransferToServer literal of shape ", + ShapeUtil::HumanString(shape)))); return tensorflow::Status::OK(); } @@ -1287,7 +1297,7 @@ tensorflow::Status Service::ComputeConstant(const ComputeConstantRequest* arg, tensorflow::Status Service::GetShape(const GetShapeRequest* arg, GetShapeResponse* result) { TF_ASSIGN_OR_RETURN(const ShapedBuffer* buffer, - allocation_tracker_.Resolve(arg->data())); + allocation_tracker_.ResolveForReplica(arg->data(), 0)); *result->mutable_shape() = buffer->on_host_shape(); return tensorflow::Status::OK(); } diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index 6ce2419711..e047df2648 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -265,11 +265,14 @@ class Service : public ServiceInterface { static StatusOr> CreateComputeConstantBackend(); // Resolves the given argument handles in the allocation tracker and returns - // the corresponding allocations. The function also verifies that each - // allocation matches the execution platform and device ordinal. - StatusOr> ResolveAndValidateArguments( + // the corresponding allocations for every replica. The function also verifies + // that each allocation matches the execution platform and device ordinal of + // the corresponding replica. + StatusOr>> + ResolveAndValidateArguments( tensorflow::gtl::ArraySlice arguments, - int device_ordinal); + tensorflow::gtl::ArraySlice + stream_executors); // Create a Hlo module config for the given program shape and arguments. // execution_options is optional; if not given a default is used. @@ -314,16 +317,17 @@ class Service : public ServiceInterface { // ExecutionProfile object which will be filled in with profile data. StatusOr ExecuteAndRegisterResult( Executable* executable, - const tensorflow::gtl::ArraySlice arguments, - Backend* backend, perftools::gputools::StreamExecutor* executor, - const string& result_tag, ExecutionProfile* profile); + const tensorflow::gtl::ArraySlice> + arguments, + Backend* backend, const string& result_tag, ExecutionProfile* profile); // Runs the given executables with the given arguments and register the result // from each executable in the allocation tracker. The handles of the result // from the tracker are returned. StatusOr> ExecuteParallelAndRegisterResult( tensorflow::gtl::ArraySlice executables, - tensorflow::gtl::ArraySlice> arguments, + tensorflow::gtl::ArraySlice>> + arguments, Backend* backend, tensorflow::gtl::ArraySlice device_handles, tensorflow::gtl::ArraySlice result_tags, -- GitLab From 864ddbc9db7611633c7320691353136b4ff557bb Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 5 Mar 2018 11:23:29 -0800 Subject: [PATCH 405/884] Extract the EvaluateConstantTensorForEdge method from ShapeRefiner. This change introduces a new stand-alone function, EvaluateConstantTensor, pulled from ShapeRefiner. ShapeRefiner now calls this new function and the old functions are removed. I'm still depending on shape_refiner_test.cc for test coverage. This is the first step towards making smart_cond better able to evaluate constant tensors. PiperOrigin-RevId: 187894976 --- tensorflow/core/BUILD | 2 + .../core/common_runtime/constant_folding.h | 2 + .../core/common_runtime/eval_const_tensor.cc | 358 ++++++++++++++++++ .../core/common_runtime/eval_const_tensor.h | 66 ++++ .../core/common_runtime/shape_refiner.cc | 299 +-------------- .../core/common_runtime/shape_refiner.h | 14 - 6 files changed, 434 insertions(+), 307 deletions(-) create mode 100644 tensorflow/core/common_runtime/eval_const_tensor.cc create mode 100644 tensorflow/core/common_runtime/eval_const_tensor.h diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 3a436ff680..445cf5bc8a 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -2039,6 +2039,7 @@ tf_cuda_library( CORE_CPU_BASE_HDRS = GRAPH_HDRS + [ "common_runtime/device.h", + "common_runtime/eval_const_tensor.h", "common_runtime/graph_runner.h", "common_runtime/shape_refiner.h", "framework/versions.h", @@ -2047,6 +2048,7 @@ CORE_CPU_BASE_HDRS = GRAPH_HDRS + [ tf_cuda_library( name = "core_cpu_base", srcs = [ + "common_runtime/eval_const_tensor.cc", "common_runtime/shape_refiner.cc", "common_runtime/shape_refiner.h", "framework/versions.h", diff --git a/tensorflow/core/common_runtime/constant_folding.h b/tensorflow/core/common_runtime/constant_folding.h index b1e1fb8319..84598880bb 100644 --- a/tensorflow/core/common_runtime/constant_folding.h +++ b/tensorflow/core/common_runtime/constant_folding.h @@ -22,6 +22,8 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/platform/env.h" +// TODO(skyewm): can this be combined with EvaluateConstantTensor? + namespace tensorflow { // This generator type is used to generate a name for the newly folded node diff --git a/tensorflow/core/common_runtime/eval_const_tensor.cc b/tensorflow/core/common_runtime/eval_const_tensor.cc new file mode 100644 index 0000000000..6370bb5028 --- /dev/null +++ b/tensorflow/core/common_runtime/eval_const_tensor.cc @@ -0,0 +1,358 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/eval_const_tensor.h" + +#include + +#include "tensorflow/core/common_runtime/graph_runner.h" +#include "tensorflow/core/common_runtime/shape_refiner.h" +#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/kernels/bounds_check.h" + +namespace tensorflow { + +using shape_inference::InferenceContext; + +namespace { + +// Tries to infer tensor output based on the input shapes of the node. In some +// cases, the shapes of the inputs are sufficient for inferring the contents of +// the output tensor. For example, a Shape op with fully defined input shapes +// can have its output tensor inferred. +Status TryToInferTensorOutputFromInputShapes(const Edge& edge, + const ShapeRefiner& refiner, + Tensor* output, bool* success) { + *success = false; + const Node* node = edge.src(); + InferenceContext* c = refiner.GetContext(node); + if (c == nullptr) { + return errors::FailedPrecondition("Node does not have context."); + } + + if (node->type_string() == "Shape") { + // If input shapes to the shape op are fully defined, + // we can infer the shape op's output tensor. + bool fully_defined_inputs = c->FullyDefined(c->input(0)); + if (fully_defined_inputs) { + int input_rank = c->Rank(c->input(0)); + Tensor t(node->output_type(0), TensorShape({input_rank})); + if (node->output_type(0) == DT_INT32) { + auto flat = t.flat(); + for (int i = 0; i < input_rank; i++) { + int64 dimension = c->Value(c->Dim(c->input(0), i)); + if (!FastBoundsCheck(dimension, std::numeric_limits::max())) { + return errors::InvalidArgument( + "Shape has output type int32, but dimension exceeds maximum " + "int32 value"); + } + flat(i) = static_cast(dimension); + } + } else if (node->output_type(0) == DT_INT64) { + auto flat = t.flat(); + for (int i = 0; i < input_rank; i++) { + flat(i) = c->Value(c->Dim(c->input(0), i)); + } + } else { + return errors::FailedPrecondition( + "Shape has output type that is not int32 or int64"); + } + *output = t; + *success = true; + } + } else if (node->type_string() == "Rank") { + bool rank_known = c->RankKnown(c->input(0)); + if (rank_known) { + int32 input_rank = c->Rank(c->input(0)); + Tensor t(node->output_type(0), TensorShape({})); + t.flat()(0) = input_rank; + *output = t; + *success = true; + } + } else if (node->type_string() == "Size") { + bool fully_defined_inputs = c->FullyDefined(c->input(0)); + if (fully_defined_inputs) { + int32 rank = c->Rank(c->input(0)); + Tensor t(node->output_type(0), TensorShape({})); + int64 size = 1; + for (int i = 0; i < rank; i++) { + size *= c->Value(c->Dim(c->input(0), i)); + } + if (node->output_type(0) == DT_INT32) { + if (!FastBoundsCheck(size, std::numeric_limits::max())) { + return errors::InvalidArgument( + "Size has output type int32, but size exceeds maximum int32 " + "value"); + } + t.flat()(0) = static_cast(size); + } else if (node->output_type(0) == DT_INT64) { + t.flat()(0) = size; + } else { + return errors::FailedPrecondition( + "Size has output type that is not int32 or int64"); + } + *output = t; + *success = true; + } + } + return Status::OK(); +} + +// Extracts the subgraph ending at 'target_node' that is statically computable +// and inserts into 'out_graph'. If statically computable, 'is_constant_graph' +// will be set to true. +Status ExtractConstantSubgraph( + const Node& target_node, const ShapeRefiner& refiner, + const std::unordered_map* cached_values, Graph* out_graph, + bool* is_constant_graph, + std::vector>* const_inputs) { + *is_constant_graph = false; + std::unordered_set const_inputs_added; + + if (target_node.op_def().is_stateful()) { + return Status::OK(); + } + + if (target_node.type_string() == "PlaceholderWithDefault") { + return Status::OK(); + } + + // TODO(skyewm): more of the filtering applied in input nodes below should be + // applied to target_node here + + // Identify the possibly constant subgraph by recursively iterating backwards + // through the inputs to 'target_node' until we either 1) find an already + // existing input to our subgraph 'const_inputs', 2) Discover our graph is not + // constant, or 3) Hit a root node. + + struct NodeAndRecursed { + Node* new_node = nullptr; + bool recursed = false; + }; + + std::map old_to_new_and_recursed; + Node* target_node_copy = out_graph->CopyNode(&target_node); + old_to_new_and_recursed[&target_node].new_node = target_node_copy; + old_to_new_and_recursed[&target_node].recursed = true; + + // Add the target node's inputs to seed the recursion. + std::deque edges_to_visit; + for (const Edge* e : target_node.in_edges()) { + // TODO(vrv): What do we do about control edges? Based on our + // definition of a constant graph, we should be free to ignore + // control edges since the order in which a constant graph is + // executed should be the same regardless of when nodes run: we + // should only need to recurse down data edges. + if (e->IsControlEdge()) continue; + edges_to_visit.push_back(e); + } + + *is_constant_graph = true; + + // Iterate over the set of edges to visit (backwards). + while (!edges_to_visit.empty()) { + const Edge* current_edge = edges_to_visit.front(); + edges_to_visit.pop_front(); + Node* current_node = current_edge->src(); + + // If the node is stateful, assume the graph is not constant. + if (current_node->op_def().is_stateful()) { + *is_constant_graph = false; + return Status::OK(); + } + + // During construction or import from GraphConstructor, back edges may not + // be filled in. Don't constant fold through merges at all for now. + if (IsMerge(current_node)) { + *is_constant_graph = false; + return Status::OK(); + } + + // Don't constant fold enter/exit currently either, as it's easy to end + // up with a partial frame. + if (IsEnter(current_node) || IsExit(current_node)) { + *is_constant_graph = false; + return Status::OK(); + } + + // Placeholders should never be constant folded because their outputs are + // fed by the user. Note that "Placeholder" nodes have no inputs so are + // handled below. + if (current_node->type_string() == "PlaceholderWithDefault") { + *is_constant_graph = false; + return Status::OK(); + } + + // If there is nothing more to recurse down, see if + // the generator node is a constant. + if (current_node->num_inputs() == 0) { + if (!current_node->IsConstant()) { + // Generator node is not a constant, so subgraph is not + // constant. + *is_constant_graph = false; + return Status::OK(); + } + } + + // Either the node is a constant, or the node is a potential + // intermediate node on the path from a constant. + // + // Add a copy of its node and a new edge to the new subgraph. + + // Get or create the version of 'current_node' in the new graph. + Node* current_node_copy; + // This gets or creates the NodeAndRecursed entry for current_node. + NodeAndRecursed* node_and_recursed = &old_to_new_and_recursed[current_node]; + if (node_and_recursed->new_node == nullptr) { + // First time processing this node. + current_node_copy = out_graph->CopyNode(current_node); + // Track the mapping from the original node to the new one. + node_and_recursed->new_node = current_node_copy; + } else { + current_node_copy = node_and_recursed->new_node; + } + + // Add the edge to the destination node. + { + auto it = old_to_new_and_recursed.find(current_edge->dst()); + if (it == old_to_new_and_recursed.end()) { + return errors::Internal( + "Could not find mapping from old to new copy of destination node: ", + current_edge->dst()->name()); + } + Node* dst_copy = it->second.new_node; + + out_graph->AddEdge(current_node_copy, current_edge->src_output(), + dst_copy, current_edge->dst_input()); + } + + const string& output_tensor_name = + strings::StrCat(current_node->name(), ":", current_edge->src_output()); + + // Some tensor values can be inferred. For example, a shape op + // with input shapes fully defined can have its output tensor inferred. + Tensor tensor_inferred; + bool successfully_inferred_tensor = false; + TF_RETURN_IF_ERROR(TryToInferTensorOutputFromInputShapes( + *current_edge, refiner, &tensor_inferred, + &successfully_inferred_tensor)); + if (successfully_inferred_tensor) { + const_inputs->emplace_back(output_tensor_name, tensor_inferred); + const_inputs_added.insert(output_tensor_name); + continue; + } + + // If we have a copy of the input tensor materialized already, + // then add to the list of inputs to feed and do not recurse further. + if (cached_values != nullptr) { + auto it = cached_values->find(output_tensor_name); + if (it != cached_values->end() && + const_inputs_added.count(output_tensor_name) == 0) { + const_inputs->emplace_back(output_tensor_name, it->second); + const_inputs_added.insert(output_tensor_name); + continue; + } + } + + // If this node's inputs have not been processed already, do so now. + if (!node_and_recursed->recursed) { + node_and_recursed->recursed = true; + for (const Edge* e : current_node->in_edges()) { + if (e->IsControlEdge()) continue; + edges_to_visit.push_back(e); + } + } + } + + return Status::OK(); +} + +} // namespace + +Status EvaluateConstantTensor(OutputTensor tensor, const ShapeRefiner& refiner, + const OpRegistryInterface& ops, + int32 graph_def_version, bool* evaluated, + Tensor* result, GraphRunner* graph_runner, + std::unordered_map* cached_values, + int64 max_cached_value_size, + bool disable_constant_propagation) { + *evaluated = false; + const Node* src = tensor.node; + + // Simple case: the source node is a constant + if (src->IsConstant()) { + if (result->FromProto(src->def().attr().at("value").tensor())) { + *evaluated = true; + return Status::OK(); + } + } + + if (disable_constant_propagation) { + return Status::OK(); + } + + bool is_constant_graph = false; + Graph subgraph(&ops); + auto versions = subgraph.versions(); + versions.set_producer(graph_def_version); + subgraph.set_versions(versions); + + std::vector> const_inputs; + TF_RETURN_IF_ERROR(ExtractConstantSubgraph(*src, refiner, cached_values, + &subgraph, &is_constant_graph, + &const_inputs)); + if (!is_constant_graph) { + return Status::OK(); + } + const string output_tensor_name = + strings::StrCat(src->name(), ":", tensor.index); + std::vector outputs; + + std::unique_ptr graph_runner_storage; + if (graph_runner == nullptr) { + // TODO(skyewm): Convert to std::make_unique when available. + graph_runner_storage.reset(new GraphRunner(Env::Default())); + graph_runner = graph_runner_storage.get(); + } + + // NOTE; we should pass in a function library runtime if we want + // to support constant-expression evaluation on functions. + Status s = graph_runner->Run(&subgraph, nullptr /* function_library */, + const_inputs, {output_tensor_name}, &outputs); + + // If all kernels in the constant graph are not registered + // in the process, GraphRunner::Run may fail, in which case + // we cannot propagate constants, so this is best-effort. + if (s.ok()) { + *result = outputs[0]; + *evaluated = true; + + // We memoize (small) constants evaluated so far, so + // ExtractConstantSubgraph can avoid extracting the full + // subgraph. As we build up large graphs, this avoids + // repeated computation of the early parts of a constant + // graph. + if (cached_values != nullptr && + outputs[0].TotalBytes() <= max_cached_value_size) { + (*cached_values)[output_tensor_name] = outputs[0]; + } + } + return Status::OK(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/common_runtime/eval_const_tensor.h b/tensorflow/core/common_runtime/eval_const_tensor.h new file mode 100644 index 0000000000..fca5a23569 --- /dev/null +++ b/tensorflow/core/common_runtime/eval_const_tensor.h @@ -0,0 +1,66 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_COMMON_RUNTIME_EVAL_CONST_TENSOR_H_ +#define TENSORFLOW_CORE_COMMON_RUNTIME_EVAL_CONST_TENSOR_H_ + +#include "tensorflow/core/graph/graph.h" +#include "tensorflow/core/lib/core/status.h" + +// TODO(skyewm): can this be combined with ConstantFold? + +namespace tensorflow { + +class GraphRunner; +class OpRegistryInterface; +class ShapeRefiner; +class Tensor; + +// Attempts to evaluate `tensor`. This will only be possible if `tensor` doesn't +// depend on any graph inputs (this function is safe to call if this isn't the +// case though). +// +// If the evaluation is successful, `evaluated` will be set to true and +// `tensor`s value returned in `result`. Otherwise `evaluated` will be set to +// false. An error status is returned if something is wrong with the graph or +// input. Note that `evaluated` may set to false if Status::OK() is returned. +// +// Params: +// tensor - the tensor to be evaluated. +// refiner - used to fetch the InferenceContexts for nodes in the graph. +// ops - the OpRegistryInterface for the graph. +// graph_def_version - the producer version of the graph. +// evaluated - output param indicating whether evaluation was successful. +// result - output param containing the result if evaluated is true. +// graph_runner - optional. If not set, a GraphRunner will be created for +// evaluating tensor. This can be set to avoid creating a new GraphRunner +// for every call. +// cached_values - optional. This can be used to cache evaluated results +// across calls, to avoid evaluating the same parts of the graph multiple +// times. +// max_cached_value_size - optional. If `cached_values` is set, the maximum +// result size to cache. +// disable_constant_propagation - if true, only Const node values will be +// returned. +Status EvaluateConstantTensor( + OutputTensor tensor, const ShapeRefiner& refiner, + const OpRegistryInterface& ops, int32 graph_def_version, bool* evaluated, + Tensor* result, GraphRunner* graph_runner = nullptr, + std::unordered_map* cached_values = nullptr, + int64 max_cached_value_size = 1024, + bool disable_constant_propagation = false); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_COMMON_RUNTIME_EVAL_CONST_TENSOR_H_ diff --git a/tensorflow/core/common_runtime/shape_refiner.cc b/tensorflow/core/common_runtime/shape_refiner.cc index 2acaa31d32..cef50be3b1 100644 --- a/tensorflow/core/common_runtime/shape_refiner.cc +++ b/tensorflow/core/common_runtime/shape_refiner.cc @@ -19,6 +19,7 @@ limitations under the License. #include #include +#include "tensorflow/core/common_runtime/eval_const_tensor.h" #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/node_def.pb.h" #include "tensorflow/core/framework/tensor.h" @@ -407,301 +408,13 @@ Status ShapeRefiner::EvaluateConstantTensorForEdge(const Node* node, int dst_idx, bool* evaluated, Tensor* result) { *evaluated = false; - const Edge* input_edge; TF_RETURN_IF_ERROR(node->input_edge(dst_idx, &input_edge)); - - // Simple case: the source node is a constant - const Node* src = input_edge->src(); - if (src->IsConstant()) { - if (result->FromProto(src->def().attr().at("value").tensor())) { - *evaluated = true; - return Status::OK(); - } - } - - if (disable_constant_propagation_) { - return Status::OK(); - } - - bool is_constant_graph = false; - Graph subgraph(ops_registry_); - auto versions = subgraph.versions(); - versions.set_producer(graph_def_version_); - subgraph.set_versions(versions); - - // We identify the possibly constant subgraph to evaluate by - // recursively iterating backwards through the inputs to 'node' - // until we either 1) find an already existing input to our subgraph - // (filled in `const_inputs`), 2) Discover our graph is not constant, - // or 3) Hit a root node. - std::vector> const_inputs; - TF_RETURN_IF_ERROR(ExtractConstantSubgraph( - input_edge->src(), &subgraph, &is_constant_graph, &const_inputs)); - if (!is_constant_graph) { - return Status::OK(); - } - const string output_tensor_name = - strings::StrCat(input_edge->src()->name(), ":", input_edge->src_output()); - std::vector outputs; - - // NOTE; we should pass in a function library runtime if we want - // to support constant-expression evaluation on functions. - Status s = graph_runner_.Run(&subgraph, nullptr /* function_library */, - const_inputs, {output_tensor_name}, &outputs); - - // If all kernels in the constant graph are not registered - // in the process, GraphRunner::Run may fail, in which case - // we cannot propagate constants, so this is best-effort. - if (s.ok()) { - *result = outputs[0]; - *evaluated = true; - - // We memoize (small) constants evaluated so far, so - // ExtractConstantSubgraph can avoid extracting the full - // subgraph. As we build up large graphs, this avoids - // repeated computation of the early parts of a constant - // graph. - if (outputs[0].TotalBytes() <= kMaxTensorSize) { - const_tensor_map_[output_tensor_name] = outputs[0]; - } - } - return Status::OK(); -} - -Status ShapeRefiner::TryToInferTensorOutputFromInputShapes(const Edge* edge, - Tensor* output, - bool* success) { - *success = false; - const Node* node = edge->src(); - auto it = node_to_context_.find(node); - if (it == node_to_context_.end()) { - return errors::FailedPrecondition("Node does not have context."); - } - InferenceContext* c = it->second->get_context(); - - if (node->type_string() == "Shape") { - // If input shapes to the shape op are fully defined, - // we can infer the shape op's output tensor. - bool fully_defined_inputs = c->FullyDefined(c->input(0)); - if (fully_defined_inputs) { - int input_rank = c->Rank(c->input(0)); - Tensor t(node->output_type(0), TensorShape({input_rank})); - if (node->output_type(0) == DT_INT32) { - auto flat = t.flat(); - for (int i = 0; i < input_rank; i++) { - int64 dimension = c->Value(c->Dim(c->input(0), i)); - if (!FastBoundsCheck(dimension, std::numeric_limits::max())) { - return errors::FailedPrecondition( - "Shape has output type int32, but dimension exceeds maximum " - "int32 value"); - } - flat(i) = static_cast(dimension); - } - } else if (node->output_type(0) == DT_INT64) { - auto flat = t.flat(); - for (int i = 0; i < input_rank; i++) { - flat(i) = c->Value(c->Dim(c->input(0), i)); - } - } else { - return errors::FailedPrecondition( - "Shape has output type that is not int32 or int64"); - } - *output = t; - *success = true; - } - } else if (node->type_string() == "Rank") { - bool rank_known = c->RankKnown(c->input(0)); - if (rank_known) { - int32 input_rank = c->Rank(c->input(0)); - Tensor t(node->output_type(0), TensorShape({})); - t.flat()(0) = input_rank; - *output = t; - *success = true; - } - } else if (node->type_string() == "Size") { - bool fully_defined_inputs = c->FullyDefined(c->input(0)); - if (fully_defined_inputs) { - int32 rank = c->Rank(c->input(0)); - Tensor t(node->output_type(0), TensorShape({})); - int64 size = 1; - for (int i = 0; i < rank; i++) { - size *= c->Value(c->Dim(c->input(0), i)); - } - if (node->output_type(0) == DT_INT32) { - if (!FastBoundsCheck(size, std::numeric_limits::max())) { - return errors::FailedPrecondition( - "Size has output type int32, but size exceeds maximum int32 " - "value"); - } - t.flat()(0) = static_cast(size); - } else if (node->output_type(0) == DT_INT64) { - t.flat()(0) = size; - } else { - return errors::FailedPrecondition( - "Size has output type that is not int32 or int64"); - } - *output = t; - *success = true; - } - } - return Status::OK(); -} - -Status ShapeRefiner::ExtractConstantSubgraph( - Node* target_node, Graph* out_graph, bool* is_constant_graph, - std::vector>* const_inputs) { - *is_constant_graph = false; - std::unordered_set const_inputs_added; - - if (target_node->op_def().is_stateful()) { - return Status::OK(); - } - - if (target_node->type_string() == "PlaceholderWithDefault") { - return Status::OK(); - } - - // TODO(skyewm): more of the filtering applied in input nodes below should be - // applied to target_node here - - struct NodeAndRecursed { - Node* new_node = nullptr; - bool recursed = false; - }; - - std::map old_to_new_and_recursed; - Node* target_node_copy = out_graph->CopyNode(target_node); - old_to_new_and_recursed[target_node].new_node = target_node_copy; - old_to_new_and_recursed[target_node].recursed = true; - - // Add the target node's inputs to seed the recursion. - std::deque edges_to_visit; - for (const Edge* e : target_node->in_edges()) { - // TODO(vrv): What do we do about control edges? Based on our - // definition of a constant graph, we should be free to ignore - // control edges since the order in which a constant graph is - // executed should be the same regardless of when nodes run: we - // should only need to recurse down data edges. - if (e->IsControlEdge()) continue; - edges_to_visit.push_back(e); - } - - *is_constant_graph = true; - - // Iterate over the set of edges to visit (backwards). - while (!edges_to_visit.empty()) { - const Edge* current_edge = edges_to_visit.front(); - edges_to_visit.pop_front(); - Node* current_node = current_edge->src(); - - // If the node is stateful, assume the graph is not constant. - if (current_node->op_def().is_stateful()) { - *is_constant_graph = false; - return Status::OK(); - } - - // During construction or import from GraphConstructor, back edges may not - // be filled in. Don't constant fold through merges at all for now. - if (IsMerge(current_node)) { - *is_constant_graph = false; - return Status::OK(); - } - - // Don't constant fold enter/exit currently either, as it's easy to end - // up with a partial frame. - if (IsEnter(current_node) || IsExit(current_node)) { - *is_constant_graph = false; - return Status::OK(); - } - - // Placeholders should never be constant folded because their outputs are - // fed by the user. Note that "Placeholder" nodes have no inputs so are - // handled below. - if (current_node->type_string() == "PlaceholderWithDefault") { - *is_constant_graph = false; - return Status::OK(); - } - - // If there is nothing more to recurse down, see if - // the generator node is a constant. - if (current_node->num_inputs() == 0) { - if (!current_node->IsConstant()) { - // Generator node is not a constant, so subgraph is not - // constant. - *is_constant_graph = false; - return Status::OK(); - } - } - - // Either the node is a constant, or the node is a potential - // intermediate node on the path from a constant. - // - // Add a copy of its node and a new edge to the new subgraph. - - // Get or create the version of 'current_node' in the new graph. - Node* current_node_copy; - // This gets or creates the NodeAndRecursed entry for current_node. - NodeAndRecursed* node_and_recursed = &old_to_new_and_recursed[current_node]; - if (node_and_recursed->new_node == nullptr) { - // First time processing this node. - current_node_copy = out_graph->CopyNode(current_node); - // Track the mapping from the original node to the new one. - node_and_recursed->new_node = current_node_copy; - } else { - current_node_copy = node_and_recursed->new_node; - } - - // Add the edge to the destination node. - { - auto it = old_to_new_and_recursed.find(current_edge->dst()); - if (it == old_to_new_and_recursed.end()) { - return errors::Internal( - "Could not find mapping from old to new copy of destination node: ", - current_edge->dst()->name()); - } - Node* dst_copy = it->second.new_node; - - out_graph->AddEdge(current_node_copy, current_edge->src_output(), - dst_copy, current_edge->dst_input()); - } - - const string& output_tensor_name = - strings::StrCat(current_node->name(), ":", current_edge->src_output()); - - // Some tensor values can be inferred. For example, a shape op - // with input shapes fully defined can have its output tensor inferred. - Tensor tensor_inferred; - bool successfully_inferred_tensor = false; - TF_RETURN_IF_ERROR(TryToInferTensorOutputFromInputShapes( - current_edge, &tensor_inferred, &successfully_inferred_tensor)); - if (successfully_inferred_tensor) { - const_inputs->emplace_back(output_tensor_name, tensor_inferred); - const_inputs_added.insert(output_tensor_name); - continue; - } - - // If we have a copy of the input tensor materialized already, - // then add to the list of inputs to feed and do not recurse further. - auto it = const_tensor_map_.find(output_tensor_name); - if (it != const_tensor_map_.end() && - const_inputs_added.count(output_tensor_name) == 0) { - const_inputs->emplace_back(output_tensor_name, it->second); - const_inputs_added.insert(output_tensor_name); - continue; - } - - // If this node's inputs have not been processed already, do so now. - if (!node_and_recursed->recursed) { - node_and_recursed->recursed = true; - for (const Edge* e : current_node->in_edges()) { - if (e->IsControlEdge()) continue; - edges_to_visit.push_back(e); - } - } - } - - return Status::OK(); + OutputTensor tensor(input_edge->src(), input_edge->src_output()); + return EvaluateConstantTensor(tensor, *this, *ops_registry_, + graph_def_version_, evaluated, result, + &graph_runner_, &const_tensor_map_, + kMaxTensorSize, disable_constant_propagation_); } Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context, diff --git a/tensorflow/core/common_runtime/shape_refiner.h b/tensorflow/core/common_runtime/shape_refiner.h index 75eb5bf0d2..d49c4373f0 100644 --- a/tensorflow/core/common_runtime/shape_refiner.h +++ b/tensorflow/core/common_runtime/shape_refiner.h @@ -215,20 +215,6 @@ class ShapeRefiner { bool keep_nested_shapes, ExtendedInferenceContext* outer_context); - // Tries to infer tensor output based on the input shapes of the node. In some - // cases, the shapes of the inputs are sufficient for inferring the contents - // of the output tensor. For example, a Shape op with fully defined input - // shapes can have its output tensor inferred. - Status TryToInferTensorOutputFromInputShapes(const Edge* edge, Tensor* output, - bool* success); - - // Extracts the subgraph ending at 'node' that is statically - // computable and inserts into 'out_graph'. If statically computable, - // 'is_constant_graph' will be true. - Status ExtractConstantSubgraph( - Node* node, Graph* out_graph, bool* is_constant_graph, - std::vector>* const_inputs) TF_MUST_USE_RESULT; - Status EvaluateConstantTensorForEdge(const Node* node, int dst_idx, bool* evaluated, Tensor* result); -- GitLab From ca7598d24d2647de7a7dba7e06f1ac695a733b26 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Mon, 5 Mar 2018 11:28:17 -0800 Subject: [PATCH 406/884] Don't log an error if we can't set HTTP/2. PiperOrigin-RevId: 187895652 --- tensorflow/core/platform/cloud/curl_http_request.cc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 9bc06d56ae..b4e1193c21 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -142,10 +142,13 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L), "Disabling signals"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, - CURL_HTTP_VERSION_2_0), - "Setting HTTP version"); + // We don't log an error here because HTTP/2 support may not be built into + // cURL, and we'd spam the logs. + // + // TODO(jhseu): Enable HTTP/2. + CURLcodeToStatus(libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, + CURL_HTTP_VERSION_2_0)) + .IgnoreError(); // Set up the progress meter. TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( -- GitLab From 167887efd7721934dedc5fb9204f49eb49b6f168 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 5 Mar 2018 11:33:20 -0800 Subject: [PATCH 407/884] Shape function bug in tensor_list_stack PiperOrigin-RevId: 187896505 --- tensorflow/core/ops/list_ops.cc | 4 ++-- tensorflow/python/kernel_tests/list_ops_test.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/list_ops.cc b/tensorflow/core/ops/list_ops.cc index 3487c955cb..0c16abd369 100644 --- a/tensorflow/core/ops/list_ops.cc +++ b/tensorflow/core/ops/list_ops.cc @@ -135,9 +135,9 @@ REGISTER_OP("TensorListStack") } shape_inference::ShapeHandle ignored; TF_RETURN_IF_ERROR(c->Merge(s, list_shape_type.shape, &ignored)); - if (!c->FullyDefined(s) || !c->FullyDefined(list_shape_type.shape)) { + if (!c->FullyDefined(list_shape_type.shape)) { return errors::InvalidArgument( - "Can only gather from a list with fully defined shapes."); + "Can only stack a list with fully defined shapes."); } s = list_shape_type.shape; } diff --git a/tensorflow/python/kernel_tests/list_ops_test.py b/tensorflow/python/kernel_tests/list_ops_test.py index 1577b7bc80..8040ea37a7 100644 --- a/tensorflow/python/kernel_tests/list_ops_test.py +++ b/tensorflow/python/kernel_tests/list_ops_test.py @@ -123,6 +123,16 @@ class ListOpsTest(test_util.TensorFlowTestCase): l_cpu, element_dtype=dtypes.float32)[1], 2.0) + def testGraphStack(self): + with context.graph_mode(), self.test_session(): + tl = list_ops.empty_tensor_list( + element_shape=constant_op.constant([1], dtype=dtypes.int32), + element_dtype=dtypes.int32) + tl = list_ops.tensor_list_push_back(tl, [1]) + self.assertAllEqual( + list_ops.tensor_list_stack(tl, element_dtype=dtypes.int32).eval(), + [[1]]) + def testSerialize(self): # pylint: disable=g-import-not-at-top try: -- GitLab From 41d6bd3a1dde0484eefd785b0e09cbf852accb26 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 5 Mar 2018 11:57:04 -0800 Subject: [PATCH 408/884] [XLA] Whitelist send/recv in BF16 passes. PiperOrigin-RevId: 187899955 --- .../compiler/xla/service/bfloat16_conversion_folding.cc | 4 ++++ tensorflow/compiler/xla/service/bfloat16_propagation.cc | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc index cde990e176..432448e9bb 100644 --- a/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc +++ b/tensorflow/compiler/xla/service/bfloat16_conversion_folding.cc @@ -147,6 +147,10 @@ Status BFloat16ConversionFoldingVisitor::DefaultAction(HloInstruction* hlo) { hlo->opcode() == HloOpcode::kGetTupleElement || // hlo->opcode() == HloOpcode::kInfeed || // hlo->opcode() == HloOpcode::kOutfeed || // + hlo->opcode() == HloOpcode::kSend || // + hlo->opcode() == HloOpcode::kSendDone || // + hlo->opcode() == HloOpcode::kRecv || // + hlo->opcode() == HloOpcode::kRecvDone || // hlo->opcode() == HloOpcode::kConstant || // hlo->opcode() == HloOpcode::kParameter || // hlo->opcode() == HloOpcode::kFusion || // diff --git a/tensorflow/compiler/xla/service/bfloat16_propagation.cc b/tensorflow/compiler/xla/service/bfloat16_propagation.cc index 7708504dc9..531f36e8c5 100644 --- a/tensorflow/compiler/xla/service/bfloat16_propagation.cc +++ b/tensorflow/compiler/xla/service/bfloat16_propagation.cc @@ -229,6 +229,10 @@ void BFloat16Propagation::DetermineAndMutateInstructionPrecision( // or assumptions for them. if (hlo->opcode() == HloOpcode::kInfeed || // hlo->opcode() == HloOpcode::kOutfeed || // + hlo->opcode() == HloOpcode::kSend || // + hlo->opcode() == HloOpcode::kSendDone || // + hlo->opcode() == HloOpcode::kRecv || // + hlo->opcode() == HloOpcode::kRecvDone || // hlo->opcode() == HloOpcode::kCustomCall || // hlo->opcode() == HloOpcode::kCall || // hlo->opcode() == HloOpcode::kConditional || // -- GitLab From 576db294513a5d692048c65f5d7d19436d32bf3d Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 5 Mar 2018 12:01:37 -0800 Subject: [PATCH 409/884] Return ComputationLayout as a reference from the HLO module. PiperOrigin-RevId: 187900559 --- tensorflow/compiler/xla/service/hlo_module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_module.h b/tensorflow/compiler/xla/service/hlo_module.h index ca94118763..755bbd359f 100644 --- a/tensorflow/compiler/xla/service/hlo_module.h +++ b/tensorflow/compiler/xla/service/hlo_module.h @@ -103,7 +103,7 @@ class HloModule { return config_.mutable_entry_computation_layout(); } - ComputationLayout entry_computation_layout() const { + const ComputationLayout& entry_computation_layout() const { return config_.entry_computation_layout(); } -- GitLab From a2daab6537a63940fe66b9cc52d686d3a7e31910 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Mon, 5 Mar 2018 12:22:35 -0800 Subject: [PATCH 410/884] [XLA] Mark xla_internal_test_main as alwayslink. PiperOrigin-RevId: 187903623 --- tensorflow/compiler/xla/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 63f4a4430f..7c95b03a67 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -44,6 +44,7 @@ cc_library( "//tensorflow/core:lib", "//tensorflow/core:test", ], + alwayslink = True, ) cc_library( -- GitLab From 119795f5341737341b526814c6360b5679cd81d3 Mon Sep 17 00:00:00 2001 From: Priya Gupta Date: Mon, 5 Mar 2018 12:28:07 -0800 Subject: [PATCH 411/884] Make variable creator scope thread local (always). PiperOrigin-RevId: 187904394 --- tensorflow/python/framework/ops.py | 18 +++++++++++++----- tensorflow/python/ops/variable_scope.py | 2 +- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 0a85b153de..47d0beca90 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2780,7 +2780,6 @@ class Graph(object): c_api.SetRequireShapeInferenceFns(self._c_graph, False) else: self._scoped_c_graph = None - self._variable_creator_stack = [] # TODO(apassos) remove once the C API is used by default. def _use_c_api_hack(self): @@ -2821,17 +2820,26 @@ class Graph(object): # frozen, and this functionality is still not ready for public visibility. @tf_contextlib.contextmanager def _variable_creator_scope(self, creator): + # This step makes a copy of the existing stack, and it also initializes + # self._thread_local._variable_creator_stack if it doesn't exist yet. old = list(self._variable_creator_stack) - self._variable_creator_stack.append(creator) + self._thread_local._variable_creator_stack.append(creator) try: yield finally: - self._variable_creator_stack = old + self._thread_local._variable_creator_stack = old # Note: this method is private because the API of tf.Graph() is public and # frozen, and this functionality is still not ready for public visibility. - def _get_variable_creator_stack(self): - return list(self._variable_creator_stack) + @property + def _variable_creator_stack(self): + if not hasattr(self._thread_local, "_variable_creator_stack"): + self._thread_local._variable_creator_stack = [] + return list(self._thread_local._variable_creator_stack) + + @_variable_creator_stack.setter + def _variable_creator_stack(self, variable_creator_stack): + self._thread_local._variable_creator_stack = variable_creator_stack def _extract_stack(self): """A lightweight, extensible re-implementation of traceback.extract_stack. diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index 81565a6377..de4e44f60c 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -2145,7 +2145,7 @@ def variable(initial_value=None, constraint=None, use_resource=None): previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs) - for getter in ops.get_default_graph()._get_variable_creator_stack(): # pylint: disable=protected-access + for getter in ops.get_default_graph()._variable_creator_stack: # pylint: disable=protected-access previous_getter = _make_getter(getter, previous_getter) return previous_getter(initial_value=initial_value, trainable=trainable, -- GitLab From 9b57fba94a2ed41ebeea8e2c6d20e291bb26b411 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Mar 2018 12:37:55 -0800 Subject: [PATCH 412/884] Fixes for PR comments --- configure.py | 2 +- .../contrib/tensorrt/convert/convert_graph.cc | 87 ++++++++++--------- .../contrib/tensorrt/convert/convert_graph.h | 5 ++ .../contrib/tensorrt/convert/convert_nodes.h | 36 ++++---- .../contrib/tensorrt/kernels/trt_engine_op.cc | 25 +++--- tensorflow/contrib/tensorrt/log/trt_logger.cc | 8 +- tensorflow/contrib/tensorrt/log/trt_logger.h | 1 + .../contrib/tensorrt/python/trt_convert.py | 1 - .../tensorrt/resources/trt_int8_calibrator.cc | 6 +- .../tensorrt/resources/trt_int8_calibrator.h | 3 +- tensorflow/contrib/tensorrt/trt_conversion.i | 6 +- 11 files changed, 93 insertions(+), 87 deletions(-) diff --git a/configure.py b/configure.py index 081632e605..7d61c2e5e3 100644 --- a/configure.py +++ b/configure.py @@ -1048,7 +1048,7 @@ def set_tf_tensorrt_install_path(environ_cp): for lib_file in possible_files: if is_compatible(lib_file, cuda_ver, cudnn_ver): - matches=nvinfer_pattern.search(lib_file) + matches = nvinfer_pattern.search(lib_file) if len(matches.groups()) == 0: continue ver_str = matches.group(1) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 36145452be..76a5d24214 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -131,23 +131,23 @@ std::unordered_map> BuildTensorNameMap( } return result; } - +// TODO(sami): convert references to pointers struct ConvertGraphParams { ConvertGraphParams( - tensorflow::Graph& graph_, const std::vector& output_names_, - const std::set& subgraph_node_ids_, size_t max_batch_size_, - size_t max_workspace_size_bytes_, - const tensorflow::grappler::GraphProperties& graph_properties_, - std::unordered_map>* output_edge_map_, - int precision_mode_) - : graph(graph_), - output_names(output_names_), - subgraph_node_ids(subgraph_node_ids_), - max_batch_size(max_batch_size_), - max_workspace_size_bytes(max_workspace_size_bytes_), - graph_properties(graph_properties_), - output_edge_map(output_edge_map_), - precision_mode(precision_mode_) {} + tensorflow::Graph& graph, const std::vector& output_names, + const std::set& subgraph_node_ids, size_t max_batch_size, + size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + std::unordered_map>* output_edge_map, + int precision_mode) + : graph(graph), + output_names(output_names), + subgraph_node_ids(subgraph_node_ids), + max_batch_size(max_batch_size), + max_workspace_size_bytes(max_workspace_size_bytes), + graph_properties(graph_properties), + output_edge_map(output_edge_map), + precision_mode(precision_mode) {} tensorflow::Graph& graph; const std::vector& output_names; const std::set& subgraph_node_ids; @@ -162,36 +162,37 @@ struct ConvertGraphParams { tensorflow::EdgeSet subgraph_outgoing_edges; }; -tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams& p) { - GetSubGraphIncomingEdges(p.graph, p.subgraph_node_ids, - &p.subgraph_incoming_edges); - for (tensorflow::Edge const* edge : p.subgraph_incoming_edges) { - p.subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); +static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { + GetSubGraphIncomingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_incoming_edges); + for (const tensorflow::Edge* edge : p->subgraph_incoming_edges) { + p->subgraph_inputs.push_back({edge->src()->id(), edge->src_output()}); } - auto output_name_to_index_map = BuildTensorNameMap(p.output_names); + auto output_name_to_index_map = BuildTensorNameMap(p->output_names); std::set> subgraph_outputs_set; - for (int node_id : p.subgraph_node_ids) { - tensorflow::Node* node = p.graph.FindNodeId(node_id); + // Collect outputs referenced from output_names + for (int node_id : p->subgraph_node_ids) { + tensorflow::Node* node = p->graph.FindNodeId(node_id); if (output_name_to_index_map.count(node->name())) { for (int index : output_name_to_index_map.at(node->name())) { subgraph_outputs_set.insert({node_id, index}); } } } - GetSubGraphOutgoingEdges(p.graph, p.subgraph_node_ids, - &p.subgraph_outgoing_edges); - for (const tensorflow::Edge* edge : p.subgraph_outgoing_edges) { + GetSubGraphOutgoingEdges(p->graph, p->subgraph_node_ids, + &p->subgraph_outgoing_edges); + for (const tensorflow::Edge* edge : p->subgraph_outgoing_edges) { subgraph_outputs_set.insert({edge->src()->id(), edge->src_output()}); } - p.subgraph_outputs.reserve(subgraph_outputs_set.size()); - p.subgraph_outputs.insert(p.subgraph_outputs.begin(), - subgraph_outputs_set.begin(), - subgraph_outputs_set.end()); + p->subgraph_outputs.reserve(subgraph_outputs_set.size()); + p->subgraph_outputs.insert(p->subgraph_outputs.begin(), + subgraph_outputs_set.begin(), + subgraph_outputs_set.end()); return tensorflow::Status::OK(); }; tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - FillSubGraphEdgeSets(*params); + FillSubGraphEdgeSets(params); tensorflow::NodeDef trt_node_def; SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, @@ -219,7 +220,7 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { } tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - FillSubGraphEdgeSets(*params); + FillSubGraphEdgeSets(params); tensorflow::NodeDef trt_node_def; SubGraphParams s(params->graph, params->subgraph_node_ids, @@ -296,19 +297,19 @@ tensorflow::Status ConvertCalibGraphToInferGraph( TF_RETURN_IF_ERROR(tensorflow::ConvertGraphDefToGraph( tensorflow::GraphConstructorOptions(), graph_def, &graph)); // get calib nodes - std::vector calibNodes; + std::vector calib_nodes; for (auto node : graph.op_nodes()) { if (node->type_string() == "TRTCalibOp") { VLOG(1) << "Found Calib Node"; - calibNodes.push_back(node); + calib_nodes.push_back(node); } } - VLOG(0) << "Num Calib nodes in graph= " << calibNodes.size(); - if (calibNodes.size() == 0) + VLOG(0) << "Num Calib nodes in graph= " << calib_nodes.size(); + if (calib_nodes.size() == 0) return tensorflow::errors::FailedPrecondition( "Graph doesn't contain any calibration nodes!." " Please generate calibration graph and run calibration first"); - for (auto n : calibNodes) { + for (auto n : calib_nodes) { TF_RETURN_IF_ERROR( tensorrt::convert::ConvertCalibrationNodeToEngineNode(graph, n)); } @@ -320,23 +321,23 @@ tensorflow::Status ConvertGraphDefToTensorRT( const tensorflow::GraphDef& graph_def, const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, - int precision_mode = 0, int minimum_segment_size = 3) { + int precision_mode = FP32MODE, int minimum_segment_size = 3) { // optimization pass tensorflow::grappler::GrapplerItem item; item.fetch = output_names; tensorflow::GraphDef gdef; - // layout optimization + // Layout optimization item.graph = graph_def; tensorflow::grappler::LayoutOptimizer optimizer; - tensorflow::grappler::Cluster* gCluster; + tensorflow::grappler::Cluster* cluster; // virtual cluster tensorflow::DeviceProperties device_properties; device_properties.set_type("GPU"); device_properties.mutable_environment()->insert({"architecture", "6"}); - gCluster = + cluster = new tensorflow::grappler::VirtualCluster({{"/GPU:0", device_properties}}); // single machine @@ -345,7 +346,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( VLOG(2) << "cpu_cores: " << num_cpu_cores; VLOG(2) << "gpus: " << num_gpus; - TF_RETURN_IF_ERROR(optimizer.Optimize(gCluster, item, &gdef)); + TF_RETURN_IF_ERROR(optimizer.Optimize(cluster, item, &gdef)); // constant folding item.graph = gdef; @@ -400,7 +401,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, max_mem_per_engine, static_graph_properties, &output_edge_map, precision_mode); - if (precision_mode == 2) { + if (precision_mode == FP16MODE) { TF_RETURN_IF_ERROR(GetCalibNode(&p)); } else { tensorflow::Status status = ConvertSubGraphToTensorRT(&p); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 8401791f76..90bd3c4a17 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -27,6 +27,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +const int FP32MODE = 0; +const int FP16MODE = 1; +const int INT8MODE = 2; +// This method converts an already generated calibration graph which was used in +// calibration runs to an inference graph tensorflow::Status ConvertCalibGraphToInferGraph( const tensorflow::GraphDef& graph_def, tensorflow::GraphDef* new_graph_def); diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 48fe51a954..02aef35ced 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -36,23 +36,23 @@ namespace convert { struct SubGraphParams { SubGraphParams( - tensorflow::Graph& graph_, const std::set& subgraph_node_ids_, - const std::vector>& input_inds_, - const std::vector>& output_inds_, - size_t max_batch_size_, size_t max_workspace_size_bytes_, - const tensorflow::grappler::GraphProperties& graph_properties_, - std::unordered_map>* output_edge_map_, - tensorflow::NodeDef* trt_node_, int precision_mode_ = 0) - : graph(graph_), - subgraph_node_ids(subgraph_node_ids_), - input_inds(input_inds_), - output_inds(output_inds_), - max_batch_size(max_batch_size_), - max_workspace_size_bytes(max_workspace_size_bytes_), - graph_properties(graph_properties_), - output_edge_map(output_edge_map_), - trt_node(trt_node_), - precision_mode(precision_mode_) {} + tensorflow::Graph& graph, const std::set& subgraph_node_ids, + const std::vector>& input_inds, + const std::vector>& output_inds, + size_t max_batch_size, size_t max_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& graph_properties, + std::unordered_map>* output_edge_map, + tensorflow::NodeDef* trt_node, int precision_mode_ = 0) + : graph(graph), + subgraph_node_ids(subgraph_node_ids), + input_inds(input_inds), + output_inds(output_inds), + max_batch_size(max_batch_size), + max_workspace_size_bytes(max_workspace_size_bytes), + graph_properties(graph_properties), + output_edge_map(output_edge_map), + trt_node(trt_node), + precision_mode(precision_mode) {} tensorflow::Graph& graph; const std::set& subgraph_node_ids; @@ -65,7 +65,7 @@ struct SubGraphParams { tensorflow::NodeDef* trt_node; const int precision_mode; }; - +// TODO(sami): Replace references with const reference or pointers tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); tensorflow::Status ConvertCalibrationNodeToEngineNode(tensorflow::Graph& graph, diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 03f80dd506..24ebf75264 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -24,7 +24,7 @@ limitations under the License. #include "cuda/include/cuda_runtime_api.h" namespace tensorflow { -static ::tensorflow::tensorrt::Logger gLogger; +static ::tensorflow::tensorrt::Logger logger; using IRuntime = nvinfer1::IRuntime; using Dims = nvinfer1::Dims; @@ -40,22 +40,23 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_)); OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); - // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. + // gpu where the input/output is also located. int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; cudaSetDevice(gpu_id); int device; cudaGetDevice(&device); if (gpu_id != device) LOG(FATAL) << "set device failed!"; - IRuntime* infer = nvinfer1::createInferRuntime(gLogger); + // TODO(samikama) runtime should be taken from a resourcemanager as well. + // Only engine should be in the op and context and runtime should be taken + // from resourcemanager + + IRuntime* infer = nvinfer1::createInferRuntime(logger); trt_engine_ptr_.reset(infer->deserializeCudaEngine( serialized_engine.c_str(), serialized_engine.size(), nullptr)); trt_execution_context_ptr_.reset(trt_engine_ptr_->createExecutionContext()); + // Runtime is safe to delete after engine creation infer->destroy(); } @@ -65,7 +66,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { size_t binding_index; int num_batch = 0; - bool valid = true; for (int i = 0; i < context->num_inputs(); i++) { // Grab the input tensor binding_index = trt_engine_ptr_->getBindingIndex(input_nodes_[i].c_str()); @@ -74,15 +74,14 @@ void TRTEngineOp::Compute(OpKernelContext* context) { const TensorShape& input_shape = input_tensor.shape(); if (i == 0) { num_batch = input_shape.dim_size(0); - if (num_batch > trt_engine_ptr_->getMaxBatchSize()) + if (num_batch > trt_engine_ptr_->getMaxBatchSize()) { LOG(FATAL) << "input tensor batch larger than max_batch_size: " << trt_engine_ptr_->getMaxBatchSize(); + } } else if (num_batch != input_shape.dim_size(0)) { - valid = false; + LOG(FATAL) << "input data inconsistent batch size"; break; } - // int64 input_shape.dim_size(int d) - // int input_shape.dims() switch (trt_engine_ptr_->getBindingDataType(binding_index)) { case nvinfer1::DataType::kFLOAT: buffers[binding_index] = (void*)(input_tensor.flat().data()); @@ -96,7 +95,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { } } - if (!valid) LOG(FATAL) << "input data inconsistent batch size"; for (int i = 0; i < static_cast(output_nodes_.size()); i++) { // This is bad that we have to reallocate output buffer every run. // Create an output tensor @@ -144,7 +142,6 @@ void TRTEngineOp::Compute(OpKernelContext* context) { *stream, nullptr); VLOG(2) << "enqueue returns: " << ret; // sync should be done by TF. - // cudaStreamSynchronize(*stream); } REGISTER_KERNEL_BUILDER(Name("TRTEngineOp").Device(DEVICE_GPU), TRTEngineOp); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index 7add8cb8b3..83ae5db1d9 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,19 +27,19 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << msg; + VLOG(2) << name_ << " " <& data, const cudaStream_t stream) { - if (done_) return false; tensorflow::mutex_lock lock(cond_mtx_); while ((calib_running_ || batch_is_set_) && !done_) { // wait while calibration is running cond_.wait(lock); - if (done_) return false; } + if (done_) return false; CHECK(!calib_running_ && !batch_is_set_); VLOG(1) << "Set Batch Waiting finished"; for (const auto it : data) { @@ -62,6 +61,8 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, // TODO(aaroey): we should not use sync copy on default stream. Make sure // stream->ThenMemcpy() is used in future PRs. + // TODO(sami,aaroey): Need to figureout a way to ensure synchronization + // between stream, perhaps using a tensor? auto status = cudaMemcpyAsync(d.first, it.second, d.second, cudaMemcpyDeviceToDevice, stream); if (status != cudaSuccess) { @@ -69,6 +70,7 @@ bool TRTInt8Calibrator::setBatch(const std::unordered_map& data, << "' failed with " << status; } } + // TODO(Sami, aaorey): Find an alternative way! cudaStreamSynchronize( stream); // we have to wait for the stream before returning! batch_is_set_ = true; diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index cab9c7e43b..aaf93ef733 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -40,7 +40,8 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { int getBatchSize() const override; bool getBatch(void* bindings[], const char* names[], int num_bindings) override; - bool setBatch(const std::unordered_map& data,const cudaStream_t stream); + bool setBatch(const std::unordered_map& data, + const cudaStream_t stream); void setDone(); const void* readCalibrationCache(std::size_t& length) override; void writeCalibrationCache(const void* ptr, std::size_t length) override; diff --git a/tensorflow/contrib/tensorrt/trt_conversion.i b/tensorflow/contrib/tensorrt/trt_conversion.i index 09e58e8ce9..46480e99a1 100644 --- a/tensorflow/contrib/tensorrt/trt_conversion.i +++ b/tensorflow/contrib/tensorrt/trt_conversion.i @@ -151,13 +151,13 @@ std::pair calib_convert(string graph_def_string // const tenso tensorflow::GraphDef outGraph; tensorflow::Status conversion_status = tensorflow::tensorrt::convert::ConvertCalibGraphToInferGraph(graph_def, - &outGraph); + &outGraph); if (!conversion_status.ok()) { auto retCode = (int)conversion_status.code(); char buff[2000]; snprintf(buff, 2000, "%d;%s", retCode, conversion_status.error_message().c_str()); - out_status=buff; + out_status = buff; return std::pair{out_status, ""}; } string result; @@ -165,7 +165,7 @@ std::pair calib_convert(string graph_def_string // const tenso out_status = "InvalidArgument;Couldn't serialize output as a GraphDef"; return std::pair{out_status, ""}; } - out_status="OK;All good!"; + out_status = "OK;All good!"; return std::pair{out_status, result}; #else // Returns FAILED_PRECONDITION. -- GitLab From 39e04e5d02cb98db90f1052e328c3c73718c8603 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 12:46:30 -0800 Subject: [PATCH 413/884] Simplify softmax_centered implementation. This also resolves a bug with softmax_centered.inverse not working on inputs with partially known. PiperOrigin-RevId: 187907026 --- .../bijectors/softmax_centered_test.py | 28 +++++++++++++++++++ .../python/ops/bijectors/softmax_centered.py | 25 ++--------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py index 62e3869db0..4a7679daad 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/softmax_centered_test.py @@ -21,7 +21,9 @@ from __future__ import print_function import numpy as np from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import SoftmaxCentered +from tensorflow.python.framework import dtypes from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops from tensorflow.python.ops.distributions.bijector_test_util import assert_bijective_and_finite from tensorflow.python.platform import test @@ -76,6 +78,32 @@ class SoftmaxCenteredBijectorTest(test.TestCase): atol=0., rtol=1e-7) + def testBijectorUnknownShape(self): + with self.test_session(): + softmax = SoftmaxCentered(event_ndims=1) + self.assertEqual("softmax_centered", softmax.name) + x = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) + real_x = np.log([[2., 3, 4], [4., 8, 12]]) + y = array_ops.placeholder(shape=[2, None], dtype=dtypes.float32) + real_y = [[0.2, 0.3, 0.4, 0.1], [0.16, 0.32, 0.48, 0.04]] + self.assertAllClose(real_y, softmax.forward(x).eval( + feed_dict={x: real_x})) + self.assertAllClose(real_x, softmax.inverse(y).eval( + feed_dict={y: real_y})) + self.assertAllClose( + -np.sum(np.log(real_y), axis=1), + softmax.inverse_log_det_jacobian(y).eval( + feed_dict={y: real_y}), + atol=0., + rtol=1e-7) + self.assertAllClose( + -softmax.inverse_log_det_jacobian(y).eval( + feed_dict={y: real_y}), + softmax.forward_log_det_jacobian(x).eval( + feed_dict={x: real_x}), + atol=0., + rtol=1e-7) + def testShapeGetters(self): with self.test_session(): for x, y, b in ((tensor_shape.TensorShape([]), diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py index a9dcce6c52..24add40445 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/softmax_centered.py @@ -18,8 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import numpy as np - from tensorflow.contrib.distributions.python.ops import distribution_util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -161,33 +159,16 @@ class SoftmaxCentered(bijector.Bijector): # x[i] = log(exp(x[i])) - log(y[end]) - log(normalization) # = log(exp(x[i])/normalization) - log(y[end]) # = log(y[i]) - log(y[end]) - shape = (np.asarray(y.shape.as_list(), dtype=np.int32) - if y.shape.is_fully_defined() - else array_ops.shape(y, name="shape")) - ndims = distribution_util.prefer_static_rank(y) # Do this first to make sure CSE catches that it'll happen again in # _inverse_log_det_jacobian. x = math_ops.log(y) - # We now extract the last coordinate of the rightmost dimension. - # Our trick is to slice from [0,0,...,shape[-1]-1] to shape[:-1]+[1]. - begin = array_ops.one_hot(indices=ndims-1, - depth=ndims, - on_value=shape[-1]-np.array(1, dtype=shape.dtype), - dtype=shape.dtype) - size = array_ops.concat([shape[:-1], np.asarray([1], dtype=shape.dtype)], 0) - log_normalization = -array_ops.strided_slice(x, begin, begin + size) - - # Here we slice out all but the last coordinate; see above for idea. - begin = array_ops.zeros_like(shape) - size = array_ops.concat([shape[:-1], [shape[-1] - 1]], 0) - x = array_ops.strided_slice(x, begin, begin + size) - - x += log_normalization + log_normalization = (-x[..., -1])[..., array_ops.newaxis] + x = x[..., :-1] + log_normalization if self._static_event_ndims == 0: - x = array_ops.squeeze(x, squeeze_dims=[ndims-1]) + x = array_ops.squeeze(x, squeeze_dims=-1) # Set shape hints. if y.shape.ndims is not None: -- GitLab From 84c9f71b20309029d5816091c27968564e775c70 Mon Sep 17 00:00:00 2001 From: Yuanzhong Xu Date: Mon, 5 Mar 2018 12:54:27 -0800 Subject: [PATCH 414/884] [XLA] Fix BF16 normalizer for CrossReplicaSum. 1. It may produce incorrect result when mixed precision is not supported and BF16 is not support only for a particular operand. Then the pass may introduce new mixed precision for an all-BF16 CRS. This is unlikely in practical settings, but removing this constraint can enable auto-generating corner case tests using this pass. 2. A cycle can be introduced in the tuple-shaped output output. This wasn't caught by the test because the DFS happened to succeed. Now add verifier explicitly. PiperOrigin-RevId: 187908099 --- tensorflow/compiler/xla/service/BUILD | 1 + .../xla/service/bfloat16_normalization.cc | 63 ++++++++++++------- .../service/bfloat16_normalization_test.cc | 12 +++- 3 files changed, 52 insertions(+), 24 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index d71790fb2d..6f52703683 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -106,6 +106,7 @@ tf_cc_test( ":bfloat16_normalization", ":bfloat16_support", ":hlo", + ":hlo_verifier", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla:test", diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization.cc b/tensorflow/compiler/xla/service/bfloat16_normalization.cc index 6176f5d209..14c54ddd13 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization.cc @@ -152,44 +152,64 @@ Status BFloat16NormalizationVisitor::HandleCrossReplicaSum( std::vector operand_types(crs->operand_count()); std::vector output_types(crs->operand_count()); - bool has_f32 = false; - bool has_bf16 = false; - bool has_bf16_output = false; + int64 f32_count = 0; + int64 bf16_count = 0; + bool has_unsupported_bf16_operand = false; + bool has_unsupported_bf16_output = false; for (int64 i = 0; i < crs->operand_count(); ++i) { operand_types[i] = crs->operand(i)->shape().element_type(); output_types[i] = ShapeUtil::GetSubshape(crs->shape(), {i}).element_type(); - if (operand_types[i] == F32 || output_types[i] == F32) { - has_f32 = true; + if (operand_types[i] == F32) { + f32_count += 1; } else if (operand_types[i] == BF16) { - has_bf16 = true; + bf16_count += 1; + if (!bfloat16_support_->SupportsBF16Operand(*crs, i)) { + has_unsupported_bf16_operand = true; + } } - if (output_types[i] == BF16) { - has_bf16 = true; - has_bf16_output = true; + if (output_types[i] == F32) { + f32_count += 1; + } else if (output_types[i] == BF16) { + bf16_count += 1; + if (!bfloat16_support_->SupportsBF16Output(*crs)) { + has_unsupported_bf16_output = true; + } } } - for (int64 i = 0; i < crs->operand_count(); ++i) { + if (bf16_count == 0) { + return Status::OK(); + } + + auto should_convert_operand = [&](int64 i) { if (operand_types[i] != BF16) { - continue; + return false; } - if (bfloat16_support_->SupportsBF16Operand(*crs, i) && - (bfloat16_support_->SupportsMixedPrecisions(*crs) || !has_f32)) { - continue; + if (!bfloat16_support_->SupportsBF16Operand(*crs, i)) { + return true; } - TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(crs, i, F32, computation_)); - has_f32 = true; - } + if (bfloat16_support_->SupportsMixedPrecisions(*crs)) { + return false; + } + return has_unsupported_bf16_operand || has_unsupported_bf16_output || + f32_count > 0; + }; - if (!has_bf16_output) { - return Status::OK(); + for (int64 i = 0; i < crs->operand_count(); ++i) { + if (should_convert_operand(i)) { + TF_RETURN_IF_ERROR(InsertConvertBeforeOperand(crs, i, F32, computation_)); + f32_count += 1; + bf16_count -= 1; + } } - if (bfloat16_support_->SupportsBF16Output(*crs) && - (bfloat16_support_->SupportsMixedPrecisions(*crs) || !has_f32)) { + if (!has_unsupported_bf16_output && + (bfloat16_support_->SupportsMixedPrecisions(*crs) || f32_count == 0 || + bf16_count == 0)) { return Status::OK(); } + std::vector materialized_users = crs->users(); std::vector output_elements(crs->operand_count()); auto original_shape = crs->shape(); for (int64 i = 0; i < crs->operand_count(); ++i) { @@ -209,7 +229,6 @@ Status BFloat16NormalizationVisitor::HandleCrossReplicaSum( auto tuple = computation_->AddInstruction( HloInstruction::CreateTuple(output_elements)); - std::vector materialized_users = crs->users(); // Use the crs' shape temporarily, in order to pass checks in // ReplaceUseWith. *tuple->mutable_shape() = crs->shape(); diff --git a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc index fc0f6f1948..1afaefd9df 100644 --- a/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc +++ b/tensorflow/compiler/xla/service/bfloat16_normalization_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_module.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" @@ -74,6 +75,10 @@ class BFloat16NormalizationTest : public HloTestBase { BFloat16Normalization normalization(&bfloat16_support_); StatusOr result = normalization.Run(module); EXPECT_IS_OK(result.status()); + + HloVerifier verifier(/*allow_mixed_precision=*/true); + EXPECT_IS_OK(verifier.Run(module).status()); + return result.ValueOrDie(); } }; @@ -170,7 +175,7 @@ TEST_F(BFloat16NormalizationTest, ResolveUnsupportedMixedPrecisionReduce) { Shape f32_input_shape = ShapeUtil::MakeShape(F32, {2, 4}); Shape f32_output_shape = ShapeUtil::MakeShape(F32, {4}); - Shape bf16_scalar_shape = ShapeUtil::MakeShape(BF16, {2, 4}); + Shape bf16_scalar_shape = ShapeUtil::MakeShape(BF16, {}); auto reduce_comp_builder = HloComputation::Builder("reduce_comp"); auto reduce_comp_param0 = reduce_comp_builder.AddInstruction( @@ -260,8 +265,11 @@ TEST_F(BFloat16NormalizationTest, DoNotAddUnsupportedMixedPrecision) { HloInstruction* b = builder.AddInstruction( HloInstruction::CreateParameter(1, bf16_shape, "b")); + DotDimensionNumbers dot_dnums; + dot_dnums.add_lhs_contracting_dimensions(1); + dot_dnums.add_rhs_contracting_dimensions(0); HloInstruction* dot = builder.AddInstruction( - HloInstruction::CreateBinary(bf16_shape, HloOpcode::kDot, a, b)); + HloInstruction::CreateDot(bf16_shape, a, b, dot_dnums)); auto module = CreateNewModule(); auto computation = module->AddEntryComputation(builder.Build()); -- GitLab From 5368a1a3af94c6b49dd51d0d85cb3702f484daa7 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 5 Mar 2018 13:36:30 -0800 Subject: [PATCH 415/884] Benchmark regression PiperOrigin-RevId: 187914657 --- tensorflow/python/ops/array_ops.py | 5 +---- tensorflow/python/ops/losses/losses_impl.py | 8 -------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index cc559695ed..bd1e84ec82 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -388,10 +388,7 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): if context.in_eager_mode() and not isinstance( input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): - size_ = 1 - for dim in ops.convert_to_tensor(input)._shape_tuple(): # pylint: disable=protected-access - size_ *= dim - return size_ + return np.prod(ops.convert_to_tensor(input)._shape_tuple()) # pylint: disable=protected-access with ops.name_scope(name, "Size", [input]) as name: if isinstance(input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index a39417139e..0cae3c1453 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -89,14 +89,6 @@ def _safe_div(numerator, denominator, name="value"): Returns: The element-wise value of the numerator divided by the denominator. """ - if isinstance(denominator, float): - if math_ops.equal(denominator, 0.0): - return ops.convert_to_tensor(0.0, dtype=numerator.dtype) - return math_ops.div(numerator, denominator) - if context.in_eager_mode() and denominator._rank() == 0: # pylint: disable=protected-access - if math_ops.equal(denominator, 0.0): - return ops.convert_to_tensor(0.0, dtype=numerator.dtype) - return math_ops.div(numerator, denominator) return array_ops.where( math_ops.greater(denominator, 0), math_ops.div(numerator, array_ops.where( -- GitLab From 7558b085afd4ba8ffb5d9ceab0616cc4ba0649b1 Mon Sep 17 00:00:00 2001 From: chengzhi chen Date: Tue, 6 Mar 2018 05:41:51 +0800 Subject: [PATCH 416/884] Lite: Supporting Raspberry Pi. (#16431) Now we can cross compiling or native compiling libtensorflow-lite.a for rpi. * Remove unnecessary space between $(CC_PREFIX) and gcc. * Adding -O3 -DNDEBUG CFLAGS same as CXXFLAGS. * Remove redundant -lpthread link flag. * Add Makefile for RPi. --- tensorflow/contrib/lite/Makefile | 9 +++-- tensorflow/contrib/lite/build_rpi_lib.sh | 22 +++++++++++ tensorflow/contrib/lite/g3doc/rpi.md | 50 ++++++++++++++++++++++++ tensorflow/contrib/lite/rpi_makefile.inc | 33 ++++++++++++++++ 4 files changed, 110 insertions(+), 4 deletions(-) create mode 100755 tensorflow/contrib/lite/build_rpi_lib.sh create mode 100644 tensorflow/contrib/lite/g3doc/rpi.md create mode 100644 tensorflow/contrib/lite/rpi_makefile.inc diff --git a/tensorflow/contrib/lite/Makefile b/tensorflow/contrib/lite/Makefile index 7f31629272..b4504f246a 100644 --- a/tensorflow/contrib/lite/Makefile +++ b/tensorflow/contrib/lite/Makefile @@ -27,10 +27,10 @@ LIBDIR := $(MAKEFILE_DIR)/gen/lib/ GENDIR := $(MAKEFILE_DIR)/gen/obj/ # Settings for the host compiler. -CXX := $(CC_PREFIX) gcc +CXX := $(CC_PREFIX)gcc CXXFLAGS := --std=c++11 -O3 -DNDEBUG -CC := $(CC_PREFIX) gcc -CFLAGS := +CC := $(CC_PREFIX)gcc +CFLAGS := -O3 -DNDEBUG LDOPTS := LDOPTS += -L/usr/local/lib ARFLAGS := -r @@ -57,10 +57,11 @@ LIBS := \ # If we're on Linux, also link in the dl library. ifeq ($(HOST_OS),LINUX) - LIBS += -ldl -lpthread + LIBS += -ldl endif include $(MAKEFILE_DIR)/ios_makefile.inc +include $(MAKEFILE_DIR)/rpi_makefile.inc # This library is the main target for this makefile. It will contain a minimal # runtime that can be linked in to other programs. diff --git a/tensorflow/contrib/lite/build_rpi_lib.sh b/tensorflow/contrib/lite/build_rpi_lib.sh new file mode 100755 index 0000000000..3824b16412 --- /dev/null +++ b/tensorflow/contrib/lite/build_rpi_lib.sh @@ -0,0 +1,22 @@ +#!/bin/bash -x +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR/../../.." + +CC_PREFIX=arm-linux-gnueabihf- make -j 3 -f tensorflow/contrib/lite/Makefile TARGET=RPI TARGET_ARCH=armv7 diff --git a/tensorflow/contrib/lite/g3doc/rpi.md b/tensorflow/contrib/lite/g3doc/rpi.md new file mode 100644 index 0000000000..7a3a231626 --- /dev/null +++ b/tensorflow/contrib/lite/g3doc/rpi.md @@ -0,0 +1,50 @@ +# TensorFlow Lite for Raspberry Pi + +## Cross compiling +### Installing toolchian +This has been tested on Ubuntu 16.04.3 64bit and Tensorflow devel docker image [tensorflow/tensorflow:nightly-devel](https://hub.docker.com/r/tensorflow/tensorflow/tags/). + +To cross compiling TensorFlow Lite. First you should install the toolchain and libs. +```bash +sudo apt-get update +sudo apt-get install crossbuild-essential-armhf +``` +> If you are using docker, you may not use `sudo` + +### Building +Clone this Tensorflow repository, Run this script at the root of the repository to download all the dependencies: +> The Tensorflow repository is in `/tensorflow` if you are using `tensorflow/tensorflow:nightly-devel` docker image, just try it. +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. + +## Native compiling +This has been tested on Raspberry Pi 3b, Raspbian GNU/Linux 9.1 (stretch), gcc version 6.3.0 20170516 (Raspbian 6.3.0-18+rpi1). + +Log in to you RPI, install the toolchain. +```bash +sudo apt-get instal build-essential +``` + +First, clone this TensorFlow repository. Run this at the root of the repository: +```bash +./tensorflow/contrib/lite/download_dependencies.sh +``` +Note than you only need to to this once. + +You should then be able to compile: +```bash +./tensorflow/contrib/lite/build_rpi_lib.sh +``` + +This should compile a static library in: +`tensorflow/contrib/lite/gen/lib/rpi_armv7/libtensorflow-lite.a`. diff --git a/tensorflow/contrib/lite/rpi_makefile.inc b/tensorflow/contrib/lite/rpi_makefile.inc new file mode 100644 index 0000000000..832ef5824b --- /dev/null +++ b/tensorflow/contrib/lite/rpi_makefile.inc @@ -0,0 +1,33 @@ +# Settings for Raspberry Pi. +ifeq ($(TARGET), RPI) + ifeq ($(TARGET_ARCH), armv7) + CXXFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + CCFLAGS += \ + -march=armv7-a \ + -mfpu=neon-vfpv4 \ + -funsafe-math-optimizations \ + -ftree-vectorize + + LDFLAGS := \ + -Wl,--no-export-dynamic \ + -Wl,--exclude-libs,ALL \ + -Wl,--gc-sections \ + -Wl,--as-needed + endif + + LIBS := \ + -lstdc++ \ + -lpthread \ + -lm \ + -ldl + + OBJDIR := $(OBJDIR)rpi_$(TARGET_ARCH)/ + LIBDIR := $(LIBDIR)rpi_$(TARGET_ARCH)/ + BINDIR := $(BINDIR)rpi_$(TARGET_ARCH)/ + DEPDIR := $(DEPDIR)rpi_$(TARGET_ARCH)/ +endif -- GitLab From 36b3c94a99704c8e1973ae5c043aec4870ae84ff Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 5 Mar 2018 13:44:42 -0800 Subject: [PATCH 417/884] Add methods for extracting the shapes of the entry computation from an HloProto. PiperOrigin-RevId: 187915821 --- tensorflow/compiler/xla/service/BUILD | 18 ++ .../compiler/xla/service/hlo_proto_util.cc | 135 +++++++++++++++ .../compiler/xla/service/hlo_proto_util.h | 9 + .../xla/service/hlo_proto_util_test.cc | 161 ++++++++++++++++++ 4 files changed, 323 insertions(+) create mode 100644 tensorflow/compiler/xla/service/hlo_proto_util_test.cc diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 6f52703683..3eecc4657f 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -2387,6 +2387,24 @@ cc_library( ":hlo", ":hlo_proto", "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:util", + ], +) + +tf_cc_test( + name = "hlo_proto_util_test", + srcs = ["hlo_proto_util_test.cc"], + deps = [ + ":hlo", + ":hlo_proto", + ":hlo_proto_util", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:test", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla/tests:hlo_test_base", + "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc index 78e6a101c1..f75c452082 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.cc +++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc @@ -15,8 +15,112 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_proto_util.h" +#include + +#include "tensorflow/compiler/xla/util.h" + namespace xla { +namespace { + +// Returns the entry computation of the HLO module in the given HloProto. +StatusOr GetEntryComputation( + const HloProto& hlo_proto) { + if (!hlo_proto.has_hlo_module()) { + return NotFound("HloProto missing HloModuleProto."); + } + + if (hlo_proto.hlo_module().entry_computation_name().empty()) { + return NotFound("HloProto has empty entry computation name."); + } + + const string& entry_computation_name = + hlo_proto.hlo_module().entry_computation_name(); + const HloComputationProto* entry_computation = nullptr; + for (const HloComputationProto& computation : + hlo_proto.hlo_module().computations()) { + if (computation.name() == entry_computation_name) { + if (entry_computation == nullptr) { + entry_computation = &computation; + } else { + return InvalidArgument( + "HloProto has multiple computations with entry computation named " + "%s.", + entry_computation_name.c_str()); + } + } + } + if (entry_computation == nullptr) { + return InvalidArgument("HloProto has no entry computation named %s.", + entry_computation_name.c_str()); + } + return entry_computation; +} + +// Returns the root instruction of the given computation proto. +StatusOr GetRootInstruction( + const HloComputationProto& computation) { + if (computation.root_name().empty()) { + return InvalidArgument("Missing root instruction name."); + } + + const HloInstructionProto* root = nullptr; + for (const HloInstructionProto& instruction : computation.instructions()) { + if (instruction.name() == computation.root_name()) { + if (root == nullptr) { + root = &instruction; + } else { + return InvalidArgument( + "Computation has multiple instructions named %s.", + computation.root_name().c_str()); + } + } + } + if (root == nullptr) { + return InvalidArgument("Computation has no instruction named %s.", + computation.root_name().c_str()); + } + return root; +} + +// Returns the parameters of the given computation. Parameter numbers are +// checked for validity and contiguousness. +StatusOr> GetParameters( + const HloComputationProto& computation) { + std::vector parameters; + for (const HloInstructionProto& instruction : computation.instructions()) { + if (instruction.opcode() == HloOpcodeString(HloOpcode::kParameter)) { + parameters.push_back(&instruction); + } + } + + // Verify the uniqueness and validity of the parameter numbers. + tensorflow::gtl::FlatSet parameter_numbers; + for (const HloInstructionProto* parameter : parameters) { + if (parameter->parameter_number() < 0 || + parameter->parameter_number() >= parameters.size()) { + return InvalidArgument( + "Parameter instruction %s has invalid parameter number %lld.", + parameter->name().c_str(), parameter->parameter_number()); + } + if (parameter_numbers.count(parameter->parameter_number()) != 0) { + return InvalidArgument( + "Multiple parameter instructions have parameter number %lld.", + parameter->parameter_number()); + } + parameter_numbers.insert(parameter->parameter_number()); + } + + std::sort(parameters.begin(), parameters.end(), + [](const HloInstructionProto* a, const HloInstructionProto* b) { + return a->parameter_number() < b->parameter_number(); + }); + + return parameters; +} + +} // namespace + HloProto MakeHloProto(const HloModule& module, const BufferAssignment& assignment) { HloOrderingProto proto_ordering = @@ -35,4 +139,35 @@ HloProto MakeHloProto(const HloModule& module) { return proto; } +StatusOr> EntryComputationParameterShapes( + const HloProto& hlo_proto) { + TF_ASSIGN_OR_RETURN(const HloComputationProto* entry_computation, + GetEntryComputation(hlo_proto)); + TF_ASSIGN_OR_RETURN(std::vector parameters, + GetParameters(*entry_computation)); + std::vector parameter_shapes; + for (const HloInstructionProto* parameter : parameters) { + if (!parameter->has_shape()) { + return InvalidArgument("Parameter instruction %s is missing shape.", + parameter->name().c_str()); + } + parameter_shapes.push_back(¶meter->shape()); + } + return parameter_shapes; +} + +StatusOr EntryComputationOutputShape(const HloProto& hlo_proto) { + TF_ASSIGN_OR_RETURN(const HloComputationProto* entry_computation, + GetEntryComputation(hlo_proto)); + + TF_ASSIGN_OR_RETURN(const HloInstructionProto* root, + GetRootInstruction(*entry_computation)); + if (!root->has_shape()) { + return InvalidArgument("Instruction %s is missing shape.", + root->name().c_str()); + } + + return &root->shape(); +} + } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.h b/tensorflow/compiler/xla/service/hlo_proto_util.h index 320288fdb9..3d9c375cd5 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.h +++ b/tensorflow/compiler/xla/service/hlo_proto_util.h @@ -35,6 +35,15 @@ HloProto MakeHloProto(const HloModule& module, // will not be included in the output. HloProto MakeHloProto(const HloModule& module); +// Returns the shapes of the parameters of the entry computation. Shape pointers +// refer to shapes inside of the given HloProto. +StatusOr> EntryComputationParameterShapes( + const HloProto& hlo_proto); + +// Returns the shape of the output of the entry computation. The shape pointer +// refers to the output shape inside of the given HloProto. +StatusOr EntryComputationOutputShape(const HloProto& hlo_proto); + } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_PROTO_UTIL_H_ diff --git a/tensorflow/compiler/xla/service/hlo_proto_util_test.cc b/tensorflow/compiler/xla/service/hlo_proto_util_test.cc new file mode 100644 index 0000000000..0c0abf10fa --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_proto_util_test.cc @@ -0,0 +1,161 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_proto_util.h" + +#include "tensorflow/compiler/xla/service/hlo.pb.h" +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/test.h" +#include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/core/lib/strings/str_util.h" + +namespace xla { +namespace { + +class HloProtoUtilTest : public ::testing::Test {}; + +TEST_F(HloProtoUtilTest, ParamsAndOutputShape) { + HloProto hlo_proto; + HloModuleProto* module = hlo_proto.mutable_hlo_module(); + module->set_entry_computation_name("entry"); + HloComputationProto* computation = module->add_computations(); + computation->set_name("entry"); + computation->set_root_name("root"); + + HloInstructionProto* param0 = computation->add_instructions(); + param0->set_opcode(HloOpcodeString(HloOpcode::kParameter)); + param0->set_parameter_number(0); + *param0->mutable_shape() = ShapeUtil::MakeShape(F32, {42}); + + HloInstructionProto* param2 = computation->add_instructions(); + param2->set_opcode(HloOpcodeString(HloOpcode::kParameter)); + param2->set_parameter_number(2); + *param2->mutable_shape() = ShapeUtil::MakeShape(S32, {1, 2, 3}); + + HloInstructionProto* param1 = computation->add_instructions(); + param1->set_opcode(HloOpcodeString(HloOpcode::kParameter)); + param1->set_parameter_number(1); + *param1->mutable_shape() = ShapeUtil::MakeShape(F64, {}); + + HloInstructionProto* root = computation->add_instructions(); + root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); + root->set_name("root"); + *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); + + VLOG(1) << hlo_proto.DebugString(); + + TF_ASSERT_OK_AND_ASSIGN(std::vector parameter_shapes, + EntryComputationParameterShapes(hlo_proto)); + ASSERT_EQ(parameter_shapes.size(), 3); + EXPECT_TRUE( + ShapeUtil::Equal(*parameter_shapes[0], ShapeUtil::MakeShape(F32, {42}))); + EXPECT_TRUE( + ShapeUtil::Equal(*parameter_shapes[1], ShapeUtil::MakeShape(F64, {}))); + EXPECT_TRUE(ShapeUtil::Equal(*parameter_shapes[2], + ShapeUtil::MakeShape(S32, {1, 2, 3}))); + + TF_ASSERT_OK_AND_ASSIGN(const Shape* output_shape, + EntryComputationOutputShape(hlo_proto)); + EXPECT_TRUE(ShapeUtil::Equal(*output_shape, ShapeUtil::MakeShape(U8, {2}))); +} + +TEST_F(HloProtoUtilTest, ParamsAndOutputShapeNoParameters) { + HloProto hlo_proto; + HloModuleProto* module = hlo_proto.mutable_hlo_module(); + module->set_entry_computation_name("entry"); + HloComputationProto* computation = module->add_computations(); + computation->set_name("entry"); + computation->set_root_name("root"); + + HloInstructionProto* root = computation->add_instructions(); + root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); + root->set_name("root"); + *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); + + TF_ASSERT_OK_AND_ASSIGN(std::vector parameter_shapes, + EntryComputationParameterShapes(hlo_proto)); + ASSERT_EQ(parameter_shapes.size(), 0); +} + +TEST_F(HloProtoUtilTest, ParamsAndOutputShapeMissingModule) { + HloProto hlo_proto; + + auto status = EntryComputationParameterShapes(hlo_proto).status(); + ASSERT_FALSE(status.ok()); + ASSERT_THAT(status.error_message(), + ::testing::HasSubstr("missing HloModuleProto")); +} + +TEST_F(HloProtoUtilTest, ParamsAndOutputShapeMissingEntryComputation) { + HloProto hlo_proto; + HloModuleProto* module = hlo_proto.mutable_hlo_module(); + module->set_entry_computation_name("entry"); + HloComputationProto* computation = module->add_computations(); + computation->set_name("not_entry"); + + auto status = EntryComputationParameterShapes(hlo_proto).status(); + ASSERT_FALSE(status.ok()); + ASSERT_THAT(status.error_message(), + ::testing::HasSubstr("has no entry computation named")); +} + +TEST_F(HloProtoUtilTest, OutputShapeMissingEntryRoot) { + HloProto hlo_proto; + HloModuleProto* module = hlo_proto.mutable_hlo_module(); + module->set_entry_computation_name("entry"); + HloComputationProto* computation = module->add_computations(); + computation->set_name("entry"); + computation->set_root_name("root"); + + auto status = EntryComputationOutputShape(hlo_proto).status(); + ASSERT_FALSE(status.ok()); + ASSERT_THAT(status.error_message(), + ::testing::HasSubstr("has no instruction named")); +} + +TEST_F(HloProtoUtilTest, ParamsShapesMissingParameterNumbers) { + HloProto hlo_proto; + HloModuleProto* module = hlo_proto.mutable_hlo_module(); + module->set_entry_computation_name("entry"); + HloComputationProto* computation = module->add_computations(); + computation->set_name("entry"); + computation->set_root_name("root"); + + HloInstructionProto* param0 = computation->add_instructions(); + param0->set_opcode(HloOpcodeString(HloOpcode::kParameter)); + param0->set_parameter_number(0); + *param0->mutable_shape() = ShapeUtil::MakeShape(F32, {42}); + + HloInstructionProto* param2 = computation->add_instructions(); + param2->set_opcode(HloOpcodeString(HloOpcode::kParameter)); + param2->set_parameter_number(2); + *param2->mutable_shape() = ShapeUtil::MakeShape(S32, {1, 2, 3}); + + HloInstructionProto* root = computation->add_instructions(); + root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); + root->set_name("root"); + *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); + + auto status = EntryComputationParameterShapes(hlo_proto).status(); + ASSERT_FALSE(status.ok()); + ASSERT_THAT(status.error_message(), + ::testing::HasSubstr("invalid parameter number")); +} + +} // namespace +} // namespace xla -- GitLab From 355fb5e14b325a1d106c4046f478da4bda350205 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Mon, 5 Mar 2018 13:47:30 -0800 Subject: [PATCH 418/884] Fix the issue where gpu_option is not respected for keras estimator. Set keras backend session with the given config before any get_session call creates a new session. Fix #14776. PiperOrigin-RevId: 187916300 --- .../python/keras/_impl/keras/estimator.py | 6 +++++- .../python/keras/_impl/keras/estimator_test.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 0bf5bd41dc..5697771a79 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -296,10 +296,14 @@ def model_to_estimator(keras_model=None, 'Given keras model has not been compiled yet. Please compile first ' 'before creating the estimator.') - keras_weights = keras_model.get_weights() keras_model_fn = _create_keras_model_fn(keras_model, custom_objects) est = estimator_lib.Estimator( keras_model_fn, model_dir=model_dir, config=config) + # Pass the config into keras backend's default session. + with session.Session(config=est._session_config) as sess: + K.set_session(sess) + + keras_weights = keras_model.get_weights() # TODO(yifeif): move checkpoint initialization to scaffold.init_fn _save_first_checkpoint(keras_model, est, custom_objects, keras_weights) return est diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index 88dd14b856..a9de5dd076 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -24,6 +24,7 @@ import tempfile import numpy as np +from tensorflow.core.protobuf import config_pb2 from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.estimator.inputs import numpy_io from tensorflow.python.framework import test_util @@ -377,6 +378,22 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) + def test_gpu_config(self): + keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() + keras_model.compile( + loss='categorical_crossentropy', + optimizer='rmsprop', + metrics=['mse', keras.metrics.categorical_accuracy]) + + gpu_options = config_pb2.GPUOptions(per_process_gpu_memory_fraction=0.3) + sess_config = config_pb2.ConfigProto(gpu_options=gpu_options) + self._config._session_config = sess_config + keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) + self.assertEqual(keras.backend.get_session() + ._config.gpu_options.per_process_gpu_memory_fraction, + gpu_options.per_process_gpu_memory_fraction) + if __name__ == '__main__': test.main() -- GitLab From 59348d87a5ef07ae3a7d7b2df822c8f94d49ed22 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Mon, 5 Mar 2018 14:08:37 -0800 Subject: [PATCH 419/884] Disable both "no_mac" and "nomac" tags when building on osx. PiperOrigin-RevId: 187919812 --- tensorflow/tools/ci_build/osx/cpu/run_contrib.sh | 2 +- tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh | 2 +- tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh b/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh index 509ee38ec4..5c5a36139f 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_contrib.sh @@ -31,7 +31,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | $PYTHON_BIN_PATH configure.py which bazel -bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac \ +bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac,-no_mac \ --test_timeout 300,450,1200,3600 \ --test_size_filters=small,medium --config=opt \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh index 0554713670..338066131b 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py2_cc_core.sh @@ -31,7 +31,7 @@ export CC_OPT_FLAGS='-mavx' export PYTHON_BIN_PATH=$(which python2) yes "" | $PYTHON_BIN_PATH configure.py which bazel -bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac \ +bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac,-no_mac \ --test_timeout 300,450,1200,3600 --config=opt \ --test_size_filters=small,medium \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ diff --git a/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh b/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh index 8f839ca110..920a261ae3 100755 --- a/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh +++ b/tensorflow/tools/ci_build/osx/cpu/run_py3_cc_core.sh @@ -30,7 +30,7 @@ export TF_NEED_CUDA=0 export PYTHON_BIN_PATH=$(which python3) yes "" | $PYTHON_BIN_PATH configure.py which bazel -bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac \ +bazel test --test_tag_filters=-no_oss,-gpu,-benchmark-test,-nomac,-no_mac \ --test_timeout 300,450,1200,3600 \ --test_size_filters=small,medium \ --jobs=${N_JOBS} --build_tests_only --test_output=errors -k -- \ -- GitLab From c865d66febe353e922100c43f4a08e77af7db69a Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Mar 2018 14:15:18 -0800 Subject: [PATCH 420/884] Remove debug messages and fix logger --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 11 +---------- tensorflow/contrib/tensorrt/log/trt_logger.h | 5 +++-- 2 files changed, 4 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 2c79d28678..9bc6e14a53 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2224,7 +2224,6 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { } } // topological order is needed to build TRT network - VLOG(2) << "BUILDING 1"; static int static_id = 0; string subgraph_name_scope; if (!order.empty()) { @@ -2239,11 +2238,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { string engine_name = tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op", static_id); static_id++; - VLOG(2) << "BUILDING 2"; auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); auto op_res = new tensorflow::tensorrt::TRTCalibrationResource(); - VLOG(1) << "SAMI Creating calibresource " << calib_op_name << " @ " << op_res; TF_CHECK_OK(op_rmgr->Create(calib_op_name, calib_op_name, op_res)); op_res->logger_ = new tensorflow::tensorrt::Logger(); op_res->builder_ = nvinfer1::createInferBuilder(*(op_res->logger_)); @@ -2253,27 +2250,21 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { "failed to create TensorRT builder object"); } - VLOG(2) << "BUILDING 3"; - op_res->network_ = op_res->builder_->createNetwork(); if (!op_res->network_) { return tensorflow::errors::Internal( "failed to create TensorRT network object"); } - VLOG(2) << "BUILDING 4"; - // Build the network auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::tensorrt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); Converter converter(op_res->network_, ws, s.precision_mode == 1); - - VLOG(2) << "BUILDING 5"; std::vector input_names; std::vector input_dtypes; for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input!!!!!"; + VLOG(2) << "parsing input. Node id= "<< input.first; int node_id = input.first; int output_idx = input.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.h b/tensorflow/contrib/tensorrt/log/trt_logger.h index 3bd7ce87d1..7f3544f8cf 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.h +++ b/tensorflow/contrib/tensorrt/log/trt_logger.h @@ -27,10 +27,11 @@ namespace tensorrt { // Logger for GIE info/warning/errors class Logger : public nvinfer1::ILogger { - Logger(string name="DefaultLogger"):name_(name){}; - private: + public: + Logger(string name = "DefaultLogger") : name_(name){}; void log(nvinfer1::ILogger::Severity severity, const char* msg) override; + private: string name_; }; -- GitLab From e139cbf91ab416822ce01f5515e9dc230e7294e6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 14:14:01 -0800 Subject: [PATCH 421/884] Add sequence_indicator_column PiperOrigin-RevId: 187920673 --- .../feature_column/sequence_feature_column.py | 67 ++++++++-- .../sequence_feature_column_test.py | 126 ++++++++++++++++++ 2 files changed, 181 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index e446043bdd..ba17b568b6 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -184,7 +184,7 @@ def _sequence_embedding_column( ```python watches = sequence_categorical_column_with_identity( 'watches', num_buckets=1000) - watches_embedding = embedding_column(watches, dimension=10) + watches_embedding = _sequence_embedding_column(watches, dimension=10) columns = [watches] features = tf.parse_example(..., features=make_parse_example_spec(columns)) @@ -209,7 +209,7 @@ def _sequence_embedding_column( trainable: Whether or not the embedding is trainable. Default is True. Returns: - A `_SequenceEmbeddingColumn`. + A `_SequenceCategoricalToDenseColumn`. Raises: ValueError: If `categorical_column` is not the right type. @@ -219,7 +219,7 @@ def _sequence_embedding_column( 'categorical_column must be of type _SequenceCategoricalColumn. ' 'Given (type {}): {}'.format( type(categorical_column), categorical_column)) - return _SequenceEmbeddingColumn( + return _SequenceCategoricalToDenseColumn( fc.embedding_column( categorical_column, dimension=dimension, @@ -230,6 +230,48 @@ def _sequence_embedding_column( trainable=trainable)) +# TODO(b/73160931): Merge with indicator_column +def _sequence_indicator_column(categorical_column): + """Returns a feature column that represents sequences of multi-hot tensors. + + Use this to convert sequence categorical data into dense representation for + input to sequence NN, such as RNN. + + Example: + + ```python + colors = sequence_categorical_column_with_vocabulary_list( + key='colors', vocabulary_list=('R', 'G', 'B', 'Y')) + colors_indicator = _sequence_indicator_column(colors) + columns = [colors] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + categorical_column: A `_SequenceCategoricalColumn` created with a + `sequence_cateogrical_column_with_*` function. + + Returns: + A `_SequenceCategoricalToDenseColumn`. + + Raises: + ValueError: If `categorical_column` is not the right type. + """ + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceCategoricalToDenseColumn( + fc.indicator_column(categorical_column)) + + def sequence_numeric_column( key, shape=(1,), @@ -358,33 +400,34 @@ class _SequenceCategoricalColumn( return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) -class _SequenceEmbeddingColumn( +class _SequenceCategoricalToDenseColumn( _SequenceDenseColumn, - collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): - """Represents sequences of embeddings.""" + collections.namedtuple( + '_SequenceCategoricalToDenseColumn', ['dense_column'])): + """Densifies a _SequenceCategoricalColumn using the specified column.""" @property def name(self): - return self.embedding_column.name + return self.dense_column.name @property def _parse_example_spec(self): - return self.embedding_column._parse_example_spec + return self.dense_column._parse_example_spec def _transform_feature(self, inputs): - return self.embedding_column._transform_feature(inputs) + return self.dense_column._transform_feature(inputs) @property def _variable_shape(self): - return self.embedding_column._variable_shape + return self.dense_column._variable_shape def _get_sequence_dense_tensor( self, inputs, weight_collections=None, trainable=None): - dense_tensor = self.embedding_column._get_dense_tensor( + dense_tensor = self.dense_column._get_dense_tensor( inputs=inputs, weight_collections=weight_collections, trainable=trainable) - sequence_length = self.embedding_column.categorical_column._sequence_length( + sequence_length = self.dense_column.categorical_column._sequence_length( inputs) return _SequenceDenseColumn.TensorSequenceLengthPair( dense_tensor=dense_tensor, sequence_length=sequence_length) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 105213680e..39caa602d9 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -106,6 +106,49 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) + def test_indicator_column(self): + vocabulary_size_a = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + vocabulary_size_b = 2 + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [1, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 1, 0), + dense_shape=(2, 2)) + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[0., 0., 1., 0., 1.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [1, 0] + [[1., 0., 0., 0., 1.], [0., 1., 0., 1., 0.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size_a) + indicator_column_a = sfc._sequence_indicator_column(categorical_column_a) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size_b) + indicator_column_b = sfc._sequence_indicator_column(categorical_column_b) + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[indicator_column_b, indicator_column_a]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + def test_numeric_column(self): sparse_input = sparse_tensor.SparseTensorValue( # example 0, values [[0.], [1]] @@ -344,6 +387,89 @@ class SequenceEmbeddingColumnTest(test.TestCase): expected_sequence_length, sequence_length.eval(session=sess)) +class SequenceIndicatorColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + expected_lookups = [ + # example 0, ids [2] + [[0., 0., 1.], [0., 0., 0.]], + # example 1, ids [0, 1] + [[1., 0., 0.], [0., 1., 0.]], + # example 2, ids [] + [[0., 0., 0.], [0., 0., 0.]], + # example 3, ids [1] + [[0., 1., 0.], [0., 0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column = sfc._sequence_indicator_column(categorical_column) + + indicator_tensor, _ = indicator_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_lookups, indicator_tensor.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column = sfc._sequence_indicator_column(categorical_column) + + _, sequence_length = indicator_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + sequence_length = sess.run(sequence_length) + self.assertAllEqual(expected_sequence_length, sequence_length) + self.assertEqual(np.int64, sequence_length.dtype) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + indicator_column = sfc._sequence_indicator_column(categorical_column) + + _, sequence_length = indicator_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + class SequenceNumericColumnTest(test.TestCase): def test_get_sequence_dense_tensor(self): -- GitLab From 09d8393677df6e383d3c54783697ee78d6f072a7 Mon Sep 17 00:00:00 2001 From: Jie Date: Mon, 5 Mar 2018 14:22:31 -0800 Subject: [PATCH 422/884] remove cudaSetDevice with tensorflow ScopedActivateExecutorContext --- .../contrib/tensorrt/kernels/trt_engine_op.cc | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 24ebf75264..445b2bdbde 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" +#include "tensorflow/stream_executor/cuda/cuda_activation.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -40,13 +41,20 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("input_nodes", &input_nodes_)); OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); - // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same - // gpu where the input/output is also located. + // TODO(samikama) runtime should be taken from a resourcemanager as well. + // Only engine should be in the op and context and runtime should be taken + // from resourcemanager + // TODO(jie): Relying on TF scheme to limit gpu scope for device placement + // cannot have dependency on //tensorflow/core:gpu_runtimeo + // Copied the function here. int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - cudaSetDevice(gpu_id); - int device; - cudaGetDevice(&device); - if (gpu_id != device) LOG(FATAL) << "set device failed!"; + auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); + if (!result.ok()) { + LOG(FATAL) << "Could not find Platform with name CUDA"; + } + gpu::Platform* gpu_machine_manager = result.ValueOrDie(); + gpu::cuda::ScopedActivateExecutorContext scoped_activation{ + gpu_machine_manager->ExecutorForDevice(gpu_id).ValueOrDie()}; // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken -- GitLab From c79c9512486daa119d3cda9c00bb36acb3933a5b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 14:31:59 -0800 Subject: [PATCH 423/884] Add alternative paths for CUDA installation. This detects negativo17's CUDA packages for Fedora. PiperOrigin-RevId: 187923472 --- third_party/gpus/cuda_configure.bzl | 177 +++++++++++++++++++--------- 1 file changed, 124 insertions(+), 53 deletions(-) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index b7c47a19dd..6c9c128db6 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -38,6 +38,64 @@ _DEFAULT_CUDA_TOOLKIT_PATH = "/usr/local/cuda" _DEFAULT_CUDNN_INSTALL_PATH = "/usr/local/cuda" _DEFAULT_CUDA_COMPUTE_CAPABILITIES = ["3.5", "5.2"] +# Lookup paths for CUDA / cuDNN libraries, relative to the install directories. +# +# Paths will be tried out in the order listed below. The first successful path +# will be used. For example, when looking for the cudart libraries, the first +# attempt will be lib64/cudart inside the CUDA toolkit. +CUDA_LIB_PATHS = [ + "lib64/", + "lib64/stubs/", + "lib/x86_64-linux-gnu/", + "lib/x64/", + "lib/", + "", +] + +# Lookup paths for cupti.h, relative to the CUDA toolkit directory. +# +# On most systems, the cupti library is not installed in the same directory as +# the other CUDA libraries but rather in a special extras/CUPTI directory. +CUPTI_HEADER_PATHS = [ + "extras/CUPTI/include/", + "include/cuda/CUPTI/", +] + +# Lookup paths for the cupti library, relative to the +# +# On most systems, the cupti library is not installed in the same directory as +# the other CUDA libraries but rather in a special extras/CUPTI directory. +CUPTI_LIB_PATHS = [ + "extras/CUPTI/lib64/", + "lib/x86_64-linux-gnu", + "lib64/", + "extras/CUPTI/libx64/", + "extras/CUPTI/lib/", + "lib/", +] + +# Lookup paths for CUDA headers (cuda.h) relative to the CUDA toolkit directory. +CUDA_INCLUDE_PATHS = [ + "include/", + "include/cuda/" +] + +# Lookup paths for cudnn.h relative to the CUDNN install directory. +CUDNN_INCLUDE_PATHS = [ + "", + "include/", + "include/cuda/", +] + +# Lookup paths for NVVM libdevice relative to the CUDA directory toolkit. +# +# libdevice implements mathematical functions for GPU kernels, and is provided +# in NVVM bitcode (a subset of LLVM bitcode). +NVVM_LIBDEVICE_PATHS = [ + "nvvm/libdevice/", + "share/cuda/", +] + load(":download_clang.bzl", "download_clang") # TODO(dzc): Once these functions have been factored out of Bazel's @@ -522,31 +580,31 @@ def _find_cuda_lib(lib, repository_ctx, cpu_value, basedir, version="", path: The full path to the library. """ file_name = _lib_name(lib, cpu_value, version, static) - if cpu_value == "Linux": - path = repository_ctx.path("%s/lib64/%s" % (basedir, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - path = repository_ctx.path("%s/lib64/stubs/%s" % (basedir, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - path = repository_ctx.path( - "%s/lib/x86_64-linux-gnu/%s" % (basedir, file_name)) + for relative_path in CUDA_LIB_PATHS: + path = repository_ctx.path("%s/%s%s" % (basedir, relative_path, file_name)) if path.exists: return struct(file_name=file_name, path=str(path.realpath)) + auto_configure_fail("Cannot find cuda library %s" % file_name) - elif cpu_value == "Windows": - path = repository_ctx.path("%s/lib/x64/%s" % (basedir, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - path = repository_ctx.path("%s/lib/%s" % (basedir, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - path = repository_ctx.path("%s/%s" % (basedir, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) +def _find_cupti_header_dir(repository_ctx, cuda_config): + """Returns the path to the directory containing cupti.h - auto_configure_fail("Cannot find cuda library %s" % file_name) + On most systems, the cupti library is not installed in the same directory as + the other CUDA libraries but rather in a special extras/CUPTI directory. + + Args: + repository_ctx: The repository context. + cuda_config: The CUDA config as returned by _get_cuda_config + + Returns: + The path of the directory containing the cupti header. + """ + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUPTI_HEADER_PATHS: + if repository_ctx.path("%s/%scupti.h" % (cuda_toolkit_path, relative_path)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail("Cannot find cupti.h under %s" % cuda_toolkit_path) def _find_cupti_lib(repository_ctx, cuda_config): @@ -566,35 +624,13 @@ def _find_cupti_lib(repository_ctx, cuda_config): """ file_name = _lib_name("cupti", cuda_config.cpu_value, cuda_config.cuda_version) - if cuda_config.cpu_value == "Linux": - path = repository_ctx.path( - "%s/extras/CUPTI/lib64/%s" % (cuda_config.cuda_toolkit_path, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - - path = repository_ctx.path( - "%s/lib/x86_64-linux-gnu/%s" % (cuda_config.cuda_toolkit_path, - file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - - elif cuda_config.cpu_value == "Windows": + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUPTI_LIB_PATHS: path = repository_ctx.path( - "%s/extras/CUPTI/libx64/%s" % - (cuda_config.cuda_toolkit_path, file_name)) + "%s/%s%s" % (cuda_toolkit_path, relative_path, file_name)) if path.exists: return struct(file_name=file_name, path=str(path.realpath)) - path = repository_ctx.path( - "%s/extras/CUPTI/lib/%s" % (cuda_config.cuda_toolkit_path, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - - path = repository_ctx.path( - "%s/lib/%s" % (cuda_config.cuda_toolkit_path, file_name)) - if path.exists: - return struct(file_name=file_name, path=str(path.realpath)) - auto_configure_fail("Cannot find cupti library %s" % file_name) def _find_libs(repository_ctx, cuda_config): @@ -635,6 +671,23 @@ def _find_libs(repository_ctx, cuda_config): } +def _find_cuda_include_path(repository_ctx, cuda_config): + """Returns the path to the directory containing cuda.h + + Args: + repository_ctx: The repository context. + cuda_config: The CUDA config as returned by _get_cuda_config + + Returns: + The path of the directory containing the CUDA headers. + """ + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in CUDA_INCLUDE_PATHS: + if repository_ctx.path("%s/%scuda.h" % (cuda_toolkit_path, relative_path)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail("Cannot find cuda.h under %s" % cuda_toolkit_path) + + def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir): """Returns the path to the directory containing cudnn.h @@ -646,15 +699,31 @@ def _find_cudnn_header_dir(repository_ctx, cudnn_install_basedir): Returns: The path of the directory containing the cudnn header. """ - if repository_ctx.path(cudnn_install_basedir + "/cudnn.h").exists: - return cudnn_install_basedir - if repository_ctx.path(cudnn_install_basedir + "/include/cudnn.h").exists: - return cudnn_install_basedir + "/include" + for relative_path in CUDA_INCLUDE_PATHS: + if repository_ctx.path("%s/%scudnn.h" % (cudnn_install_basedir, relative_path)).exists: + return ("%s/%s" % (cudnn_install_basedir, relative_path))[:-1] if repository_ctx.path("/usr/include/cudnn.h").exists: return "/usr/include" auto_configure_fail("Cannot find cudnn.h under %s" % cudnn_install_basedir) +def _find_nvvm_libdevice_dir(repository_ctx, cuda_config): + """Returns the path to the directory containing libdevice in bitcode format. + + Args: + repository_ctx: The repository context. + cuda_config: The CUDA config as returned by _get_cuda_config + + Returns: + The path of the directory containing the CUDA headers. + """ + cuda_toolkit_path = cuda_config.cuda_toolkit_path + for relative_path in NVVM_LIBDEVICE_PATHS: + if repository_ctx.path("%s/%slibdevice.10.bc" % (cuda_toolkit_path, relative_path)).exists: + return ("%s/%s" % (cuda_toolkit_path, relative_path))[:-1] + auto_configure_fail("Cannot find libdevice.10.bc under %s" % cuda_toolkit_path) + + def _cudart_static_linkopt(cpu_value): """Returns additional platform-specific linkopts for cudart.""" return "" if cpu_value == "Darwin" else "\"-lrt\"," @@ -925,21 +994,22 @@ def _create_local_cuda_repository(repository_ctx): """Creates the repository containing files set up to build with CUDA.""" cuda_config = _get_cuda_config(repository_ctx) + cuda_include_path = _find_cuda_include_path(repository_ctx, cuda_config) cudnn_header_dir = _find_cudnn_header_dir(repository_ctx, cuda_config.cudnn_install_basedir) + cupti_header_dir = _find_cupti_header_dir(repository_ctx, cuda_config) + nvvm_libdevice_dir = _find_nvvm_libdevice_dir(repository_ctx, cuda_config) # Set up symbolic links for the cuda toolkit by creating genrules to do # symlinking. We create one genrule for each directory we want to track under # cuda_toolkit_path cuda_toolkit_path = cuda_config.cuda_toolkit_path - cuda_include_path = cuda_toolkit_path + "/include" genrules = [symlink_genrule_for_dir(repository_ctx, cuda_include_path, "cuda/include", "cuda-include")] genrules.append(symlink_genrule_for_dir(repository_ctx, - cuda_toolkit_path + "/nvvm", "cuda/nvvm", "cuda-nvvm")) + nvvm_libdevice_dir, "cuda/nvvm/libdevice", "cuda-nvvm")) genrules.append(symlink_genrule_for_dir(repository_ctx, - cuda_toolkit_path + "/extras/CUPTI/include", - "cuda/extras/CUPTI/include", "cuda-extras")) + cupti_header_dir, "cuda/extras/CUPTI/include", "cuda-extras")) cuda_libs = _find_libs(repository_ctx, cuda_config) cuda_lib_src = [] @@ -1086,6 +1156,7 @@ cuda_configure = repository_rule( _TF_CUDNN_VERSION, _TF_CUDA_COMPUTE_CAPABILITIES, _TF_CUDA_CONFIG_REPO, + "NVVMIR_LIBRARY_DIR", ], ) -- GitLab From 0e9289489f9dac926b7de5eae47417daad6d626f Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Mon, 5 Mar 2018 14:33:22 -0800 Subject: [PATCH 424/884] [XLA] Make HloEvaluator use wrap-around semantics for DynamicUpdateSlice. PiperOrigin-RevId: 187923671 --- .../compiler/xla/service/hlo_evaluator.cc | 21 +++++++++++++------ tensorflow/compiler/xla/tests/BUILD | 3 +++ 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 42de7ada61..534433be7b 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -1970,17 +1970,26 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault { StatusOr> DynamicUpdateSlice( const Literal& operand_literal, const Literal& update_literal, const Literal& start_indices_literal) { - auto start_indices_typed = start_indices_literal.data(); - const std::vector start(start_indices_typed.begin(), - start_indices_typed.end()); - auto result = operand_literal.CloneToUnique(); - std::vector result_index(ShapeUtil::Rank(result->shape()), 0); + auto start_indices_typed = start_indices_literal.data(); + const auto rank = ShapeUtil::Rank(result->shape()); + std::vector start(rank, 0); + for (int64 i = 0; i < rank; ++i) { + // All other implementations currently wrap-around the index, so this + // should do so as well. + start[i] = (start_indices_typed[i] % result->shape().dimensions(i)); + start[i] += (start[i] < 0) * result->shape().dimensions(i); + } + std::vector result_index(rank, 0); auto func = [&](ArraySlice update_index) { std::transform(update_index.begin(), update_index.end(), start.begin(), result_index.begin(), std::plus()); - + // Same as above, wrap-around only to match other implementations' + // semantics. + std::transform(result_index.begin(), result_index.end(), + result->shape().dimensions().begin(), result_index.begin(), + std::modulus()); result->Set(result_index, update_literal.Get(update_index)); return true; diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 7c95b03a67..1b2008accd 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -948,6 +948,9 @@ xla_test( name = "dynamic_ops_test", timeout = "moderate", srcs = ["dynamic_ops_test.cc"], + tags = [ + "enable_for_xla_interpreter", + ], deps = [ "//tensorflow/compiler/xla:array2d", "//tensorflow/compiler/xla:reference_util", -- GitLab From 06c7a190ac122512edf7229041f34391d8993da0 Mon Sep 17 00:00:00 2001 From: Tatiana Shpeisman Date: Mon, 5 Mar 2018 14:40:46 -0800 Subject: [PATCH 425/884] Adds test_util.IsMklEnabled() that returns true if TensorFlow has been built with MKL support. Fixes the failure of tensorflow/python/tools:print_selective_registration_header_test by using 'Mkl' prefix for MatMul op name when MKL is enabled. PiperOrigin-RevId: 187925038 --- tensorflow/core/util/port.cc | 7 +++++++ tensorflow/core/util/port.h | 3 +++ tensorflow/python/framework/test_util.py | 5 ++++- tensorflow/python/framework/test_util_test.py | 8 ++++++++ .../print_selective_registration_header_test.py | 15 +++++++++++---- tensorflow/python/util/port.i | 1 + 6 files changed, 34 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/util/port.cc b/tensorflow/core/util/port.cc index d93b971f85..490c584dc5 100644 --- a/tensorflow/core/util/port.cc +++ b/tensorflow/core/util/port.cc @@ -39,4 +39,11 @@ bool CudaSupportsHalfMatMulAndConv() { #endif } +bool IsMklEnabled() { +#ifdef INTEL_MKL + return true; +#else + return false; +#endif +} } // end namespace tensorflow diff --git a/tensorflow/core/util/port.h b/tensorflow/core/util/port.h index ed65341711..981def9d22 100644 --- a/tensorflow/core/util/port.h +++ b/tensorflow/core/util/port.h @@ -25,6 +25,9 @@ bool IsGoogleCudaEnabled(); // half-precision matrix multiplications and convolution operations. bool CudaSupportsHalfMatMulAndConv(); +// Returns true if INTEL_MKL is defined +bool IsMklEnabled(); + } // end namespace tensorflow #endif // TENSORFLOW_UTIL_PORT_H_ diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index aabf89a234..78252e4518 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -200,11 +200,14 @@ def _strip_checkpoint_v2_randomized(graph_def): def IsGoogleCudaEnabled(): return pywrap_tensorflow.IsGoogleCudaEnabled() - def CudaSupportsHalfMatMulAndConv(): return pywrap_tensorflow.CudaSupportsHalfMatMulAndConv() +def IsMklEnabled(): + return pywrap_tensorflow.IsMklEnabled() + + def InstallStackTraceHandler(): pywrap_tensorflow.InstallStacktraceHandler() diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index a717eb3951..20d816050f 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -82,6 +82,14 @@ class TestUtilTest(test_util.TensorFlowTestCase): else: print("GoogleCuda is disabled") + def testIsMklEnabled(self): + # This test doesn't assert anything. + # It ensures the py wrapper function is generated correctly. + if test_util.IsMklEnabled(): + print("MKL is enabled") + else: + print("MKL is disabled") + def testAssertProtoEqualsStr(self): graph_str = "node { name: 'w1' op: 'params' }" diff --git a/tensorflow/python/tools/print_selective_registration_header_test.py b/tensorflow/python/tools/print_selective_registration_header_test.py index 36978b0860..4b3d98242c 100644 --- a/tensorflow/python/tools/print_selective_registration_header_test.py +++ b/tensorflow/python/tools/print_selective_registration_header_test.py @@ -24,6 +24,7 @@ import sys from google.protobuf import text_format from tensorflow.core.framework import graph_pb2 +from tensorflow.python.framework import test_util from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.tools import selective_registration_header_lib @@ -93,11 +94,16 @@ class PrintOpFilegroupTest(test.TestCase): ops_and_kernels = selective_registration_header_lib.get_ops_and_kernels( 'rawproto', self.WriteGraphFiles(graphs), default_ops) + matmul_prefix = '' + if test_util.IsMklEnabled(): + matmul_prefix = 'Mkl' + self.assertListEqual( [ ('BiasAdd', 'BiasOp'), # - ('MatMul', 'MatMulOp'), # - ('MatMul', 'MatMulOp'), # + ('MatMul', + matmul_prefix + 'MatMulOp'), # + ('MatMul', matmul_prefix + 'MatMulOp'), # ('NoOp', 'NoOp'), # ('Reshape', 'ReshapeOp'), # ('_Recv', 'RecvOp'), # @@ -112,8 +118,9 @@ class PrintOpFilegroupTest(test.TestCase): self.assertListEqual( [ ('BiasAdd', 'BiasOp'), # - ('MatMul', 'MatMulOp'), # - ('MatMul', 'MatMulOp'), # + ('MatMul', + matmul_prefix + 'MatMulOp'), # + ('MatMul', matmul_prefix + 'MatMulOp'), # ('NoOp', 'NoOp'), # ('Reshape', 'ReshapeOp'), # ('_Recv', 'RecvOp'), # diff --git a/tensorflow/python/util/port.i b/tensorflow/python/util/port.i index cea4d8468a..2f730732be 100644 --- a/tensorflow/python/util/port.i +++ b/tensorflow/python/util/port.i @@ -23,5 +23,6 @@ limitations under the License. %unignore tensorflow; %unignore tensorflow::IsGoogleCudaEnabled; %unignore tensorflow::CudaSupportsHalfMatMulAndConv; +%unignore tensorflow::IsMklEnabled; %include "tensorflow/core/util/port.h" %unignoreall -- GitLab From 5279cf29cea96b3ec50df506bb51d8ffabdabac9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 14:45:28 -0800 Subject: [PATCH 426/884] Correct op::Attr usage in C++ gradient implementations. Also enabled TF_MUST_USE_RESULT for the generated Attr API, so we can catch any new errors early. Fixes #17360 PiperOrigin-RevId: 187925761 --- tensorflow/cc/framework/cc_op_gen.cc | 3 +- tensorflow/cc/gradients/nn_grad.cc | 59 ++++++++++------------------ 2 files changed, 23 insertions(+), 39 deletions(-) diff --git a/tensorflow/cc/framework/cc_op_gen.cc b/tensorflow/cc/framework/cc_op_gen.cc index a40ad1ffc3..39893f5ccd 100644 --- a/tensorflow/cc/framework/cc_op_gen.cc +++ b/tensorflow/cc/framework/cc_op_gen.cc @@ -697,7 +697,8 @@ string OpInfo::GetOpAttrStruct() const { attr_comment = MakeComment(attr_comment, " "); strings::StrAppend(&setters, attr_comment); - strings::StrAppend(&setters, " Attrs ", attr_func_def, " x) {\n"); + strings::StrAppend(&setters, " TF_MUST_USE_RESULT Attrs ", attr_func_def, + " x) {\n"); strings::StrAppend(&setters, " Attrs ret = *this;\n"); strings::StrAppend(&setters, " ret.", api_def_attr.rename_to(), "_ = x;\n"); diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 13a3bba5e6..9b732421e5 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -48,8 +48,8 @@ Status SoftmaxGrad(const Scope& scope, const Operation& op, REGISTER_GRADIENT_OP("Softmax", SoftmaxGrad); Status LogSoftmaxGrad(const Scope& scope, const Operation& op, - const std::vector& grad_inputs, - std::vector* grad_outputs) { + const std::vector& grad_inputs, + std::vector* grad_outputs) { auto softmax = Exp(scope, op.output(0)); auto sum = Sum(scope, grad_inputs[0], {1}, Sum::KeepDims(true)); auto mul = Mul(scope, sum, softmax); @@ -107,11 +107,10 @@ Status BiasAddGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { string data_format; - BiasAddGrad::Attrs input_attrs; TF_RETURN_IF_ERROR( GetNodeAttr(op.output(0).node()->attrs(), "data_format", &data_format)); - input_attrs.DataFormat(data_format); - auto dx_1 = BiasAddGrad(scope, grad_inputs[0], input_attrs); + auto dx_1 = + BiasAddGrad(scope, grad_inputs[0], BiasAddGrad::DataFormat(data_format)); grad_outputs->push_back(Identity(scope, grad_inputs[0])); grad_outputs->push_back(dx_1); return scope.status(); @@ -130,19 +129,16 @@ Status Conv2DGrad(const Scope& scope, const Operation& op, TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "use_cudnn_on_gpu", &use_cudnn_on_gpu)); - Conv2DBackpropInput::Attrs input_attrs; - input_attrs.DataFormat(data_format); - input_attrs.UseCudnnOnGpu(use_cudnn_on_gpu); - auto dx_1 = Conv2DBackpropInput(scope, Shape(scope, op.input(0)), - op.input(1), grad_inputs[0], - strides, padding, input_attrs); + auto dx_1 = Conv2DBackpropInput(scope, Shape(scope, op.input(0)), op.input(1), + grad_inputs[0], strides, padding, + Conv2DBackpropInput::DataFormat(data_format) + .UseCudnnOnGpu(use_cudnn_on_gpu)); grad_outputs->push_back(dx_1); - Conv2DBackpropFilter::Attrs filter_attrs; - filter_attrs.DataFormat(data_format); - filter_attrs.UseCudnnOnGpu(use_cudnn_on_gpu); - auto dx_2 = Conv2DBackpropFilter(scope, op.input(0), - Shape(scope, op.input(1)), grad_inputs[0], - strides, padding, filter_attrs); + auto dx_2 = + Conv2DBackpropFilter(scope, op.input(0), Shape(scope, op.input(1)), + grad_inputs[0], strides, padding, + Conv2DBackpropFilter::DataFormat(data_format) + .UseCudnnOnGpu(use_cudnn_on_gpu)); grad_outputs->push_back(dx_2); return scope.status(); } @@ -160,13 +156,9 @@ Status MaxPoolGradHelper(const Scope& scope, const Operation& op, TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); - internal::MaxPoolGrad::Attrs grad_attrs; - grad_attrs.DataFormat(data_format); - auto dx = internal::MaxPoolGrad(scope, op.input(0), - op.output(0), - grad_inputs[0], - ksize, strides, - padding, grad_attrs); + auto dx = internal::MaxPoolGrad( + scope, op.input(0), op.output(0), grad_inputs[0], ksize, strides, padding, + internal::MaxPoolGrad::DataFormat(data_format)); grad_outputs->push_back(dx); return scope.status(); } @@ -180,15 +172,9 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op, auto attrs = op.output(0).node()->attrs(); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); - MaxPoolGradV2::Attrs grad_attrs; - grad_attrs.DataFormat(data_format); - auto dx = MaxPoolGradV2(scope, op.input(0), - op.output(0), - grad_inputs[0], - op.input(1), - op.input(2), - padding, - grad_attrs); + auto dx = MaxPoolGradV2(scope, op.input(0), op.output(0), grad_inputs[0], + op.input(1), op.input(2), padding, + MaxPoolGradV2::DataFormat(data_format)); grad_outputs->push_back(dx); grad_outputs->push_back(NoGradient()); grad_outputs->push_back(NoGradient()); @@ -198,11 +184,8 @@ REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper); Status LRNGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, - std::vector* grad_outputs){ - internal::LRNGrad::Attrs grad_attrs; - - auto dx = internal::LRNGrad(scope, grad_inputs[0], op.input(0), op.output(0), - grad_attrs); + std::vector* grad_outputs) { + auto dx = internal::LRNGrad(scope, grad_inputs[0], op.input(0), op.output(0)); grad_outputs->push_back(dx); return scope.status(); } -- GitLab From 1f2868a30998f8eee85677017118bcbd64f1765f Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Mar 2018 14:56:20 -0800 Subject: [PATCH 427/884] Change variable naming --- .../contrib/tensorrt/convert/convert_nodes.cc | 84 +++++++++---------- .../contrib/tensorrt/convert/convert_nodes.h | 34 ++++---- 2 files changed, 59 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 9bc6e14a53..422ef67953 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -1396,30 +1396,30 @@ tensorflow::Status ConvertConst(Converter& ctx, } } if (ctx.isFP16()) { - auto dtypeNew = tensorflow::DataType::DT_HALF; - size_t lenData = tensorflow::DataTypeSize(dtypeNew); + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); for (int i = 0; i < scalar_shape.nbDims; i++) - lenData *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(lenData)); + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); auto half_tensor = temp_tensor.flat(); Eigen::DefaultDevice defd; half_tensor.device(defd) = tensor.flat().template cast(); - memcpy(dst, half_tensor.data(), lenData); // store into weight store - weights = TRT_ShapedWeights(dtypeNew, dst, scalar_shape); + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); } else { - size_t lenData = tensorflow::DataTypeSize(dtype); + size_t len_data = tensorflow::DataTypeSize(dtype); for (int i = 0; i < scalar_shape.nbDims; i++) - lenData *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(lenData)); + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); std::vector tensor_data( weights_tensor.float_val().begin(), weights_tensor.float_val() .end()); // make a local copy first to flatten - memcpy(dst, tensor_data.data(), lenData); // store into weight store + memcpy(dst, tensor_data.data(), len_data); // store into weight store weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } } else if (!weights_tensor.int_val().empty()) { @@ -1452,11 +1452,11 @@ tensorflow::Status ConvertConst(Converter& ctx, } } if (ctx.isFP16()) { - auto dtypeNew = tensorflow::DataType::DT_HALF; - size_t lenData = tensorflow::DataTypeSize(dtypeNew); + auto dtype_new = tensorflow::DataType::DT_HALF; + size_t len_data = tensorflow::DataTypeSize(dtype_new); for (int i = 0; i < scalar_shape.nbDims; i++) - lenData *= scalar_shape.d[i]; - ctx.weight_store()->store_.push_back(std::vector(lenData)); + len_data *= scalar_shape.d[i]; + ctx.weight_store()->store_.push_back(std::vector(len_data)); void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); tensorflow::Tensor temp_tensor(tensorflow::DT_HALF, tensor.shape()); TTypes::Flat half_tensor = temp_tensor.flat(); @@ -1488,22 +1488,22 @@ tensorflow::Status ConvertConst(Converter& ctx, " for FP16 conversion"); break; }; - memcpy(dst, half_tensor.data(), lenData); // store into weight store - weights = TRT_ShapedWeights(dtypeNew, dst, scalar_shape); + memcpy(dst, half_tensor.data(), len_data); // store into weight store + weights = TRT_ShapedWeights(dtype_new, dst, scalar_shape); } else { - size_t lenData = tensorflow::DataTypeSize(dtype); + size_t len_data = tensorflow::DataTypeSize(dtype); for (int i = 0; i < scalar_shape.nbDims; i++) - lenData *= scalar_shape.d[i]; - size_t lenTensor = weights_tensor.int_val_size() * sizeof(int32); - lenData = std::max(lenData, lenTensor); - ctx.weight_store()->store_.push_back(std::vector(lenData)); + len_data *= scalar_shape.d[i]; + size_t len_tensor = weights_tensor.int_val_size() * sizeof(int32); + len_data = std::max(len_data, len_tensor); + ctx.weight_store()->store_.push_back(std::vector(len_data)); void* dst = static_cast(&(ctx.weight_store()->store_.back()[0])); std::vector tensor_data( weights_tensor.int_val().begin(), weights_tensor.int_val() .end()); // make a local copy first to flatten // doesn't have to be contigous - memcpy(dst, tensor_data.data(), lenTensor); // store into weight store + memcpy(dst, tensor_data.data(), len_tensor); // store into weight store weights = TRT_ShapedWeights(dtype, dst, scalar_shape); } } else if (!weights_tensor.tensor_content().empty()) { @@ -2028,13 +2028,13 @@ tensorflow::Status ConvertReshape( nvinfer1::IShuffleLayer* layer = ctx.network()->addShuffle(*const_cast(tensor)); - nvinfer1::Dims reshapeDims; + nvinfer1::Dims reshape_dims; VLOG(2) << "new dimension: " << shape_num_dims - 1; - reshapeDims.nbDims = shape_num_dims - 1; - for (int32_t i = 0; i < reshapeDims.nbDims; ++i) { - reshapeDims.d[i] = shape_data[i + 1]; + reshape_dims.nbDims = shape_num_dims - 1; + for (int32_t i = 0; i < reshape_dims.nbDims; ++i) { + reshape_dims.d[i] = shape_data[i + 1]; } - layer->setReshapeDimensions(reshapeDims); + layer->setReshapeDimensions(reshape_dims); VLOG(2) << "new dimension: " << shape_num_dims - 1; nvinfer1::ITensor* output_tensor = layer->getOutput(0); @@ -2096,35 +2096,35 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( const auto node_id = tensorflow::str_util::Split(res_name, "_"); engine_name += node_id.back(); } - std::map nodeMaps; + std::map node_maps; for (auto n : graph.op_nodes()) { - nodeMaps.insert({n->name(), n}); + node_maps.insert({n->name(), n}); } VLOG(1) << "Output Nodes:"; std::vector out_types; std::vector out_edges; for (auto& i : output_nodes) { auto node_port = tensorflow::str_util::Split(i, ":"); - VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + VLOG(1) << " " << i << " in graph " << node_maps.count(i); auto out_node_name = node_port.at(0); if (node_port.size() > 1) { VLOG(1) << "Multi port output" << node_port.at(0) << " " << node_port.at(1) << " size=" << node_port.size(); } - auto nodeIt = nodeMaps.find(out_node_name); - if (nodeIt != nodeMaps.end()) { - tensorflow::Node* outNode = nodeIt->second; + auto node_it = node_maps.find(out_node_name); + if (node_it != node_maps.end()) { + tensorflow::Node* out_node = node_it->second; int port = 0; if (node_port.size() == 2) { port = std::strtoul(node_port.at(1).c_str(), nullptr, 10); - out_types.push_back(outNode->output_type(port)); + out_types.push_back(out_node->output_type(port)); } else { - out_types.push_back(outNode->output_type(0)); + out_types.push_back(out_node->output_type(0)); } - for (auto outEdge : outNode->out_edges()) { - if (outEdge->src_output() == port) { - out_edges.push_back(outEdge); + for (auto out_edge : out_node->out_edges()) { + if (out_edge->src_output() == port) { + out_edges.push_back(out_edge); break; } } @@ -2134,7 +2134,7 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } VLOG(1) << "Input Nodes:"; for (auto& i : input_names) { - VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); + VLOG(1) << " " << i << " in graph " << node_maps.count(i); } auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); auto resmgr = trt_rm->getManager("TRTCalibOps"); @@ -2199,9 +2199,9 @@ tensorflow::Status ConvertCalibrationNodeToEngineNode( } VLOG(1) << "Segment nodes:"; for (auto& i : segment_nodes) { - VLOG(1) << " " << i << " in graph " << nodeMaps.count(i); - auto it = nodeMaps.find(i); - if (it != nodeMaps.end()) { + VLOG(1) << " " << i << " in graph " << node_maps.count(i); + auto it = node_maps.find(i); + if (it != node_maps.end()) { graph.RemoveNode(it->second); } } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 02aef35ced..1f09aecd1e 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -36,23 +36,23 @@ namespace convert { struct SubGraphParams { SubGraphParams( - tensorflow::Graph& graph, const std::set& subgraph_node_ids, - const std::vector>& input_inds, - const std::vector>& output_inds, - size_t max_batch_size, size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - std::unordered_map>* output_edge_map, - tensorflow::NodeDef* trt_node, int precision_mode_ = 0) - : graph(graph), - subgraph_node_ids(subgraph_node_ids), - input_inds(input_inds), - output_inds(output_inds), - max_batch_size(max_batch_size), - max_workspace_size_bytes(max_workspace_size_bytes), - graph_properties(graph_properties), - output_edge_map(output_edge_map), - trt_node(trt_node), - precision_mode(precision_mode) {} + tensorflow::Graph& inp_graph, const std::set& subgraph_node_id_numbers, + const std::vector>& input_indices, + const std::vector>& output_indices, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + tensorflow::NodeDef* constructed_trt_node, int engine_precision_mode = 0) + : graph(inp_graph), + subgraph_node_ids(subgraph_node_id_numbers), + input_inds(input_indices), + output_inds(output_indices), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + trt_node(constructed_trt_node), + precision_mode(engine_precision_mode) {} tensorflow::Graph& graph; const std::set& subgraph_node_ids; -- GitLab From 7f703f9d867edf5312fe100ea71ecafee3ca5402 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Mar 2018 15:10:03 -0800 Subject: [PATCH 428/884] More variable renaming --- .../contrib/tensorrt/convert/convert_graph.cc | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 76a5d24214..872c468172 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -134,20 +134,20 @@ std::unordered_map> BuildTensorNameMap( // TODO(sami): convert references to pointers struct ConvertGraphParams { ConvertGraphParams( - tensorflow::Graph& graph, const std::vector& output_names, - const std::set& subgraph_node_ids, size_t max_batch_size, - size_t max_workspace_size_bytes, - const tensorflow::grappler::GraphProperties& graph_properties, - std::unordered_map>* output_edge_map, - int precision_mode) - : graph(graph), - output_names(output_names), - subgraph_node_ids(subgraph_node_ids), - max_batch_size(max_batch_size), - max_workspace_size_bytes(max_workspace_size_bytes), - graph_properties(graph_properties), - output_edge_map(output_edge_map), - precision_mode(precision_mode) {} + tensorflow::Graph& inp_graph, const std::vector& output_node_names, + const std::set& subgraph_node_id_numbers, size_t max_supported_batch_size, + size_t max_consumed_workspace_size_bytes, + const tensorflow::grappler::GraphProperties& current_graph_properties, + std::unordered_map>* output_edges, + int engine_precision_mode) + : graph(inp_graph), + output_names(output_node_names), + subgraph_node_ids(subgraph_node_id_numbers), + max_batch_size(max_supported_batch_size), + max_workspace_size_bytes(max_consumed_workspace_size_bytes), + graph_properties(current_graph_properties), + output_edge_map(output_edges), + precision_mode(engine_precision_mode) {} tensorflow::Graph& graph; const std::vector& output_names; const std::set& subgraph_node_ids; -- GitLab From 1e3906458ce43bacb954b283304c98a8e81325fa Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 5 Mar 2018 15:17:06 -0800 Subject: [PATCH 429/884] Fix bug with multi_gpu_model / model.inputs. PiperOrigin-RevId: 187931852 --- .../keras/_impl/keras/engine/network.py | 10 ++- .../keras/_impl/keras/engine/topology_test.py | 4 + .../_impl/keras/utils/multi_gpu_utils.py | 4 +- .../_impl/keras/utils/multi_gpu_utils_test.py | 82 +++++++++++++++++-- 4 files changed, 89 insertions(+), 11 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index e47bba9267..0fc05420fe 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -495,7 +495,10 @@ class Network(base_layer.Layer): # `updates` might contain irrelevant updates, so it needs to be filtered # with respect to inputs the model has been called on. - relevant_inputs = self.inputs or [] + if self.inputs: + relevant_inputs = self.inputs[:] + else: + relevant_inputs = [] for i in range(1, len(self._inbound_nodes)): inputs = self.get_input_at(i) if isinstance(inputs, list): @@ -530,7 +533,10 @@ class Network(base_layer.Layer): if context.in_eager_mode(): return losses - relevant_inputs = self.inputs or [] + if self.inputs: + relevant_inputs = self.inputs[:] + else: + relevant_inputs = [] for i in range(1, len(self._inbound_nodes)): inputs = self.get_input_at(i) if isinstance(inputs, list): diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 04434323d6..0058e66c29 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -531,7 +531,9 @@ class TopologyConstructionTest(test.TestCase): e = keras.layers.Input(shape=(32,), name='input_e') f = keras.layers.Input(shape=(32,), name='input_f') + self.assertEqual(len(model.inputs), 2) g, h = model([e, f]) + self.assertEqual(len(model.inputs), 2) self.assertEqual(g.name, 'model/dense_2/BiasAdd:0') self.assertListEqual(g.get_shape().as_list(), c.get_shape().as_list()) @@ -713,7 +715,9 @@ class TopologyConstructionTest(test.TestCase): j = keras.layers.Input(shape=(32,), name='input_j') k = keras.layers.Input(shape=(32,), name='input_k') + self.assertEqual(len(model.inputs), 2) m, n = model([j, k]) + self.assertEqual(len(model.inputs), 2) tf_model = keras.models.Model([j, k], [m, n]) j_tf = array_ops.placeholder(dtype=dtypes.float32, shape=(None, 32)) diff --git a/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py index ce7402e9d2..231ace2a0b 100644 --- a/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils.py @@ -125,7 +125,7 @@ def multi_gpu_model(model, gpus): if gpus <= 1: raise ValueError('For multi-gpu usage to be effective, ' 'call `multi_gpu_model` with `gpus >= 2`. ' - 'Received: `gpus=%d`' % gpus) + 'Received: `gpus=%s`' % gpus) num_gpus = gpus target_gpu_ids = range(num_gpus) @@ -136,7 +136,7 @@ def multi_gpu_model(model, gpus): ] for device in target_devices: if device not in available_devices: - raise ValueError('To call `multi_gpu_model` with `gpus=%d`, ' + raise ValueError('To call `multi_gpu_model` with `gpus=%s`, ' 'we expect the following devices to be available: %s. ' 'However this machine only has: %s. ' 'Try reducing `gpus`.' % (gpus, target_devices, diff --git a/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py index 12354c49ca..0a38d6b522 100644 --- a/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py +++ b/tensorflow/python/keras/_impl/keras/utils/multi_gpu_utils_test.py @@ -19,21 +19,34 @@ from __future__ import print_function import numpy as np - +from tensorflow.python import data from tensorflow.python.keras._impl import keras from tensorflow.python.platform import test +def check_if_compatible_devices(gpus=2): + available_devices = [ + keras.utils.multi_gpu_utils._normalize_device_name(name) + for name in keras.utils.multi_gpu_utils._get_available_devices() + ] + if '/gpu:%d' % (gpus - 1) not in available_devices: + return False + return True + + class TestMultiGPUModel(test.TestCase): - def multi_gpu_test_simple_model(self): + def test_multi_gpu_test_simple_model(self): gpus = 2 num_samples = 1000 input_dim = 10 output_dim = 1 hidden_dim = 10 epochs = 2 - target_gpu_id = [0, 2, 4] + target_gpu_id = [0, 1] + + if not check_if_compatible_devices(gpus=gpus): + return with self.test_session(): model = keras.models.Sequential() @@ -47,12 +60,11 @@ class TestMultiGPUModel(test.TestCase): parallel_model = keras.utils.multi_gpu_model(model, gpus=gpus) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) - parallel_model = keras.utils.multi_gpu_model(model, gpus=target_gpu_id) parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit(x, y, epochs=epochs) - def multi_gpu_test_multi_io_model(self): + def test_multi_gpu_test_multi_io_model(self): gpus = 2 num_samples = 1000 input_dim_a = 10 @@ -61,7 +73,10 @@ class TestMultiGPUModel(test.TestCase): output_dim_b = 2 hidden_dim = 10 epochs = 2 - target_gpu_id = [0, 2, 4] + target_gpu_id = [0, 1] + + if not check_if_compatible_devices(gpus=gpus): + return with self.test_session(): input_a = keras.Input((input_dim_a,)) @@ -86,7 +101,10 @@ class TestMultiGPUModel(test.TestCase): parallel_model.compile(loss='mse', optimizer='rmsprop') parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) - def multi_gpu_test_invalid_devices(self): + def test_multi_gpu_test_invalid_devices(self): + if not check_if_compatible_devices(gpus=2): + return + with self.test_session(): input_shape = (1000, 10) model = keras.models.Sequential() @@ -115,3 +133,53 @@ class TestMultiGPUModel(test.TestCase): with self.assertRaises(ValueError): parallel_model = keras.utils.multi_gpu_model(model, gpus=[0]) parallel_model.fit(x, y, epochs=2) + + def test_nested_model_with_tensor_input(self): + gpus = 2 + input_dim = 10 + shape = (input_dim,) + num_samples = 16 + num_classes = 10 + + if not check_if_compatible_devices(gpus=gpus): + return + + with self.test_session(): + input_shape = (num_samples,) + shape + x_train = np.random.randint(0, 255, input_shape) + y_train = np.random.randint(0, num_classes, (input_shape[0],)) + keras.backend.set_learning_phase(True) + + y_train = keras.utils.to_categorical(y_train, num_classes) + + x_train = x_train.astype('float32') + y_train = y_train.astype('float32') + + dataset = data.Dataset.from_tensor_slices((x_train, y_train)) + dataset = dataset.repeat() + dataset = dataset.batch(4) + iterator = dataset.make_one_shot_iterator() + + inputs, targets = iterator.get_next() + + input_tensor = keras.layers.Input(tensor=inputs) + + model = keras.models.Sequential() + model.add(keras.layers.Dense(3, + input_shape=(input_dim,))) + model.add(keras.layers.Dense(num_classes)) + + output = model(input_tensor) + outer_model = keras.Model(input_tensor, output) + parallel_model = keras.utils.multi_gpu_model(outer_model, gpus=gpus) + + parallel_model.compile( + loss='categorical_crossentropy', + optimizer=keras.optimizers.RMSprop(lr=0.0001, decay=1e-6), + metrics=['accuracy'], + target_tensors=[targets]) + parallel_model.fit(epochs=1, steps_per_epoch=3) + + +if __name__ == '__main__': + test.main() -- GitLab From fb59cf3a2fcaaa5b038b0ad900e6a91d94b91cf3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 15:17:24 -0800 Subject: [PATCH 430/884] Add objective functions for variational inference with Csiszar f-divergences. PiperOrigin-RevId: 187931921 --- tensorflow/contrib/bayesflow/BUILD | 23 - tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/csiszar_divergence_test.py | 1004 --------------- .../python/ops/csiszar_divergence.py | 51 - .../python/ops/csiszar_divergence_impl.py | 1105 ----------------- 5 files changed, 2185 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/csiszar_divergence.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/csiszar_divergence_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 5fdcbffb4d..0a5b7e46f2 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -56,29 +56,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "csiszar_divergence_test", - size = "medium", - srcs = ["python/kernel_tests/csiszar_divergence_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:gradients", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn_ops", - ], - tags = [ - "manual", # b/64490288 - "notap", - ], -) - cuda_py_test( name = "custom_grad_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index c411026346..f2b7fb77a8 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -21,7 +21,6 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long -from tensorflow.contrib.bayesflow.python.ops import csiszar_divergence from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc @@ -36,7 +35,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'csiszar_divergence', 'custom_grad', 'entropy', 'halton_sequence', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py deleted file mode 100644 index 2e94b7206d..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/csiszar_divergence_test.py +++ /dev/null @@ -1,1004 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Csiszar Divergence Ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import csiszar_divergence_impl -from tensorflow.contrib.distributions.python.ops import mvn_diag as mvn_diag_lib -from tensorflow.contrib.distributions.python.ops import mvn_full_covariance as mvn_full_lib -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import linalg_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops.distributions import kullback_leibler -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.platform import test - - -cd = csiszar_divergence_impl - - -def tridiag(d, diag_value, offdiag_value): - """d x d matrix with given value on diag, and one super/sub diag.""" - diag_mat = linalg_ops.eye(d) * (diag_value - offdiag_value) - three_bands = array_ops.matrix_band_part( - array_ops.fill([d, d], offdiag_value), 1, 1) - return diag_mat + three_bands - - -class AmariAlphaTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - for alpha in [-1., 0., 1., 2.]: - for normalized in [True, False]: - with self.test_session(graph=ops.Graph()): - self.assertAllClose( - cd.amari_alpha(0., alpha=alpha, - self_normalized=normalized).eval(), - 0.) - - def test_correct_when_alpha0(self): - with self.test_session(): - self.assertAllClose( - cd.amari_alpha(self._logu, alpha=0.).eval(), - -self._logu) - - self.assertAllClose( - cd.amari_alpha(self._logu, alpha=0., self_normalized=True).eval(), - -self._logu + (self._u - 1.)) - - def test_correct_when_alpha1(self): - with self.test_session(): - self.assertAllClose( - cd.amari_alpha(self._logu, alpha=1.).eval(), - self._u * self._logu) - - self.assertAllClose( - cd.amari_alpha(self._logu, alpha=1., self_normalized=True).eval(), - self._u * self._logu - (self._u - 1.)) - - def test_correct_when_alpha_not_01(self): - for alpha in [-2, -1., -0.5, 0.5, 2.]: - with self.test_session(graph=ops.Graph()): - self.assertAllClose( - cd.amari_alpha(self._logu, - alpha=alpha, - self_normalized=False).eval(), - ((self._u**alpha - 1)) / (alpha * (alpha - 1.))) - - self.assertAllClose( - cd.amari_alpha(self._logu, - alpha=alpha, - self_normalized=True).eval(), - ((self._u**alpha - 1.) - - alpha * (self._u - 1)) / (alpha * (alpha - 1.))) - - -class KLReverseTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - for normalized in [True, False]: - with self.test_session(graph=ops.Graph()): - self.assertAllClose( - cd.kl_reverse(0., self_normalized=normalized).eval(), - 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.kl_reverse(self._logu).eval(), - -self._logu) - - self.assertAllClose( - cd.kl_reverse(self._logu, self_normalized=True).eval(), - -self._logu + (self._u - 1.)) - - -class KLForwardTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - for normalized in [True, False]: - with self.test_session(graph=ops.Graph()): - self.assertAllClose( - cd.kl_forward(0., self_normalized=normalized).eval(), - 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.kl_forward(self._logu).eval(), - self._u * self._logu) - - self.assertAllClose( - cd.kl_forward(self._logu, self_normalized=True).eval(), - self._u * self._logu - (self._u - 1.)) - - -class JensenShannonTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.jensen_shannon(0.).eval(), np.log(0.25)) - - def test_symmetric(self): - with self.test_session(): - self.assertAllClose( - cd.jensen_shannon(self._logu).eval(), - cd.symmetrized_csiszar_function( - self._logu, cd.jensen_shannon).eval()) - - self.assertAllClose( - cd.jensen_shannon(self._logu, self_normalized=True).eval(), - cd.symmetrized_csiszar_function( - self._logu, - lambda x: cd.jensen_shannon(x, self_normalized=True)).eval()) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.jensen_shannon(self._logu).eval(), - (self._u * self._logu - - (1 + self._u) * np.log1p(self._u))) - - self.assertAllClose( - cd.jensen_shannon(self._logu, self_normalized=True).eval(), - (self._u * self._logu - - (1 + self._u) * np.log((1 + self._u) / 2))) - - -class ArithmeticGeometricMeanTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.arithmetic_geometric(0.).eval(), np.log(4)) - self.assertAllClose( - cd.arithmetic_geometric(0., self_normalized=True).eval(), 0.) - - def test_symmetric(self): - with self.test_session(): - self.assertAllClose( - cd.arithmetic_geometric(self._logu).eval(), - cd.symmetrized_csiszar_function( - self._logu, cd.arithmetic_geometric).eval()) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.arithmetic_geometric(self._logu).eval(), - (1. + self._u) * np.log((1. + self._u) / np.sqrt(self._u))) - - self.assertAllClose( - cd.arithmetic_geometric(self._logu, self_normalized=True).eval(), - (1. + self._u) * np.log(0.5 * (1. + self._u) / np.sqrt(self._u))) - - -class TotalVariationTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.total_variation(0.).eval(), 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.total_variation(self._logu).eval(), - 0.5 * np.abs(self._u - 1)) - - -class PearsonTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.pearson(0.).eval(), 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.pearson(self._logu).eval(), - np.square(self._u - 1)) - - -class SquaredHellingerTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.squared_hellinger(0.).eval(), 0.) - - def test_symmetric(self): - with self.test_session(): - self.assertAllClose( - cd.squared_hellinger(self._logu).eval(), - cd.symmetrized_csiszar_function( - self._logu, cd.squared_hellinger).eval()) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.squared_hellinger(self._logu).eval(), - np.square(np.sqrt(self._u) - 1)) - - -class TriangularTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.triangular(0.).eval(), 0.) - - def test_symmetric(self): - with self.test_session(): - self.assertAllClose( - cd.triangular(self._logu).eval(), - cd.symmetrized_csiszar_function( - self._logu, cd.triangular).eval()) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.triangular(self._logu).eval(), - np.square(self._u - 1) / (1 + self._u)) - - -class TPowerTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.t_power(0., t=-0.1).eval(), 0.) - self.assertAllClose(cd.t_power(0., t=0.5).eval(), 0.) - self.assertAllClose(cd.t_power(0., t=1.1).eval(), 0.) - self.assertAllClose( - cd.t_power(0., t=-0.1, self_normalized=True).eval(), 0.) - self.assertAllClose( - cd.t_power(0., t=0.5, self_normalized=True).eval(), 0.) - self.assertAllClose( - cd.t_power(0., t=1.1, self_normalized=True).eval(), 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(-0.1)).eval(), - self._u ** -0.1 - 1.) - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(0.5)).eval(), - -self._u ** 0.5 + 1.) - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(1.1)).eval(), - self._u ** 1.1 - 1.) - - def test_correct_self_normalized(self): - with self.test_session(): - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(-0.1), - self_normalized=True).eval(), - self._u ** -0.1 - 1. + 0.1 * (self._u - 1.)) - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(0.5), - self_normalized=True).eval(), - -self._u ** 0.5 + 1. + 0.5 * (self._u - 1.)) - self.assertAllClose( - cd.t_power(self._logu, t=np.float64(1.1), - self_normalized=True).eval(), - self._u ** 1.1 - 1. - 1.1 * (self._u - 1.)) - - -class Log1pAbsTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.log1p_abs(0.).eval(), 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.log1p_abs(self._logu).eval(), - self._u**(np.sign(self._u - 1)) - 1) - - -class JeffreysTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.jeffreys(0.).eval(), 0.) - - def test_symmetric(self): - with self.test_session(): - self.assertAllClose( - cd.jeffreys(self._logu).eval(), - cd.symmetrized_csiszar_function( - self._logu, cd.jeffreys).eval()) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.jeffreys(self._logu).eval(), - 0.5 * (self._u * self._logu - self._logu)) - - -class ChiSquareTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose(cd.chi_square(0.).eval(), 0.) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.chi_square(self._logu).eval(), - self._u**2 - 1) - - -class ModifiedGanTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10, 100) - self._u = np.exp(self._logu) - - def test_at_zero(self): - with self.test_session(): - self.assertAllClose( - cd.modified_gan(0.).eval(), np.log(2)) - self.assertAllClose( - cd.modified_gan(0., self_normalized=True).eval(), np.log(2)) - - def test_correct(self): - with self.test_session(): - self.assertAllClose( - cd.modified_gan(self._logu).eval(), - np.log1p(self._u) - self._logu) - - self.assertAllClose( - cd.modified_gan(self._logu, self_normalized=True).eval(), - np.log1p(self._u) - self._logu + 0.5 * (self._u - 1)) - - -class SymmetrizedCsiszarFunctionTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10., 100) - self._u = np.exp(self._logu) - - def test_jensen_shannon(self): - with self.test_session(): - - # The following functions come from the claim made in the - # symmetrized_csiszar_function docstring. - def js1(logu): - return (-logu - - (1. + math_ops.exp(logu)) * ( - nn_ops.softplus(logu))) - - def js2(logu): - return 2. * (math_ops.exp(logu) * ( - logu - nn_ops.softplus(logu))) - - self.assertAllClose( - cd.symmetrized_csiszar_function(self._logu, js1).eval(), - cd.jensen_shannon(self._logu).eval()) - - self.assertAllClose( - cd.symmetrized_csiszar_function(self._logu, js2).eval(), - cd.jensen_shannon(self._logu).eval()) - - def test_jeffreys(self): - with self.test_session(): - self.assertAllClose( - cd.symmetrized_csiszar_function(self._logu, cd.kl_reverse).eval(), - cd.jeffreys(self._logu).eval()) - - self.assertAllClose( - cd.symmetrized_csiszar_function(self._logu, cd.kl_forward).eval(), - cd.jeffreys(self._logu).eval()) - - -class DualCsiszarFunctionTest(test.TestCase): - - def setUp(self): - self._logu = np.linspace(-10., 10., 100) - self._u = np.exp(self._logu) - - def test_kl_forward(self): - with self.test_session(): - self.assertAllClose( - cd.dual_csiszar_function(self._logu, cd.kl_forward).eval(), - cd.kl_reverse(self._logu).eval()) - - def test_kl_reverse(self): - with self.test_session(): - self.assertAllClose( - cd.dual_csiszar_function(self._logu, cd.kl_reverse).eval(), - cd.kl_forward(self._logu).eval()) - - -class MonteCarloCsiszarFDivergenceTest(test.TestCase): - - def test_kl_forward(self): - with self.test_session() as sess: - q = normal_lib.Normal( - loc=np.ones(6), - scale=np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0])) - - p = normal_lib.Normal(loc=q.loc + 0.1, scale=q.scale - 0.2) - - approx_kl = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_forward, - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - approx_kl_self_normalized = cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_forward(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - exact_kl = kullback_leibler.kl_divergence(p, q) - - [approx_kl_, approx_kl_self_normalized_, exact_kl_] = sess.run([ - approx_kl, approx_kl_self_normalized, exact_kl]) - - self.assertAllClose(approx_kl_, exact_kl_, - rtol=0.08, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_, exact_kl_, - rtol=0.02, atol=0.) - - def test_kl_reverse(self): - with self.test_session() as sess: - - q = normal_lib.Normal( - loc=np.ones(6), - scale=np.array([0.5, 1.0, 1.5, 2.0, 2.5, 3.0])) - - p = normal_lib.Normal(loc=q.loc + 0.1, scale=q.scale - 0.2) - - approx_kl = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_reverse, - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - approx_kl_self_normalized = cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_reverse(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - exact_kl = kullback_leibler.kl_divergence(q, p) - - [approx_kl_, approx_kl_self_normalized_, exact_kl_] = sess.run([ - approx_kl, approx_kl_self_normalized, exact_kl]) - - self.assertAllClose(approx_kl_, exact_kl_, - rtol=0.07, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_, exact_kl_, - rtol=0.02, atol=0.) - - def test_kl_reverse_multidim(self): - - with self.test_session() as sess: - d = 5 # Dimension - - p = mvn_full_lib.MultivariateNormalFullCovariance( - covariance_matrix=tridiag(d, diag_value=1, offdiag_value=0.5)) - - q = mvn_diag_lib.MultivariateNormalDiag(scale_diag=[0.5]*d) - - approx_kl = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_reverse, - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - approx_kl_self_normalized = cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_reverse(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - exact_kl = kullback_leibler.kl_divergence(q, p) - - [approx_kl_, approx_kl_self_normalized_, exact_kl_] = sess.run([ - approx_kl, approx_kl_self_normalized, exact_kl]) - - self.assertAllClose(approx_kl_, exact_kl_, - rtol=0.02, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_, exact_kl_, - rtol=0.08, atol=0.) - - def test_kl_forward_multidim(self): - - with self.test_session() as sess: - d = 5 # Dimension - - p = mvn_full_lib.MultivariateNormalFullCovariance( - covariance_matrix=tridiag(d, diag_value=1, offdiag_value=0.5)) - - # Variance is very high when approximating Forward KL, so we make - # scale_diag larger than in test_kl_reverse_multidim. This ensures q - # "covers" p and thus Var_q[p/q] is smaller. - q = mvn_diag_lib.MultivariateNormalDiag(scale_diag=[1.]*d) - - approx_kl = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_forward, - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - approx_kl_self_normalized = cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_forward(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=int(1e5), - seed=1) - - exact_kl = kullback_leibler.kl_divergence(p, q) - - [approx_kl_, approx_kl_self_normalized_, exact_kl_] = sess.run([ - approx_kl, approx_kl_self_normalized, exact_kl]) - - self.assertAllClose(approx_kl_, exact_kl_, - rtol=0.06, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_, exact_kl_, - rtol=0.05, atol=0.) - - def test_score_trick(self): - - with self.test_session() as sess: - d = 5 # Dimension - num_draws = int(1e5) - seed = 1 - - p = mvn_full_lib.MultivariateNormalFullCovariance( - covariance_matrix=tridiag(d, diag_value=1, offdiag_value=0.5)) - - # Variance is very high when approximating Forward KL, so we make - # scale_diag larger than in test_kl_reverse_multidim. This ensures q - # "covers" p and thus Var_q[p/q] is smaller. - s = array_ops.constant(1.) - q = mvn_diag_lib.MultivariateNormalDiag( - scale_diag=array_ops.tile([s], [d])) - - approx_kl = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_reverse, - p_log_prob=p.log_prob, - q=q, - num_draws=num_draws, - seed=seed) - - approx_kl_self_normalized = cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_reverse(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=num_draws, - seed=seed) - - approx_kl_score_trick = cd.monte_carlo_csiszar_f_divergence( - f=cd.kl_reverse, - p_log_prob=p.log_prob, - q=q, - num_draws=num_draws, - use_reparametrization=False, - seed=seed) - - approx_kl_self_normalized_score_trick = ( - cd.monte_carlo_csiszar_f_divergence( - f=lambda logu: cd.kl_reverse(logu, self_normalized=True), - p_log_prob=p.log_prob, - q=q, - num_draws=num_draws, - use_reparametrization=False, - seed=seed)) - - exact_kl = kullback_leibler.kl_divergence(q, p) - - grad_sum = lambda fs: gradients_impl.gradients(fs, s)[0] - - [ - approx_kl_grad_, - approx_kl_self_normalized_grad_, - approx_kl_score_trick_grad_, - approx_kl_self_normalized_score_trick_grad_, - exact_kl_grad_, - approx_kl_, - approx_kl_self_normalized_, - approx_kl_score_trick_, - approx_kl_self_normalized_score_trick_, - exact_kl_, - ] = sess.run([ - grad_sum(approx_kl), - grad_sum(approx_kl_self_normalized), - grad_sum(approx_kl_score_trick), - grad_sum(approx_kl_self_normalized_score_trick), - grad_sum(exact_kl), - approx_kl, - approx_kl_self_normalized, - approx_kl_score_trick, - approx_kl_self_normalized_score_trick, - exact_kl, - ]) - - # Test average divergence. - self.assertAllClose(approx_kl_, exact_kl_, - rtol=0.02, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_, exact_kl_, - rtol=0.08, atol=0.) - - self.assertAllClose(approx_kl_score_trick_, exact_kl_, - rtol=0.02, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_score_trick_, exact_kl_, - rtol=0.08, atol=0.) - - # Test average gradient-divergence. - self.assertAllClose(approx_kl_grad_, exact_kl_grad_, - rtol=0.007, atol=0.) - - self.assertAllClose(approx_kl_self_normalized_grad_, exact_kl_grad_, - rtol=0.011, atol=0.) - - self.assertAllClose(approx_kl_score_trick_grad_, exact_kl_grad_, - rtol=0.018, atol=0.) - - self.assertAllClose( - approx_kl_self_normalized_score_trick_grad_, exact_kl_grad_, - rtol=0.017, atol=0.) - - -class CsiszarVIMCOTest(test.TestCase): - - def _csiszar_vimco_helper(self, logu): - """Numpy implementation of `csiszar_vimco_helper`.""" - - # Since this is a naive/intuitive implementation, we compensate by using the - # highest precision we can. - logu = np.float128(logu) - n = logu.shape[0] - u = np.exp(logu) - loogeoavg_u = [] # Leave-one-out geometric-average of exp(logu). - for j in range(n): - loogeoavg_u.append(np.exp(np.mean( - [logu[i, ...] for i in range(n) if i != j], - axis=0))) - loogeoavg_u = np.array(loogeoavg_u) - - loosum_u = [] # Leave-one-out sum of exp(logu). - for j in range(n): - loosum_u.append(np.sum( - [u[i, ...] for i in range(n) if i != j], - axis=0)) - loosum_u = np.array(loosum_u) - - # Natural log of the average u except each is swapped-out for its - # leave-`i`-th-out Geometric average. - log_sooavg_u = np.log(loosum_u + loogeoavg_u) - np.log(n) - - log_avg_u = np.log(np.mean(u, axis=0)) - return log_avg_u, log_sooavg_u - - def _csiszar_vimco_helper_grad(self, logu, delta): - """Finite difference approximation of `grad(csiszar_vimco_helper, logu)`.""" - - # This code actually estimates the sum of the Jacobiab because that's what - # TF's `gradients` does. - np_log_avg_u1, np_log_sooavg_u1 = self._csiszar_vimco_helper( - logu[..., None] + np.diag([delta]*len(logu))) - np_log_avg_u, np_log_sooavg_u = self._csiszar_vimco_helper( - logu[..., None]) - return [ - (np_log_avg_u1 - np_log_avg_u) / delta, - np.sum(np_log_sooavg_u1 - np_log_sooavg_u, axis=0) / delta, - ] - - def test_vimco_helper_1(self): - """Tests that function calculation correctly handles batches.""" - - logu = np.linspace(-100., 100., 100).reshape([10, 2, 5]) - with self.test_session() as sess: - np_log_avg_u, np_log_sooavg_u = self._csiszar_vimco_helper(logu) - [log_avg_u, log_sooavg_u] = sess.run(cd.csiszar_vimco_helper(logu)) - self.assertAllClose(np_log_avg_u, log_avg_u, - rtol=1e-8, atol=0.) - self.assertAllClose(np_log_sooavg_u, log_sooavg_u, - rtol=1e-8, atol=0.) - - def test_vimco_helper_2(self): - """Tests that function calculation correctly handles overflow.""" - - # Using 700 (rather than 1e3) since naive numpy version can't handle higher. - logu = np.float32([0., 700, -1, 1]) - with self.test_session() as sess: - np_log_avg_u, np_log_sooavg_u = self._csiszar_vimco_helper(logu) - [log_avg_u, log_sooavg_u] = sess.run(cd.csiszar_vimco_helper(logu)) - self.assertAllClose(np_log_avg_u, log_avg_u, - rtol=1e-6, atol=0.) - self.assertAllClose(np_log_sooavg_u, log_sooavg_u, - rtol=1e-5, atol=0.) - - def test_vimco_helper_3(self): - """Tests that function calculation correctly handles underlow.""" - - logu = np.float32([0., -1000, -1, 1]) - with self.test_session() as sess: - np_log_avg_u, np_log_sooavg_u = self._csiszar_vimco_helper(logu) - [log_avg_u, log_sooavg_u] = sess.run(cd.csiszar_vimco_helper(logu)) - self.assertAllClose(np_log_avg_u, log_avg_u, - rtol=1e-5, atol=0.) - self.assertAllClose(np_log_sooavg_u, log_sooavg_u, - rtol=1e-4, atol=1e-15) - - def test_vimco_helper_gradient_using_finite_difference_1(self): - """Tests that gradient calculation correctly handles batches.""" - - logu_ = np.linspace(-100., 100., 100).reshape([10, 2, 5]) - with self.test_session() as sess: - logu = array_ops.constant(logu_) - - grad = lambda flogu: gradients_impl.gradients(flogu, logu)[0] - log_avg_u, log_sooavg_u = cd.csiszar_vimco_helper(logu) - - [ - grad_log_avg_u, - grad_log_sooavg_u, - ] = sess.run([grad(log_avg_u), grad(log_sooavg_u)]) - - # We skip checking against finite-difference approximation since it - # doesn't support batches. - - # Verify claim in docstring. - self.assertAllClose( - np.ones_like(grad_log_avg_u.sum(axis=0)), - grad_log_avg_u.sum(axis=0)) - self.assertAllClose( - np.ones_like(grad_log_sooavg_u.mean(axis=0)), - grad_log_sooavg_u.mean(axis=0)) - - def test_vimco_helper_gradient_using_finite_difference_2(self): - """Tests that gradient calculation correctly handles overflow.""" - - delta = 1e-3 - logu_ = np.float32([0., 1000, -1, 1]) - with self.test_session() as sess: - logu = array_ops.constant(logu_) - - [ - np_grad_log_avg_u, - np_grad_log_sooavg_u, - ] = self._csiszar_vimco_helper_grad(logu_, delta) - - grad = lambda flogu: gradients_impl.gradients(flogu, logu)[0] - log_avg_u, log_sooavg_u = cd.csiszar_vimco_helper(logu) - - [ - grad_log_avg_u, - grad_log_sooavg_u, - ] = sess.run([grad(log_avg_u), grad(log_sooavg_u)]) - - self.assertAllClose(np_grad_log_avg_u, grad_log_avg_u, - rtol=delta, atol=0.) - self.assertAllClose(np_grad_log_sooavg_u, grad_log_sooavg_u, - rtol=delta, atol=0.) - # Verify claim in docstring. - self.assertAllClose( - np.ones_like(grad_log_avg_u.sum(axis=0)), - grad_log_avg_u.sum(axis=0)) - self.assertAllClose( - np.ones_like(grad_log_sooavg_u.mean(axis=0)), - grad_log_sooavg_u.mean(axis=0)) - - def test_vimco_helper_gradient_using_finite_difference_3(self): - """Tests that gradient calculation correctly handles underlow.""" - - delta = 1e-3 - logu_ = np.float32([0., -1000, -1, 1]) - with self.test_session() as sess: - logu = array_ops.constant(logu_) - - [ - np_grad_log_avg_u, - np_grad_log_sooavg_u, - ] = self._csiszar_vimco_helper_grad(logu_, delta) - - grad = lambda flogu: gradients_impl.gradients(flogu, logu)[0] - log_avg_u, log_sooavg_u = cd.csiszar_vimco_helper(logu) - - [ - grad_log_avg_u, - grad_log_sooavg_u, - ] = sess.run([grad(log_avg_u), grad(log_sooavg_u)]) - - self.assertAllClose(np_grad_log_avg_u, grad_log_avg_u, - rtol=delta, atol=0.) - self.assertAllClose(np_grad_log_sooavg_u, grad_log_sooavg_u, - rtol=delta, atol=0.) - # Verify claim in docstring. - self.assertAllClose( - np.ones_like(grad_log_avg_u.sum(axis=0)), - grad_log_avg_u.sum(axis=0)) - self.assertAllClose( - np.ones_like(grad_log_sooavg_u.mean(axis=0)), - grad_log_sooavg_u.mean(axis=0)) - - def test_vimco_and_gradient(self): - - with self.test_session() as sess: - dims = 5 # Dimension - num_draws = int(20) - num_batch_draws = int(3) - seed = 1 - - f = lambda logu: cd.kl_reverse(logu, self_normalized=False) - np_f = lambda logu: -logu - - p = mvn_full_lib.MultivariateNormalFullCovariance( - covariance_matrix=tridiag(dims, diag_value=1, offdiag_value=0.5)) - - # Variance is very high when approximating Forward KL, so we make - # scale_diag larger than in test_kl_reverse_multidim. This ensures q - # "covers" p and thus Var_q[p/q] is smaller. - s = array_ops.constant(1.) - q = mvn_diag_lib.MultivariateNormalDiag( - scale_diag=array_ops.tile([s], [dims])) - - vimco = cd.csiszar_vimco( - f=f, - p_log_prob=p.log_prob, - q=q, - num_draws=num_draws, - num_batch_draws=num_batch_draws, - seed=seed) - - x = q.sample(sample_shape=[num_draws, num_batch_draws], - seed=seed) - x = array_ops.stop_gradient(x) - logu = p.log_prob(x) - q.log_prob(x) - f_log_sum_u = f(cd.csiszar_vimco_helper(logu)[0]) - - grad_sum = lambda fs: gradients_impl.gradients(fs, s)[0] - - def jacobian(x): - # Warning: this function is slow and may not even finish if prod(shape) - # is larger than, say, 100. - shape = x.shape.as_list() - assert all(s is not None for s in shape) - x = array_ops.reshape(x, shape=[-1]) - r = [grad_sum(x[i]) for i in range(np.prod(shape))] - return array_ops.reshape(array_ops.stack(r), shape=shape) - - [ - logu_, - jacobian_logqx_, - vimco_, - grad_vimco_, - f_log_sum_u_, - grad_mean_f_log_sum_u_, - ] = sess.run([ - logu, - jacobian(q.log_prob(x)), - vimco, - grad_sum(vimco), - f_log_sum_u, - grad_sum(f_log_sum_u) / num_batch_draws, - ]) - - np_log_avg_u, np_log_sooavg_u = self._csiszar_vimco_helper(logu_) - - # Test VIMCO loss is correct. - self.assertAllClose(np_f(np_log_avg_u).mean(axis=0), vimco_, - rtol=1e-5, atol=0.) - - # Test gradient of VIMCO loss is correct. - # - # To make this computation we'll inject two gradients from TF: - # - grad[mean(f(log(sum(p(x)/q(x)))))] - # - jacobian[log(q(x))]. - # - # We now justify why using these (and only these) TF values for - # ground-truth does not undermine the completeness of this test. - # - # Regarding `grad_mean_f_log_sum_u_`, note that we validate the - # correctness of the zero-th order derivative (for each batch member). - # Since `cd.csiszar_vimco_helper` itself does not manipulate any gradient - # information, we can safely rely on TF. - self.assertAllClose(np_f(np_log_avg_u), f_log_sum_u_, rtol=1e-4, atol=0.) - # - # Regarding `jacobian_logqx_`, note that testing the gradient of - # `q.log_prob` is outside the scope of this unit-test thus we may safely - # use TF to find it. - - # The `mean` is across batches and the `sum` is across iid samples. - np_grad_vimco = ( - grad_mean_f_log_sum_u_ - + np.mean( - np.sum( - jacobian_logqx_ * (np_f(np_log_avg_u) - - np_f(np_log_sooavg_u)), - axis=0), - axis=0)) - - self.assertAllClose(np_grad_vimco, grad_vimco_, - rtol=1e-5, atol=0.) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence.py b/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence.py deleted file mode 100644 index 9f7a95f138..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Csiszar f-Divergence and helpers. - -See ${python/contrib.bayesflow.csiszar_divergence}. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.csiszar_divergence_impl import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'amari_alpha', - 'arithmetic_geometric', - 'chi_square', - 'csiszar_vimco', - 'dual_csiszar_function', - 'jeffreys', - 'jensen_shannon', - 'kl_forward', - 'kl_reverse', - 'log1p_abs', - 'modified_gan', - 'monte_carlo_csiszar_f_divergence', - 'pearson', - 'squared_hellinger', - 'symmetrized_csiszar_function', - 'total_variation', - 't_power', - 'triangular', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence_impl.py b/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence_impl.py deleted file mode 100644 index 8efd59d651..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/csiszar_divergence_impl.py +++ /dev/null @@ -1,1105 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Csiszar f-Divergence and helpers. - -@@amari_alpha -@@arithmetic_geometric -@@chi_square -@@csiszar_vimco -@@dual_csiszar_function -@@jeffreys -@@jensen_shannon -@@kl_forward -@@kl_reverse -@@log1p_abs -@@modified_gan -@@monte_carlo_csiszar_f_divergence -@@pearson -@@squared_hellinger -@@symmetrized_csiszar_function -@@total_variation -@@triangular - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib import framework as contrib_framework -from tensorflow.contrib.bayesflow.python.ops import monte_carlo_impl as monte_carlo -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops.distributions import distribution -from tensorflow.python.ops.distributions import util as distribution_util - - -def amari_alpha(logu, alpha=1., self_normalized=False, name=None): - """The Amari-alpha Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True`, the Amari-alpha Csiszar-function is: - - ```none - f(u) = { -log(u) + (u - 1), alpha = 0 - { u log(u) - (u - 1), alpha = 1 - { [(u**alpha - 1) - alpha (u - 1)] / (alpha (alpha - 1)), otherwise - ``` - - When `self_normalized = False` the `(u - 1)` terms are omitted. - - Warning: when `alpha != 0` and/or `self_normalized = True` this function makes - non-log-space calculations and may therefore be numerically unstable for - `|logu| >> 0`. - - For more information, see: - A. Cichocki and S. Amari. "Families of Alpha-Beta-and GammaDivergences: - Flexible and Robust Measures of Similarities." Entropy, vol. 12, no. 6, pp. - 1532-1568, 2010. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - alpha: `float`-like Python scalar. (See Mathematical Details for meaning.) - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - amari_alpha_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - - Raises: - TypeError: if `alpha` is `None` or a `Tensor`. - TypeError: if `self_normalized` is `None` or a `Tensor`. - """ - with ops.name_scope(name, "amari_alpha", [logu]): - if alpha is None or contrib_framework.is_tensor(alpha): - raise TypeError("`alpha` cannot be `None` or `Tensor` type.") - if self_normalized is None or contrib_framework.is_tensor(self_normalized): - raise TypeError("`self_normalized` cannot be `None` or `Tensor` type.") - - logu = ops.convert_to_tensor(logu, name="logu") - - if alpha == 0.: - f = -logu - elif alpha == 1.: - f = math_ops.exp(logu) * logu - else: - f = math_ops.expm1(alpha * logu) / (alpha * (alpha - 1.)) - - if not self_normalized: - return f - - if alpha == 0.: - return f + math_ops.expm1(logu) - elif alpha == 1.: - return f - math_ops.expm1(logu) - else: - return f - math_ops.expm1(logu) / (alpha - 1.) - - -def kl_reverse(logu, self_normalized=False, name=None): - """The reverse Kullback-Leibler Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True`, the KL-reverse Csiszar-function is: - - ```none - f(u) = -log(u) + (u - 1) - ``` - - When `self_normalized = False` the `(u - 1)` term is omitted. - - Observe that as an f-Divergence, this Csiszar-function implies: - - ```none - D_f[p, q] = KL[q, p] - ``` - - The KL is "reverse" because in maximum likelihood we think of minimizing `q` - as in `KL[p, q]`. - - Warning: when self_normalized = True` this function makes non-log-space - calculations and may therefore be numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - kl_reverse_of_u: `float`-like `Tensor` of the Csiszar-function evaluated at - `u = exp(logu)`. - - Raises: - TypeError: if `self_normalized` is `None` or a `Tensor`. - """ - - with ops.name_scope(name, "kl_reverse", [logu]): - return amari_alpha(logu, alpha=0., self_normalized=self_normalized) - - -def kl_forward(logu, self_normalized=False, name=None): - """The forward Kullback-Leibler Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True`, the KL-forward Csiszar-function is: - - ```none - f(u) = u log(u) - (u - 1) - ``` - - When `self_normalized = False` the `(u - 1)` term is omitted. - - Observe that as an f-Divergence, this Csiszar-function implies: - - ```none - D_f[p, q] = KL[p, q] - ``` - - The KL is "forward" because in maximum likelihood we think of minimizing `q` - as in `KL[p, q]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - kl_forward_of_u: `float`-like `Tensor` of the Csiszar-function evaluated at - `u = exp(logu)`. - - Raises: - TypeError: if `self_normalized` is `None` or a `Tensor`. - """ - - with ops.name_scope(name, "kl_forward", [logu]): - return amari_alpha(logu, alpha=1., self_normalized=self_normalized) - - -def jensen_shannon(logu, self_normalized=False, name=None): - """The Jensen-Shannon Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True`, the Jensen-Shannon Csiszar-function is: - - ```none - f(u) = u log(u) - (1 + u) log(1 + u) + (u + 1) log(2) - ``` - - When `self_normalized = False` the `(u + 1) log(2)` term is omitted. - - Observe that as an f-Divergence, this Csiszar-function implies: - - ```none - D_f[p, q] = KL[p, m] + KL[q, m] - m(x) = 0.5 p(x) + 0.5 q(x) - ``` - - In a sense, this divergence is the "reverse" of the Arithmetic-Geometric - f-Divergence. - - This Csiszar-function induces a symmetric f-Divergence, i.e., - `D_f[p, q] = D_f[q, p]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - For more information, see: - Lin, J. "Divergence measures based on the Shannon entropy." IEEE Trans. - Inf. Th., 37, 145-151, 1991. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - jensen_shannon_of_u: `float`-like `Tensor` of the Csiszar-function - evaluated at `u = exp(logu)`. - """ - - with ops.name_scope(name, "jensen_shannon", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - npdt = logu.dtype.as_numpy_dtype - y = nn_ops.softplus(logu) - if self_normalized: - y -= np.log(2).astype(npdt) - return math_ops.exp(logu) * logu - (1. + math_ops.exp(logu)) * y - - -def arithmetic_geometric(logu, self_normalized=False, name=None): - """The Arithmetic-Geometric Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True` the Arithmetic-Geometric Csiszar-function is: - - ```none - f(u) = (1 + u) log( (1 + u) / sqrt(u) ) - (1 + u) log(2) - ``` - - When `self_normalized = False` the `(1 + u) log(2)` term is omitted. - - Observe that as an f-Divergence, this Csiszar-function implies: - - ```none - D_f[p, q] = KL[m, p] + KL[m, q] - m(x) = 0.5 p(x) + 0.5 q(x) - ``` - - In a sense, this divergence is the "reverse" of the Jensen-Shannon - f-Divergence. - - This Csiszar-function induces a symmetric f-Divergence, i.e., - `D_f[p, q] = D_f[q, p]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - arithmetic_geometric_of_u: `float`-like `Tensor` of the - Csiszar-function evaluated at `u = exp(logu)`. - """ - - with ops.name_scope(name, "arithmetic_geometric", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - y = nn_ops.softplus(logu) - 0.5 * logu - if self_normalized: - y -= np.log(2.).astype(logu.dtype.as_numpy_dtype) - return (1. + math_ops.exp(logu)) * y - - -def total_variation(logu, name=None): - """The Total Variation Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Total-Variation Csiszar-function is: - - ```none - f(u) = 0.5 |u - 1| - ``` - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - total_variation_of_u: `float`-like `Tensor` of the Csiszar-function - evaluated at `u = exp(logu)`. - """ - - with ops.name_scope(name, "total_variation", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return 0.5 * math_ops.abs(math_ops.expm1(logu)) - - -def pearson(logu, name=None): - """The Pearson Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Pearson Csiszar-function is: - - ```none - f(u) = (u - 1)**2 - ``` - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - pearson_of_u: `float`-like `Tensor` of the Csiszar-function evaluated at - `u = exp(logu)`. - """ - - with ops.name_scope(name, "pearson", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return math_ops.square(math_ops.expm1(logu)) - - -def squared_hellinger(logu, name=None): - """The Squared-Hellinger Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Squared-Hellinger Csiszar-function is: - - ```none - f(u) = (sqrt(u) - 1)**2 - ``` - - This Csiszar-function induces a symmetric f-Divergence, i.e., - `D_f[p, q] = D_f[q, p]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - squared_hellinger_of_u: `float`-like `Tensor` of the Csiszar-function - evaluated at `u = exp(logu)`. - """ - - with ops.name_scope(name, "squared_hellinger", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return pearson(0.5 * logu) - - -def triangular(logu, name=None): - """The Triangular Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Triangular Csiszar-function is: - - ```none - f(u) = (u - 1)**2 / (1 + u) - ``` - - This Csiszar-function induces a symmetric f-Divergence, i.e., - `D_f[p, q] = D_f[q, p]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - triangular_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - - with ops.name_scope(name, "triangular", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return pearson(logu) / (1. + math_ops.exp(logu)) - - -def t_power(logu, t, self_normalized=False, name=None): - """The T-Power Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True` the T-Power Csiszar-function is: - - ```none - f(u) = s [ u**t - 1 - t(u - 1) ] - s = { -1 0 < t < 1 - { +1 otherwise - ``` - - When `self_normalized = False` the `- t(u - 1)` term is omitted. - - This is similar to the `amari_alpha` Csiszar-function, with the associated - divergence being the same up to factors depending only on `t`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - t: `Tensor` of same `dtype` as `logu` and broadcastable shape. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - t_power_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - with ops.name_scope(name, "t_power", [logu, t]): - logu = ops.convert_to_tensor(logu, name="logu") - t = ops.convert_to_tensor(t, dtype=logu.dtype.base_dtype, name="t") - fu = math_ops.expm1(t * logu) - if self_normalized: - fu -= t * math_ops.expm1(logu) - fu *= array_ops.where(math_ops.logical_and(0. < t, t < 1.), - -array_ops.ones_like(t), - array_ops.ones_like(t)) - return fu - - -def log1p_abs(logu, name=None): - """The log1p-abs Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Log1p-Abs Csiszar-function is: - - ```none - f(u) = u**(sign(u-1)) - 1 - ``` - - This function is so-named because it was invented from the following recipe. - Choose a convex function g such that g(0)=0 and solve for f: - - ```none - log(1 + f(u)) = g(log(u)). - <=> - f(u) = exp(g(log(u))) - 1 - ``` - - That is, the graph is identically `g` when y-axis is `log1p`-domain and x-axis - is `log`-domain. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - log1p_abs_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - - with ops.name_scope(name, "log1p_abs", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return math_ops.expm1(math_ops.abs(logu)) - - -def jeffreys(logu, name=None): - """The Jeffreys Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Jeffreys Csiszar-function is: - - ```none - f(u) = 0.5 ( u log(u) - log(u) ) - = 0.5 kl_forward + 0.5 kl_reverse - = symmetrized_csiszar_function(kl_reverse) - = symmetrized_csiszar_function(kl_forward) - ``` - - This Csiszar-function induces a symmetric f-Divergence, i.e., - `D_f[p, q] = D_f[q, p]`. - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - jeffreys_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - - with ops.name_scope(name, "jeffreys", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return 0.5 * math_ops.expm1(logu) * logu - - -def chi_square(logu, name=None): - """The chi-Square Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Chi-square Csiszar-function is: - - ```none - f(u) = u**2 - 1 - ``` - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - chi_square_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - - with ops.name_scope(name, "chi_square", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return math_ops.expm1(2. * logu) - - -def modified_gan(logu, self_normalized=False, name=None): - """The Modified-GAN Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - When `self_normalized = True` the modified-GAN (Generative/Adversarial - Network) Csiszar-function is: - - ```none - f(u) = log(1 + u) - log(u) + 0.5 (u - 1) - ``` - - When `self_normalized = False` the `0.5 (u - 1)` is omitted. - - The unmodified GAN Csiszar-function is identical to Jensen-Shannon (with - `self_normalized = False`). - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - self_normalized: Python `bool` indicating whether `f'(u=1)=0`. When - `f'(u=1)=0` the implied Csiszar f-Divergence remains non-negative even - when `p, q` are unnormalized measures. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - chi_square_of_u: `float`-like `Tensor` of the Csiszar-function evaluated - at `u = exp(logu)`. - """ - - with ops.name_scope(name, "chi_square", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - y = nn_ops.softplus(logu) - logu - if self_normalized: - y += 0.5 * math_ops.expm1(logu) - return y - - -def dual_csiszar_function(logu, csiszar_function, name=None): - """Calculates the dual Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Csiszar-dual is defined as: - - ```none - f^*(u) = u f(1 / u) - ``` - - where `f` is some other Csiszar-function. - - For example, the dual of `kl_reverse` is `kl_forward`, i.e., - - ```none - f(u) = -log(u) - f^*(u) = u f(1 / u) = -u log(1 / u) = u log(u) - ``` - - The dual of the dual is the original function: - - ```none - f^**(u) = {u f(1/u)}^*(u) = u (1/u) f(1/(1/u)) = f(u) - ``` - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - csiszar_function: Python `callable` representing a Csiszar-function over - log-domain. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - dual_f_of_u: `float`-like `Tensor` of the result of calculating the dual of - `f` at `u = exp(logu)`. - """ - - with ops.name_scope(name, "dual_csiszar_function", [logu]): - return math_ops.exp(logu) * csiszar_function(-logu) - - -def symmetrized_csiszar_function(logu, csiszar_function, name=None): - """Symmetrizes a Csiszar-function in log-space. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The symmetrized Csiszar-function is defined as: - - ```none - f_g(u) = 0.5 g(u) + 0.5 u g (1 / u) - ``` - - where `g` is some other Csiszar-function. - - We say the function is "symmetrized" because: - - ```none - D_{f_g}[p, q] = D_{f_g}[q, p] - ``` - - for all `p << >> q` (i.e., `support(p) = support(q)`). - - There exists alternatives for symmetrizing a Csiszar-function. For example, - - ```none - f_g(u) = max(f(u), f^*(u)), - ``` - - where `f^*` is the dual Csiszar-function, also implies a symmetric - f-Divergence. - - Example: - - When either of the following functions are symmetrized, we obtain the - Jensen-Shannon Csiszar-function, i.e., - - ```none - g(u) = -log(u) - (1 + u) log((1 + u) / 2) + u - 1 - h(u) = log(4) + 2 u log(u / (1 + u)) - ``` - - implies, - - ```none - f_g(u) = f_h(u) = u log(u) - (1 + u) log((1 + u) / 2) - = jensen_shannon(log(u)). - ``` - - Warning: this function makes non-log-space calculations and may therefore be - numerically unstable for `|logu| >> 0`. - - Args: - logu: `float`-like `Tensor` representing `log(u)` from above. - csiszar_function: Python `callable` representing a Csiszar-function over - log-domain. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - symmetrized_g_of_u: `float`-like `Tensor` of the result of applying the - symmetrization of `g` evaluated at `u = exp(logu)`. - """ - - with ops.name_scope(name, "symmetrized_csiszar_function", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - return 0.5 * (csiszar_function(logu) - + dual_csiszar_function(logu, csiszar_function)) - - -def monte_carlo_csiszar_f_divergence( - f, - p_log_prob, - q, - num_draws, - use_reparametrization=None, - seed=None, - name=None): - """Monte-Carlo approximation of the Csiszar f-Divergence. - - A Csiszar-function is a member of, - - ```none - F = { f:R_+ to R : f convex }. - ``` - - The Csiszar f-Divergence for Csiszar-function f is given by: - - ```none - D_f[p(X), q(X)] := E_{q(X)}[ f( p(X) / q(X) ) ] - ~= m**-1 sum_j^m f( p(x_j) / q(x_j) ), - where x_j ~iid q(X) - ``` - - Tricks: Reparameterization and Score-Gradient - - When q is "reparameterized", i.e., a diffeomorphic transformation of a - parameterless distribution (e.g., - `Normal(Y; m, s) <=> Y = sX + m, X ~ Normal(0,1)`), we can swap gradient and - expectation, i.e., - `grad[Avg{ s_i : i=1...n }] = Avg{ grad[s_i] : i=1...n }` where `S_n=Avg{s_i}` - and `s_i = f(x_i), x_i ~iid q(X)`. - - However, if q is not reparameterized, TensorFlow's gradient will be incorrect - since the chain-rule stops at samples of unreparameterized distributions. In - this circumstance using the Score-Gradient trick results in an unbiased - gradient, i.e., - - ```none - grad[ E_q[f(X)] ] - = grad[ int dx q(x) f(x) ] - = int dx grad[ q(x) f(x) ] - = int dx [ q'(x) f(x) + q(x) f'(x) ] - = int dx q(x) [q'(x) / q(x) f(x) + f'(x) ] - = int dx q(x) grad[ f(x) q(x) / stop_grad[q(x)] ] - = E_q[ grad[ f(x) q(x) / stop_grad[q(x)] ] ] - ``` - - Unless `q.reparameterization_type != distribution.FULLY_REPARAMETERIZED` it is - usually preferable to set `use_reparametrization = True`. - - Example Application: - - The Csiszar f-Divergence is a useful framework for variational inference. - I.e., observe that, - - ```none - f(p(x)) = f( E_{q(Z | x)}[ p(x, Z) / q(Z | x) ] ) - <= E_{q(Z | x)}[ f( p(x, Z) / q(Z | x) ) ] - := D_f[p(x, Z), q(Z | x)] - ``` - - The inequality follows from the fact that the "perspective" of `f`, i.e., - `(s, t) |-> t f(s / t))`, is convex in `(s, t)` when `s/t in domain(f)` and - `t` is a real. Since the above framework includes the popular Evidence Lower - BOund (ELBO) as a special case, i.e., `f(u) = -log(u)`, we call this framework - "Evidence Divergence Bound Optimization" (EDBO). - - Args: - f: Python `callable` representing a Csiszar-function in log-space, i.e., - takes `p_log_prob(q_samples) - q.log_prob(q_samples)`. - p_log_prob: Python `callable` taking (a batch of) samples from `q` and - returning the natural-log of the probability under distribution `p`. - (In variational inference `p` is the joint distribution.) - q: `tf.Distribution`-like instance; must implement: - `reparameterization_type`, `sample(n, seed)`, and `log_prob(x)`. - (In variational inference `q` is the approximate posterior distribution.) - num_draws: Integer scalar number of draws used to approximate the - f-Divergence expectation. - use_reparametrization: Python `bool`. When `None` (the default), - automatically set to: - `q.reparameterization_type == distribution.FULLY_REPARAMETERIZED`. - When `True` uses the standard Monte-Carlo average. When `False` uses the - score-gradient trick. (See above for details.) When `False`, consider - using `csiszar_vimco`. - seed: Python `int` seed for `q.sample`. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - monte_carlo_csiszar_f_divergence: `float`-like `Tensor` Monte Carlo - approximation of the Csiszar f-Divergence. - - Raises: - ValueError: if `q` is not a reparameterized distribution and - `use_reparametrization = True`. A distribution `q` is said to be - "reparameterized" when its samples are generated by transforming the - samples of another distribution which does not depend on the - parameterization of `q`. This property ensures the gradient (with respect - to parameters) is valid. - TypeError: if `p_log_prob` is not a Python `callable`. - """ - with ops.name_scope(name, "monte_carlo_csiszar_f_divergence", [num_draws]): - if use_reparametrization is None: - use_reparametrization = (q.reparameterization_type - == distribution.FULLY_REPARAMETERIZED) - elif (use_reparametrization and - q.reparameterization_type != distribution.FULLY_REPARAMETERIZED): - # TODO(jvdillon): Consider only raising an exception if the gradient is - # requested. - raise ValueError( - "Distribution `q` must be reparameterized, i.e., a diffeomorphic " - "transformation of a parameterless distribution. (Otherwise this " - "function has a biased gradient.)") - if not callable(p_log_prob): - raise TypeError("`p_log_prob` must be a Python `callable` function.") - return monte_carlo.expectation( - f=lambda q_samples: f(p_log_prob(q_samples) - q.log_prob(q_samples)), - samples=q.sample(num_draws, seed=seed), - log_prob=q.log_prob, # Only used if use_reparametrization=False. - use_reparametrization=use_reparametrization) - - -def csiszar_vimco(f, - p_log_prob, - q, - num_draws, - num_batch_draws=1, - seed=None, - name=None): - """Use VIMCO to lower the variance of gradient[csiszar_function(Avg(logu))]. - - This function generalizes "Variational Inference for Monte Carlo Objectives" - (VIMCO), i.e., https://arxiv.org/abs/1602.06725, to Csiszar f-Divergences. - - Note: if `q.reparameterization_type = distribution.FULLY_REPARAMETERIZED`, - consider using `monte_carlo_csiszar_f_divergence`. - - The VIMCO loss is: - - ```none - vimco = f(Avg{logu[i] : i=0,...,m-1}) - where, - logu[i] = log( p(x, h[i]) / q(h[i] | x) ) - h[i] iid~ q(H | x) - ``` - - Interestingly, the VIMCO gradient is not the naive gradient of `vimco`. - Rather, it is characterized by: - - ```none - grad[vimco] - variance_reducing_term - where, - variance_reducing_term = Sum{ grad[log q(h[i] | x)] * - (vimco - f(log Avg{h[j;i] : j=0,...,m-1})) - : i=0, ..., m-1 } - h[j;i] = { u[j] j!=i - { GeometricAverage{ u[k] : k!=i} j==i - ``` - - (We omitted `stop_gradient` for brevity. See implementation for more details.) - - The `Avg{h[j;i] : j}` term is a kind of "swap-out average" where the `i`-th - element has been replaced by the leave-`i`-out Geometric-average. - - This implementation prefers numerical precision over efficiency, i.e., - `O(num_draws * num_batch_draws * prod(batch_shape) * prod(event_shape))`. - (The constant may be fairly large, perhaps around 12.) - - Args: - f: Python `callable` representing a Csiszar-function in log-space. - p_log_prob: Python `callable` representing the natural-log of the - probability under distribution `p`. (In variational inference `p` is the - joint distribution.) - q: `tf.Distribution`-like instance; must implement: `sample(n, seed)`, and - `log_prob(x)`. (In variational inference `q` is the approximate posterior - distribution.) - num_draws: Integer scalar number of draws used to approximate the - f-Divergence expectation. - num_batch_draws: Integer scalar number of draws used to approximate the - f-Divergence expectation. - seed: Python `int` seed for `q.sample`. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - vimco: The Csiszar f-Divergence generalized VIMCO objective. - - Raises: - ValueError: if `num_draws < 2`. - """ - with ops.name_scope(name, "csiszar_vimco", [num_draws, num_batch_draws]): - if num_draws < 2: - raise ValueError("Must specify num_draws > 1.") - stop = array_ops.stop_gradient # For readability. - x = stop(q.sample(sample_shape=[num_draws, num_batch_draws], - seed=seed)) - logqx = q.log_prob(x) - logu = p_log_prob(x) - logqx - f_log_avg_u, f_log_sooavg_u = [f(r) for r in csiszar_vimco_helper(logu)] - dotprod = math_ops.reduce_sum( - logqx * stop(f_log_avg_u - f_log_sooavg_u), - axis=0) # Sum over iid samples. - # We now rewrite f_log_avg_u so that: - # `grad[f_log_avg_u] := grad[f_log_avg_u + dotprod]`. - # To achieve this, we use a trick that - # `f(x) - stop(f(x)) == zeros_like(f(x))` - # but its gradient is grad[f(x)]. - # Note that IEEE754 specifies that `x - x == 0.` and `x + 0. == x`, hence - # this trick loses no precision. For more discussion regarding the relevant - # portions of the IEEE754 standard, see the StackOverflow question, - # "Is there a floating point value of x, for which x-x == 0 is false?" - # http://stackoverflow.com/q/2686644 - f_log_avg_u += dotprod - stop(dotprod) # Add zeros_like(dot_prod). - return math_ops.reduce_mean(f_log_avg_u, axis=0) # Avg over batches. - - -def csiszar_vimco_helper(logu, name=None): - """Helper to `csiszar_vimco`; computes `log_avg_u`, `log_sooavg_u`. - - `axis = 0` of `logu` is presumed to correspond to iid samples from `q`, i.e., - - ```none - logu[j] = log(u[j]) - u[j] = p(x, h[j]) / q(h[j] | x) - h[j] iid~ q(H | x) - ``` - - Args: - logu: Floating-type `Tensor` representing `log(p(x, h) / q(h | x))`. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - log_avg_u: `logu.dtype` `Tensor` corresponding to the natural-log of the - average of `u`. The sum of the gradient of `log_avg_u` is `1`. - log_sooavg_u: `logu.dtype` `Tensor` characterized by the natural-log of the - average of `u`` except that the average swaps-out `u[i]` for the - leave-`i`-out Geometric-average. The mean of the gradient of - `log_sooavg_u` is `1`. Mathematically `log_sooavg_u` is, - ```none - log_sooavg_u[i] = log(Avg{h[j ; i] : j=0, ..., m-1}) - h[j ; i] = { u[j] j!=i - { GeometricAverage{u[k] : k != i} j==i - ``` - - """ - with ops.name_scope(name, "csiszar_vimco_helper", [logu]): - logu = ops.convert_to_tensor(logu, name="logu") - - n = logu.shape.with_rank_at_least(1)[0].value - if n is None: - n = array_ops.shape(logu)[0] - log_n = math_ops.log(math_ops.cast(n, dtype=logu.dtype)) - nm1 = math_ops.cast(n - 1, dtype=logu.dtype) - else: - log_n = np.log(n).astype(logu.dtype.as_numpy_dtype) - nm1 = np.asarray(n - 1, dtype=logu.dtype.as_numpy_dtype) - - # Throughout we reduce across axis=0 since this is presumed to be iid - # samples. - - log_max_u = math_ops.reduce_max(logu, axis=0) - log_sum_u_minus_log_max_u = math_ops.reduce_logsumexp( - logu - log_max_u, axis=0) - - # log_loosum_u[i] = - # = logsumexp(logu[j] : j != i) - # = log( exp(logsumexp(logu)) - exp(logu[i]) ) - # = log( exp(logsumexp(logu - logu[i])) exp(logu[i]) - exp(logu[i])) - # = logu[i] + log(exp(logsumexp(logu - logu[i])) - 1) - # = logu[i] + log(exp(logsumexp(logu) - logu[i]) - 1) - # = logu[i] + softplus_inverse(logsumexp(logu) - logu[i]) - d = log_sum_u_minus_log_max_u + (log_max_u - logu) - # We use `d != 0` rather than `d > 0.` because `d < 0.` should never - # happens; if it does we want to complain loudly (which `softplus_inverse` - # will). - d_ok = math_ops.not_equal(d, 0.) - safe_d = array_ops.where(d_ok, d, array_ops.ones_like(d)) - d_ok_result = logu + distribution_util.softplus_inverse(safe_d) - - inf = np.array(np.inf, dtype=logu.dtype.as_numpy_dtype) - - # When not(d_ok) and is_positive_and_largest then we manually compute the - # log_loosum_u. (We can efficiently do this for any one point but not all, - # hence we still need the above calculation.) This is good because when - # this condition is met, we cannot use the above calculation; its -inf. - # We now compute the log-leave-out-max-sum, replicate it to every - # point and make sure to select it only when we need to. - is_positive_and_largest = math_ops.logical_and( - logu > 0., - math_ops.equal(logu, log_max_u[array_ops.newaxis, ...])) - log_lomsum_u = math_ops.reduce_logsumexp( - array_ops.where(is_positive_and_largest, - array_ops.fill(array_ops.shape(logu), -inf), - logu), - axis=0, keep_dims=True) - log_lomsum_u = array_ops.tile( - log_lomsum_u, - multiples=1 + array_ops.pad([n-1], [[0, array_ops.rank(logu)-1]])) - - d_not_ok_result = array_ops.where( - is_positive_and_largest, - log_lomsum_u, - array_ops.fill(array_ops.shape(d), -inf)) - - log_loosum_u = array_ops.where(d_ok, d_ok_result, d_not_ok_result) - - # The swap-one-out-sum ("soosum") is n different sums, each of which - # replaces the i-th item with the i-th-left-out average, i.e., - # soo_sum_u[i] = [exp(logu) - exp(logu[i])] + exp(mean(logu[!=i])) - # = exp(log_loosum_u[i]) + exp(looavg_logu[i]) - looavg_logu = (math_ops.reduce_sum(logu, axis=0) - logu) / nm1 - log_soosum_u = math_ops.reduce_logsumexp( - array_ops.stack([log_loosum_u, looavg_logu]), - axis=0) - - log_avg_u = log_sum_u_minus_log_max_u + log_max_u - log_n - log_sooavg_u = log_soosum_u - log_n - - log_avg_u.set_shape(logu.shape.with_rank_at_least(1)[1:]) - log_sooavg_u.set_shape(logu.shape) - - return log_avg_u, log_sooavg_u -- GitLab From 73454e35fa20278712a59949e84cad1ffc1aaf4e Mon Sep 17 00:00:00 2001 From: Chi Zeng Date: Mon, 5 Mar 2018 15:22:07 -0800 Subject: [PATCH 431/884] Update TensorBoard's tutorial on tensorflow.org with information on setting up as well as how long the tutorial should take. PiperOrigin-RevId: 187933027 --- .../summaries_and_tensorboard.md | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md index 05dfdfdc4d..79280d246a 100644 --- a/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md +++ b/tensorflow/docs_src/programmers_guide/summaries_and_tensorboard.md @@ -16,10 +16,17 @@ TensorBoard is fully configured, it looks like this: -This tutorial is intended to get you started with simple TensorBoard usage. -There are other resources available as well! The [TensorBoard's GitHub](https://github.com/tensorflow/tensorboard) -has a lot more information on TensorBoard usage, including tips & tricks, and -debugging information. +This 30-minute tutorial is intended to get you started with simple TensorBoard +usage. It assumes a basic understanding of TensorFlow. + +There are other resources available as well! The [TensorBoard GitHub](https://github.com/tensorflow/tensorboard) +has a lot more information on using individual dashboards within TensorBoard +including tips & tricks and debugging information. + +## Setup + +[Install TensorFlow](https://www.tensorflow.org/install/). Installing TensorFlow +via pip should also automatically install TensorBoard. ## Serializing the data @@ -214,4 +221,5 @@ corner. Each tab represents a set of serialized data that can be visualized. For in depth information on how to use the *graph* tab to visualize your graph, see @{$graph_viz$TensorBoard: Graph Visualization}. -For more usage information on TensorBoard in general, see the [TensorBoard's GitHub](https://github.com/tensorflow/tensorboard). +For more usage information on TensorBoard in general, see the +[TensorBoard GitHub](https://github.com/tensorflow/tensorboard). -- GitLab From d70110a8e99f59ba06011f724e02d77dcd39e703 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Mon, 5 Mar 2018 15:34:26 -0800 Subject: [PATCH 432/884] disabling timed out test in asan PiperOrigin-RevId: 187935309 --- tensorflow/contrib/distributions/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1b4877c57f..0e4ddeffb0 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -485,6 +485,7 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], + tags = ["noasan"], ) cuda_py_test( -- GitLab From 5dc7dbb8c61872f34b4af18588852ed9d78ed5e0 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Mar 2018 15:54:01 -0800 Subject: [PATCH 433/884] removing unused variables --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 422ef67953..fc66b2ed63 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -875,7 +875,6 @@ tensorflow::Status BinaryTensorOpWeight( // Maybe this part has to be moved into the block of rsqrt later // Check type consistency - auto dtype = TFAttrs(node_def).get("T"); nvinfer1::DataType ttype; TF_CHECK_OK(ConvertDType(weights.type_, &ttype)); @@ -1007,7 +1006,6 @@ tensorflow::Status ConvertConv2DHelper( TFAttrs attrs(node_def); - int c_index = 1; int h_index = 2; int w_index = 3; auto data_format = attrs.get("data_format"); @@ -1016,7 +1014,6 @@ tensorflow::Status ConvertConv2DHelper( {0, 3, 1, 2}); h_index = 1; w_index = 2; - c_index = 3; // TODO(jie): transpose it } @@ -1958,9 +1955,6 @@ tensorflow::Status ConvertMatMul(Converter& ctx, // TODO(jie): transpose! TFAttrs attrs(node_def); - // tensor after transpose (NCHW) - auto tensor_dim = tensor->getDimensions(); - TRT_ShapedWeights weights_ck = inputs.at(1).weights(); TRT_ShapedWeights weights = ctx.get_temp_weights_like(weights_ck); ReorderCKtoKC(weights_ck, &weights); -- GitLab From b7e38a5f2a310599e9d4cab2bd95a43dd18018d6 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 15:53:43 -0800 Subject: [PATCH 434/884] Remove unnecessary density functions. distributions.py appropriately calls `log` or `exp` to compute missing cdf/prob functions. PiperOrigin-RevId: 187938200 --- tensorflow/contrib/distributions/python/ops/gumbel.py | 3 --- .../contrib/distributions/python/ops/inverse_gamma.py | 6 ------ tensorflow/contrib/distributions/python/ops/logistic.py | 3 --- .../contrib/distributions/python/ops/onehot_categorical.py | 3 --- .../distributions/python/ops/relaxed_onehot_categorical.py | 3 --- tensorflow/python/ops/distributions/gamma.py | 6 ------ tensorflow/python/ops/distributions/normal.py | 3 --- tensorflow/python/ops/distributions/student_t.py | 3 --- tensorflow/python/ops/distributions/uniform.py | 6 ------ 9 files changed, 36 deletions(-) diff --git a/tensorflow/contrib/distributions/python/ops/gumbel.py b/tensorflow/contrib/distributions/python/ops/gumbel.py index d0efaefb8e..8d05ad6b80 100644 --- a/tensorflow/contrib/distributions/python/ops/gumbel.py +++ b/tensorflow/contrib/distributions/python/ops/gumbel.py @@ -190,9 +190,6 @@ class _Gumbel(distribution.Distribution): def _log_prob(self, x): return self._log_unnormalized_prob(x) - self._log_normalization() - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _log_cdf(self, x): return -math_ops.exp(-self._z(x)) diff --git a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py index ee4d86867d..51ac61dcf6 100644 --- a/tensorflow/contrib/distributions/python/ops/inverse_gamma.py +++ b/tensorflow/contrib/distributions/python/ops/inverse_gamma.py @@ -192,12 +192,6 @@ class InverseGamma(distribution.Distribution): def _log_prob(self, x): return self._log_unnormalized_prob(x) - self._log_normalization() - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - - def _log_cdf(self, x): - return math_ops.log(self._cdf(x)) - def _cdf(self, x): x = self._maybe_assert_valid_sample(x) # Note that igammac returns the upper regularized incomplete gamma diff --git a/tensorflow/contrib/distributions/python/ops/logistic.py b/tensorflow/contrib/distributions/python/ops/logistic.py index 473677f8d9..68e6bca5a5 100644 --- a/tensorflow/contrib/distributions/python/ops/logistic.py +++ b/tensorflow/contrib/distributions/python/ops/logistic.py @@ -185,9 +185,6 @@ class Logistic(distribution.Distribution): def _log_prob(self, x): return self._log_unnormalized_prob(x) - self._log_normalization() - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _log_cdf(self, x): return -nn_ops.softplus(-self._z(x)) diff --git a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py index b76cebf79f..46c2cc8b7a 100644 --- a/tensorflow/contrib/distributions/python/ops/onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/onehot_categorical.py @@ -203,9 +203,6 @@ class OneHotCategorical(distribution.Distribution): ret = array_ops.reshape(ret, logits_shape) return ret - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _entropy(self): return -math_ops.reduce_sum( nn_ops.log_softmax(self.logits) * self.probs, axis=-1) diff --git a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py index 2aa771a71e..ff33f327c7 100644 --- a/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py +++ b/tensorflow/contrib/distributions/python/ops/relaxed_onehot_categorical.py @@ -285,9 +285,6 @@ class ExpRelaxedOneHotCategorical(distribution.Distribution): ret = array_ops.reshape(log_prob, logits_shape) return ret - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _assert_valid_sample(self, x): if not self.validate_args: return x diff --git a/tensorflow/python/ops/distributions/gamma.py b/tensorflow/python/ops/distributions/gamma.py index 8fb218be3a..adb1f4f9a8 100644 --- a/tensorflow/python/ops/distributions/gamma.py +++ b/tensorflow/python/ops/distributions/gamma.py @@ -193,12 +193,6 @@ class Gamma(distribution.Distribution): def _log_prob(self, x): return self._log_unnormalized_prob(x) - self._log_normalization() - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - - def _log_cdf(self, x): - return math_ops.log(self._cdf(x)) - def _cdf(self, x): x = self._maybe_assert_valid_sample(x) # Note that igamma returns the regularized incomplete gamma function, diff --git a/tensorflow/python/ops/distributions/normal.py b/tensorflow/python/ops/distributions/normal.py index e7f120ea2d..32e8a49c81 100644 --- a/tensorflow/python/ops/distributions/normal.py +++ b/tensorflow/python/ops/distributions/normal.py @@ -188,9 +188,6 @@ class Normal(distribution.Distribution): def _log_prob(self, x): return self._log_unnormalized_prob(x) - self._log_normalization() - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _log_cdf(self, x): return special_math.log_ndtr(self._z(x)) diff --git a/tensorflow/python/ops/distributions/student_t.py b/tensorflow/python/ops/distributions/student_t.py index 778fefb8c2..9d9e65b4e8 100644 --- a/tensorflow/python/ops/distributions/student_t.py +++ b/tensorflow/python/ops/distributions/student_t.py @@ -248,9 +248,6 @@ class StudentT(distribution.Distribution): math_ops.lgamma(0.5 * self.df) - math_ops.lgamma(0.5 * (self.df + 1.))) - def _prob(self, x): - return math_ops.exp(self._log_prob(x)) - def _cdf(self, x): # Take Abs(scale) to make subsequent where work correctly. y = (x - self.loc) / math_ops.abs(self.scale) diff --git a/tensorflow/python/ops/distributions/uniform.py b/tensorflow/python/ops/distributions/uniform.py index e0c554442f..ec623b55eb 100644 --- a/tensorflow/python/ops/distributions/uniform.py +++ b/tensorflow/python/ops/distributions/uniform.py @@ -165,9 +165,6 @@ class Uniform(distribution.Distribution): seed=seed) return self.low + self.range() * samples - def _log_prob(self, x): - return math_ops.log(self._prob(x)) - def _prob(self, x): broadcasted_x = x * array_ops.ones(self.batch_shape_tensor()) return array_ops.where( @@ -179,9 +176,6 @@ class Uniform(distribution.Distribution): array_ops.zeros_like(broadcasted_x), array_ops.ones_like(broadcasted_x) / self.range())) - def _log_cdf(self, x): - return math_ops.log(self.cdf(x)) - def _cdf(self, x): broadcast_shape = array_ops.broadcast_dynamic_shape( array_ops.shape(x), self.batch_shape_tensor()) -- GitLab From 031e938064f5637055d7f8e9bef6b8a2e6ed24a2 Mon Sep 17 00:00:00 2001 From: Sami Kama Date: Mon, 5 Mar 2018 16:06:06 -0800 Subject: [PATCH 435/884] Fix enum ints --- tensorflow/contrib/tensorrt/convert/convert_graph.cc | 2 +- tensorflow/contrib/tensorrt/convert/convert_graph.h | 3 --- tensorflow/contrib/tensorrt/convert/convert_nodes.cc | 6 +++--- tensorflow/contrib/tensorrt/convert/convert_nodes.h | 9 ++++++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 872c468172..1feaabbfed 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -401,7 +401,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( ConvertGraphParams p(graph, output_names, subgraph_node_ids, max_batch_size, max_mem_per_engine, static_graph_properties, &output_edge_map, precision_mode); - if (precision_mode == FP16MODE) { + if (precision_mode == INT8MODE) { TF_RETURN_IF_ERROR(GetCalibNode(&p)); } else { tensorflow::Status status = ConvertSubGraphToTensorRT(&p); diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 90bd3c4a17..4cdc768a42 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -27,9 +27,6 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { -const int FP32MODE = 0; -const int FP16MODE = 1; -const int INT8MODE = 2; // This method converts an already generated calibration graph which was used in // calibration runs to an inference graph tensorflow::Status ConvertCalibGraphToInferGraph( diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index fc66b2ed63..7d81831539 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -2254,7 +2254,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::tensorrt::TRTWeightStore(); TF_CHECK_OK(weight_rmgr->Create(calib_op_name, calib_op_name, ws)); - Converter converter(op_res->network_, ws, s.precision_mode == 1); + Converter converter(op_res->network_, ws, s.precision_mode == FP16MODE); std::vector input_names; std::vector input_dtypes; for (const std::pair& input : s.input_inds) { @@ -2460,7 +2460,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( TF_CHECK_OK(weight_rmgr->Create(engine_name, engine_name, ws)); // Build the network - Converter converter(trt_network.get(), ws, s.precision_mode == 1); + Converter converter(trt_network.get(), ws, s.precision_mode == FP16MODE); std::vector input_names; std::vector input_dtypes; @@ -2607,7 +2607,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( trt_builder->setMaxWorkspaceSize(s.max_workspace_size_bytes); VLOG(0) << "Max batch size= " << s.max_batch_size << " max workspace size= " << s.max_workspace_size_bytes; - if (s.precision_mode == 1) { + if (s.precision_mode == FP16MODE) { trt_builder->setHalf2Mode(true); VLOG(0) << "Using FP16 precision mode"; } diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 1f09aecd1e..518798c0ad 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -33,16 +33,19 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { - +const int FP32MODE = 0; +const int FP16MODE = 1; +const int INT8MODE = 2; struct SubGraphParams { SubGraphParams( - tensorflow::Graph& inp_graph, const std::set& subgraph_node_id_numbers, + tensorflow::Graph& inp_graph, + const std::set& subgraph_node_id_numbers, const std::vector>& input_indices, const std::vector>& output_indices, size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, - tensorflow::NodeDef* constructed_trt_node, int engine_precision_mode = 0) + tensorflow::NodeDef* constructed_trt_node, int engine_precision_mode = FP32MODE) : graph(inp_graph), subgraph_node_ids(subgraph_node_id_numbers), input_inds(input_indices), -- GitLab From 79a76178539f08697b5de43b733492fd5f7684d5 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Mon, 5 Mar 2018 16:09:47 -0800 Subject: [PATCH 436/884] Tests for running _Send and _Recv in eager. PiperOrigin-RevId: 187940522 --- tensorflow/c/eager/runtime.cc | 13 ++++++- tensorflow/python/eager/core_test.py | 56 ++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 1 deletion(-) diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index 4bf24fec2c..9b46cf8245 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -302,7 +302,18 @@ Status KernelAndDevice::Run(std::vector* input_tensors, params.runner = &runner; OpKernelContext context(¶ms); - device_->Compute(kernel_.get(), &context); + + if (kernel_->def().op() == "_Recv") { + // TODO(apassos) do not special-case _Recv. Currently the GPU device fails + // if trying to run _Recv->Compute(), specifically checking for _Recv. To go + // around this we call _Recv->ComputeAsync, to mimic graph mode behavior. + AsyncOpKernel* async = kernel_->AsAsync(); + Notification done; + device_->ComputeAsync(async, &context, [&done]() { done.Notify(); }); + done.WaitForNotification(); + } else { + device_->Compute(kernel_.get(), &context); + } if (!context.status().ok()) return context.status(); output_tensors->clear(); diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index e418be5fae..f8f1011e4e 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -546,5 +546,61 @@ class TFETest(test_util.TensorFlowTestCase): self.assertIsInstance(t, ops.EagerTensor) +class SendRecvTest(test_util.TensorFlowTestCase): + + cpu_device = '/job:localhost/replica:0/task:0/device:CPU:0' + + def _send(self, tensor, tensor_name, to_device): + return execute( + b'_Send', num_outputs=0, inputs=[tensor], + attrs=('T', tensor.dtype.as_datatype_enum, + 'tensor_name', tensor_name, + 'send_device', tensor.device, + 'send_device_incarnation', 0, + 'recv_device', to_device, + 'client_terminated', True)) + + def _recv(self, dtype, tensor_name, from_device): + device_name = context.context().device_name + if not device_name: + device_name = self.cpu_device + return execute( + b'_Recv', num_outputs=1, inputs=[], + attrs=('tensor_type', dtype.as_datatype_enum, + 'tensor_name', tensor_name, + 'send_device', from_device, + 'send_device_incarnation', 0, + 'recv_device', device_name, + 'client_terminated', False))[0] + + def testBasic(self): + t0 = constant_op.constant(1.0) + t1 = constant_op.constant(2.0) + self._send(t0, 't0', self.cpu_device) + self._send(t1, 't1', self.cpu_device) + self.assertAllEqual( + self._recv(dtypes.float32, 't0', self.cpu_device), + 1.0) + self.assertAllEqual( + self._recv(dtypes.float32, 't1', self.cpu_device), + 2.0) + + def testLocalCrossDevice(self): + if not context.context().num_gpus(): + self.skipTest('No GPUs found') + gpu_device_name = '/job:localhost/replica:0/task:0/device:GPU:0' + with ops.device('GPU:0'): + t0 = constant_op.constant(1.0) + self._send(t0, 't0', self.cpu_device) + self.assertAllEqual( + self._recv(dtypes.float32, 't0', gpu_device_name), + 1.0) + self._send(constant_op.constant(2.0), 't1', gpu_device_name) + with ops.device('GPU:0'): + self.assertAllEqual( + self._recv(dtypes.float32, 't1', self.cpu_device), + 2.0) + + if __name__ == '__main__': test.main() -- GitLab From a1483aca252dc6924685bdd368b86394e98375e2 Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Mon, 5 Mar 2018 16:12:01 -0800 Subject: [PATCH 437/884] Remove core:lib dependency in favor of Lite-specific logging helper This CL makes an enormous swathe of TF Lite's tests buildable on Android targets (and helps for many iOS tests, as well). The only reason the tests dependended on tensorflow/core:lib was because lib was the most common target that includes logging.h, which is necessary for log-related tests. This set of utilities may not be perfect (e.g. it still means that certain TF-related test resources, like FLAGs, are not accessible), but it is an improvement. PiperOrigin-RevId: 187940806 --- tensorflow/contrib/lite/kernels/BUILD | 2 +- tensorflow/core/BUILD | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 956bd35fe6..7dc725d578 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -33,7 +33,7 @@ cc_library( "//tensorflow/contrib/lite:schema_fbs_version", "//tensorflow/contrib/lite:string_util", "//tensorflow/contrib/lite/testing:util", - "//tensorflow/core:lib", + "//tensorflow/core:tflite_portable_logging", "@com_google_googletest//:gtest", ], ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 445cf5bc8a..b7f84a4d27 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1664,6 +1664,25 @@ cc_library( ], ) +cc_library( + name = "tflite_portable_logging", + srcs = [], + hdrs = [ + "lib/bfloat16/bfloat16.h", + "platform/default/integral_types.h", + "platform/default/logging.h", + "platform/logging.h", + "platform/macros.h", + "platform/platform.h", + "platform/types.h", + ], + copts = tf_copts(), + linkopts = ["-ldl"], + deps = [ + "//tensorflow/core/platform/default/build_config:logging", + ], +) + cc_library( name = "android_jpeg_internal", srcs = if_android([ -- GitLab From 413a22f8ca594b01d78ea5970d454629a438bab3 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Mon, 5 Mar 2018 16:24:49 -0800 Subject: [PATCH 438/884] disabling msan flaky test PiperOrigin-RevId: 187942643 --- tensorflow/contrib/bayesflow/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 0a5b7e46f2..7302c9119d 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -204,6 +204,7 @@ cuda_py_test( "//tensorflow/python:platform_test", "//tensorflow/python:random_seed", ], + tags = ["nomsan"], ) cuda_py_test( -- GitLab From 665a4bf664546224c65eeb5a0a52d80e48e2f3e1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 17:07:39 -0800 Subject: [PATCH 439/884] Improve the accuracy of the upper-bound of the sum of the size of an HLO and all its dependencies. The previous implementation computed the size of an HLO as the sum of dependencies weighted by the number of paths to the each dependency. In the previous implementation the "size" of some HLO overflowed an int64 for dependence graphs with a large number of distinct paths. The new implementation computes the min of the previous overestimate and the sum of all HLO's before-and-including the current HLO in a topological sort of the graph. Both the current and the previous implementations are linear time. Since the sum of the size of all HLOs will never overflow, the "total size" of each HLO will never overflow. The new upper-bound is the min of the previous upper bound and a new heuristic, so it is always at least as tight a bound as the old implementation. RELNOTES: n/a PiperOrigin-RevId: 187948221 --- tensorflow/compiler/xla/service/hlo_scheduling.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/hlo_scheduling.cc b/tensorflow/compiler/xla/service/hlo_scheduling.cc index f6e33403f5..da448ed71a 100644 --- a/tensorflow/compiler/xla/service/hlo_scheduling.cc +++ b/tensorflow/compiler/xla/service/hlo_scheduling.cc @@ -348,6 +348,7 @@ StatusOr> RunDFSMemoryScheduler( // simply users-1 for each instruction. By subtracting 1, we're saying that // instructions with no users or a single user don't count; instructions with // lots of fan-out will be visited earlier. + int64 cumulative_total_size = 0; tensorflow::gtl::FlatMap extra_users; tensorflow::gtl::FlatMap total_sizes; for (const HloInstruction* hlo : computation.MakeInstructionPostOrder()) { @@ -357,14 +358,17 @@ StatusOr> RunDFSMemoryScheduler( continue; } extra_users[hlo] = hlo->users().empty() ? 0 : hlo->users().size() - 1; - total_sizes[hlo] = SumLogicalBufferSizes( + int64 logical_buffer_size = SumLogicalBufferSizes( points_to_analysis.GetBuffersDefinedByInstruction(hlo), size_function); + total_sizes[hlo] = logical_buffer_size; + cumulative_total_size += logical_buffer_size; tensorflow::gtl::FlatSet unique_operands( hlo->operands().begin(), hlo->operands().end()); for (const HloInstruction* operand : unique_operands) { extra_users[hlo] += extra_users[operand]; total_sizes[hlo] += total_sizes[operand]; } + total_sizes[hlo] = std::min(total_sizes[hlo], cumulative_total_size); } CHECK_EQ(extra_users.size(), computation.instruction_count()); CHECK_EQ(total_sizes.size(), computation.instruction_count()); -- GitLab From d576afdcd38dcfd9d0f6ce6d6cb262d22e2b11dd Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 5 Mar 2018 17:28:12 -0800 Subject: [PATCH 440/884] gradients: Export tf.custom_gradients (Moved from the tf.contrib.eager namespace) PiperOrigin-RevId: 187950503 --- tensorflow/contrib/eager/python/BUILD | 2 +- tensorflow/contrib/eager/python/tfe.py | 2 +- tensorflow/python/BUILD | 4 + tensorflow/python/eager/BUILD | 18 --- tensorflow/python/eager/backprop_test.py | 2 +- tensorflow/python/eager/custom_gradient.py | 90 ------------- tensorflow/python/eager/tape_test.py | 17 +-- tensorflow/python/ops/custom_gradient.py | 134 +++++++++++++++++++ tensorflow/python/ops/gradients.py | 2 + tensorflow/python/ops/gradients_test.py | 55 ++++++++ tensorflow/python/ops/standard_ops.py | 1 + tensorflow/python/training/training.py | 1 + tensorflow/tools/api/golden/tensorflow.pbtxt | 4 + 13 files changed, 205 insertions(+), 127 deletions(-) delete mode 100644 tensorflow/python/eager/custom_gradient.py create mode 100644 tensorflow/python/ops/custom_gradient.py diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 7fde53476d..fcb14bedc4 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -18,6 +18,7 @@ py_library( ":saver", "//tensorflow/python:framework_ops", "//tensorflow/python:framework_test_lib", + "//tensorflow/python:gradients", "//tensorflow/python:numerics", "//tensorflow/python:resource_variable_ops", "//tensorflow/python:script_ops", @@ -27,7 +28,6 @@ py_library( "//tensorflow/python/eager:backprop", "//tensorflow/python/eager:context", "//tensorflow/python/eager:core", - "//tensorflow/python/eager:custom_gradient", "//tensorflow/python/eager:execution_callbacks", "//tensorflow/python/eager:function", ], diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index fce7a60853..5bddd26a0a 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -97,7 +97,6 @@ from tensorflow.python.eager.context import in_eager_mode from tensorflow.python.eager.context import in_graph_mode from tensorflow.python.eager.context import list_devices from tensorflow.python.eager.context import num_gpus -from tensorflow.python.eager.custom_gradient import custom_gradient from tensorflow.python.eager.execution_callbacks import add_execution_callback from tensorflow.python.eager.execution_callbacks import clear_execution_callbacks from tensorflow.python.eager.execution_callbacks import inf_callback @@ -107,6 +106,7 @@ from tensorflow.python.eager.execution_callbacks import seterr from tensorflow.python.framework.ops import enable_eager_execution from tensorflow.python.framework.ops import eager_run as run from tensorflow.python.framework.test_util import run_in_graph_and_eager_modes as run_test_in_graph_and_eager_modes +from tensorflow.python.ops.custom_gradient import custom_gradient from tensorflow.python.ops.resource_variable_ops import ResourceVariable as Variable from tensorflow.python.ops.variable_scope import EagerVariableStore from tensorflow.python.ops import script_ops diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index db17a3fe02..4fdfacbfa8 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1775,6 +1775,7 @@ py_library( py_library( name = "gradients", srcs = [ + "ops/custom_gradient.py", "ops/gradients.py", "ops/gradients_impl.py", ], @@ -1788,6 +1789,7 @@ py_library( ":control_flow_util", ":framework", ":framework_for_generated_wrappers", + ":framework_ops", ":functional_ops", ":image_grad", ":linalg_grad", @@ -1800,6 +1802,8 @@ py_library( ":platform", ":spectral_grad", ":util", + "//tensorflow/python/eager:context", + "//tensorflow/python/eager:tape", "//third_party/py/numpy", "@six_archive//:six", ], diff --git a/tensorflow/python/eager/BUILD b/tensorflow/python/eager/BUILD index ab81d40148..5bedf9c6fd 100644 --- a/tensorflow/python/eager/BUILD +++ b/tensorflow/python/eager/BUILD @@ -42,7 +42,6 @@ py_library( ":backprop", ":context", ":core", - ":custom_gradient", ":execute", ":function", ":graph_callable", @@ -103,7 +102,6 @@ cuda_py_test( additional_deps = [ ":backprop", ":context", - ":custom_gradient", ":test", "//tensorflow/python:embedding_ops", "//tensorflow/python:array_ops", @@ -206,21 +204,6 @@ cc_library( ], ) -py_library( - name = "custom_gradient", - srcs = ["custom_gradient.py"], - srcs_version = "PY2AND3", - visibility = ["//tensorflow:internal"], - deps = [ - ":context", - ":tape", - "//tensorflow/python:array_ops", - "//tensorflow/python:framework_ops", - "//tensorflow/python:resource_variable_ops", - "//tensorflow/python:util", - ], -) - py_library( name = "graph_only_ops", srcs = ["graph_only_ops.py"], @@ -364,7 +347,6 @@ py_test( deps = [ ":backprop", ":context", - ":custom_gradient", ":test", "//tensorflow/python:array_ops", "//tensorflow/python:constant_op", diff --git a/tensorflow/python/eager/backprop_test.py b/tensorflow/python/eager/backprop_test.py index 48fd170764..07a2155d24 100644 --- a/tensorflow/python/eager/backprop_test.py +++ b/tensorflow/python/eager/backprop_test.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.python import pywrap_tensorflow from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.eager import custom_gradient from tensorflow.python.eager import tape from tensorflow.python.eager import test from tensorflow.python.framework import constant_op @@ -32,6 +31,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import embedding_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops diff --git a/tensorflow/python/eager/custom_gradient.py b/tensorflow/python/eager/custom_gradient.py deleted file mode 100644 index fb932a9372..0000000000 --- a/tensorflow/python/eager/custom_gradient.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Decorator to overrides the gradient for a function.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.eager import context -from tensorflow.python.eager import tape -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gen_array_ops -from tensorflow.python.util import nest -from tensorflow.python.util import tf_decorator - - -def custom_gradient(f): - """Decorator to define a function with a custom gradient. - - The input function is expected to return the tuple - (results, gradient_function). - - The output function will return results while possibly recording the - gradient_function and inputs in the tape. - - Args: - f: function to be decorated. - - Returns: - decorated function. - """ - - def decorated(*args, **kwargs): - """Decorated function with custom gradient.""" - if context.in_graph_mode(): - if kwargs: - raise ValueError( - "custom_gradient in graph mode doesn't support keyword arguments.") - name = "CustomGradient-%s" % tf_ops.uid() - args = [tf_ops.convert_to_tensor(x) for x in args] - result, grad_fn = f(*args) - flat_result = nest.flatten(result) - all_tensors = flat_result + args - - @tf_ops.RegisterGradient(name) - def internal_grad_fn(unused_op, *result_grads): # pylint: disable=unused-variable - gradients = nest.flatten(grad_fn(*result_grads[:len(flat_result)])) - # Need to return one value per input to the IdentityN, so pad the - # gradients of the inputs of the custom_gradient function with the - # gradients of the outputs as well. - return ([None] * len(flat_result)) + gradients - - with tf_ops.get_default_graph().gradient_override_map( - {"IdentityN": name}): - all_tensors = array_ops.identity_n(all_tensors) - return nest.pack_sequence_as( - structure=result, flat_sequence=all_tensors[:len(flat_result)]) - - input_tensors = [tf_ops.convert_to_tensor(x) for x in args] - - result, grad_fn = f(*args, **kwargs) - flat_result = nest.flatten(result) - # TODO(apassos) consider removing the identity below. - flat_result = [gen_array_ops.identity(x) for x in flat_result] - - def actual_grad_fn(*outputs): - return nest.flatten(grad_fn(*outputs)) - - tape.record_operation( - f.__name__, - flat_result, - input_tensors, - actual_grad_fn) - flat_result = list(flat_result) - return nest.pack_sequence_as(result, flat_result) - - return tf_decorator.make_decorator(f, decorated) diff --git a/tensorflow/python/eager/tape_test.py b/tensorflow/python/eager/tape_test.py index b490bac66d..4326d5efa3 100644 --- a/tensorflow/python/eager/tape_test.py +++ b/tensorflow/python/eager/tape_test.py @@ -21,11 +21,11 @@ from __future__ import print_function from tensorflow.python.eager import backprop from tensorflow.python.eager import context -from tensorflow.python.eager import custom_gradient from tensorflow.python.eager import test from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops +from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops # Importing nn_grad for the registration functions. @@ -165,21 +165,6 @@ class TapeTest(test.TestCase): g, = backprop.gradients_function(fn, [0])(t) self.assertAllEqual(g, 1.0) - def testCustomGradientGraphMode(self): - with context.graph_mode(), self.test_session(): - - @custom_gradient.custom_gradient - def f(x): - - def grad(dresult): - return dresult * 10.0 - - return x, grad - - inp = constant_op.constant(1.0) - grad = gradients_impl.gradients(f(inp), inp) - self.assertAllEqual(grad[0].eval(), 10.0) - if __name__ == '__main__': test.main() diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py new file mode 100644 index 0000000000..f199ba8fd4 --- /dev/null +++ b/tensorflow/python/ops/custom_gradient.py @@ -0,0 +1,134 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Decorator to overrides the gradient for a function.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.eager import context +from tensorflow.python.eager import tape +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import gen_array_ops +from tensorflow.python.util import nest +from tensorflow.python.util import tf_decorator +from tensorflow.python.util.tf_export import tf_export + + +@tf_export("custom_gradient") +def custom_gradient(f): + """Decorator to define a function with a custom gradient. + + This decorator allows fine grained control over the gradients of a sequence + for operations. This may be useful for multiple reasons, including providing + a more efficient or numerically stable gradient for a sequence of operations. + + For example, consider the following function that commonly occurs in the + computation of cross entropy and log likelihoods: + + ```python + def log1pexp(x): + return tf.log(1 + tf.exp(x)) + ``` + + Due to numerical instability, the gradient this function evaluated at x=100 is + NaN. For example: + + ```python + x = tf.constant(100.) + y = log1pexp(x) + dy = tf.gradients(y, x) # Will be NaN when evaluated. + ``` + + The gradient expression can be analytically simplified to provide numerical + stability: + + ```python + @tf.custom_gradient + def log1pexp(x): + e = tf.exp(x) + def grad(dy): + return dy * (1 - 1 / (1 + e)) + return tf.log(1 + e), grad + ``` + + With this definition, the gradient at x=100 will be correctly evaluated as + 1.0. + + See also @{tf.RegisterGradient} which registers a gradient function for a + primitive TensorFlow operation. `tf.custom_gradient` on the other hand allows + for fine grained control over the gradient computation of a sequence of + operations. + + Args: + f: function `f(x)` that returns a tuple `(y, grad_fn)` where: + - `x` is a `Tensor` or sequence of `Tensor` inputs to the function. + - `y` is a `Tensor` or sequence of `Tensor` outputs of applying + TensorFlow + operations in `f` to `x`. + - `grad_fn` is a function with the signature `g(grad_ys)` which returns + a list of `Tensor`s - the derivatives of `Tensor`s in `y` with respect + to the `Tensor`s in `x. `grad_ys` is a `Tensor` or sequence of + `Tensor`s the same size as `y` holding the initial value gradients for + each `Tensor` in `y`. + + Returns: + A function `h(x)` which returns the same value as `f(x)[0]` and whose + gradient (as calculated by @{tf.gradients}) is determined by `f(x)[1]`. + """ + + def decorated(*args, **kwargs): + """Decorated function with custom gradient.""" + if context.in_graph_mode(): + if kwargs: + raise ValueError( + "The custom_gradient decorator currently suports keywords " + "arguments only when eager execution is enabled.") + name = "CustomGradient-%s" % ops.uid() + args = [ops.convert_to_tensor(x) for x in args] + result, grad_fn = f(*args) + flat_result = nest.flatten(result) + all_tensors = flat_result + args + + @ops.RegisterGradient(name) + def internal_grad_fn(unused_op, *result_grads): # pylint: disable=unused-variable + gradients = nest.flatten(grad_fn(*result_grads[:len(flat_result)])) + # Need to return one value per input to the IdentityN, so pad the + # gradients of the inputs of the custom_gradient function with the + # gradients of the outputs as well. + return ([None] * len(flat_result)) + gradients + + with ops.get_default_graph().gradient_override_map({"IdentityN": name}): + all_tensors = array_ops.identity_n(all_tensors) + return nest.pack_sequence_as( + structure=result, flat_sequence=all_tensors[:len(flat_result)]) + + input_tensors = [ops.convert_to_tensor(x) for x in args] + + result, grad_fn = f(*args, **kwargs) + flat_result = nest.flatten(result) + # TODO(apassos) consider removing the identity below. + flat_result = [gen_array_ops.identity(x) for x in flat_result] + + def actual_grad_fn(*outputs): + return nest.flatten(grad_fn(*outputs)) + + tape.record_operation(f.__name__, flat_result, input_tensors, + actual_grad_fn) + flat_result = list(flat_result) + return nest.pack_sequence_as(result, flat_result) + + return tf_decorator.make_decorator(f, decorated) diff --git a/tensorflow/python/ops/gradients.py b/tensorflow/python/ops/gradients.py index 921fd50aa9..63d9a23222 100644 --- a/tensorflow/python/ops/gradients.py +++ b/tensorflow/python/ops/gradients.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import +from tensorflow.python.ops.custom_gradient import custom_gradient from tensorflow.python.ops.gradients_impl import AggregationMethod from tensorflow.python.ops.gradients_impl import gradients from tensorflow.python.ops.gradients_impl import hessians @@ -28,6 +29,7 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ # TODO(drpng): find a good place to reference this. "AggregationMethod", + "custom_gradient", "gradients", # tf.gradients.gradients. "hessians", # tf.gradients.hessians ] diff --git a/tensorflow/python/ops/gradients_test.py b/tensorflow/python/ops/gradients_test.py index d39b934819..c94f1396b2 100644 --- a/tensorflow/python/ops/gradients_test.py +++ b/tensorflow/python/ops/gradients_test.py @@ -35,6 +35,7 @@ from tensorflow.python.ops import array_grad # pylint: disable=unused-import from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_grad # pylint: disable=unused-import from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import custom_gradient from tensorflow.python.ops import data_flow_grad # pylint: disable=unused-import from tensorflow.python.ops import data_flow_ops # pylint: disable=unused-import from tensorflow.python.ops import functional_ops # pylint: disable=unused-import @@ -661,6 +662,7 @@ class HessianTest(test_util.TensorFlowTestCase): self.assertAllEqual((m, n, m, n), hess_actual.shape) self.assertAllClose(hess_value, hess_actual.reshape((m * n, m * n))) + @test_util.with_c_api class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase): @@ -741,6 +743,59 @@ class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase): "of unknown shape. This may consume a large amount of memory." in str(w[0].message)) + def testCustomGradientTrivial(self): + + @custom_gradient.custom_gradient + def MyIdentity(x): + + def Grad(dy): + return [3 * dy] + + return x, Grad + + with ops.Graph().as_default(): + x = constant(3.) + y = MyIdentity(MyIdentity(x)) + dy = gradients.gradients(y, x)[0] + with session.Session(): + self.assertEqual(9., dy.eval()) + + def testCustomGradient(self): + + @custom_gradient.custom_gradient + def MyMultiply(x1, x2): + result = x1 * x2 + + def Grad(dy): + # Switched the ordering here. + return [dy * x1, dy * x2] + + return result, Grad + + with ops.Graph().as_default(): + x1 = constant(3.) + x2 = constant(5.) + y = MyMultiply(x1, x2) + dy = gradients.gradients(y, [x1, x2]) + with session.Session() as sess: + self.assertAllEqual([3., 5.], sess.run(dy)) + + def testCustomGradientErrors(self): + + @custom_gradient.custom_gradient + def F(x): + + def Grad(_): + raise RuntimeError("x") + + return x, Grad + + with ops.Graph().as_default(): + x = constant(1.0) + y = F(x) + with self.assertRaises(RuntimeError): + gradients.gradients(y, x) + @test_util.with_c_api class OnlyRealGradientsTest(test_util.TensorFlowTestCase): diff --git a/tensorflow/python/ops/standard_ops.py b/tensorflow/python/ops/standard_ops.py index 65b788c31a..60a98aca7f 100644 --- a/tensorflow/python/ops/standard_ops.py +++ b/tensorflow/python/ops/standard_ops.py @@ -218,6 +218,7 @@ _allowed_symbols_gradients = [ # Documented in training.py: # Not importing training.py to avoid complex graph dependencies. "AggregationMethod", + "custom_gradient", "gradients", # tf.gradients = gradients.gradients "hessians", ] diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index 78c8ce9208..e623e27a21 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -28,6 +28,7 @@ See the @{$python/train} guide. @@ProximalGradientDescentOptimizer @@ProximalAdagradOptimizer @@RMSPropOptimizer +@@custom_gradient @@gradients @@AggregationMethod @@stop_gradient diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index 8c9e7af89b..a88a87b952 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -892,6 +892,10 @@ tf_module { name: "cumsum" argspec: "args=[\'x\', \'axis\', \'exclusive\', \'reverse\', \'name\'], varargs=None, keywords=None, defaults=[\'0\', \'False\', \'False\', \'None\'], " } + member_method { + name: "custom_gradient" + argspec: "args=[\'f\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "decode_base64" argspec: "args=[\'input\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 80f3080cd87997dd87f2c6ca84bce6525dca92fe Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Mon, 5 Mar 2018 17:37:29 -0800 Subject: [PATCH 441/884] disabling flaky test in msan PiperOrigin-RevId: 187951549 --- tensorflow/contrib/distributions/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 0e4ddeffb0..d81dfc2f62 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -350,6 +350,7 @@ cuda_py_test( "//tensorflow/python:nn_ops", "//tensorflow/python:platform_test", ], + tags = ["nomsan"], ) cuda_py_test( -- GitLab From f72727494b57a2200af25c3dab8e9c061d4b9282 Mon Sep 17 00:00:00 2001 From: Mark Heffernan Date: Mon, 5 Mar 2018 18:07:12 -0800 Subject: [PATCH 442/884] Add method for computing the maximal set of live LogicalBuffers in an allocation. PiperOrigin-RevId: 187954755 --- .../compiler/xla/service/buffer_assignment.cc | 125 +++++++++++++++++- .../compiler/xla/service/buffer_assignment.h | 35 ++++- .../xla/service/buffer_assignment_test.cc | 113 +++++++++++++++- 3 files changed, 266 insertions(+), 7 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index d44d3d71d9..0434c0a92b 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -292,6 +292,112 @@ BufferAllocationProto BufferAllocation::ToProto() const { return proto; } +std::pair> +BufferAllocation::ComputePeakMemoryLogicalBuffers() const { + if (HeapTraces().empty()) { + // Just return the largest LogicalBuffer in the allocation. + const LogicalBuffer* largest_buffer = nullptr; + int64 largest_size = 0; + for (const auto& pair : assigned_buffers()) { + const LogicalBuffer* buffer = pair.first; + int64 size = pair.second.size; + if (largest_buffer == nullptr) { + largest_buffer = buffer; + largest_size = size; + continue; + } + // Tie-break with LogicalBuffer::Id so the return value is stable relative + // to changing addresses. + if (size > largest_size || + ((size == largest_size) && (largest_buffer->id() > buffer->id()))) { + largest_buffer = buffer; + largest_size = size; + } + } + CHECK(largest_buffer != nullptr) + << "No logical buffers in allocation: " << ToString(); + return {largest_size, {largest_buffer}}; + } + + // Create a map from LogicalBuffer::Id to LogicalBuffer* for the logical + // buffers in this allocation. + tensorflow::gtl::FlatMap + id_to_buffer; + tensorflow::gtl::FlatMap buffer_sizes; + for (const auto& pair : assigned_buffers()) { + const LogicalBuffer* buffer = pair.first; + const OffsetSize& offset_size = pair.second; + id_to_buffer[buffer->id()] = buffer; + buffer_sizes[buffer] = offset_size.size; + } + + // Returns how much the given event increases the total size of live + // buffers. Can be negative. + auto memory_delta = [this, &id_to_buffer, &buffer_sizes]( + const HeapSimulatorTrace::Event& event) -> int64 { + const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); + const int64 buffer_size = buffer_sizes.at(buffer); + if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { + return buffer_size; + } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { + // Sharing a buffer does not change the live set size for the purposes of + // the heap simulator. Even though the shared-with buffer may be smaller, + // the entire allocation remains live. + return 0; + } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { + return -1 * buffer_size; + } + LOG(FATAL) << "Unknown event kind: " << event.kind(); + }; + + int64 total_max_live_size = 0; + std::vector live_buffers_vector; + for (const HeapSimulatorTrace& heap_trace : HeapTraces()) { + // First compute the size of the maximal live set. + int64 max_live_size = 0; + int64 live_size = 0; + for (const auto& event : heap_trace.events()) { + live_size += memory_delta(event); + if (max_live_size < live_size) { + max_live_size = live_size; + } + } + + // Next gather the set of logical buffers live at the earliest point of + // maximal live set size. + tensorflow::gtl::FlatSet live_buffers; + live_size = 0; + for (const auto& event : heap_trace.events()) { + const LogicalBuffer* buffer = id_to_buffer.at(event.buffer_id()); + if (event.kind() == HeapSimulatorTrace::Event::ALLOC) { + InsertOrDie(&live_buffers, buffer); + } else if (event.kind() == HeapSimulatorTrace::Event::SHARE_WITH) { + // Nothing to do. + } else if (event.kind() == HeapSimulatorTrace::Event::FREE) { + CHECK(ContainsKey(live_buffers, buffer)); + live_buffers.erase(buffer); + } + + live_size += memory_delta(event); + if (live_size == max_live_size) { + break; + } + } + CHECK_EQ(live_size, max_live_size); + total_max_live_size += max_live_size; + + live_buffers_vector.insert(live_buffers_vector.end(), live_buffers.begin(), + live_buffers.end()); + } + + // Stabily sort the live buffers. + std::sort(live_buffers_vector.begin(), live_buffers_vector.end(), + [](const LogicalBuffer* a, const LogicalBuffer* b) { + return a->id() < b->id(); + }); + return {total_max_live_size, live_buffers_vector}; +} + string BufferAllocation::ToString() const { string output; Appendf(&output, "allocation %lld: %p, size %lld", index_, this, size()); @@ -525,6 +631,7 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation, // Combines allocations of temporary buffers of the same color into one big // BufferAllocation. void BufferAssignment::CombineTempAllocations() { + VLOG(1) << "CombineTempAllocations()"; FlatMap combined_allocation_map; @@ -546,11 +653,16 @@ void BufferAssignment::CombineTempAllocations() { if (combined_it == combined_allocation_map.end()) { // We have found the first temp allocation of this color. Collect // the other temp allocations of the same color into it. + VLOG(1) << "Combined temp allocation for color " << color + << " is: " << temp_allocation; combined_allocation_map.emplace(color, temp_allocation); continue; } auto* combined_allocation = &combined_it->second; + VLOG(1) << "Combined allocation absorbing temp allocation: " + << temp_allocation; + // Each temp allocation is placed end-to-end, accounting for alignment. // The offset of each buffer in the combined allocation is computed from // the base offset of the allocation. @@ -564,6 +676,10 @@ void BufferAssignment::CombineTempAllocations() { const int64 size = buffer_offset_size.second.size; combined_allocation->AddAssignment(*buffer, base + offset, size); } + if (!temp_allocation.HeapTraces().empty()) { + CHECK_EQ(temp_allocation.HeapTraces().size(), 1); + combined_allocation->AddHeapTrace(temp_allocation.HeapTraces().front()); + } } // Replace all existing temporary allocations with the new combined // allocations. @@ -693,9 +809,9 @@ BufferAssignmentProto BufferAssignment::ToProto() const { for (const BufferAllocation& allocation : Allocations()) { BufferAllocationProto proto_allocation = allocation.ToProto(); proto.add_buffer_allocations()->Swap(&proto_allocation); - } - for (const HeapSimulatorTrace& trace : heap_simulator_traces_) { - *proto.add_heap_simulator_traces() = trace; + for (const HeapSimulatorTrace& heap_trace : allocation.HeapTraces()) { + *proto.add_heap_simulator_traces() = heap_trace; + } } return proto; } @@ -1131,7 +1247,8 @@ void BufferAssigner::AssignBuffersFromHeapSimulator( assignment->AddAssignment(allocation, buffer, chunk.offset, chunk.size); } - assignment->heap_simulator_traces_.push_back(result.debug_trace); + VLOG(1) << "Ran heap simulation for allocation: " << allocation->ToString(); + allocation->AddHeapTrace(result.debug_trace); } // Adds the 'colocated_set' of buffers to 'colocated_buffer_sets', maintaining diff --git a/tensorflow/compiler/xla/service/buffer_assignment.h b/tensorflow/compiler/xla/service/buffer_assignment.h index 6b7fd0014d..3086d0e2ca 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.h +++ b/tensorflow/compiler/xla/service/buffer_assignment.h @@ -192,6 +192,37 @@ class BufferAllocation { !is_thread_local(); } + // Add a heap trace which was used to assign slices to logical buffers in this + // allocation. A single BufferAllocation may include multiple heap traces + // in the case of the temporary block where there is a heap trace per + // computation. + void AddHeapTrace(const HeapSimulatorTrace& heap_trace) { + heap_traces_.push_back(heap_trace); + } + + // Return the set of heap traces used to assign slices to logical buffers in + // this allocation. + const std::vector HeapTraces() const { + return heap_traces_; + } + + // Compute and return the LogicalBuffers which are live at the point of peak + // memory usage for the given allocation. The point of peak memory usage is + // the point at which the total size of all live logical buffers is + // maximal. If peak memory is reached at multiple points, the set of logical + // buffers live at the earliest maximal point is returned. The vector is + // stabily asserted by LogicalBuffer::Index. + // + // The return value is a pair of total size of the logical buffers at peak, + // and the buffers themselves. + std::pair> + ComputePeakMemoryLogicalBuffers() const; + + // Get the number of bytes lost to fragmentation. This is equal to the + // difference between the size of the allocation and the size of the maximal + // live set. + int64 fragmentation_bytes() const { return fragmentation_bytes_; } + bool operator==(const BufferAllocation& other) const { return index_ == other.index_; } @@ -257,6 +288,9 @@ class BufferAllocation { // Mapping from the set of buffers assigned to this allocation to their // logical offsets and sizes. tensorflow::gtl::FlatMap assigned_buffers_; + + int64 fragmentation_bytes_ = 0; + std::vector heap_traces_; }; // Add stream operators for nicer output of CHECK/RET_CHECK failures. @@ -441,7 +475,6 @@ class BufferAssignment { LogicalBuffer::AlignmentFunction color_alignment_; Stats stats_; - std::vector heap_simulator_traces_; TF_DISALLOW_COPY_AND_ASSIGN(BufferAssignment); }; diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index cd73654b8f..234c725bb9 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -42,9 +42,10 @@ limitations under the License. #include "tensorflow/core/platform/macros.h" namespace xla { - namespace { +using ::testing::UnorderedElementsAre; + // DFS visitor that collects the instructions referenced by a computation // without descending into nested computations, i.e., only from the operands. class InstructionListVisitor : public DfsHloVisitorWithDefault { @@ -101,6 +102,22 @@ class BufferAssignmentTest : public HloTestBase { .ConsumeValueOrDie(); } + std::unique_ptr RunBufferAssignmentWithInstructionSequence( + HloModule* module, + tensorflow::gtl::ArraySlice instruction_sequence, + int64 alignment = 1) { + SequentialHloOrdering::HloModuleSequence module_sequence; + module_sequence[module->entry_computation()] = + std::vector(instruction_sequence.begin(), + instruction_sequence.end()); + return BufferAssigner::Run( + module, + xla::MakeUnique(module, module_sequence), + backend().compiler()->BufferSizeBytesFunction(), + [alignment](LogicalBuffer::Color) { return alignment; }) + .ConsumeValueOrDie(); + } + // Builds an x+1.0 computation to use in a Map. std::unique_ptr BuildMapComputationPlus1(const string& name) { auto builder = HloComputation::Builder(name); @@ -1370,7 +1387,7 @@ TEST_F(BufferAssignmentTest, AmbiguousBufferAsOutput) { auto element_slices = assignment->GetAllSlices(select, /*index=*/{0}); EXPECT_EQ(2, element_slices.size()); EXPECT_THAT(element_slices, - ::testing::UnorderedElementsAre( + UnorderedElementsAre( assignment->GetUniqueSlice(tuple_param0, /*index=*/{0}) .ConsumeValueOrDie(), assignment->GetUniqueSlice(tuple_param1, /*index=*/{0}) @@ -1473,6 +1490,98 @@ TEST_F(BufferAssignmentTest, OneTempAllocation) { } } +TEST_F(BufferAssignmentTest, TrivialPeakBuffers) { + // paramscalar ------- (mul) -- (add) -- (sub) + // / / / + // param0[100] -------/ / / + // / / + // param1[100] --------------/--------/ + auto builder = HloComputation::Builder(TestName()); + auto paramscalar = + builder.AddInstruction(HloInstruction::CreateParameter(0, r0f32_, "")); + auto param0 = builder.AddInstruction( + HloInstruction::CreateParameter(1, f32vec100_, "")); + auto param1 = builder.AddInstruction( + HloInstruction::CreateParameter(2, f32vec100_, "")); + auto mul = builder.AddInstruction(HloInstruction::CreateBinary( + f32vec100_, HloOpcode::kMultiply, paramscalar, param0)); + auto add = builder.AddInstruction( + HloInstruction::CreateBinary(f32vec100_, HloOpcode::kAdd, mul, param1)); + builder.AddInstruction(HloInstruction::CreateBinary( + f32vec100_, HloOpcode::kSubtract, add, param1)); + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + auto buffers = RunBufferAssignment(module.get()); + + // Trivially, the set of peak memory logical buffer(s) of an allocation with a + // single logical buffer should be exactly the logical buffer in that + // allocation. + const BufferAllocation& mul_buffer = GetTopLevelAllocation(*buffers, mul); + int64 peak_size; + std::vector peak_buffers; + + std::tie(peak_size, peak_buffers) = + mul_buffer.ComputePeakMemoryLogicalBuffers(); + EXPECT_EQ(peak_size, ShapeUtil::ByteSizeOf(f32vec100_)); + ASSERT_EQ(peak_buffers.size(), 1); + EXPECT_EQ(peak_buffers[0]->instruction(), mul); +} + +TEST_F(BufferAssignmentTest, PeakBuffers) { + // Compute the peak liveness buffers of the following sequence: + // + // %param = ... + // %log = log(%param) + // %rev = reverse(%log) + // %neg = neg(%param) + // %concat = concat(%rev, %neg) + // ROOT %root = slice(concat) + // + // In the temporary block, the set of live buffers at peak memory use should + // be {%rev, %neg, %concat}. This occurs right at the concat itself. + auto builder = HloComputation::Builder(TestName()); + auto param = builder.AddInstruction( + HloInstruction::CreateParameter(0, f32vec100_, "")); + auto log = builder.AddInstruction( + HloInstruction::CreateUnary(f32vec100_, HloOpcode::kLog, param)); + auto rev = builder.AddInstruction( + HloInstruction::CreateReverse(f32vec100_, log, {0})); + auto neg = builder.AddInstruction( + HloInstruction::CreateUnary(f32vec100_, HloOpcode::kNegate, param)); + const Shape concat_shape = ShapeUtil::MakeShape(F32, {200}); + auto concat = builder.AddInstruction( + HloInstruction::CreateConcatenate(concat_shape, {rev, neg}, 0)); + // Make the root tiny so no interior nodes can share its buffer. + auto root = builder.AddInstruction(HloInstruction::CreateSlice( + ShapeUtil::MakeShape(F32, {1}), concat, {0}, {1}, {1})); + + auto module = CreateNewModule(); + module->AddEntryComputation(builder.Build()); + + auto buffers = RunBufferAssignmentWithInstructionSequence( + module.get(), {param, log, rev, neg, concat, root}); + + // The temporary buffer should hold the 4 interior instructions. + const BufferAllocation& buffer = GetTopLevelAllocation(*buffers, concat); + EXPECT_FALSE(buffer.IsInputOrOutput()); + EXPECT_TRUE(buffer.IsPreallocatedTempBuffer()); + ASSERT_EQ(buffer.assigned_buffers().size(), 4); + + int64 peak_size; + std::vector peak_buffers; + std::tie(peak_size, peak_buffers) = buffer.ComputePeakMemoryLogicalBuffers(); + + // The peak live set should be concat and its inputs. + EXPECT_EQ(peak_size, ShapeUtil::ByteSizeOf(ShapeUtil::MakeShape(F32, {400}))); + ASSERT_EQ(peak_buffers.size(), 3); + std::vector peak_instructions; + for (const LogicalBuffer* logical_buffer : peak_buffers) { + peak_instructions.push_back(logical_buffer->instruction()); + } + EXPECT_THAT(peak_instructions, UnorderedElementsAre(rev, neg, concat)); +} + class WhileBufferAssignmentTest : public HloTestBase { protected: std::unique_ptr BuildWhileConditionComputation( -- GitLab From b5f943201afc06525818f45da28f82559fceced2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 18:36:46 -0800 Subject: [PATCH 443/884] Properly recurse when checkpointing scopes. PiperOrigin-RevId: 187958420 --- .../py2tf/pyct/static_analysis/activity.py | 26 ++++++-- .../pyct/static_analysis/activity_test.py | 66 ++++++++++++++----- 2 files changed, 70 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py index 02ea6fdeaf..22925afe7c 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py @@ -71,13 +71,33 @@ class Scope(object): tuple(self.modified)) def copy_from(self, other): + """Recursively copies the contents of this scope from another scope.""" + if (self.parent is None) != (other.parent is None): + raise ValueError('cannot copy scopes of different structures') + if other.parent is not None: + self.parent.copy_from(other.parent) + self.isolated = other.isolated self.modified = copy.copy(other.modified) self.created = copy.copy(other.created) self.used = copy.copy(other.used) self.params = copy.copy(other.params) self.returned = copy.copy(other.returned) + @classmethod + def copy_of(cls, other): + if other.parent is not None: + parent = cls.copy_of(other.parent) + else: + parent = None + new_copy = cls(parent) + new_copy.copy_from(other) + return new_copy + def merge_from(self, other): + if (self.parent is None) != (other.parent is None): + raise ValueError('cannot merge scopes of different structures') + if other.parent is not None: + self.parent.merge_from(other.parent) self.modified |= other.modified self.created |= other.created self.used |= other.used @@ -225,14 +245,12 @@ class ActivityAnalizer(transformer.Base): # modifies the parent state causing the other child blocks to be # processed incorrectly. So we need to checkpoint the parent scope so that # each child sees the same context. - before_parent = Scope(None) - before_parent.copy_from(self.scope) + before_parent = Scope.copy_of(self.scope) after_children = [] for child, scope_name in children: self.scope.copy_from(before_parent) parent = self._process_block_node(parent, child, scope_name) - after_child = Scope(None) - after_child.copy_from(self.scope) + after_child = Scope.copy_of(self.scope) after_children.append(after_child) for after_child in after_children: self.scope.merge_from(after_child) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py index 69f5f4fc58..b16d15b39d 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/activity_test.py @@ -45,7 +45,7 @@ class ScopeTest(test.TestCase): scope.mark_read(QN('bar')) self.assertFalse(scope.has(QN('bar'))) - def test_copy(self): + def test_copy_from(self): scope = activity.Scope(None) scope.mark_write(QN('foo')) @@ -65,6 +65,17 @@ class ScopeTest(test.TestCase): self.assertTrue(QN('bar') in scope.created) self.assertFalse(QN('bar') in other.created) + def test_copy_of(self): + scope = activity.Scope(None) + scope.mark_read(QN('foo')) + + self.assertTrue(QN('foo') in activity.Scope.copy_of(scope).used) + + child_scope = activity.Scope(scope) + child_scope.mark_read(QN('bar')) + + self.assertTrue(QN('bar') in activity.Scope.copy_of(child_scope).used) + def test_nesting(self): scope = activity.Scope(None) scope.mark_write(QN('foo')) @@ -133,7 +144,7 @@ class ActivityAnalizerTest(test.TestCase): anno.getanno(node.body[0].body[2].value, NodeAnno.IS_LOCAL)) # b in return b - def assertScopeIs(self, scope, used, modified, created): + def assertScopeIsRmc(self, scope, used, modified, created): self.assertItemsEqual(used, tuple(str(s) for s in scope.used)) self.assertItemsEqual(modified, tuple(str(s) for s in scope.modified)) self.assertItemsEqual(created, tuple(str(s) for s in scope.created)) @@ -159,7 +170,7 @@ class ActivityAnalizerTest(test.TestCase): print_args_scope = anno.getanno(print_node, NodeAnno.ARGS_SCOPE) # We basically need to detect which variables are captured by the call # arguments. - self.assertScopeIs(print_args_scope, ('a', 'b'), (), ()) + self.assertScopeIsRmc(print_args_scope, ('a', 'b'), (), ()) def test_call(self): @@ -173,7 +184,7 @@ class ActivityAnalizerTest(test.TestCase): call_node = node.body[0].body[2].value # We basically need to detect which variables are captured by the call # arguments. - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'b'), (), ()) def test_while(self): @@ -187,10 +198,10 @@ class ActivityAnalizerTest(test.TestCase): node = self._parse_and_analyze(test_fn) while_node = node.body[0].body[1] - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(while_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',)) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(while_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'), ('b', 'c'), ('a', 'b', 'c')) @@ -205,9 +216,9 @@ class ActivityAnalizerTest(test.TestCase): node = self._parse_and_analyze(test_fn) for_node = node.body[0].body[1] - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(for_node, NodeAnno.BODY_SCOPE), ('b',), ('b', 'c'), ('c',)) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(for_node, NodeAnno.BODY_SCOPE).parent, ('a', 'b', 'c'), ('b', 'c', '_'), ('a', 'b', 'c', '_')) @@ -226,21 +237,40 @@ class ActivityAnalizerTest(test.TestCase): node = self._parse_and_analyze(test_fn) if_node = node.body[0].body[0] - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('x', 'y', 'z'), ('y', 'z')) # TODO(mdan): Double check: is it ok to not mark a local symbol as not read? - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE).parent, ('x', 'z', 'u'), ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u')) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('x', 'y'), ('x', 'y', 'u'), ('y', 'u')) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.ORELSE_SCOPE).parent, ('x', 'z', 'u'), ('x', 'y', 'z', 'u'), ('x', 'y', 'z', 'u')) - def test_functiondef(self): + def test_nested_if_else_creation(self): + + def test_fn(b): + if b > 0: + if b < 5: + a = b + else: + a = b * b + return a + + node = self._parse_and_analyze(test_fn) + inner_if_node = node.body[0].body[0].body[0] + self.assertScopeIsRmc( + anno.getanno(inner_if_node, NodeAnno.BODY_SCOPE), ('b',), ('a',), + ('a',)) + self.assertScopeIsRmc( + anno.getanno(inner_if_node, NodeAnno.ORELSE_SCOPE), ('b',), ('a',), + ('a',)) + + def test_function_def(self): def test_fn(a): @@ -257,11 +287,11 @@ class ActivityAnalizerTest(test.TestCase): node = self._parse_and_analyze(test_fn) fndef_node = node.body[0].body[0] - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(fndef_node, NodeAnno.BODY_SCOPE).parent, ('b', 'i', 'f', 'c', 'a'), ('f', 'b', 'c', 'i'), ('f', 'a', 'b', 'c', 'i')) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(fndef_node, NodeAnno.BODY_SCOPE), ('x', 'y'), ('y',), ( 'x', 'y', @@ -284,13 +314,13 @@ class ActivityAnalizerTest(test.TestCase): node = self._parse_and_analyze(test_fn) call_node = node.body[0].body[0].value - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(call_node, NodeAnno.ARGS_SCOPE), ('a', 'a.b', 'a.c'), (), ()) if_node = node.body[0].body[1] - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.BODY_SCOPE), ('a',), ('a.b',), ()) - self.assertScopeIs( + self.assertScopeIsRmc( anno.getanno(if_node, NodeAnno.ORELSE_SCOPE), ('a', 'a.c', 'd', 'd.e', 'f'), ('a.c', 'd', 'd.e', 'f'), ('d', 'f')) -- GitLab From 73999dc944b3516d485081fe060d6916c089e412 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Mon, 5 Mar 2018 18:49:53 -0800 Subject: [PATCH 444/884] Fixes a number of usability issues with model_to_estimator, in particular: - make it possible to use a model that was compiled with a TF optimizer (do not require a Keras optimizer) - do not require input to be dict (input_fn supports plain arrays) - do not require `config` to be a RunConfig instance, can now be a dict (better UX) - make it possible to use a subclassed model (caveat: weights are not preserved, yet) - clear error message when model isn't compiled; improve various error messages PiperOrigin-RevId: 187959927 --- .../python/keras/_impl/keras/estimator.py | 291 ++++++++++++++---- .../keras/_impl/keras/estimator_test.py | 146 ++++++++- tensorflow/python/layers/base.py | 5 +- 3 files changed, 374 insertions(+), 68 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator.py b/tensorflow/python/keras/_impl/keras/estimator.py index 5697771a79..081f25e914 100644 --- a/tensorflow/python/keras/_impl/keras/estimator.py +++ b/tensorflow/python/keras/_impl/keras/estimator.py @@ -25,11 +25,15 @@ from tensorflow.python.client import session from tensorflow.python.estimator import estimator as estimator_lib from tensorflow.python.estimator import export as export_lib from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import run_config as run_config_lib from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib from tensorflow.python.keras._impl.keras import backend as K from tensorflow.python.keras._impl.keras import models +from tensorflow.python.keras._impl.keras import optimizers +from tensorflow.python.keras._impl.keras.engine.base_layer import Layer +from tensorflow.python.keras._impl.keras.engine.network import Network from tensorflow.python.keras._impl.keras.utils.generic_utils import CustomObjectScope from tensorflow.python.ops import math_ops from tensorflow.python.ops import metrics as metrics_module @@ -50,36 +54,174 @@ def _cast_tensor_to_floatx(x): return math_ops.cast(x, K.floatx()) -def _create_ordered_io(keras_model, estimator_io_dict, is_input=True): +def _create_ordered_io(keras_model, estimator_io, is_input=True): """Create a list of tensors from IO dictionary based on Keras IO order. Args: - keras_model: an instance of compiled keras model. - estimator_io_dict: features or labels dictionary from model_fn. + keras_model: An instance of compiled keras model. + estimator_io: The features or labels (dict or plain array) from model_fn. is_input: True if dictionary is for inputs. Returns: - a list of tensors based on Keras IO order. + A list of tensors based on Keras IO order. Raises: ValueError: if dictionary keys cannot be found in Keras model input_names or output_names. """ - if is_input: - keras_io_names = keras_model.input_names + if isinstance(estimator_io, (list, tuple)): + # Case currently not supported by most built-in input_fn, + # but it's good to have for sanity + return [_cast_tensor_to_floatx(x) for x in estimator_io] + elif isinstance(estimator_io, dict): + if is_input: + if keras_model._is_graph_network: + keras_io_names = keras_model.input_names + else: + keras_io_names = [ + 'input_%d' % i for i in range(1, len(estimator_io) + 1)] + else: + if keras_model._is_graph_network: + keras_io_names = keras_model.output_names + else: + keras_io_names = [ + 'output_%d' % i for i in range(1, len(estimator_io) + 1)] + + for key in estimator_io: + if key not in keras_io_names: + raise ValueError( + 'Cannot find %s with name "%s" in Keras Model. ' + 'It needs to match one ' + 'of the following: %s' % ('input' if is_input else 'output', key, + ', '.join(keras_io_names))) + tensors = [_cast_tensor_to_floatx(estimator_io[io_name]) + for io_name in keras_io_names] + return tensors else: - keras_io_names = keras_model.output_names + # Plain array. + return _cast_tensor_to_floatx(estimator_io) - for key in estimator_io_dict: - if key not in keras_io_names: - raise ValueError( - 'Cannot find %s with name "%s" in Keras Model. It needs to match ' - 'one of the following: %s' % ('input' if is_input else 'output', key, - ', '.join(keras_io_names))) - tensors = [] - for io_name in keras_io_names: - tensors.append(_cast_tensor_to_floatx(estimator_io_dict[io_name])) - return tensors + +def _in_place_subclassed_model_reset(model): + """Substitute for model cloning that works for subclassed models. + + Subclassed models cannot be cloned because their topology is not serializable. + To "instantiate" an identical model in a new TF graph, we reuse the original + model object, but we clear its state. + + After calling this function on a model intance, you can use the model instance + as if it were a model clone (in particular you can use it in a new graph). + + This method clears the state of the input model. It is thus destructive. + However the original state can be restored fully by calling + `_in_place_subclassed_model_state_restoration`. + + Args: + model: Instance of a Keras model created via subclassing. + + Raises: + ValueError: In case the model uses a subclassed model as inner layer. + """ + assert not model._is_graph_network # Only makes sense for subclassed networks + # Retrieve all layers tracked by the model as well as their attribute names + attributes_cache = {} + for name in dir(model): + try: + value = getattr(model, name) + except (AttributeError, ValueError, TypeError): + continue + if isinstance(value, Layer): + attributes_cache[name] = value + assert value in model._layers + elif isinstance(value, (list, tuple)) and name not in ('layers', '_layers'): + # Handle case: list/tuple of layers (also tracked by the Network API). + if value and all(isinstance(val, Layer) for val in value): + raise ValueError('We do not support the use of list-of-layers ' + 'attributes in subclassed models used with ' + '`model_to_estimator` at this time. Found list ' + 'model: %s' % name) + + # Replace layers on the model with fresh layers + layers_to_names = {value: key for key, value in attributes_cache.items()} + original_layers = model._layers[:] + model._layers = [] + for layer in original_layers: # We preserve layer order. + config = layer.get_config() + # This will not work for nested subclassed models used as layers. + # This would be theoretically possible to support, but would add complexity. + # Only do it if users complain. + if isinstance(layer, Network) and not layer._is_graph_network: + raise ValueError('We do not support the use of nested subclassed models ' + 'in `model_to_estimator` at this time. Found nested ' + 'model: %s' % layer) + fresh_layer = layer.__class__.from_config(config) + name = layers_to_names[layer] + setattr(model, name, fresh_layer) + + # Cache original model build attributes (in addition to layers) + if (not hasattr(model, '_original_attributes_cache') or + model._original_attributes_cache is None): + if model.built: + attributes_to_cache = [ + 'inputs', + 'outputs', + '_feed_outputs', + '_feed_output_names', + '_feed_output_shapes', + '_feed_loss_fns', + 'loss_weights_list', + 'targets', + '_feed_targets', + 'sample_weight_modes', + 'weighted_metrics', + 'metrics_names', + 'metrics_tensors', + 'metrics_updates', + 'stateful_metric_names', + 'total_loss', + 'sample_weights', + '_feed_sample_weights', + 'train_function', + 'test_function', + 'predict_function', + '_collected_trainable_weights', + '_feed_inputs', + '_feed_input_names', + '_feed_input_shapes', + 'optimizer', + ] + for name in attributes_to_cache: + attributes_cache[name] = getattr(model, name) + model._original_attributes_cache = attributes_cache + + # Reset built state + model.built = False + model.inputs = None + model.outputs = None + + +def _in_place_subclassed_model_state_restoration(model): + """Restores the original state of a model after it was "reset". + + This undoes this action of `_in_place_subclassed_model_reset`. + + Args: + model: Instance of a Keras model created via subclassing, on which + `_in_place_subclassed_model_reset` was previously called. + """ + assert not model._is_graph_network + # Restore layers and build attributes + if (hasattr(model, '_original_attributes_cache') and + model._original_attributes_cache is not None): + model._layers = [] + for name, value in model._original_attributes_cache.items(): + setattr(model, name, value) + model._original_attributes_cache = None + else: + # Restore to the state of a never-called model. + model.built = False + model.inputs = None + model.outputs = None def _clone_and_build_model(mode, @@ -93,8 +235,8 @@ def _clone_and_build_model(mode, mode: training mode. keras_model: an instance of compiled keras model. custom_objects: Dictionary for custom objects. - features: - labels: + features: Dict of tensors. + labels: Dict of tensors, or single tensor instance. Returns: The newly built model. @@ -102,33 +244,49 @@ def _clone_and_build_model(mode, # Set to True during training, False for inference. K.set_learning_phase(mode == model_fn_lib.ModeKeys.TRAIN) - # Clone keras model. - input_tensors = None if features is None else _create_ordered_io( - keras_model, features) - if custom_objects: - with CustomObjectScope(custom_objects): + # Get list of inputs. + if features is None: + input_tensors = None + else: + input_tensors = _create_ordered_io(keras_model, + estimator_io=features, + is_input=True) + # Get list of outputs. + if labels is None: + target_tensors = None + elif isinstance(labels, dict): + target_tensors = _create_ordered_io(keras_model, + estimator_io=labels, + is_input=False) + else: + target_tensors = [ + _cast_tensor_to_floatx( + sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels)) + ] + + if keras_model._is_graph_network: + if custom_objects: + with CustomObjectScope(custom_objects): + model = models.clone_model(keras_model, input_tensors=input_tensors) + else: model = models.clone_model(keras_model, input_tensors=input_tensors) else: - model = models.clone_model(keras_model, input_tensors=input_tensors) + model = keras_model + _in_place_subclassed_model_reset(model) + if input_tensors is not None: + model._set_inputs(input_tensors) # Compile/Build model - if mode is model_fn_lib.ModeKeys.PREDICT and not model.built: - model.build() + if mode is model_fn_lib.ModeKeys.PREDICT: + if isinstance(model, models.Sequential): + model.build() else: - optimizer_config = keras_model.optimizer.get_config() - optimizer = keras_model.optimizer.__class__.from_config(optimizer_config) - optimizer.iterations = training_util.get_or_create_global_step() - - # Get list of outputs. - if labels is None: - target_tensors = None - elif isinstance(labels, dict): - target_tensors = _create_ordered_io(keras_model, labels, is_input=False) + if isinstance(keras_model.optimizer, optimizers.TFOptimizer): + optimizer = keras_model.optimizer else: - target_tensors = [ - _cast_tensor_to_floatx( - sparse_tensor_lib.convert_to_tensor_or_sparse_tensor(labels)) - ] + optimizer_config = keras_model.optimizer.get_config() + optimizer = keras_model.optimizer.__class__.from_config(optimizer_config) + optimizer.iterations = training_util.get_or_create_global_step() model.compile( optimizer, @@ -168,10 +326,14 @@ def _create_keras_model_fn(keras_model, custom_objects=None): # Set loss and metric only during train and evaluate. if mode is not model_fn_lib.ModeKeys.PREDICT: - model._make_train_function() # pylint: disable=protected-access + if mode is model_fn_lib.ModeKeys.TRAIN: + model._make_train_function() # pylint: disable=protected-access + else: + model._make_test_function() # pylint: disable=protected-access loss = model.total_loss if model.metrics: + # TODO(fchollet): support stateful metrics eval_metric_ops = {} # When each metric maps to an output if isinstance(model.metrics, dict): @@ -195,6 +357,10 @@ def _create_keras_model_fn(keras_model, custom_objects=None): if mode is model_fn_lib.ModeKeys.TRAIN: train_op = model.train_function.updates_op + if not model._is_graph_network: + # Reset model state to original state, + # to avoid `model_fn` being destructive for the initial model argument. + _in_place_subclassed_model_state_restoration(keras_model) return model_fn_lib.EstimatorSpec( mode=mode, predictions=predictions, @@ -274,10 +440,11 @@ def model_to_estimator(keras_model=None, """ if (not keras_model) and (not keras_model_path): raise ValueError( - 'Either keras_model or keras_model_path needs to be provided.') + 'Either `keras_model` or `keras_model_path` needs to be provided.') if keras_model and keras_model_path: raise ValueError( - 'Please specity either keras_model or keras_model_path but not both.') + 'Please specity either `keras_model` or `keras_model_path`, ' + 'but not both.') if not keras_model: if keras_model_path.startswith( @@ -288,22 +455,42 @@ def model_to_estimator(keras_model=None, logging.info('Loading models from %s', keras_model_path) keras_model = models.load_model(keras_model_path) else: - logging.info('Using the Keras model from memory.') + logging.info('Using the Keras model provided.') keras_model = keras_model - if not hasattr(keras_model, 'optimizer'): + if not hasattr(keras_model, 'optimizer') or not keras_model.optimizer: raise ValueError( - 'Given keras model has not been compiled yet. Please compile first ' - 'before creating the estimator.') + 'The given keras model has not been compiled yet. Please compile first ' + 'before calling `model_to_estimator`.') + + if isinstance(config, dict): + config = run_config_lib.RunConfig(**config) keras_model_fn = _create_keras_model_fn(keras_model, custom_objects) - est = estimator_lib.Estimator( + estimator = estimator_lib.Estimator( keras_model_fn, model_dir=model_dir, config=config) + # Pass the config into keras backend's default session. - with session.Session(config=est._session_config) as sess: + with session.Session(config=estimator._session_config) as sess: K.set_session(sess) keras_weights = keras_model.get_weights() - # TODO(yifeif): move checkpoint initialization to scaffold.init_fn - _save_first_checkpoint(keras_model, est, custom_objects, keras_weights) - return est + if keras_model._is_graph_network: + # TODO(yifeif): move checkpoint initialization to scaffold.init_fn + _save_first_checkpoint(keras_model, + estimator, + custom_objects, + keras_weights) + elif keras_model.built: + logging.warning('You are creating an Estimator from a Keras model ' + 'manually subclassed from `Model`, that was ' + 'already called on some inputs (and thus already had ' + 'weights). We are currently unable to preserve ' + 'the model\'s state (its weights) ' + 'as part of the estimator ' + 'in this case. Be warned that the estimator ' + 'has been created using ' + 'a freshly initialized version of your model.\n' + 'Note that this doesn\'t affect the state of the ' + 'model instance you passed as `keras_model` argument.') + return estimator diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index a9de5dd076..e076dc25b1 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -34,6 +34,7 @@ from tensorflow.python.keras._impl.keras.applications import mobilenet from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import rmsprop try: @@ -64,12 +65,42 @@ def simple_functional_model(): return model -def get_resource_for_simple_model(is_sequential=True, is_evaluate=False): - model = simple_sequential_model( - ) if is_sequential else simple_functional_model() - if is_sequential: +def simple_subclassed_model(): + + class SimpleModel(keras.Model): + + def __init__(self): + super(SimpleModel, self).__init__() + self.dense1 = keras.layers.Dense(16, activation='relu') + self.dp = keras.layers.Dropout(0.1) + self.dense2 = keras.layers.Dense(_NUM_CLASS, activation='softmax') + + def call(self, inputs): + x = self.dense1(inputs) + x = self.dp(x) + return self.dense2(x) + + return SimpleModel() + + +def get_resource_for_simple_model(model_type='sequential', + is_evaluate=False,): + if model_type == 'sequential': + model = simple_sequential_model() model.build() - input_name = model.input_names[0] + elif model_type == 'subclass': + model = simple_subclassed_model() + else: + assert model_type == 'functional' + model = simple_functional_model() + + if model_type == 'subclass': + input_name = 'input_1' + output_name = 'output_1' + else: + input_name = model.input_names[0] + output_name = model.output_names[0] + np.random.seed(_RANDOM_SEED) (x_train, y_train), (x_test, y_test) = testing_utils.get_test_data( train_samples=_TRAIN_SIZE, @@ -80,17 +111,19 @@ def get_resource_for_simple_model(is_sequential=True, is_evaluate=False): y_test = keras.utils.to_categorical(y_test) train_input_fn = numpy_io.numpy_input_fn( - x={input_name: x_train}, - y=y_train, + x=randomize_io_type(x_train, input_name), + y=randomize_io_type(y_train, output_name), shuffle=False, num_epochs=None, batch_size=16) evaluate_input_fn = numpy_io.numpy_input_fn( - x={input_name: x_test}, y=y_test, num_epochs=1, shuffle=False) + x=randomize_io_type(x_test, input_name), + y=randomize_io_type(y_test, output_name), + num_epochs=1, shuffle=False) predict_input_fn = numpy_io.numpy_input_fn( - x={input_name: x_test}, num_epochs=1, shuffle=False) + x=randomize_io_type(x_test, input_name), num_epochs=1, shuffle=False) inference_input_fn = evaluate_input_fn if is_evaluate else predict_input_fn @@ -98,6 +131,14 @@ def get_resource_for_simple_model(is_sequential=True, is_evaluate=False): y_test), train_input_fn, inference_input_fn +def randomize_io_type(array, name): + switch = np.random.random() + if switch > 0.5: + return array + else: + return {name: array} + + def multi_inputs_multi_outputs_model(): # test multi-input layer a = keras.layers.Input(shape=(16,), name='input_a') @@ -134,10 +175,10 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): gfile.DeleteRecursively(self._base_dir) def test_train(self): - for is_sequential in [True, False]: + for model_type in ['sequential', 'functional']: keras_model, (_, _), ( _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( - is_sequential=is_sequential, is_evaluate=True) + model_type=model_type, is_evaluate=True) keras_model.compile( loss='categorical_crossentropy', optimizer='rmsprop', @@ -155,10 +196,87 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): writer_cache.FileWriterCache.clear() gfile.DeleteRecursively(self._config.model_dir) + def test_train_with_tf_optimizer(self): + for model_type in ['sequential', 'functional']: + keras_model, (_, _), ( + _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( + model_type=model_type, is_evaluate=True) + keras_model.compile( + loss='categorical_crossentropy', + optimizer=rmsprop.RMSPropOptimizer(1e-3), + metrics=['mse', keras.metrics.categorical_accuracy]) + + with self.test_session(): + est_keras = keras.estimator.model_to_estimator( + keras_model=keras_model, + # Also use dict config argument to get test coverage for that line. + config={ + 'tf_random_seed': _RANDOM_SEED, + 'model_dir': self._base_dir, + }) + before_eval_results = est_keras.evaluate( + input_fn=eval_input_fn, steps=1) + est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) + after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) + self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + writer_cache.FileWriterCache.clear() + gfile.DeleteRecursively(self._config.model_dir) + + def test_train_with_subclassed_model(self): + keras_model, (_, _), ( + _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( + model_type='subclass', is_evaluate=True) + keras_model.compile( + loss='categorical_crossentropy', + optimizer=rmsprop.RMSPropOptimizer(1e-3), + metrics=['mse', keras.metrics.categorical_accuracy]) + + with self.test_session(): + est_keras = keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) + est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) + before_eval_results = est_keras.evaluate( + input_fn=eval_input_fn, steps=1) + est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) + after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) + self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + def test_train_with_subclassed_model_with_existing_state(self): + keras_model, (_, _), ( + _, _), train_input_fn, eval_input_fn = get_resource_for_simple_model( + model_type='subclass', is_evaluate=True) + keras_model.compile( + loss='categorical_crossentropy', + optimizer=rmsprop.RMSPropOptimizer(1e-3), + metrics=['mse', keras.metrics.categorical_accuracy]) + + with self.test_session(): + # Create state + keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE), + np.random.random((10, _NUM_CLASS))) + original_preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE)) + + est_keras = keras.estimator.model_to_estimator( + keras_model=keras_model, config=self._config) + est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) + before_eval_results = est_keras.evaluate( + input_fn=eval_input_fn, steps=1) + est_keras.train(input_fn=train_input_fn, steps=_TRAIN_SIZE / 16) + after_eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1) + self.assertLess(after_eval_results['loss'], before_eval_results['loss']) + + # Check that original model state was not altered + preds = keras_model.predict(np.ones((10,) + _INPUT_SIZE)) + self.assertAllClose(original_preds, preds, atol=1e-5) + # Check that the original model compilation did not break + keras_model.train_on_batch(np.random.random((10,) + _INPUT_SIZE), + np.random.random((10, _NUM_CLASS))) + def test_evaluate(self): keras_model, (x_train, y_train), ( x_test, y_test), _, eval_input_fn = get_resource_for_simple_model( - is_sequential=False, is_evaluate=True) + model_type='functional', is_evaluate=True) with self.test_session(): metrics = [ @@ -200,7 +318,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): # Check that predict on a pretrained model yield the same result. keras_model, (x_train, y_train), ( x_test, _), _, pred_input_fn = get_resource_for_simple_model( - is_sequential=True, is_evaluate=False) + model_type='sequential', is_evaluate=False) with self.test_session(): keras_model.compile( @@ -262,7 +380,7 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): keras_model, (x_train, y_train), ( x_test, _), _, pred_input_fn = get_resource_for_simple_model( - is_sequential=False, is_evaluate=False) + model_type='functional', is_evaluate=False) with self.test_session(): keras_model.compile( diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 2ec9971b88..c6d16a3bc0 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -127,7 +127,7 @@ class Layer(checkpointable.CheckpointableBase): # return tensors. When using graph execution, _losses is a list of ops. self._losses = [] self._reuse = kwargs.get('_reuse') - self._graph = ops.get_default_graph() + self._graph = None # Will be set at build time. self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name call_fn_args = estimator_util.fn_args(self.call) self._compute_previous_mask = ('mask' in call_fn_args or @@ -630,7 +630,8 @@ class Layer(checkpointable.CheckpointableBase): # the same graph as where it was created. if in_graph_mode: try: - ops._get_graph_from_inputs(input_list, graph=self.graph) # pylint: disable=protected-access + # Set layer's "graph" at build time + self._graph = ops._get_graph_from_inputs(input_list, graph=self._graph) # pylint: disable=protected-access except ValueError as e: raise ValueError('Input graph and Layer graph are not the same: %s' % e) if in_graph_mode or in_deferred_mode: -- GitLab From 20c3a2ef6f5e1f2fc0ca3eef1838c6f294964815 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 5 Mar 2018 18:54:33 -0800 Subject: [PATCH 445/884] [tf.data] Fix uninitialized local variable in ParallelMapDataset. PiperOrigin-RevId: 187960354 --- tensorflow/core/kernels/data/parallel_map_dataset_op.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc index 33053b1bd9..7e373f2568 100644 --- a/tensorflow/core/kernels/data/parallel_map_dataset_op.cc +++ b/tensorflow/core/kernels/data/parallel_map_dataset_op.cc @@ -318,7 +318,7 @@ class ParallelMapDatasetOp : public UnaryDatasetOpKernel { // Get the next input element. std::vector input_element; - bool end_of_input; + bool end_of_input = false; result->status = input_impl_->GetNext(ctx, &input_element, &end_of_input); if (end_of_input) { -- GitLab From 5574d6300c5e05dceb92d6d765313a99dd2c417d Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 5 Mar 2018 19:15:13 -0800 Subject: [PATCH 446/884] [TPU Cluster Resolver]: Integrate with GKE This change integrates the TPUClusterResolver with GKE's support for Cloud TPUs PiperOrigin-RevId: 187961802 --- .../python/training/tpu_cluster_resolver.py | 18 ++++++++- .../training/tpu_cluster_resolver_test.py | 39 ++++++++++++------- 2 files changed, 43 insertions(+), 14 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index aeccf4c06b..83d26a17a8 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os from six.moves.urllib.request import Request from six.moves.urllib.request import urlopen @@ -34,6 +35,9 @@ except ImportError: _GOOGLE_API_CLIENT_INSTALLED = False +_GKE_ENV_VARIABLE = 'KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' + + class TPUClusterResolver(ClusterResolver): """Cluster Resolver for Google Cloud TPUs. @@ -57,8 +61,15 @@ class TPUClusterResolver(ClusterResolver): return False return True + def _inGke(self): + """When running in GKE, the environment variable will be set.""" + return _GKE_ENV_VARIABLE in os.environ + + def _gkeMaster(self): + return os.environ[_GKE_ENV_VARIABLE].split(',')[0] + def __init__(self, - tpu, + tpu=None, zone=None, project=None, job_name='worker', @@ -107,6 +118,11 @@ class TPUClusterResolver(ClusterResolver): raise NotImplementedError( 'Using multiple TPUs in a single session is not yet implemented') tpu = tpu[0] + + # When using GKE with Cloud TPUs, the env variable will be set. + if tpu is None and self._inGke(): + tpu = self._gkeMaster() + self._tpu = compat.as_bytes(tpu) # self._tpu is always bytes self._job_name = job_name self._credentials = credentials diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index 6b4a155152..b7d56fc122 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + from tensorflow.contrib.cluster_resolver.python.training.tpu_cluster_resolver import TPUClusterResolver from tensorflow.python.platform import test from tensorflow.python.training import server_lib @@ -73,18 +75,17 @@ class TPUClusterResolverTest(test.TestCase): expected_proto: Expected protobuf """ self.assertProtoEquals(expected_proto, cluster_spec.as_cluster_def()) - self.assertProtoEquals( - expected_proto, server_lib.ClusterSpec(cluster_spec).as_cluster_def()) self.assertProtoEquals( expected_proto, - server_lib.ClusterSpec(cluster_spec.as_cluster_def()).as_cluster_def()) - self.assertProtoEquals( - expected_proto, - server_lib.ClusterSpec(cluster_spec.as_dict()).as_cluster_def()) + server_lib.ClusterSpec(cluster_spec).as_cluster_def()) + self.assertProtoEquals(expected_proto, + server_lib.ClusterSpec( + cluster_spec.as_cluster_def()).as_cluster_def()) + self.assertProtoEquals(expected_proto, + server_lib.ClusterSpec( + cluster_spec.as_dict()).as_cluster_def()) - def mock_service_client( - self, - tpu_map=None): + def mock_service_client(self, tpu_map=None): if tpu_map is None: tpu_map = {} @@ -100,8 +101,7 @@ class TPUClusterResolverTest(test.TestCase): return mock_client - @mock.patch.object(TPUClusterResolver, - '_requestComputeMetadata', + @mock.patch.object(TPUClusterResolver, '_requestComputeMetadata', mock_request_compute_metadata) def testRetrieveProjectAndZoneFromMetadata(self): tpu_map = { @@ -350,11 +350,24 @@ class TPUClusterResolverTest(test.TestCase): def testNoCallComputeMetadata(self): tpu_cluster_resolver = TPUClusterResolver(tpu='/bns/foo/bar') - self.assertEqual(compat.as_bytes('/bns/foo/bar'), - tpu_cluster_resolver.master()) + self.assertEqual( + compat.as_bytes('/bns/foo/bar'), tpu_cluster_resolver.master()) self.assertEqual( server_lib.ClusterSpec({}), tpu_cluster_resolver.cluster_spec()) + def testGkeEnvironment(self): + os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] = 'grpc://10.120.27.5:8470' + self.assertTrue('KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS' in os.environ) + tpu_cluster_resolver = TPUClusterResolver() + self.assertTrue(tpu_cluster_resolver._inGke()) + self.assertEqual( + compat.as_bytes('grpc://10.120.27.5:8470'), + tpu_cluster_resolver._gkeMaster()) + self.assertEqual( + compat.as_bytes('grpc://10.120.27.5:8470'), + tpu_cluster_resolver.get_master()) + del os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] + if __name__ == '__main__': test.main() -- GitLab From 5db5079e50199a776428f5f44339723c21508770 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Mon, 5 Mar 2018 19:15:15 -0800 Subject: [PATCH 447/884] Lower logging levels of acceptable conditions When using remote function invocation, these two conditions are okay, and are not cause for alarm. This change reduces them to VLOG's so they do not pollute the logs unnecessarily. PiperOrigin-RevId: 187961803 --- tensorflow/core/common_runtime/device_mgr.cc | 4 ++-- .../core/common_runtime/process_function_library_runtime.cc | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/common_runtime/device_mgr.cc b/tensorflow/core/common_runtime/device_mgr.cc index 1f0cc5e83b..a77601ba79 100644 --- a/tensorflow/core/common_runtime/device_mgr.cc +++ b/tensorflow/core/common_runtime/device_mgr.cc @@ -94,8 +94,8 @@ Status DeviceMgr::LookupDevice(StringPiece name, Device** device) const { for (auto&& itr : device_map_) { device_names.push_back(itr.first); } - LOG(WARNING) << "Unknown device: " << name - << " all devices: " << str_util::Join(device_names, ", "); + VLOG(1) << "Unknown device: " << name + << " all devices: " << str_util::Join(device_names, ", "); return errors::InvalidArgument(name, " unknown device."); } *device = iter->second; diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index e205e34aa0..929f5c67bc 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -145,7 +145,7 @@ FunctionLibraryRuntime* ProcessFunctionLibraryRuntime::GetFLR( Device* device = nullptr; if (device_name != kDefaultFLRDevice) { if (!device_mgr_->LookupDevice(device_name, &device).ok()) { - LOG(ERROR) << "Could not find device: " << device_name; + VLOG(1) << "Could not find device: " << device_name; return nullptr; } } -- GitLab From 834093de427445b4ed49729146e69b05786f4d1d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 5 Mar 2018 22:51:17 -0800 Subject: [PATCH 448/884] Add BatchNorm bijector. PiperOrigin-RevId: 187975255 --- tensorflow/contrib/distributions/BUILD | 16 ++ .../bijectors/batch_normalization_test.py | 236 ++++++++++++++++ .../python/ops/bijectors/__init__.py | 2 + .../ops/bijectors/batch_normalization.py | 259 ++++++++++++++++++ 4 files changed, 513 insertions(+) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index d81dfc2f62..84f74ce79c 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -831,6 +831,22 @@ cuda_py_test( ], ) +cuda_py_test( + name = "batch_normalization_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/batch_normalization_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "chain_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py new file mode 100644 index 0000000000..a215a4a2b1 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/batch_normalization_test.py @@ -0,0 +1,236 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for BatchNorm Bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib import distributions +from tensorflow.contrib.distributions.python.ops import test_util +from tensorflow.contrib.distributions.python.ops.bijectors.batch_normalization import BatchNormalization +from tensorflow.contrib.distributions.python.ops.bijectors.invert import Invert +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import ops +from tensorflow.python.layers import normalization +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import variables +from tensorflow.python.ops.distributions import normal as normal_lib +from tensorflow.python.ops.distributions import transformed_distribution as transformed_distribution_lib +from tensorflow.python.platform import test +from tensorflow.python.training import adam + + +class BatchNormTest(test_util.VectorDistributionTestHelpers, + test.TestCase): + + def _reduction_axes(self, input_shape, event_dims): + if isinstance(event_dims, int): + event_dims = [event_dims] + ndims = len(input_shape) + # Convert event_dims to non-negative indexing. + event_dims = list(event_dims) + for idx, x in enumerate(event_dims): + if x < 0: + event_dims[idx] = ndims + x + return tuple(i for i in range(ndims) if i not in event_dims) + + def testForwardInverse(self): + """Tests forward and backward passes with different event shapes. + + input_shape: Tuple of shapes for input tensor. + event_dims: Tuple of dimension indices that will be normalized. + training: Boolean of whether bijector runs in training or inference mode. + """ + params = [ + ((5*2, 4), [-1], False), + ((5, 2, 4), [-1], False), + ((5, 2, 4), [1, 2], False), + ((5, 2, 4), [0, 1], False), + ((5*2, 4), [-1], True), + ((5, 2, 4), [-1], True), + ((5, 2, 4), [1, 2], True), + ((5, 2, 4), [0, 1], True) + ] + for input_shape, event_dims, training in params: + x_ = np.arange(5 * 4 * 2).astype(np.float32).reshape(input_shape) + with self.test_session() as sess: + x = constant_op.constant(x_) + # When training, memorize the exact mean of the last + # minibatch that it normalized (instead of moving average assignment). + layer = normalization.BatchNormalization( + axis=event_dims, momentum=0., epsilon=0.) + batch_norm = BatchNormalization( + batchnorm_layer=layer, training=training) + # Minibatch statistics are saved only after norm_x has been computed. + norm_x = batch_norm.inverse(x) + with ops.control_dependencies(batch_norm.batchnorm.updates): + moving_mean = array_ops.identity(batch_norm.batchnorm.moving_mean) + moving_var = array_ops.identity(batch_norm.batchnorm.moving_variance) + denorm_x = batch_norm.forward(array_ops.identity(norm_x)) + fldj = batch_norm.forward_log_det_jacobian(x) + # Use identity to invalidate cache. + ildj = batch_norm.inverse_log_det_jacobian( + array_ops.identity(denorm_x)) + variables.global_variables_initializer().run() + # Update variables. + norm_x_ = sess.run(norm_x) + [ + norm_x_, + moving_mean_, + moving_var_, + denorm_x_, + ildj_, + fldj_, + ] = sess.run([ + norm_x, + moving_mean, + moving_var, + denorm_x, + ildj, + fldj, + ]) + self.assertEqual("batch_normalization", batch_norm.name) + + reduction_axes = self._reduction_axes(input_shape, event_dims) + keepdims = len(event_dims) > 1 + + expected_batch_mean = np.mean( + x_, axis=reduction_axes, keepdims=keepdims) + expected_batch_var = np.var(x_, axis=reduction_axes, keepdims=keepdims) + + if training: + # When training=True, values become normalized across batch dim and + # original values are recovered after de-normalizing. + zeros = np.zeros_like(norm_x_) + self.assertAllClose(np.mean(zeros, axis=reduction_axes), + np.mean(norm_x_, axis=reduction_axes)) + + self.assertAllClose(expected_batch_mean, moving_mean_) + self.assertAllClose(expected_batch_var, moving_var_) + self.assertAllClose(x_, denorm_x_, atol=1e-5) + # Since moving statistics are set to batch statistics after + # normalization, ildj and -fldj should match. + self.assertAllClose(ildj_, -fldj_) + # ildj is computed with minibatch statistics. + expected_ildj = np.sum(np.log(1.) - .5 * np.log( + expected_batch_var + batch_norm.batchnorm.epsilon)) + self.assertAllClose(expected_ildj, ildj_) + else: + # When training=False, moving_mean, moving_var remain at their + # initialized values (0., 1.), resulting in no scale/shift (a small + # shift occurs if epsilon > 0.) + self.assertAllClose(x_, norm_x_) + self.assertAllClose(x_, denorm_x_, atol=1e-5) + # ildj is computed with saved statistics. + expected_ildj = np.sum( + np.log(1.) - .5 * np.log(1. + batch_norm.batchnorm.epsilon)) + self.assertAllClose(expected_ildj, ildj_) + + def testMaximumLikelihoodTraining(self): + # Test Maximum Likelihood training with default bijector. + with self.test_session() as sess: + base_dist = distributions.MultivariateNormalDiag(loc=[0., 0.]) + batch_norm = BatchNormalization(training=True) + dist = transformed_distribution_lib.TransformedDistribution( + distribution=base_dist, + bijector=batch_norm) + target_dist = distributions.MultivariateNormalDiag(loc=[1., 2.]) + target_samples = target_dist.sample(100) + dist_samples = dist.sample(3000) + loss = -math_ops.reduce_mean(dist.log_prob(target_samples)) + with ops.control_dependencies(batch_norm.batchnorm.updates): + train_op = adam.AdamOptimizer(1e-2).minimize(loss) + moving_mean = array_ops.identity(batch_norm.batchnorm.moving_mean) + moving_var = array_ops.identity(batch_norm.batchnorm.moving_variance) + variables.global_variables_initializer().run() + for _ in range(3000): + sess.run(train_op) + [ + dist_samples_, + moving_mean_, + moving_var_ + ] = sess.run([ + dist_samples, + moving_mean, + moving_var + ]) + self.assertAllClose([1., 2.], np.mean(dist_samples_, axis=0), atol=5e-2) + self.assertAllClose([1., 2.], moving_mean_, atol=5e-2) + self.assertAllClose([1., 1.], moving_var_, atol=5e-2) + + def testLogProb(self): + with self.test_session() as sess: + layer = normalization.BatchNormalization(epsilon=0.) + batch_norm = BatchNormalization(batchnorm_layer=layer, training=False) + base_dist = distributions.MultivariateNormalDiag(loc=[0., 0.]) + dist = transformed_distribution_lib.TransformedDistribution( + distribution=base_dist, + bijector=batch_norm, + validate_args=True) + samples = dist.sample(int(1e5)) + # No volume distortion since training=False, bijector is initialized + # to the identity transformation. + base_log_prob = base_dist.log_prob(samples) + dist_log_prob = dist.log_prob(samples) + variables.global_variables_initializer().run() + base_log_prob_, dist_log_prob_ = sess.run([base_log_prob, dist_log_prob]) + self.assertAllClose(base_log_prob_, dist_log_prob_) + + def testMutuallyConsistent(self): + # BatchNorm bijector is only mutually consistent when training=False. + dims = 4 + with self.test_session() as sess: + layer = normalization.BatchNormalization(epsilon=0.) + batch_norm = BatchNormalization(batchnorm_layer=layer, training=False) + dist = transformed_distribution_lib.TransformedDistribution( + distribution=normal_lib.Normal(loc=0., scale=1.), + bijector=batch_norm, + event_shape=[dims], + validate_args=True) + self.run_test_sample_consistent_log_prob( + sess_run_fn=sess.run, + dist=dist, + num_samples=int(1e5), + radius=2., + center=0., + rtol=0.02) + + def testInvertMutuallyConsistent(self): + # BatchNorm bijector is only mutually consistent when training=False. + dims = 4 + with self.test_session() as sess: + layer = normalization.BatchNormalization(epsilon=0.) + batch_norm = Invert( + BatchNormalization(batchnorm_layer=layer, training=False)) + dist = transformed_distribution_lib.TransformedDistribution( + distribution=normal_lib.Normal(loc=0., scale=1.), + bijector=batch_norm, + event_shape=[dims], + validate_args=True) + self.run_test_sample_consistent_log_prob( + sess_run_fn=sess.run, + dist=dist, + num_samples=int(1e5), + radius=2., + center=0., + rtol=0.02) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 9437f56b1e..46ec49754a 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -18,6 +18,7 @@ @@Affine @@AffineLinearOperator @@Bijector +@@BatchNormalization @@Chain @@CholeskyOuterProduct @@ConditionalBijector @@ -53,6 +54,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops.bijectors.absolute_value import * from tensorflow.contrib.distributions.python.ops.bijectors.affine import * from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import * +from tensorflow.contrib.distributions.python.ops.bijectors.batch_normalization import * from tensorflow.contrib.distributions.python.ops.bijectors.chain import * from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product import * from tensorflow.contrib.distributions.python.ops.bijectors.conditional_bijector import * diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py new file mode 100644 index 0000000000..e47a3e01f5 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py @@ -0,0 +1,259 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Batch Norm bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import numpy as np + +from tensorflow.python.framework import ops +from tensorflow.python.layers import normalization +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops.distributions import bijector + + +__all__ = [ + "BatchNormalization", +] + + +def _undo_batch_normalization(x, + mean, + variance, + offset, + scale, + variance_epsilon, + name=None): + r"""Inverse of tf.nn.batch_normalization. + + Args: + x: Input `Tensor` of arbitrary dimensionality. + mean: A mean `Tensor`. + variance: A variance `Tensor`. + offset: An offset `Tensor`, often denoted `beta` in equations, or + None. If present, will be added to the normalized tensor. + scale: A scale `Tensor`, often denoted `gamma` in equations, or + `None`. If present, the scale is applied to the normalized tensor. + variance_epsilon: A small `float` added to the minibatch `variance` to + prevent dividing by zero. + name: A name for this operation (optional). + + Returns: + batch_unnormalized: The de-normalized, de-scaled, de-offset `Tensor`. + """ + with ops.name_scope( + name, "undo_batchnorm", [x, mean, variance, scale, offset]): + # inv = math_ops.rsqrt(variance + variance_epsilon) + # if scale is not None: + # inv *= scale + # return x * inv + ( + # offset - mean * inv if offset is not None else -mean * inv) + rescale = math_ops.sqrt(variance + variance_epsilon) + if scale is not None: + rescale /= scale + batch_unnormalized = x * rescale + ( + mean - offset * rescale if offset is not None else mean) + return batch_unnormalized + + +class BatchNormalization(bijector.Bijector): + """Compute `Y = g(X) s.t. X = g^-1(Y) = (Y - mean(Y)) / std(Y)`. + + Applies Batch Normalization [1] to samples from a data distribution. This can + be used to stabilize training of normalizing flows [2, 3]. + + When training Deep Neural Networks (DNNs), it is common practice to + normalize or whiten features by shifting them to have zero mean and + scaling them to have unit variance. + + The `inverse()` method of the BatchNorm bijector, which is used in the + log-likelihood computation of data samples, implements the normalization + procedure (shift-and-scale) using the mean and standard deviation of the + current minibatch. + + Conversely, the `forward()` method of the bijector de-normalizes samples (e.g. + `X*std(Y) + mean(Y)` with the running-average mean and standard deviation + computed at training-time. De-normalization is useful for sampling. + + + ```python + + dist = tfd.TransformedDistribution( + distribution=tfd.Normal()), + bijector=tfb.BatchNorm()) + + y = tfd.MultivariateNormalDiag(loc=1., scale=2.).sample(100) # ~ N(1, 2) + x = dist.bijector.inverse(y) # ~ N(0, 1) + y = dist.sample() # ~ N(1, 2) + ``` + + During training time, `BatchNorm.inverse` and `BatchNorm.forward` are not + guaranteed to be inverses of each other because `inverse(y)` uses statistics + of the current minibatch, while `forward(x)` uses running-average statistics + accumulated from training. In other words, + `BatchNorm.inverse(BatchNorm.forward(...))` and + `BatchNorm.forward(BatchNorm.inverse(...))` will be identical when + `training=False` but may be different when `training=True`. + + [1]: "Batch Normalization: Accelerating Deep Network Training by Reducing + Internal Covariate Shift." + Sergey Ioffe, Christian Szegedy. Arxiv. 2015. + https://arxiv.org/abs/1502.03167 + + [2]: "Density Estimation using Real NVP." + Laurent Dinh, Jascha Sohl-Dickstein, Samy Bengio. ICLR. 2017. + https://arxiv.org/abs/1605.08803 + + [3]: "Masked Autoregressive Flow for Density Estimation." + George Papamakarios, Theo Pavlakou, Iain Murray. Arxiv. 2017. + https://arxiv.org/abs/1705.07057 + + """ + + def __init__(self, + batchnorm_layer=None, + training=True, + validate_args=False, + name="batch_normalization"): + """Instantiates the `BatchNorm` bijector. + + Args: + batchnorm_layer: `tf.layers.BatchNormalization` layer object. If `None`, + defaults to + `tf.layers.BatchNormalization(gamma_constraint=nn_ops.relu(x) + 1e-6)`. + This ensures positivity of the scale variable. + + training: If True, updates running-average statistics during call to + `inverse()`. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + Raises: + ValueError: If bn_layer is not an instance of + `tf.layers.BatchNormalization`, or if it is specified with `renorm=True` + or a virtual batch size. + """ + # Scale must be positive. + g_constraint = lambda x: nn.relu(x) + 1e-6 + self.batchnorm = batchnorm_layer or normalization.BatchNormalization( + gamma_constraint=g_constraint) + self._validate_bn_layer(self.batchnorm) + self._training = training + super(BatchNormalization, self).__init__( + validate_args=validate_args, name=name) + + def _validate_bn_layer(self, layer): + """Check for valid BatchNormalization layer. + + Args: + layer: Instance of `tf.layers.BatchNormalization`. + Raises: + ValueError: If batchnorm_layer argument is not an instance of + `tf.layers.BatchNormalization`, or if `batchnorm_layer.renorm=True` or + if `batchnorm_layer.virtual_batch_size` is specified. + """ + if not isinstance(layer, normalization.BatchNormalization): + raise ValueError( + "batchnorm_layer must be an instance of BatchNormalization layer.") + if layer.renorm: + raise ValueError("BatchNorm Bijector does not support renormalization.") + if layer.virtual_batch_size: + raise ValueError( + "BatchNorm Bijector does not support virtual batch sizes.") + + def _get_broadcast_fn(self, x): + # Compute shape to broadcast scale/shift parameters to. + if not x.shape.is_fully_defined(): + raise ValueError("Input must have shape known at graph construction.") + input_shape = np.int32(x.shape.as_list()) + + ndims = len(input_shape) + # event_dims = self._compute_event_dims(x) + reduction_axes = [i for i in range(ndims) if i not in self.batchnorm.axis] + # Broadcasting only necessary for single-axis batch norm where the axis is + # not the last dimension + broadcast_shape = [1] * ndims + # import pdb; pdb.set_trace() + broadcast_shape[self.batchnorm.axis[0]] = ( + input_shape[self.batchnorm.axis[0]]) + def _broadcast(v): + if (v is not None and + len(v.get_shape()) != ndims and + reduction_axes != list(range(ndims - 1))): + return array_ops.reshape(v, broadcast_shape) + return v + return _broadcast + + def _normalize(self, y): + return self.batchnorm.apply(y, training=self._training) + + def _de_normalize(self, x): + # Uses the saved statistics. + if not self.batchnorm.built: + input_shape = x.get_shape() + self.batchnorm.build(input_shape) + broadcast_fn = self._get_broadcast_fn(x) + mean = broadcast_fn(self.batchnorm.moving_mean) + variance = broadcast_fn(self.batchnorm.moving_variance) + beta = broadcast_fn(self.batchnorm.beta) if self.batchnorm.center else None + gamma = broadcast_fn(self.batchnorm.gamma) if self.batchnorm.scale else None + return _undo_batch_normalization( + x, mean, variance, beta, gamma, self.batchnorm.epsilon) + + def _forward(self, x): + return self._de_normalize(x) + + def _inverse(self, y): + return self._normalize(y) + + def _forward_log_det_jacobian(self, x): + # Uses saved statistics to compute volume distortion. + return -self._inverse_log_det_jacobian(x, use_saved_statistics=True) + + def _inverse_log_det_jacobian(self, y, use_saved_statistics=False): + if not y.shape.is_fully_defined(): + raise ValueError("Input must have shape known at graph construction.") + input_shape = np.int32(y.shape.as_list()) + + if not self.batchnorm.built: + # Create variables. + self.batchnorm.build(input_shape) + + event_dims = self.batchnorm.axis + reduction_axes = [i for i in range(len(input_shape)) if i not in event_dims] + + if use_saved_statistics or not self._training: + log_variance = math_ops.log( + self.batchnorm.moving_variance + self.batchnorm.epsilon) + else: + # At training-time, ildj is computed from the mean and log-variance across + # the current minibatch. + _, v = nn.moments(y, axes=reduction_axes, keep_dims=True) + log_variance = math_ops.log(v + self.batchnorm.epsilon) + + # `gamma` and `log Var(y)` reductions over event_dims. + # Log(total change in area from gamma term). + log_total_gamma = math_ops.reduce_sum(math_ops.log(self.batchnorm.gamma)) + + # Log(total change in area from log-variance term). + log_total_variance = math_ops.reduce_sum(log_variance) + # The ildj is scalar, as it does not depend on the values of x and are + # constant across minibatch elements. + return log_total_gamma - 0.5 * log_total_variance -- GitLab From c6a12c77a50778e28de3590f4618bc2b62f3ecab Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Tue, 6 Mar 2018 08:47:32 +0100 Subject: [PATCH 449/884] Windows: Enable tensorflow/contrib in Bazel build (#16659) --- configure.py | 2 +- tensorflow/contrib/BUILD | 6 +- tensorflow/contrib/__init__.py | 5 +- .../boosted_trees/lib/utils/batch_features.h | 6 +- tensorflow/contrib/distributions/BUILD | 2 + tensorflow/contrib/eager/python/BUILD | 1 + .../python/examples/linear_regression/BUILD | 1 + tensorflow/contrib/gan/BUILD | 1 + .../contrib/kfac/python/kernel_tests/BUILD | 1 + tensorflow/contrib/labeled_tensor/BUILD | 1 + tensorflow/contrib/layers/BUILD | 2 + tensorflow/contrib/learn/BUILD | 5 + tensorflow/contrib/lookup/BUILD | 1 + tensorflow/contrib/py2tf/converters/BUILD | 2 + tensorflow/contrib/py2tf/utils/BUILD | 1 + .../contrib/remote_fused_graph/pylib/BUILD | 1 - tensorflow/contrib/saved_model/BUILD | 1 + tensorflow/contrib/session_bundle/BUILD | 1 + .../contrib/slim/python/slim/data/BUILD | 1 + tensorflow/contrib/tensor_forest/BUILD | 1 - tensorflow/contrib/tensorboard/BUILD | 1 + tensorflow/contrib/timeseries/examples/BUILD | 5 +- .../timeseries/python/timeseries/BUILD | 5 +- .../timeseries/state_space_models/BUILD | 1 + tensorflow/contrib/tpu/BUILD | 1 + tensorflow/contrib/util/loader.py | 7 +- tensorflow/core/framework/dataset.h | 4 +- tensorflow/core/lib/core/stringpiece.cc | 2 - tensorflow/core/lib/core/stringpiece.h | 2 +- tensorflow/core/platform/tracing.h | 2 +- tensorflow/python/BUILD | 94 +++++++--- tensorflow/python/debug/BUILD | 1 + tensorflow/python/keras/BUILD | 5 +- tensorflow/python/kernel_tests/BUILD | 4 - tensorflow/tensorflow.bzl | 20 ++- .../windows/cpu/pip/build_tf_windows.sh | 3 +- tensorflow/tools/def_file_filter/BUILD | 0 tensorflow/tools/def_file_filter/BUILD.tpl | 15 ++ .../def_file_filter/def_file_filter.py.tpl | 168 ++++++++++++++++++ .../def_file_filter_configure.bzl | 56 ++++++ tensorflow/tools/pip_package/BUILD | 128 ++++++------- tensorflow/workspace.bzl | 8 +- 42 files changed, 450 insertions(+), 124 deletions(-) create mode 100644 tensorflow/tools/def_file_filter/BUILD create mode 100644 tensorflow/tools/def_file_filter/BUILD.tpl create mode 100644 tensorflow/tools/def_file_filter/def_file_filter.py.tpl create mode 100644 tensorflow/tools/def_file_filter/def_file_filter_configure.bzl diff --git a/configure.py b/configure.py index 97f46757ee..8e3f055991 100644 --- a/configure.py +++ b/configure.py @@ -1377,7 +1377,7 @@ def main(): # environment variables. environ_cp = dict(os.environ) - check_bazel_version('0.5.4') + check_bazel_version('0.10.0') reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index bab37e8906..07d7fa64cc 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -8,6 +8,7 @@ package(default_visibility = ["//tensorflow:__subpackages__"]) load("//third_party/mpi:mpi.bzl", "if_mpi") load("@local_config_cuda//cuda:build_defs.bzl", "if_cuda") load("@local_config_tensorrt//:build_defs.bzl", "if_tensorrt") +load("//tensorflow:tensorflow.bzl", "if_not_windows") py_library( name = "contrib_py", @@ -39,7 +40,6 @@ py_library( "//tensorflow/contrib/estimator:estimator_py", "//tensorflow/contrib/factorization:factorization_py", "//tensorflow/contrib/feature_column:feature_column_py", - "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", "//tensorflow/contrib/framework:framework_py", "//tensorflow/contrib/fused_conv:fused_conv_py", "//tensorflow/contrib/gan", @@ -63,7 +63,6 @@ py_library( "//tensorflow/contrib/linalg:linalg_py", "//tensorflow/contrib/linear_optimizer:sdca_estimator_py", "//tensorflow/contrib/linear_optimizer:sdca_ops_py", - "//tensorflow/contrib/lite/python:lite", "//tensorflow/contrib/lookup:lookup_py", "//tensorflow/contrib/losses:losses_py", "//tensorflow/contrib/losses:metric_learning_py", @@ -110,6 +109,9 @@ py_library( "//tensorflow/python:util", ] + if_mpi(["//tensorflow/contrib/mpi_collectives:mpi_collectives_py"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", + ]) + if_not_windows([ + "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code + "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code ]), ) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index 4f6f539027..bcf0d7b48b 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os + # Add projects here, they will show up under tf.contrib. from tensorflow.contrib import batching from tensorflow.contrib import bayesflow @@ -83,7 +85,8 @@ from tensorflow.contrib import tpu from tensorflow.contrib import training from tensorflow.contrib import util from tensorflow.contrib.eager.python import tfe as eager -from tensorflow.contrib.lite.python import lite +if os.name != 'nt': + from tensorflow.contrib.lite.python import lite from tensorflow.contrib.receptive_field import receptive_field_api as receptive_field from tensorflow.contrib.remote_fused_graph import pylib as remote_fused_graph from tensorflow.contrib.specs import python as specs diff --git a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h index da5e744851..7815fa049a 100644 --- a/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h +++ b/tensorflow/contrib/boosted_trees/lib/utils/batch_features.h @@ -48,9 +48,9 @@ class BatchFeatures { Status GetFeatureColumnSizes(int64* const num_dense_float_features, int64* const num_sparse_float_features, int64* const num_sparse_int_features) const { - QCHECK_NE(num_dense_float_features, nullptr); - QCHECK_NE(num_sparse_float_features, nullptr); - QCHECK_NE(num_sparse_int_features, nullptr); + QCHECK_NE(num_dense_float_features, (int64*) nullptr); + QCHECK_NE(num_sparse_float_features, (int64*) nullptr); + QCHECK_NE(num_sparse_int_features, (int64*) nullptr); *num_dense_float_features = dense_float_feature_columns_.size(); *num_sparse_float_features = sparse_float_feature_columns_.size(); *num_sparse_int_features = sparse_int_feature_columns_.size(); diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 1b4877c57f..b79ad63559 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -453,6 +453,7 @@ cuda_py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( @@ -1102,6 +1103,7 @@ cuda_py_test( "//tensorflow/python:math_ops", "//tensorflow/python:platform_test", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 7fde53476d..3ca12e2522 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -266,6 +266,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD index f86331af6f..2f6cfdf31e 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/BUILD +++ b/tensorflow/contrib/eager/python/examples/linear_regression/BUILD @@ -22,6 +22,7 @@ cuda_py_test( ":linear_regression", "//tensorflow:tensorflow_py", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) cuda_py_test( diff --git a/tensorflow/contrib/gan/BUILD b/tensorflow/contrib/gan/BUILD index 0eb0e3cbe2..ff6f3b7441 100644 --- a/tensorflow/contrib/gan/BUILD +++ b/tensorflow/contrib/gan/BUILD @@ -354,6 +354,7 @@ py_test( name = "classifier_metrics_test", srcs = ["python/eval/python/classifier_metrics_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":classifier_metrics", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index f4ed978174..b0b1314d45 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -113,6 +113,7 @@ py_test( name = "utils_test", srcs = ["utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ "//tensorflow/contrib/kfac/python/ops:utils", "//tensorflow/contrib/tpu", diff --git a/tensorflow/contrib/labeled_tensor/BUILD b/tensorflow/contrib/labeled_tensor/BUILD index 894e6f6946..544065dac6 100644 --- a/tensorflow/contrib/labeled_tensor/BUILD +++ b/tensorflow/contrib/labeled_tensor/BUILD @@ -70,6 +70,7 @@ py_test( "python/ops/core_test.py", ], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":_typecheck", ":core", diff --git a/tensorflow/contrib/layers/BUILD b/tensorflow/contrib/layers/BUILD index 852d06e1e3..cc7bbabf21 100644 --- a/tensorflow/contrib/layers/BUILD +++ b/tensorflow/contrib/layers/BUILD @@ -188,6 +188,7 @@ py_test( size = "small", srcs = ["python/layers/normalization_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/contrib/framework:framework_py", @@ -353,6 +354,7 @@ py_test( size = "small", srcs = ["python/ops/sparse_ops_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":layers_py", "//tensorflow/python:array_ops", diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index abf6e393bb..ccb7d81b49 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -115,6 +115,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/data_feeder_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/python:client_testlib", @@ -170,6 +171,7 @@ tf_py_test( "//tensorflow/python:variables", "//tensorflow/python/estimator", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( @@ -188,6 +190,7 @@ py_test( size = "small", srcs = ["python/learn/graph_actions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/framework:framework_py", @@ -584,6 +587,7 @@ py_test( size = "small", srcs = ["python/learn/learn_io/io_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/learn/python/learn/datasets", @@ -813,6 +817,7 @@ py_test( size = "small", srcs = ["python/learn/utils/saved_model_export_utils_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":learn", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/lookup/BUILD b/tensorflow/contrib/lookup/BUILD index 8ca03f4193..0a6edc33c5 100644 --- a/tensorflow/contrib/lookup/BUILD +++ b/tensorflow/contrib/lookup/BUILD @@ -46,6 +46,7 @@ tf_py_test( "//tensorflow/python:variables", ], grpc_enabled = True, + tags = ["no_windows"], # TODO: needs investigation on Windows ) filegroup( diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 78f46bc05f..848822f9b1 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -78,6 +78,7 @@ py_test( name = "builtin_functions_test", srcs = ["builtin_functions_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/python:client_testlib", @@ -88,6 +89,7 @@ py_test( name = "call_trees_test", srcs = ["call_trees_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":test_lib", "//tensorflow/contrib/py2tf/impl", diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 63261d5043..8a7cfeaa2b 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -71,6 +71,7 @@ py_test( name = "py_func_test", srcs = ["py_func_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":utils", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/remote_fused_graph/pylib/BUILD b/tensorflow/contrib/remote_fused_graph/pylib/BUILD index 27f0a7f58f..54c66271cd 100644 --- a/tensorflow/contrib/remote_fused_graph/pylib/BUILD +++ b/tensorflow/contrib/remote_fused_graph/pylib/BUILD @@ -38,7 +38,6 @@ py_test( size = "small", srcs = ["python/ops/remote_fused_graph_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":remote_fused_graph_ops_py", "//tensorflow/core:protos_all_py", diff --git a/tensorflow/contrib/saved_model/BUILD b/tensorflow/contrib/saved_model/BUILD index 245fe07f2b..b10757df47 100644 --- a/tensorflow/contrib/saved_model/BUILD +++ b/tensorflow/contrib/saved_model/BUILD @@ -53,6 +53,7 @@ py_test( size = "small", srcs = ["python/saved_model/reader_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":saved_model_py", diff --git a/tensorflow/contrib/session_bundle/BUILD b/tensorflow/contrib/session_bundle/BUILD index 67011c8fef..3ad88a8a22 100644 --- a/tensorflow/contrib/session_bundle/BUILD +++ b/tensorflow/contrib/session_bundle/BUILD @@ -165,6 +165,7 @@ py_test( name = "gc_test", srcs = ["gc_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows visibility = ["//visibility:private"], deps = [ ":gc", diff --git a/tensorflow/contrib/slim/python/slim/data/BUILD b/tensorflow/contrib/slim/python/slim/data/BUILD index 5daabbd62e..7aa1684839 100644 --- a/tensorflow/contrib/slim/python/slim/data/BUILD +++ b/tensorflow/contrib/slim/python/slim/data/BUILD @@ -61,6 +61,7 @@ py_test( name = "dataset_data_provider_test", srcs = ["dataset_data_provider_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":dataset", ":dataset_data_provider", diff --git a/tensorflow/contrib/tensor_forest/BUILD b/tensorflow/contrib/tensor_forest/BUILD index 1e4cc3f095..07b6b1f142 100644 --- a/tensorflow/contrib/tensor_forest/BUILD +++ b/tensorflow/contrib/tensor_forest/BUILD @@ -553,7 +553,6 @@ py_test( srcs = ["client/random_forest_test.py"], srcs_version = "PY2AND3", tags = [ - "no_windows", "nomac", # b/63258195 "notsan", ], diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index 2e0a46ffe4..1e7dd79ae7 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -9,6 +9,7 @@ exports_files(["LICENSE"]) # For platform specific build config load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") +load("//tensorflow:tensorflow.bzl", "py_test") tf_proto_library( name = "protos_all", diff --git a/tensorflow/contrib/timeseries/examples/BUILD b/tensorflow/contrib/timeseries/examples/BUILD index bb86ecb220..70bf67c779 100644 --- a/tensorflow/contrib/timeseries/examples/BUILD +++ b/tensorflow/contrib/timeseries/examples/BUILD @@ -25,7 +25,10 @@ py_test( srcs = ["predict_test.py"], data = ["data/period_trend.csv"], srcs_version = "PY2AND3", - tags = ["notsan"], # b/67513579 + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", # b/67513579 + ], deps = [ ":predict", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index ed3ed4c0e1..64f5cd8357 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -156,9 +156,7 @@ py_test( "head_test.py", ], srcs_version = "PY2AND3", - tags = [ - "no_pip_gpu", # b/63391119 - ], + tags = ["no_pip_gpu"], # b/63391119 deps = [ ":feature_keys", ":head", @@ -427,6 +425,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 + "no_windows", # TODO: needs investigation on Windows ], deps = [ ":feature_keys", diff --git a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD index c86d06e923..07df7bc9a5 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/state_space_models/BUILD @@ -40,6 +40,7 @@ py_test( timeout = "long", # Moderate but for asan srcs = ["state_space_model_test.py"], srcs_version = "PY2AND3", + tags = ["no_windows"], # TODO: needs investigation on Windows deps = [ ":state_space_model", "//tensorflow/contrib/layers:layers_py", diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 095b4821f1..706b3ad0fa 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -220,6 +220,7 @@ tf_py_test( "//tensorflow/python:framework", "//tensorflow/python:layers", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) tf_py_test( diff --git a/tensorflow/contrib/util/loader.py b/tensorflow/contrib/util/loader.py index f4283cd9ed..dca01d26f4 100644 --- a/tensorflow/contrib/util/loader.py +++ b/tensorflow/contrib/util/loader.py @@ -42,9 +42,10 @@ def load_op_library(path): plugin. """ if os.name == 'nt': - # To avoid makeing every user_ops aware of windows, re-write - # the file extension from .so to .dll. - path = re.sub(r'\.so$', '.dll', path) + # To avoid making every user_ops aware of windows, re-write + # the file extension from .so to .dll if .so file doesn't exist. + if not os.path.exists(path): + path = re.sub(r'\.so$', '.dll', path) # Currently we have only some user_ops as dlls on windows - don't try # to load them if the dll is not found. diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index 6ab23d92a4..27d68dd45f 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -466,11 +466,11 @@ class GraphDatasetBase : public DatasetBase { } // Key for storing the Dataset graph in the serialized format. - static const char kDatasetGraphKey[]; + TF_EXPORT static const char kDatasetGraphKey[]; // Key for storing the output node of the Dataset graph in the serialized // format. - static const char kDatasetGraphOutputNodeKey[]; + TF_EXPORT static const char kDatasetGraphOutputNodeKey[]; private: Status Serialize(OpKernelContext* ctx, string* serialized_graph_def, diff --git a/tensorflow/core/lib/core/stringpiece.cc b/tensorflow/core/lib/core/stringpiece.cc index 29b727fc44..c42d911a35 100644 --- a/tensorflow/core/lib/core/stringpiece.cc +++ b/tensorflow/core/lib/core/stringpiece.cc @@ -60,6 +60,4 @@ StringPiece StringPiece::substr(size_t pos, size_t n) const { return StringPiece(data_ + pos, n); } -const StringPiece::size_type StringPiece::npos = size_type(-1); - } // namespace tensorflow diff --git a/tensorflow/core/lib/core/stringpiece.h b/tensorflow/core/lib/core/stringpiece.h index caa9642774..b945540f98 100644 --- a/tensorflow/core/lib/core/stringpiece.h +++ b/tensorflow/core/lib/core/stringpiece.h @@ -67,7 +67,7 @@ class StringPiece { iterator begin() const { return data_; } iterator end() const { return data_ + size_; } - static const size_t npos; + static const size_t npos = size_type(-1); // Return the ith byte in the referenced data. // REQUIRES: n < size() diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h index 8f7bff1bb0..eebbeaeba6 100644 --- a/tensorflow/core/platform/tracing.h +++ b/tensorflow/core/platform/tracing.h @@ -103,7 +103,7 @@ class Tracing { friend class ScopedAnnotation; friend class TraceMe; - static std::atomic tracing_engine_; + TF_EXPORT static std::atomic tracing_engine_; static Tracing::Engine* engine() { return tracing_engine_.load(std::memory_order_acquire); } diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index db17a3fe02..9102182e97 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -28,6 +28,7 @@ load("//tensorflow:tensorflow.bzl", "py_tests") load("//tensorflow:tensorflow.bzl", "tf_py_build_info_genrule") load("//tensorflow:tensorflow.bzl", "tf_py_wrap_cc") load("//tensorflow:tensorflow.bzl", "tf_cc_shared_object") +load("//tensorflow:tensorflow.bzl", "tf_custom_op_library_additional_deps_impl") load("//tensorflow:tensorflow.bzl", "cuda_py_test") load("//tensorflow:tensorflow.bzl", "cuda_py_tests") load("//tensorflow/core:platform/default/build_config.bzl", "pyx_library") @@ -86,6 +87,7 @@ py_library( ":ops", ":platform", ":pywrap_tensorflow", + ":saver_test_utils", ":script_ops", ":session_ops", ":sets", @@ -94,31 +96,29 @@ py_library( ":standard_ops", ":state_ops", ":string_ops", + ":subscribe", ":summary", ":tensor_array_ops", - ":training", - ":saver_test_utils", - ":subscribe", ":test_ops", # TODO: Break testing code out into separate rule. - ":tf_item", ":tf_cluster", + ":tf_item", ":tf_optimizer", + ":training", ":util", ":weights_broadcast_ops", - "//third_party/py/numpy", + "//tensorflow/contrib:contrib_py", "//tensorflow/core:protos_all_py", "//tensorflow/python/data", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/feature_column:feature_column_py", "//tensorflow/python/keras", - "//tensorflow/python/ops/losses", "//tensorflow/python/ops/distributions", "//tensorflow/python/ops/linalg", + "//tensorflow/python/ops/losses", "//tensorflow/python/profiler", "//tensorflow/python/saved_model", - ] + if_not_windows([ - "//tensorflow/contrib:contrib_py", - ]), + "//third_party/py/numpy", + ], ) tf_py_build_info_genrule() @@ -946,7 +946,6 @@ py_test( srcs = ["framework/contrib_test.py"], main = "framework/contrib_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ "//tensorflow:tensorflow_py", "//tensorflow/python:client_testlib", @@ -1311,7 +1310,6 @@ py_test( srcs = ["framework/dtypes_test.py"], main = "framework/dtypes_test.py", srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":framework_for_generated_wrappers", ":framework_test_lib", @@ -1653,7 +1651,6 @@ py_test( size = "small", srcs = ["ops/clip_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":clip_ops", @@ -2713,7 +2710,6 @@ cuda_py_test( ], data = ["//tensorflow/core:image_testdata"], shard_count = 5, - tags = ["no_windows"], ) cuda_py_test( @@ -3251,6 +3247,11 @@ tf_py_wrap_cc( "util/transform_graph.i", "util/util.i", ], + # Use a DEF file to export symbols on Windows + win_def_file = select({ + "//tensorflow:windows": ":pywrap_tensorflow_filtered_def_file", + "//conditions:default": None, + }), deps = [ ":bfloat16_lib", ":cost_analyzer_lib", @@ -3294,6 +3295,65 @@ tf_py_wrap_cc( tf_additional_gdr_deps()), ) +# ** Targets for Windows build (start) ** +# We need the following targets to expose symbols from _pywrap_tensorflow.dll + +# Build a cc_binary from tf_custom_op_library_additional_deps_impl, +# it contains all object code from its dependencies. +cc_binary( + name = "tf_custom_op_library_additional_deps.so", + linkshared = 1, + linkstatic = 1, + deps = tf_custom_op_library_additional_deps_impl(), +) + +# Get a DEF file generated by parsing all object files +# of tf_custom_op_library_additional_deps.so +filegroup( + name = "pywrap_tensorflow_def_file", + srcs = [":tf_custom_op_library_additional_deps.so"], + output_group = "def_file", +) + +# Filter the DEF file to reduce the number of symbols to 64K or less. +# Note that we also write the name of the pyd file into DEF file so that +# the dynamic libraries of custom ops can find it at runtime. +genrule( + name = "pywrap_tensorflow_filtered_def_file", + srcs = [":pywrap_tensorflow_def_file"], + outs = ["pywrap_tensorflow_filtered_def_file.def"], + cmd = select({ + "//tensorflow:windows": """ + $(location @local_config_def_file_filter//:def_file_filter) \\ + --input $(location :pywrap_tensorflow_def_file) \\ + --output $@ \\ + --target _pywrap_tensorflow_internal.pyd + """, + "//conditions:default": "touch $@", # Just a placeholder for Unix platforms + }), + tools = ["@local_config_def_file_filter//:def_file_filter"], +) + +# Get the import library of _pywrap_tensorflow_internal.dll +filegroup( + name = "pywrap_tensorflow_import_lib_file", + srcs = [":_pywrap_tensorflow_internal.so"], + output_group = "interface_library", +) + +# Create a cc_import rule for the import library of _pywrap_tensorflow_internal.dll +# so that custom ops' dynamic libraries can link against it. +cc_import( + name = "pywrap_tensorflow_import_lib", + interface_library = select({ + "//tensorflow:windows": ":pywrap_tensorflow_import_lib_file", + "//conditions:default": "not_exsiting_on_unix.lib", # Just a placeholder for Unix platforms + }), + system_provided = 1, +) + +# ** Targets for Windows build (end) ** + py_library( name = "lib", srcs = [ @@ -3666,7 +3726,6 @@ py_test( size = "small", srcs = ["lib/core/bfloat16_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":client_testlib", ":lib", @@ -3948,7 +4007,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_cuda_on_cpu_tap", - "no_windows", ], deps = [ ":client", @@ -3971,7 +4029,6 @@ py_test( size = "small", srcs = ["training/checkpoint_ops_test.py"], srcs_version = "PY2AND3", - tags = ["no_windows"], deps = [ ":checkpoint_ops_gen", ":client", @@ -3993,10 +4050,7 @@ py_test( size = "medium", srcs = ["training/monitored_session_test.py"], srcs_version = "PY2AND3", - tags = [ - "no_windows", - "notsan", # b/67945581 - ], + tags = ["notsan"], # b/67945581 deps = [ ":array_ops", ":client_testlib", diff --git a/tensorflow/python/debug/BUILD b/tensorflow/python/debug/BUILD index 253588fc3b..b3abbf21e9 100644 --- a/tensorflow/python/debug/BUILD +++ b/tensorflow/python/debug/BUILD @@ -913,6 +913,7 @@ cuda_py_test( "//tensorflow/python:util", "//tensorflow/python:variables", ], + tags = ["no_windows"], # TODO: needs investigation on Windows ) py_test( diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index bd1aac5eae..3af9b1be49 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -628,7 +628,10 @@ py_test( size = "small", srcs = ["_impl/keras/utils/io_utils_test.py"], srcs_version = "PY2AND3", - tags = ["notsan"], + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], deps = [ ":keras", "//tensorflow/python:client_testlib", diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 23b79a24c0..c37ad5c0ec 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -295,7 +295,6 @@ tf_py_test( "//tensorflow/python:nn_grad", ], data = ["//tensorflow/core:image_testdata"], - tags = ["no_windows"], ) tf_py_test( @@ -1138,7 +1137,6 @@ tf_py_test( "//tensorflow/python:variables", ], data = ["//tensorflow/core:lmdb_testdata"], - tags = ["no_windows"], ) cuda_py_test( @@ -2328,7 +2326,6 @@ cuda_py_test( "//tensorflow/python:variables", ], shard_count = 4, - tags = ["no_windows"], ) cuda_py_test( @@ -2459,7 +2456,6 @@ cuda_py_test( "//tensorflow/python/eager:context", ], shard_count = 10, - tags = ["no_windows"], ) cuda_py_test( diff --git a/tensorflow/tensorflow.bzl b/tensorflow/tensorflow.bzl index 818d67f7b5..51ef3235b7 100644 --- a/tensorflow/tensorflow.bzl +++ b/tensorflow/tensorflow.bzl @@ -1185,6 +1185,22 @@ def tf_custom_op_library_additional_deps(): "@nsync//:nsync_headers", clean_dep("//third_party/eigen3"), clean_dep("//tensorflow/core:framework_headers_lib"), + ] + if_windows(["//tensorflow/python:pywrap_tensorflow_import_lib"]) + +# A list of targets that contains the implemenation of +# tf_custom_op_library_additional_deps. It's used to generate a DEF file for +# exporting symbols from _pywrap_tensorflow.dll on Windows. +def tf_custom_op_library_additional_deps_impl(): + return [ + # for @protobuf_archive//:protobuf_headers + "@protobuf_archive//:protobuf", + # for @nsync//:nsync_headers + "@nsync//:nsync_cpp", + # for //third_party/eigen3 + clean_dep("//third_party/eigen3"), + # for //tensorflow/core:framework_headers_lib + clean_dep("//tensorflow/core:framework"), + clean_dep("//tensorflow/core:reader_base"), ] # Traverse the dependency graph along the "deps" attribute of the @@ -1271,6 +1287,7 @@ def tf_custom_op_library(name, srcs=[], gpu_srcs=[], deps=[], linkopts=[]): deps=deps + if_cuda(cuda_deps), data=[name + "_check_deps"], copts=tf_copts(is_external=True), + features = ["windows_export_all_symbols"], linkopts=linkopts + select({ "//conditions:default": [ "-lm", @@ -1417,7 +1434,8 @@ def tf_py_wrap_cc(name, ]) + tf_extension_copts()), linkopts=tf_extension_linkopts() + extra_linkopts, linkstatic=1, - deps=deps + extra_deps) + deps=deps + extra_deps, + **kwargs) native.genrule( name="gen_" + cc_library_pyd_name, srcs=[":" + cc_library_name], diff --git a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh index 8b8ba31a0d..40189a6d1b 100644 --- a/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh +++ b/tensorflow/tools/ci_build/windows/cpu/pip/build_tf_windows.sh @@ -65,4 +65,5 @@ bazel test -c opt $BUILD_OPTS -k --test_output=errors \ --define=no_tensorflow_py_deps=true --test_lang_filters=py \ --test_tag_filters=-no_pip,-no_windows,-no_oss \ --build_tag_filters=-no_pip,-no_windows,-no_oss --build_tests_only \ - //${PY_TEST_DIR}/tensorflow/python/... + //${PY_TEST_DIR}/tensorflow/python/... \ + //${PY_TEST_DIR}/tensorflow/contrib/... diff --git a/tensorflow/tools/def_file_filter/BUILD b/tensorflow/tools/def_file_filter/BUILD new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tensorflow/tools/def_file_filter/BUILD.tpl b/tensorflow/tools/def_file_filter/BUILD.tpl new file mode 100644 index 0000000000..3cb72f4979 --- /dev/null +++ b/tensorflow/tools/def_file_filter/BUILD.tpl @@ -0,0 +1,15 @@ +# Description: +# Tools for filtering DEF file for TensorFlow on Windows +# +# On Windows, we use a DEF file generated by Bazel to export +# symbols from the tensorflow dynamic library(_pywrap_tensorflow.dll). +# The maximum number of symbols that can be exported per DLL is 64K, +# so we have to filter some useless symbols through this python script. + +package(default_visibility = ["//visibility:public"]) + +py_binary( + name = "def_file_filter", + srcs = ["def_file_filter.py"], + srcs_version = "PY2AND3", +) diff --git a/tensorflow/tools/def_file_filter/def_file_filter.py.tpl b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl new file mode 100644 index 0000000000..8bdc03eb0f --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter.py.tpl @@ -0,0 +1,168 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +"""def_file_filter.py - tool to filter a windows def file. + +The def file can be used to export symbols from the tensorflow dll to enable +tf.load_library(). + +Because the linker allows only 64K symbols to be exported per dll +we filter the symbols down to the essentials. The regular expressions +we use for this are specific to tensorflow. + +TODO: this works fine but there is an issue with exporting +'const char * const' and importing it from a user_ops. The problem is +on the importing end and using __declspec(dllimport) works around it. +""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import argparse +import io +import os +import re +import subprocess +import sys +import tempfile + +# External tools we use that come with visual studio sdk +UNDNAME = "%{undname_bin_path}" + +# Exclude if matched +EXCLUDE_RE = re.compile(r"RTTI|deleting destructor|::internal::") + +# Include if matched before exclude +INCLUDEPRE_RE = re.compile(r"google::protobuf::internal::ExplicitlyConstructed|" + r"google::protobuf::internal::ArenaImpl::AllocateAligned|" # for contrib/data/_prefetching_ops + r"google::protobuf::internal::ArenaImpl::AddCleanup|" # for contrib/data/_prefetching_ops + r"google::protobuf::Arena::OnArenaAllocation|" # for contrib/data/_prefetching_ops + r"tensorflow::internal::LogMessage|" + r"tensorflow::internal::LogString|" + r"tensorflow::internal::CheckOpMessageBuilder|" + r"tensorflow::internal::MakeCheckOpValueString|" + r"tensorflow::internal::PickUnusedPortOrDie|" + r"tensorflow::internal::ValidateDevice|" + r"tensorflow::ops::internal::Enter|" + r"tensorflow::strings::internal::AppendPieces|" + r"tensorflow::strings::internal::CatPieces|" + r"tensorflow::io::internal::JoinPathImpl") + +# Include if matched after exclude +INCLUDE_RE = re.compile(r"^(TF_\w*)$|" + r"^(TFE_\w*)$|" + r"nsync::|" + r"tensorflow::|" + r"functor::|" + r"perftools::gputools") + +# We want to identify data members explicitly in the DEF file, so that no one +# can implicitly link against the DLL if they use one of the variables exported +# from the DLL and the header they use does not decorate the symbol with +# __declspec(dllimport). It is easier to detect what a data symbol does +# NOT look like, so doing it with the below regex. +DATA_EXCLUDE_RE = re.compile(r"[)(]|" + r"vftable|" + r"vbtable|" + r"vcall|" + r"RTTI|" + r"protobuf::internal::ExplicitlyConstructed") + +def get_args(): + """Parse command line.""" + filename_list = lambda x: x.split(";") + parser = argparse.ArgumentParser() + parser.add_argument("--input", type=filename_list, + help="paths to input def file", + required=True) + parser.add_argument("--output", help="output deffile", required=True) + parser.add_argument("--target", help="name of the target", required=True) + args = parser.parse_args() + return args + + +def main(): + """main.""" + args = get_args() + + # Pipe dumpbin to extract all linkable symbols from libs. + # Good symbols are collected in candidates and also written to + # a temp file. + candidates = [] + tmpfile = tempfile.NamedTemporaryFile(mode="w", delete=False) + for def_file_path in args.input: + def_file = open(def_file_path, 'r') + for line in def_file: + cols = line.split() + sym = cols[0] + tmpfile.file.write(sym + "\n") + candidates.append(sym) + tmpfile.file.close() + + # Run the symbols through undname to get their undecorated name + # so we can filter on something readable. + with open(args.output, "w") as def_fp: + # track dupes + taken = set() + + # Header for the def file. + def_fp.write("LIBRARY " + args.target + "\n") + def_fp.write("EXPORTS\n") + def_fp.write("\t ??1OpDef@tensorflow@@UEAA@XZ\n") + + # Each symbols returned by undname matches the same position in candidates. + # We compare on undname but use the decorated name from candidates. + dupes = 0 + proc = subprocess.Popen([UNDNAME, tmpfile.name], stdout=subprocess.PIPE) + for idx, line in enumerate(io.TextIOWrapper(proc.stdout, encoding="utf-8")): + decorated = candidates[idx] + if decorated in taken: + # Symbol is already in output, done. + dupes += 1 + continue + + if not INCLUDEPRE_RE.search(line): + if EXCLUDE_RE.search(line): + continue + if not INCLUDE_RE.search(line): + continue + + if "deleting destructor" in line: + # Some of the symbols convered by INCLUDEPRE_RE export deleting + # destructor symbols, which is a bad idea. + # So we filter out such symbols here. + continue + + if DATA_EXCLUDE_RE.search(line): + def_fp.write("\t" + decorated + "\n") + else: + def_fp.write("\t" + decorated + " DATA\n") + taken.add(decorated) + def_fp.close() + + exit_code = proc.wait() + if exit_code != 0: + print("{} failed, exit={}".format(UNDNAME, exit_code)) + return exit_code + + os.unlink(tmpfile.name) + + print("symbols={}, taken={}, dupes={}" + .format(len(candidates), len(taken), dupes)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl new file mode 100644 index 0000000000..47539b2423 --- /dev/null +++ b/tensorflow/tools/def_file_filter/def_file_filter_configure.bzl @@ -0,0 +1,56 @@ +"""Repository rule for def file filter autoconfiguration. + +This repository reuses Bazel's VC detect mechanism to find undname.exe, +which is a tool used in def_file_filter.py. + +def_file_filter.py is for filtering the DEF file for TensorFlow on Windows. +On Windows, we use a DEF file generated by Bazel to export symbols from the +tensorflow dynamic library(_pywrap_tensorflow.dll). The maximum number of +symbols that can be exported per DLL is 64K, so we have to filter some useless +symbols through this python script. + +`def_file_filter_config` depends on the following environment variables: + * `BAZEL_VC` + * `BAZEL_VS` + * `VS90COMNTOOLS` + * `VS100COMNTOOLS` + * `VS110COMNTOOLS` + * `VS120COMNTOOLS` + * `VS140COMNTOOLS` +""" + +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_vc_path") +load("@bazel_tools//tools/cpp:windows_cc_configure.bzl", "find_msvc_tool") +load("@bazel_tools//tools/cpp:lib_cc_configure.bzl", "auto_configure_fail") + +def _def_file_filter_configure_impl(repository_ctx): + if repository_ctx.os.name.lower().find("windows") == -1: + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + repository_ctx.file("def_file_filter.py", "") + return + vc_path = find_vc_path(repository_ctx) + if vc_path == "visual-studio-not-found": + auto_configure_fail("Visual C++ build tools not found on your machine") + undname_bin_path = find_msvc_tool(repository_ctx, vc_path, "undname.exe").replace("\\", "\\\\") + + repository_ctx.template( + "def_file_filter.py", + Label("//tensorflow/tools/def_file_filter:def_file_filter.py.tpl"), + { + "%{undname_bin_path}": undname_bin_path, + }) + repository_ctx.symlink(Label("//tensorflow/tools/def_file_filter:BUILD.tpl"), "BUILD") + + +def_file_filter_configure = repository_rule( + implementation = _def_file_filter_configure_impl, + environ = [ + "BAZEL_VC", + "BAZEL_VS", + "VS90COMNTOOLS", + "VS100COMNTOOLS", + "VS110COMNTOOLS", + "VS120COMNTOOLS", + "VS140COMNTOOLS" + ], +) diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index fb6eaa4faa..ed5801b8bd 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -48,36 +48,65 @@ py_binary( deps = ["//tensorflow:tensorflow_py"], ) +COMMON_PIP_DEPS = [ + ":licenses", + "MANIFEST.in", + "README", + "setup.py", + ":included_headers", + "//tensorflow:tensorflow_py", + "//tensorflow/contrib/boosted_trees:boosted_trees_pip", + "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", + "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", + "//tensorflow/contrib/data/python/ops:contrib_op_loader", + "//tensorflow/contrib/eager/python/examples:examples_pip", + "//tensorflow/contrib/eager/python:checkpointable_utils", + "//tensorflow/contrib/eager/python:evaluator", + "//tensorflow/contrib/gan:gan", + "//tensorflow/contrib/graph_editor:graph_editor_pip", + "//tensorflow/contrib/keras:keras", + "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", + "//tensorflow/contrib/nn:nn_py", + "//tensorflow/contrib/predictor:predictor_pip", + "//tensorflow/contrib/py2tf:py2tf", + "//tensorflow/contrib/py2tf/converters:converters", + "//tensorflow/contrib/py2tf/converters:test_lib", + "//tensorflow/contrib/py2tf/impl:impl", + "//tensorflow/contrib/py2tf/pyct:pyct", + "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", + "//tensorflow/contrib/receptive_field:receptive_field_pip", + "//tensorflow/contrib/session_bundle:session_bundle_pip", + "//tensorflow/contrib/signal:signal_py", + "//tensorflow/contrib/signal:test_util", + "//tensorflow/contrib/slim:slim", + "//tensorflow/contrib/slim/python/slim/data:data_pip", + "//tensorflow/contrib/slim/python/slim/nets:nets_pip", + "//tensorflow/contrib/specs:specs", + "//tensorflow/contrib/summary:summary_test_util", + "//tensorflow/contrib/tensor_forest:init_py", + "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", + "//tensorflow/contrib/timeseries:timeseries_pip", + "//tensorflow/contrib/tpu", + "//tensorflow/examples/tutorials/mnist:package", + "//tensorflow/python:distributed_framework_test_lib", + "//tensorflow/python:meta_graph_testdata", + "//tensorflow/python:spectral_ops_test_util", + "//tensorflow/python:util_example_parser_configuration", + "//tensorflow/python/debug:debug_pip", + "//tensorflow/python/eager:eager_pip", + "//tensorflow/python/saved_model:saved_model", + "//tensorflow/python/tools:tools_pip", + "//tensorflow/python:test_ops", + "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", +] + # On Windows, python binary is a zip file of runfiles tree. # Add everything to its data dependency for generating a runfiles tree # for building the pip package on Windows. py_binary( name = "simple_console_for_windows", srcs = ["simple_console_for_windows.py"], - data = [ - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/saved_model", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/contrib/summary:summary_test_util", - # These targets don't build on Windows yet. Exclude them for now. - # "//tensorflow/contrib/slim", - # "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - # "//tensorflow/contrib/specs", - # "//tensorflow/contrib/tensor_forest:init_py", - # "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - # "//tensorflow/examples/tutorials/mnist:package", - ], + data = COMMON_PIP_DEPS, srcs_version = "PY2AND3", deps = ["//tensorflow:tensorflow_py"], ) @@ -137,60 +166,11 @@ sh_binary( data = select({ "//tensorflow:windows": [":simple_console_for_windows"], "//tensorflow:windows_msvc": [":simple_console_for_windows"], - "//conditions:default": [ - ":licenses", - "MANIFEST.in", - "README", - "setup.py", - ":included_headers", + "//conditions:default": COMMON_PIP_DEPS + [ ":simple_console", - "//tensorflow:tensorflow_py", - "//tensorflow/contrib/boosted_trees:boosted_trees_pip", - "//tensorflow/contrib/cluster_resolver:cluster_resolver_pip", - "//tensorflow/contrib/data/python/kernel_tests:dataset_serialization_test", - "//tensorflow/contrib/data/python/ops:contrib_op_loader", - "//tensorflow/contrib/eager/python/examples:examples_pip", - "//tensorflow/contrib/eager/python:checkpointable_utils", - "//tensorflow/contrib/eager/python:evaluator", - "//tensorflow/contrib/gan:gan", - "//tensorflow/contrib/graph_editor:graph_editor_pip", - "//tensorflow/contrib/keras:keras", - "//tensorflow/contrib/labeled_tensor:labeled_tensor_pip", "//tensorflow/contrib/lite/toco:toco", "//tensorflow/contrib/lite/toco/python:toco_wrapper", "//tensorflow/contrib/lite/toco/python:toco_from_protos", - "//tensorflow/contrib/nn:nn_py", - "//tensorflow/contrib/predictor:predictor_pip", - "//tensorflow/contrib/py2tf:py2tf", - "//tensorflow/contrib/py2tf/converters:converters", - "//tensorflow/contrib/py2tf/converters:test_lib", - "//tensorflow/contrib/py2tf/impl:impl", - "//tensorflow/contrib/py2tf/pyct:pyct", - "//tensorflow/contrib/py2tf/pyct/static_analysis:static_analysis", - "//tensorflow/contrib/receptive_field:receptive_field_pip", - "//tensorflow/contrib/session_bundle:session_bundle_pip", - "//tensorflow/contrib/signal:signal_py", - "//tensorflow/contrib/signal:test_util", - "//tensorflow/contrib/slim:slim", - "//tensorflow/contrib/slim/python/slim/data:data_pip", - "//tensorflow/contrib/slim/python/slim/nets:nets_pip", - "//tensorflow/contrib/specs:specs", - "//tensorflow/contrib/summary:summary_test_util", - "//tensorflow/contrib/tensor_forest:init_py", - "//tensorflow/contrib/tensor_forest/hybrid:hybrid_pip", - "//tensorflow/contrib/timeseries:timeseries_pip", - "//tensorflow/contrib/tpu", - "//tensorflow/examples/tutorials/mnist:package", - "//tensorflow/python:distributed_framework_test_lib", - "//tensorflow/python:meta_graph_testdata", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/python:util_example_parser_configuration", - "//tensorflow/python/debug:debug_pip", - "//tensorflow/python/eager:eager_pip", - "//tensorflow/python/saved_model:saved_model", - "//tensorflow/python/tools:tools_pip", - "//tensorflow/python:test_ops", - "//tensorflow/tools/dist_test/server:grpc_tensorflow_server", ], }) + if_mkl(["//third_party/mkl:intel_binary_blob"]) + if_tensorrt([ "//tensorflow/contrib/tensorrt:init_py", diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1af246f9dc..0b8dfae00e 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -12,6 +12,8 @@ load("//third_party/toolchains/cpus/arm:arm_compiler_configure.bzl", "arm_compil load("//third_party:repo.bzl", "tf_http_archive") load("@io_bazel_rules_closure//closure/private:java_import_external.bzl", "java_import_external") load("@io_bazel_rules_closure//closure:defs.bzl", "filegroup_external") +load("//tensorflow/tools/def_file_filter:def_file_filter_configure.bzl", + "def_file_filter_configure") def _extract_version_number(bazel_version): """Extracts the semantic version number from a version string @@ -67,7 +69,7 @@ def tf_workspace(path_prefix="", tf_repo_name=""): # We must check the bazel version before trying to parse any other BUILD # files, in case the parsing of those build files depends on the bazel # version we require here. - check_bazel_version_at_least("0.5.4") + check_bazel_version_at_least("0.10.0") clang6_configure(name="local_config_clang6") cuda_configure(name="local_config_cuda") tensorrt_configure(name="local_config_tensorrt") @@ -75,6 +77,10 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sycl_configure(name="local_config_sycl") python_configure(name="local_config_python") + # For windows bazel build + # TODO: Remove def file filter when TensorFlow can export symbols properly on Windows. + def_file_filter_configure(name = "local_config_def_file_filter") + # Point //external/local_config_arm_compiler to //external/arm_compiler arm_compiler_configure( name="local_config_arm_compiler", -- GitLab From bec6e47cf93ce3fad041580de4d922f30190b1c7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Tue, 6 Mar 2018 03:31:45 -0800 Subject: [PATCH 450/884] [XLA:GPU] Mark bitcasts as eligible for fusion. Currently this never happens because we only turn rehaspes into bitcasts after layout assignment. This changes when layout assignment runs before fusion. Once layouts are available the pipeline turns reshapes into bitcasts, which would be left unfused without this change. PiperOrigin-RevId: 187999864 --- .../xla/service/elemental_ir_emitter.cc | 1 + tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../xla/service/gpu/instruction_fusion.cc | 1 + .../service/gpu/instruction_fusion_test.cc | 45 +++++++++++++++++++ .../xla/tests/llvm_irgen_test_base.cc | 5 ++- 5 files changed, 51 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index c732974995..31c0f2233c 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1722,6 +1722,7 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( SetToFirstInsertPoint(if_data.after_block, ir_builder_); return ir_builder_->CreateLoad(ret_value_addr); }; + case HloOpcode::kBitcast: case HloOpcode::kReshape: CHECK_EQ(ShapeUtil::ElementsIn(hlo->shape()), ShapeUtil::ElementsIn(hlo->operand(0)->shape())); diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index cecbc25192..a1ea5884a4 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -397,6 +397,7 @@ tf_cc_test( "//tensorflow/compiler/xla/service:hlo_matchers", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", ], ) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index b5962f069b..870d241856 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -26,6 +26,7 @@ namespace { bool IsFusile(const HloInstruction& hlo) { return (hlo.IsElementwise() && hlo.operand_count() > 0) || + hlo.opcode() == HloOpcode::kBitcast || hlo.opcode() == HloOpcode::kBroadcast || hlo.opcode() == HloOpcode::kConcatenate || hlo.opcode() == HloOpcode::kDynamicSlice || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 2d6dad27a5..373e5a5587 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_matchers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" namespace op = xla::testing::opcode_matchers; @@ -163,5 +164,49 @@ TEST_F(InstructionFusionTest, GetTupleElementFused) { EXPECT_EQ(HloOpcode::kGetTupleElement, fused_root->operand(1)->opcode()); } +TEST_F(InstructionFusionTest, BitcastIntoAdd) { + auto module = tools::Parse(R"( + HloModule test_module + + ENTRY BroadcastIntoAdd { + p0 = f32[4,1,1]{2,1,0} parameter(0) + p1 = f32[4,1]{1,0} parameter(1) + bitcast = f32[4,1]{1,0} bitcast(p0) + ROOT add = f32[4,1] add(bitcast, p1) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Fusion()); + EXPECT_THAT(root->fused_expression_root(), + op::Add(op::Bitcast(op::Parameter()), op::Parameter())); +} + +TEST_F(InstructionFusionTest, AddIntoBitcast) { + auto module = tools::Parse(R"( + HloModule test_module + + ENTRY BroadcastIntoAdd { + p0 = f32[4,1,1]{2,1,0} parameter(0) + p1 = f32[4,1]{1,0} parameter(1) + add = f32[4,1] add(p0, p1) + ROOT bitcast = f32[4,1,1] bitcast(add) + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Fusion()); + EXPECT_THAT(root->fused_expression_root(), + op::Bitcast(op::Add(op::Parameter(), op::Parameter()))); +} + } // namespace gpu } // namespace xla diff --git a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc index 99514baf23..3023df47cd 100644 --- a/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc +++ b/tensorflow/compiler/xla/tests/llvm_irgen_test_base.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/llvm_ir/llvm_util.h" #include "tensorflow/compiler/xla/tests/filecheck.h" +#include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" namespace xla { @@ -49,11 +50,11 @@ void LLVMIRGenTestBase::CompileAndVerifyIr( std::unique_ptr hlo_module, const string& pattern, bool match_optimized_ir) { SetIrHook(match_optimized_ir); - ASSERT_TRUE(CompileToExecutable(std::move(hlo_module)).ok()); + TF_ASSERT_OK(CompileToExecutable(std::move(hlo_module)).status()); ResetIrHook(); StatusOr filecheck_result = RunFileCheck(ir_, pattern); - ASSERT_TRUE(filecheck_result.ok()); + TF_ASSERT_OK(filecheck_result.status()); EXPECT_TRUE(filecheck_result.ValueOrDie()); } -- GitLab From f261257ab26802cf3cab7303a76db2fb729e1d01 Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Tue, 6 Mar 2018 08:21:10 -0800 Subject: [PATCH 451/884] Implements MaxPoolGradGrad in tf2xla using bitwise trickery. Further detail covered by a comment inside pooling_ops.cc. Retains 32 bits of gradient precision, but can confuse the backprop source for input cells that are equally maximal at 16 bits. We could in principle be accurate up to 31 bits of input, if we were willing to find gradients one bit at a time, or 24 bits of input 8 gradient bits at a time, etc. PiperOrigin-RevId: 188025278 --- tensorflow/compiler/tests/pooling_ops_test.py | 133 +++++++++++--- .../tf2xla/g3doc/cpu_supported_ops.md | 14 ++ .../tf2xla/g3doc/gpu_supported_ops.md | 14 ++ .../compiler/tf2xla/kernels/pooling_ops.cc | 167 ++++++++++++++++++ 4 files changed, 305 insertions(+), 23 deletions(-) diff --git a/tensorflow/compiler/tests/pooling_ops_test.py b/tensorflow/compiler/tests/pooling_ops_test.py index e0e85295fe..fe270af3d6 100644 --- a/tensorflow/compiler/tests/pooling_ops_test.py +++ b/tensorflow/compiler/tests/pooling_ops_test.py @@ -292,8 +292,15 @@ class PoolGradTest(XLATestCase): CPU_DEVICE = "/job:localhost/replica:0/task:0/cpu:0" - def _VerifyOneTest(self, pool_func, pool_grad_func, input_sizes, ksize, - strides, padding, data_format): + def _VerifyOneTest(self, + pool_func, + pool_grad_func, + input_sizes, + ksize, + strides, + padding, + data_format, + pool_grad_grad_func=None): """Verifies the output values of the pooling gradient function. Args: @@ -304,9 +311,19 @@ class PoolGradTest(XLATestCase): strides: The stride dimensions padding: Padding type. data_format: The data format we use to run the pooling operation. + pool_grad_grad_func: Second-order gradient function, if available. """ total_size = np.prod(input_sizes) - x = np.arange(1, total_size + 1, dtype=np.float32).reshape(input_sizes) + # TODO(b/73062247): MaxPoolGradGrad can confuse gradients when x is equally + # maximal at 16 bits. Switch to np.random.randn when resolved. + x = np.arange(1, total_size + 1, dtype=np.float32) + x *= (np.random.randint(2, size=total_size) * 2 - 1) # Flip signs randomly + # Verify some specifically interesting values... + x[np.random.choice(total_size)] = np.inf + x[np.random.choice(total_size)] = -np.inf + # TODO(b/74222344): Fix nan handling for max pool grad. + # x[np.random.choice(total_size)] = np.nan + x = x.reshape(input_sizes) with self.test_session() as sess: # Use the forward pool function to compute some corresponding outputs # (needed for the CPU device, and we need the shape in both cases). @@ -323,6 +340,8 @@ class PoolGradTest(XLATestCase): output_gradient_vals = np.arange( 1, output_vals.size + 1, dtype=np.float32) output_gradient_vals = output_gradient_vals.reshape(output_vals.shape) + output_grad_grad_vals = np.arange(1, x.size + 1, dtype=np.float32) + output_grad_grad_vals = output_grad_grad_vals.reshape(x.shape) # Use the Tensorflow CPU pooling gradient to compute the expected input # gradients. @@ -342,18 +361,36 @@ class PoolGradTest(XLATestCase): {inputs: x, output_gradients: output_gradient_vals}) + output_grad_gradients = array_ops.placeholder( + dtypes.float32, shape=expected_input_gradient_vals.shape) + if pool_grad_grad_func is not None: + expected_grad_gradients = pool_grad_grad_func( + inputs, + outputs, + output_grad_gradients, + ksize=ksize, + strides=strides, + padding=padding, + data_format="NHWC") + expected_grad_gradients_vals = sess.run(expected_grad_gradients, { + inputs: x, + output_grad_gradients: output_grad_grad_vals + }) + # Run the gradient op on the XLA device with self.test_scope(): outputs = array_ops.placeholder(dtypes.float32, shape=output_vals.shape) xla_inputs = inputs xla_outputs = outputs xla_output_gradients = output_gradients + xla_output_grad_gradients = output_grad_gradients xla_ksize = ksize xla_strides = strides if data_format == "NCHW": xla_inputs = NHWCToNCHW(inputs) xla_outputs = NHWCToNCHW(outputs) xla_output_gradients = NHWCToNCHW(output_gradients) + xla_output_grad_gradients = NHWCToNCHW(output_grad_gradients) xla_ksize = NHWCToNCHW(ksize) xla_strides = NHWCToNCHW(strides) actual_input_gradients = pool_grad_func( @@ -366,22 +403,54 @@ class PoolGradTest(XLATestCase): data_format=data_format) if data_format == "NCHW": actual_input_gradients = NCHWToNHWC(actual_input_gradients) - actual = sess.run(actual_input_gradients, { + if pool_grad_grad_func is not None: + actual_grad_gradients = pool_grad_grad_func( + xla_inputs, + xla_outputs, + xla_output_grad_gradients, + ksize=xla_ksize, + strides=xla_strides, + padding=padding, + data_format=data_format) + if data_format == "NCHW": + actual_grad_gradients = NCHWToNHWC(actual_grad_gradients) + actual_input_gradients_vals = sess.run(actual_input_gradients, { inputs: x, outputs: output_vals, output_gradients: output_gradient_vals }) - # Compare the Tensorflow and XLA results. self.assertAllClose( - expected_input_gradient_vals.flatten(), - actual.flatten(), + expected_input_gradient_vals, + actual_input_gradients_vals, rtol=1e-4, atol=1e-6) - self.assertShapeEqual(actual, inputs) - - def _VerifyValues(self, pool_func, pool_grad_func, input_sizes, ksize, - strides, padding): + self.assertShapeEqual(actual_input_gradients_vals, inputs) + + if pool_grad_grad_func is not None: + actual_grad_gradients_vals = sess.run( + actual_grad_gradients, { + inputs: x, + outputs: output_vals, + output_grad_gradients: output_grad_grad_vals + }) + + # Compare the Tensorflow and XLA results. + self.assertAllClose( + expected_grad_gradients_vals, + actual_grad_gradients_vals, + rtol=1e-4, + atol=1e-6) + self.assertShapeEqual(actual_grad_gradients_vals, outputs) + + def _VerifyValues(self, + pool_func, + pool_grad_func, + input_sizes, + ksize, + strides, + padding, + pool_grad_grad_func=None): """Verifies the output values of the pooling function. Args: @@ -391,12 +460,20 @@ class PoolGradTest(XLATestCase): ksize: The kernel size dimensions strides: The stride dimensions padding: Padding type. + pool_grad_grad_func: Second-order gradient function, if available. """ for data_format in GetTestConfigs(): - self._VerifyOneTest(pool_func, pool_grad_func, input_sizes, ksize, - strides, padding, data_format) - - def _TestPooling(self, forward_op, backward_op): + self._VerifyOneTest( + pool_func, + pool_grad_func, + input_sizes, + ksize, + strides, + padding, + data_format, + pool_grad_grad_func=pool_grad_grad_func) + + def _TestPooling(self, forward_op, backward_op, pool_grad_grad_func=None): # VALID padding self._VerifyValues( forward_op, @@ -404,7 +481,8 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 3, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding="VALID") + padding="VALID", + pool_grad_grad_func=pool_grad_grad_func) # SAME padding self._VerifyValues( @@ -413,7 +491,8 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 2, 3, 3], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding="SAME") + padding="SAME", + pool_grad_grad_func=pool_grad_grad_func) # SAME padding, non square window self._VerifyValues( @@ -422,7 +501,8 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 2, 2, 1], ksize=[1, 1, 2, 1], strides=[1, 1, 1, 1], - padding="SAME") + padding="SAME", + pool_grad_grad_func=pool_grad_grad_func) # VALID padding, uneven stride self._VerifyValues( @@ -431,14 +511,16 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 1, 2, 1], - padding="VALID") + padding="VALID", + pool_grad_grad_func=pool_grad_grad_func) self._VerifyValues( forward_op, backward_op, input_sizes=[1, 4, 4, 1], ksize=[1, 2, 2, 1], strides=[1, 2, 1, 1], - padding="VALID") + padding="VALID", + pool_grad_grad_func=pool_grad_grad_func) # SAME padding, size 4 input self._VerifyValues( @@ -447,7 +529,8 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 4, 4, 4], ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], - padding="SAME") + padding="SAME", + pool_grad_grad_func=pool_grad_grad_func) # SAME padding, size 8 input self._VerifyValues( @@ -456,10 +539,14 @@ class PoolGradTest(XLATestCase): input_sizes=[1, 8, 8, 8], ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], - padding="SAME") + padding="SAME", + pool_grad_grad_func=pool_grad_grad_func) def testMaxPool(self): - self._TestPooling(nn_ops.max_pool, gen_nn_ops.max_pool_grad) + self._TestPooling( + nn_ops.max_pool, + gen_nn_ops.max_pool_grad, + pool_grad_grad_func=gen_nn_ops.max_pool_grad_grad) def testAvgPool(self): # Wrapper around AvgPoolGrad that ignores extra arguments needed by diff --git a/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md index 91351421bc..20179b6799 100644 --- a/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md +++ b/tensorflow/compiler/tf2xla/g3doc/cpu_supported_ops.md @@ -3,6 +3,7 @@ Operator | Type Constraint ------------------------------------- | --------------- `Abs` | `T={double,float,int32,int64}` +`Acos` | `T={complex64,double,float,int32,int64}` `Acosh` | `T={complex64,double,float}` `Add` | `T={complex64,double,float,int32,int64}` `AddN` | `T={complex64,double,float,int32,int64,uint32,uint64}` @@ -15,10 +16,12 @@ Operator | Type Constraint `ApproximateEqual` | `T={complex64,double,float,int32,int64,uint32,uint64}` `ArgMax` | `Tidx={int32,int64}`
`output_type={int32,int64}`
`T={float}` `ArgMin` | `Tidx={int32,int64}`
`output_type={int32,int64}`
`T={complex64,double,float,int32,int64,uint32,uint64}` +`Asin` | `T={complex64,double,float,int32,int64}` `Asinh` | `T={complex64,double,float}` `AssignAddVariableOp` | `dtype={complex64,double,float,int32,int64,uint32,uint64}` `AssignSubVariableOp` | `dtype={complex64,double,float,int32,int64,uint32,uint64}` `AssignVariableOp` | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}` +`Atan` | `T={complex64,double,float,int32,int64}` `Atan2` | `T={double,float}` `Atanh` | `T={complex64,double,float}` `AvgPool` | `T={double,float}` @@ -75,6 +78,10 @@ Operator | Type Constraint `FFT` | `FFT2D` | `FFT3D` | +`FakeQuantWithMinMaxArgs` | +`FakeQuantWithMinMaxArgsGradient` | +`FakeQuantWithMinMaxVars` | +`FakeQuantWithMinMaxVarsGradient` | `Fill` | `index_type={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Floor` | `T={double,float}` `FloorDiv` | `T={complex64,double,float,int32,int64}` @@ -84,6 +91,7 @@ Operator | Type Constraint `FusedBatchNormGradV2` | `U={float}`
`T={float}` `FusedBatchNormV2` | `U={float}`
`T={float}` `Gather` | `Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` +`GatherNd` | `Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` `GatherV2` | `Taxis={int32,int64}`
`Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` `Greater` | `T={double,float,int32,int64,uint32,uint64}` `GreaterEqual` | `T={double,float,int32,int64,uint32,uint64}` @@ -117,14 +125,18 @@ Operator | Type Constraint `LogicalNot` | `LogicalOr` | `MatMul` | `T={complex64,double,float}` +`MatrixBandPart` | `Tindex={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixDiag` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixDiagPart` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` +`MatrixSetDiag` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixTriangularSolve` | `T={complex64,double,float}` `Max` | `Tidx={int32,int64}`
`T={complex64,double,float,int32,int64,uint32,uint64}` `MaxPool` | `T={double,float,int32,int64}` `MaxPool3D` | `T={float}` `MaxPool3DGrad` | `TInput={float}`
`T={float}` `MaxPoolGrad` | `T={double,float,int32,int64,uint32,uint64}` +`MaxPoolGradGrad` | `T={float}` +`MaxPoolGradGradV2` | `T={float}` `MaxPoolGradV2` | `T={double,float,int32,int64,uint32,uint64}` `MaxPoolV2` | `T={double,float,int32,int64}` `Maximum` | `T={double,float,int32,int64}` @@ -186,6 +198,7 @@ Operator | Type Constraint `Round` | `T={complex64,double,float,int32,int64}` `Rsqrt` | `T={complex64,double,float}` `RsqrtGrad` | `T={complex64,double,float}` +`ScatterNd` | `Tindices={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Select` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Selu` | `T={double,float}` `SeluGrad` | `T={double,float}` @@ -198,6 +211,7 @@ Operator | Type Constraint `Sinh` | `T={complex64,double,float}` `Size` | `out_type={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Slice` | `Index={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` +`Snapshot` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Softmax` | `T={double,float}` `SoftmaxCrossEntropyWithLogits` | `T={double,float}` `Softplus` | `T={double,float,int32,int64,uint32,uint64}` diff --git a/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md index b9bdb829d7..55f0538dba 100644 --- a/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md +++ b/tensorflow/compiler/tf2xla/g3doc/gpu_supported_ops.md @@ -3,6 +3,7 @@ Operator | Type Constraint ------------------------------------- | --------------- `Abs` | `T={double,float,int32,int64}` +`Acos` | `T={complex64,double,float,int32,int64}` `Acosh` | `T={complex64,double,float}` `Add` | `T={complex64,double,float,int32,int64}` `AddN` | `T={complex64,double,float,int32,int64,uint32,uint64}` @@ -15,10 +16,12 @@ Operator | Type Constraint `ApproximateEqual` | `T={complex64,double,float,int32,int64,uint32,uint64}` `ArgMax` | `Tidx={int32,int64}`
`output_type={int32,int64}`
`T={complex64,double,float,int32,int64,uint32,uint64}` `ArgMin` | `Tidx={int32,int64}`
`output_type={int32,int64}`
`T={complex64,double,float,int32,int64,uint32,uint64}` +`Asin` | `T={complex64,double,float,int32,int64}` `Asinh` | `T={complex64,double,float}` `AssignAddVariableOp` | `dtype={complex64,double,float,int32,int64,uint32,uint64}` `AssignSubVariableOp` | `dtype={complex64,double,float,int32,int64,uint32,uint64}` `AssignVariableOp` | `dtype={bool,complex64,double,float,int32,int64,uint32,uint64}` +`Atan` | `T={complex64,double,float,int32,int64}` `Atan2` | `T={double,float}` `Atanh` | `T={complex64,double,float}` `AvgPool` | `T={double,float}` @@ -75,6 +78,10 @@ Operator | Type Constraint `FFT` | `FFT2D` | `FFT3D` | +`FakeQuantWithMinMaxArgs` | +`FakeQuantWithMinMaxArgsGradient` | +`FakeQuantWithMinMaxVars` | +`FakeQuantWithMinMaxVarsGradient` | `Fill` | `index_type={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Floor` | `T={double,float}` `FloorDiv` | `T={complex64,double,float,int32,int64}` @@ -84,6 +91,7 @@ Operator | Type Constraint `FusedBatchNormGradV2` | `U={float}`
`T={float}` `FusedBatchNormV2` | `U={float}`
`T={float}` `Gather` | `Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` +`GatherNd` | `Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` `GatherV2` | `Taxis={int32,int64}`
`Tindices={int32,int64}`
`Tparams={bool,complex64,double,float,int32,int64,uint32,uint64}` `Greater` | `T={double,float,int32,int64,uint32,uint64}` `GreaterEqual` | `T={double,float,int32,int64,uint32,uint64}` @@ -117,14 +125,18 @@ Operator | Type Constraint `LogicalNot` | `LogicalOr` | `MatMul` | `T={complex64,double,float}` +`MatrixBandPart` | `Tindex={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixDiag` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixDiagPart` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` +`MatrixSetDiag` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `MatrixTriangularSolve` | `T={complex64,double,float}` `Max` | `Tidx={int32,int64}`
`T={complex64,double,float,int32,int64,uint32,uint64}` `MaxPool` | `T={double,float,int32,int64}` `MaxPool3D` | `T={float}` `MaxPool3DGrad` | `TInput={float}`
`T={float}` `MaxPoolGrad` | `T={double,float,int32,int64,uint32,uint64}` +`MaxPoolGradGrad` | `T={float}` +`MaxPoolGradGradV2` | `T={float}` `MaxPoolGradV2` | `T={double,float,int32,int64,uint32,uint64}` `MaxPoolV2` | `T={double,float,int32,int64}` `Maximum` | `T={double,float,int32,int64}` @@ -183,6 +195,7 @@ Operator | Type Constraint `Round` | `T={complex64,double,float,int32,int64}` `Rsqrt` | `T={complex64,double,float}` `RsqrtGrad` | `T={complex64,double,float}` +`ScatterNd` | `Tindices={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Select` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Selu` | `T={double,float}` `SeluGrad` | `T={double,float}` @@ -195,6 +208,7 @@ Operator | Type Constraint `Sinh` | `T={complex64,double,float}` `Size` | `out_type={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Slice` | `Index={int32,int64}`
`T={bool,complex64,double,float,int32,int64,uint32,uint64}` +`Snapshot` | `T={bool,complex64,double,float,int32,int64,uint32,uint64}` `Softmax` | `T={double,float}` `SoftmaxCrossEntropyWithLogits` | `T={double,float}` `Softplus` | `T={double,float,int32,int64,uint32,uint64}` diff --git a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc index d4fb5dd4e0..086a9491aa 100644 --- a/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc +++ b/tensorflow/compiler/tf2xla/kernels/pooling_ops.cc @@ -525,5 +525,172 @@ class AvgPool3DGradOp : public AvgPoolGradOp { REGISTER_XLA_OP(Name("AvgPool3DGrad").CompileTimeConstInput("orig_input_shape"), AvgPool3DGradOp); +class MaxPoolGradGradOp : public XlaOpKernel { + public: + MaxPoolGradGradOp(OpKernelConstruction* ctx, int num_spatial_dims) + : XlaOpKernel(ctx), num_spatial_dims_(num_spatial_dims) { + if (ctx->num_inputs() == 3) { + OP_REQUIRES_OK(ctx, ctx->GetAttr("ksize", &ksize_)); + OP_REQUIRES_OK(ctx, ctx->GetAttr("strides", &stride_)); + } + OP_REQUIRES_OK(ctx, ctx->GetAttr("padding", &padding_)); + } + + int num_dims() const { return num_spatial_dims_ + 2; } + + void Compile(XlaOpKernelContext* ctx) override { + if (ctx->num_inputs() != 3) { + OP_REQUIRES( + ctx, ctx->num_inputs() == 5, + errors::InvalidArgument("Must supply ksize and stride arguments.")); + const TensorShape ksize_shape = ctx->InputShape(3); + // Validate input sizes. + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(ksize_shape), + errors::InvalidArgument("ksize must be a vector, not shape ", + ksize_shape.DebugString())); + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(3, &ksize_)); + + const TensorShape stride_shape = ctx->InputShape(4); + // Validate input sizes. + OP_REQUIRES(ctx, TensorShapeUtils::IsVector(stride_shape), + errors::InvalidArgument("stride must be a vector, not shape ", + stride_shape.DebugString())); + OP_REQUIRES_OK(ctx, ctx->ConstantInputAsIntVector(4, &stride_)); + } + + OP_REQUIRES(ctx, ksize_.size() == num_dims(), + errors::InvalidArgument("Sliding window ksize field must " + "specify ", + num_dims(), " dimensions")); + OP_REQUIRES(ctx, stride_.size() == num_dims(), + errors::InvalidArgument("Sliding window strides field must " + "specify ", + num_dims(), " dimensions")); + + const TensorShape tensor_in_shape = ctx->InputShape(0); + const TensorShape tensor_out_shape = ctx->InputShape(1); + const TensorShape out_backprop_shape = ctx->InputShape(2); + + // For maxpooling, tensor_in should have num_dims() dimensions. + OP_REQUIRES(ctx, tensor_in_shape.dims() == num_dims(), + errors::InvalidArgument("tensor_in must be ", num_dims(), + "-dimensional")); + OP_REQUIRES(ctx, tensor_out_shape.dims() == num_dims(), + errors::InvalidArgument("tensor_out must be ", num_dims(), + "-dimensional")); + // For maxpooling, out_backprop should have num_dims() dimensions. + OP_REQUIRES(ctx, out_backprop_shape.dims() == num_dims(), + errors::InvalidArgument("out_backprop must be ", num_dims(), + "-dimensional")); + + // What we want to compute: + // Given y = MaxPool(x), and xs_grad = MaxPoolGrad(x, y, ys_grad) + // MaxPoolGradGrad computes {ys_grad}_grad given x, y, and {xs_grad}_grad. + // + // In the regular TF op, this amounts to selecting for each window the + // incoming backprop value from xs_grad_grad that corresponds to the maximal + // value in the corresponding window of x. + // + // TODO(b/73062247): What we really want is a ReduceWindow with different + // arrays for index selection vs return value selection--a select-to-gather. + // + // Here, we implement a bitwise hack: we use the hi 16 bits of input for + // separate max pooling alongside each of the hi and lo 16 bits of + // out_backprop packed into 16 lo bits, which we then glue back together at + // the end to get a full 32 bits of gradient. + // + // This could select the wrong backprop value for two x values that are + // equally maximal up to the first 16 bits, in which case we are taking the + // latter. + // + // Note that in principle we could use 32 separate maxpools to recover each + // of 32 bits of the gradient while preserving 31 bits of input for the max + // pooling criteria; here, we just truncate to the first 16 bits of input. + + auto input = ctx->Input(0); + auto out_backprop = ctx->Input(2); + + auto b = ctx->builder(); + + auto sixteen = b->ConstantR0(16); + // in (f32) -> round to bf16 -> f32 for correct bitwidth -> 16-high-bit u32 + auto in_hi = b->BitcastConvertType( + b->ConvertElementType(b->ConvertElementType(input, xla::BF16), + xla::F32), + xla::U32); + auto bp_int = b->BitcastConvertType(out_backprop, xla::U32); + auto bp_hi = b->ShiftRightLogical(bp_int, sixteen); + auto bp_lo = b->ShiftRightLogical(b->ShiftLeft(bp_int, sixteen), sixteen); + auto in_hi_bp_hi = b->Add(in_hi, bp_hi); // Want an unsigned add. + auto in_hi_bp_lo = b->Add(in_hi, bp_lo); // Want an unsigned add. + + auto init_value = XlaHelpers::MinValue(b, DT_FLOAT); + // We will reduce by taking the maximal value up to 16 bits (ignoring the lo + // 16 bits of packed-in hi/lo backprop value). + auto rb = b->CreateSubBuilder("GreaterOrEqOf_ByFirst16Bits"); + { + // F32 parameters to satisfy lowering type restriction for reduce opcode. + const xla::Shape scalar = xla::ShapeUtil::MakeShape(xla::F32, {}); + auto lhs = rb->Parameter(0, scalar, "lhs"); + auto rhs = rb->Parameter(1, scalar, "rhs"); + auto sixteen = rb->ConstantR0(16); + auto lhs_criteria = rb->ShiftLeft( + rb->ShiftRightLogical(rb->BitcastConvertType(lhs, xla::S32), sixteen), + sixteen); + auto rhs_criteria = rb->ShiftLeft( + rb->ShiftRightLogical(rb->BitcastConvertType(rhs, xla::S32), sixteen), + sixteen); + // Must use a F32 comparison, because S32 would not work for negatives. + rb->Select(rb->Ge(rb->BitcastConvertType(lhs_criteria, xla::F32), + rb->BitcastConvertType(rhs_criteria, xla::F32)), + lhs, rhs); + } + auto reduce = rb->BuildAndNoteError(); + xla::Padding xla_padding = + (padding_ == VALID) ? xla::Padding::kValid : xla::Padding::kSame; + auto pooled_hi = + b->ReduceWindow(b->BitcastConvertType(in_hi_bp_hi, xla::F32), + init_value, reduce, ksize_, stride_, xla_padding); + auto pooled_lo = + b->ReduceWindow(b->BitcastConvertType(in_hi_bp_lo, xla::F32), + init_value, reduce, ksize_, stride_, xla_padding); + auto grads_hi = + b->ShiftLeft(b->BitcastConvertType(pooled_hi, xla::U32), sixteen); + auto grads_lo = b->ShiftRightLogical( + b->ShiftLeft(b->BitcastConvertType(pooled_lo, xla::U32), sixteen), + sixteen); + auto grads = b->Add(grads_hi, grads_lo); // Want an unsigned add. + + xla::PrimitiveType element_type; + OP_REQUIRES_OK(ctx, DataTypeToPrimitiveType(input_type(2), &element_type)); + ctx->SetOutput(0, b->BitcastConvertType(grads, element_type)); + } + + protected: + const int num_spatial_dims_; + std::vector ksize_; + std::vector stride_; + Padding padding_; + TensorFormat data_format_ = FORMAT_NHWC; +}; + +class MaxPool2DGradGradOp : public MaxPoolGradGradOp { + public: + explicit MaxPool2DGradGradOp(OpKernelConstruction* ctx) + : MaxPoolGradGradOp(ctx, /*num_spatial_dims=*/2) { + string data_format; + OP_REQUIRES_OK(ctx, ctx->GetAttr("data_format", &data_format)); + OP_REQUIRES(ctx, FormatFromString(data_format, &data_format_), + errors::InvalidArgument("Invalid data format")); + } +}; +REGISTER_XLA_OP(Name("MaxPoolGradGrad").TypeConstraint("T", DT_FLOAT), + MaxPool2DGradGradOp); +REGISTER_XLA_OP(Name("MaxPoolGradGradV2") + .TypeConstraint("T", DT_FLOAT) + .CompileTimeConstInput("ksize") + .CompileTimeConstInput("strides"), + MaxPool2DGradGradOp); + } // anonymous namespace } // namespace tensorflow -- GitLab From a2ea23e91915fabd0e856f284d0af75a496a432a Mon Sep 17 00:00:00 2001 From: Brian Patton Date: Tue, 6 Mar 2018 08:23:04 -0800 Subject: [PATCH 452/884] StreamExecutor support for float64 convolutions and backprop. PiperOrigin-RevId: 188025477 --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 132 ++++++++++++++------ tensorflow/stream_executor/cuda/cuda_dnn.h | 29 ++++- tensorflow/stream_executor/dnn.h | 28 ++++- tensorflow/stream_executor/stream.cc | 97 ++++++++++++++ tensorflow/stream_executor/stream.h | 35 ++++++ 5 files changed, 284 insertions(+), 37 deletions(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 61cf4ba7ea..0b3b060fe7 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -2281,7 +2281,6 @@ struct ConvDoFP32ComputationFP16Input { // A group of helper functions to return the internal compute type for // convolutions in cudnn. -// TODO(yangzihao): Add support for float64. template cudnnDataType_t GetConvComputeType() { return CUDNN_DATA_FLOAT; @@ -2296,6 +2295,11 @@ cudnnDataType_t GetConvComputeType() { } } +template <> +cudnnDataType_t GetConvComputeType() { + return CUDNN_DATA_DOUBLE; +} + } // namespace template @@ -2324,9 +2328,15 @@ bool CudnnSupport::DoConvolveImpl( LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } // Alpha is the scaling factor for input. - float alpha = 1.0; + float falpha = 1.0; + double dalpha = 1.0; + void* alpha = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dalpha) + : static_cast(&falpha); // Beta is the scaling factor for output. - float beta = 0.0; + float fbeta = 0.0; + double dbeta = 0.0; + void* beta = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dbeta) + : static_cast(&fbeta); const bool is_profiling = output_profile_result != nullptr; cudnnConvolutionFwdAlgo_t algo; @@ -2464,11 +2474,11 @@ bool CudnnSupport::DoConvolveImpl( } status = wrap::cudnnConvolutionForward( parent_, ToHandle(dnn_handle_), - /*alpha=*/&alpha, /*srcDesc=*/input_nd.handle(), + /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), /*srcData=*/input_data.opaque(), /*filterDesc=*/filter.handle(), /*filterData=*/filter_data.opaque(), /*convDesc=*/conv.handle(), /*algo=*/algo, /*workSpace=*/scratch.opaque(), - /*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/&beta, + /*workSpaceSizeInBytes=*/scratch.size(), /*beta=*/beta, /*destDesc=*/output_nd.handle(), /*destData=*/output_data->opaque()); if (is_profiling) { @@ -2943,10 +2953,14 @@ bool CudnnSupport::DoConvolve( const FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, const ConvolutionDescriptor& convolution_descriptor, - const BatchDescriptor& output_descriptor, - DeviceMemory* output_data) { - LOG(ERROR) << "double-based DNN not yet implemented"; - return false; + const BatchDescriptor& output_descriptor, DeviceMemory* output_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) { + return DoConvolveImpl( + stream, batch_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output_data, scratch_allocator, + algorithm_config, output_profile_result); } bool CudnnSupport::DoConvolve( @@ -3151,10 +3165,17 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } + cudnnDataType_t cudnn_type = GetCudnnDataType(); // Alpha is the scaling factor for input. - float alpha = 1.0; + float falpha = 1.0; + double dalpha = 1.0; + void* alpha = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dalpha) + : static_cast(&falpha); // Beta is the scaling factor for output. - float beta = 0.0; + float fbeta = 0.0; + double dbeta = 0.0; + void* beta = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dbeta) + : static_cast(&fbeta); // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. BatchDescriptor output_descriptor; @@ -3163,7 +3184,6 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( backward_output_data = MaybeTransformLayout( stream, &output_descriptor, backward_output_data, &transform_scratch); - cudnnDataType_t cudnn_type = GetCudnnDataType(); ScopedTensorDescriptor out_back_nd{parent_, output_descriptor, cudnn_type}; ScopedTensorDescriptor in_back_nd{parent_, input_descriptor, cudnn_type}; ScopedFilterDescriptor filter{parent_, filter_descriptor, input_descriptor, @@ -3310,7 +3330,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( status = wrap::cudnnConvolutionBackwardData_v3( #endif parent_, ToHandle(dnn_handle_), - /*alpha=*/&alpha, + /*alpha=*/alpha, /*filterDesc=*/filter.handle(), /*filterData=*/filter_data.opaque(), /*diffDesc=*/out_back_nd.handle(), @@ -3319,7 +3339,7 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( /*algo=*/algo, /*workSpace=*/scratch.opaque(), /*workSpaceSizeInBytes=*/scratch.size(), - /*beta=*/&beta, + /*beta=*/beta, /*gradDesc=*/in_back_nd.handle(), /*gradData=*/backward_input_data->opaque()); if (is_profiling) { @@ -3344,10 +3364,28 @@ bool CudnnSupport::DoConvolveBackwardDataImpl( return true; } +bool CudnnSupport::DoConvolveBackwardData( + Stream* stream, const FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, + const BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const ConvolutionDescriptor& convolution_descriptor, + const BatchDescriptor& input_descriptor, + DeviceMemory* backward_input_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) { + return DoConvolveBackwardDataImpl(stream, filter_descriptor, filter_data, + output_descriptor, backward_output_data, + convolution_descriptor, input_descriptor, + backward_input_data, scratch_allocator, + algorithm_config, output_profile_result); +} + bool CudnnSupport::DoConvolveBackwardData( Stream* stream, const FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, - const BatchDescriptor& output_descriptor_in, + const BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const ConvolutionDescriptor& convolution_descriptor, const BatchDescriptor& input_descriptor, @@ -3356,7 +3394,7 @@ bool CudnnSupport::DoConvolveBackwardData( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return DoConvolveBackwardDataImpl(stream, filter_descriptor, filter_data, - output_descriptor_in, backward_output_data, + output_descriptor, backward_output_data, convolution_descriptor, input_descriptor, backward_input_data, scratch_allocator, algorithm_config, output_profile_result); @@ -3365,7 +3403,7 @@ bool CudnnSupport::DoConvolveBackwardData( bool CudnnSupport::DoConvolveBackwardData( Stream* stream, const FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, - const BatchDescriptor& output_descriptor_in, + const BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const ConvolutionDescriptor& convolution_descriptor, const BatchDescriptor& input_descriptor, @@ -3374,7 +3412,7 @@ bool CudnnSupport::DoConvolveBackwardData( const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { return DoConvolveBackwardDataImpl(stream, filter_descriptor, filter_data, - output_descriptor_in, backward_output_data, + output_descriptor, backward_output_data, convolution_descriptor, input_descriptor, backward_input_data, scratch_allocator, algorithm_config, output_profile_result); @@ -3398,10 +3436,17 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( LOG(FATAL) << "failed to set stream for cudnn handle: " << ToString(status); } + cudnnDataType_t cudnn_type = GetCudnnDataType(); // Alpha is the scaling factor for input. - float alpha = 1.0; + float falpha = 1.0; + double dalpha = 1.0; + void* alpha = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dalpha) + : static_cast(&falpha); // Beta is the scaling factor for output. - float beta = 0.0; + float fbeta = 0.0; + double dbeta = 0.0; + void* beta = cudnn_type == CUDNN_DATA_DOUBLE ? static_cast(&dbeta) + : static_cast(&fbeta); // TBD(keveman): remove once cuDNN supports kBatchYXDepth for backward pass. BatchDescriptor output_descriptor; @@ -3410,7 +3455,6 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( backward_output_data = MaybeTransformLayout( stream, &output_descriptor, backward_output_data, &transform_scratch); - cudnnDataType_t cudnn_type = GetCudnnDataType(); ScopedTensorDescriptor out_back_nd{parent_, output_descriptor, cudnn_type}; ScopedTensorDescriptor input_nd{parent_, input_descriptor, cudnn_type}; ScopedFilterDescriptor filter{parent_, filter_descriptor, input_descriptor, @@ -3557,7 +3601,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( #else status = wrap::cudnnConvolutionBackwardFilter_v3( #endif - parent_, ToHandle(dnn_handle_), /*alpha=*/&alpha, + parent_, ToHandle(dnn_handle_), /*alpha=*/alpha, /*srcDesc=*/input_nd.handle(), /*srcData=*/input_data.opaque(), /*diffDesc=*/out_back_nd.handle(), @@ -3566,7 +3610,7 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( /*algo=*/algo, /*workSpace=*/scratch.opaque(), /*workSpaceSizeInBytes=*/scratch.size(), - /*beta=*/&beta, + /*beta=*/beta, /*gradDesc=*/filter.handle(), /*gradData=*/backward_filter_data->opaque()); @@ -3592,10 +3636,28 @@ bool CudnnSupport::DoConvolveBackwardFilterImpl( return true; } +bool CudnnSupport::DoConvolveBackwardFilter( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::FilterDescriptor& filter_descriptor, + DeviceMemory* backward_filter_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) { + return DoConvolveBackwardFilterImpl(stream, input_descriptor, input_data, + output_descriptor, backward_output_data, + convolution_descriptor, filter_descriptor, + backward_filter_data, scratch_allocator, + algorithm_config, output_profile_result); +} + bool CudnnSupport::DoConvolveBackwardFilter( Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory& input_data, - const dnn::BatchDescriptor& output_descriptor_in, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::FilterDescriptor& filter_descriptor, @@ -3603,17 +3665,17 @@ bool CudnnSupport::DoConvolveBackwardFilter( ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return DoConvolveBackwardFilterImpl( - stream, input_descriptor, input_data, output_descriptor_in, - backward_output_data, convolution_descriptor, filter_descriptor, - backward_filter_data, scratch_allocator, algorithm_config, - output_profile_result); + return DoConvolveBackwardFilterImpl(stream, input_descriptor, input_data, + output_descriptor, backward_output_data, + convolution_descriptor, filter_descriptor, + backward_filter_data, scratch_allocator, + algorithm_config, output_profile_result); } bool CudnnSupport::DoConvolveBackwardFilter( Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory& input_data, - const dnn::BatchDescriptor& output_descriptor_in, + const dnn::BatchDescriptor& output_descriptor, DeviceMemory backward_output_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::FilterDescriptor& filter_descriptor, @@ -3621,11 +3683,11 @@ bool CudnnSupport::DoConvolveBackwardFilter( ScratchAllocator* scratch_allocator, const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) { - return DoConvolveBackwardFilterImpl( - stream, input_descriptor, input_data, output_descriptor_in, - backward_output_data, convolution_descriptor, filter_descriptor, - backward_filter_data, scratch_allocator, algorithm_config, - output_profile_result); + return DoConvolveBackwardFilterImpl(stream, input_descriptor, input_data, + output_descriptor, backward_output_data, + convolution_descriptor, filter_descriptor, + backward_filter_data, scratch_allocator, + algorithm_config, output_profile_result); } template diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.h b/tensorflow/stream_executor/cuda/cuda_dnn.h index 40aa974dd9..48d56f71e3 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.h +++ b/tensorflow/stream_executor/cuda/cuda_dnn.h @@ -259,7 +259,10 @@ class CudnnSupport : public dnn::DnnSupport { const DeviceMemory& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data) override; + DeviceMemory* output_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) override; bool DoConvolve(Stream* stream, const dnn::BatchDescriptor& batch_descriptor, const DeviceMemory& input_data, @@ -371,6 +374,18 @@ class CudnnSupport : public dnn::DnnSupport { return false; } + bool DoConvolveBackwardData( + Stream* stream, const dnn::FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::BatchDescriptor& input_descriptor, + DeviceMemory* backward_input_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) override; + bool DoConvolveBackwardData( Stream* stream, const dnn::FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, @@ -395,6 +410,18 @@ class CudnnSupport : public dnn::DnnSupport { const dnn::AlgorithmConfig& algorithm_config, dnn::ProfileResult* output_profile_result) override; + bool DoConvolveBackwardFilter( + Stream* stream, const dnn::BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const dnn::BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor& convolution_descriptor, + const dnn::FilterDescriptor& filter_descriptor, + DeviceMemory* backward_filter_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) override; + bool DoConvolveBackwardFilter( Stream* stream, const dnn::BatchDescriptor& input_descriptor, const DeviceMemory& input_data, diff --git a/tensorflow/stream_executor/dnn.h b/tensorflow/stream_executor/dnn.h index aa88fe770f..b41536e638 100644 --- a/tensorflow/stream_executor/dnn.h +++ b/tensorflow/stream_executor/dnn.h @@ -1172,7 +1172,9 @@ class DnnSupport { const DeviceMemory& filter_data, const dnn::ConvolutionDescriptor& convolution_descriptor, const dnn::BatchDescriptor& output_descriptor, - DeviceMemory* output_data) = 0; + DeviceMemory* output_data, ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + dnn::ProfileResult* output_profile_result) = 0; // Enqueues a half-precision convolution operation onto the stream. // See DoConvolve above for argument details. @@ -1273,6 +1275,18 @@ class DnnSupport { bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms); + virtual bool DoConvolveBackwardData( + Stream* stream, const FilterDescriptor& filter_descriptor, + const DeviceMemory& filter_data, + const BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const ConvolutionDescriptor& convolution_descriptor, + const BatchDescriptor& input_descriptor, + DeviceMemory* backward_input_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + ProfileResult* output_profile_result) = 0; + virtual bool DoConvolveBackwardData( Stream* stream, const FilterDescriptor& filter_descriptor, const DeviceMemory& filter_data, @@ -1322,6 +1336,18 @@ class DnnSupport { bool with_winograd_nonfused, int cc_major, int cc_minor, std::vector* out_algorithms); + virtual bool DoConvolveBackwardFilter( + Stream* stream, const BatchDescriptor& input_descriptor, + const DeviceMemory& input_data, + const BatchDescriptor& output_descriptor, + DeviceMemory backward_output_data, + const ConvolutionDescriptor& convolution_descriptor, + const FilterDescriptor& filter_descriptor, + DeviceMemory* backward_filter_data, + ScratchAllocator* scratch_allocator, + const dnn::AlgorithmConfig& algorithm_config, + ProfileResult* output_profile_result) = 0; + virtual bool DoConvolveBackwardFilter( Stream* stream, const BatchDescriptor& input_descriptor, const DeviceMemory& input_data, diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index ba5001e273..4d852e6e5a 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -681,6 +681,37 @@ Stream &Stream::ThenFusedConvolveWithAlgorithm( return *this; } +Stream &Stream::ThenConvolveWithAlgorithm( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, DeviceMemory *output, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(filter_descriptor), PARAM(filter_data), + PARAM(convolution_descriptor), PARAM(output_descriptor), + PARAM(output), PARAM(algorithm_config)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoConvolve( + this, input_descriptor, input_data, filter_descriptor, filter_data, + convolution_descriptor, output_descriptor, output, scratch_allocator, + algorithm_config, output_profile_result); + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + Stream &Stream::ThenConvolveWithAlgorithm( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory &input_data, @@ -890,6 +921,39 @@ Stream &Stream::ThenConvolveBackwardDataWithScratch( return *this; } +Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &input_descriptor, + DeviceMemory *backward_input_data, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result) { + VLOG_CALL(PARAM(filter_descriptor), PARAM(filter_data), + PARAM(output_descriptor), PARAM(backward_output_data), + PARAM(convolution_descriptor), PARAM(input_descriptor), + PARAM(backward_input_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoConvolveBackwardData( + this, filter_descriptor, filter_data, output_descriptor, + backward_output_data, convolution_descriptor, input_descriptor, + backward_input_data, scratch_allocator, algorithm_config, + output_profile_result); + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + Stream &Stream::ThenConvolveBackwardDataWithAlgorithm( const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory &filter_data, @@ -1026,6 +1090,39 @@ Stream &Stream::ThenConvolveBackwardFilterWithScratch( return *this; } +Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::FilterDescriptor &filter_descriptor, + DeviceMemory *backward_filter_data, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result) { + VLOG_CALL(PARAM(input_descriptor), PARAM(input_data), + PARAM(output_descriptor), PARAM(backward_output_data), + PARAM(convolution_descriptor), PARAM(filter_descriptor), + PARAM(backward_filter_data)); + + if (ok()) { + if (dnn::DnnSupport *dnn = parent_->AsDnn()) { + auto status = dnn->DoConvolveBackwardFilter( + this, input_descriptor, input_data, output_descriptor, + backward_output_data, convolution_descriptor, filter_descriptor, + backward_filter_data, scratch_allocator, algorithm_config, + output_profile_result); + if (!status && !output_profile_result) { + SetError(); + } + } else { + SetErrorAndLogNoDnnSupport(); + } + } + return *this; +} + Stream &Stream::ThenConvolveBackwardFilterWithAlgorithm( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory &input_data, diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index a2fb2ea237..8cd0a0d3ba 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -358,6 +358,17 @@ class Stream { const dnn::BatchDescriptor &output_descriptor, DeviceMemory *output, ScratchAllocator *scratch_allocator); + Stream &ThenConvolveWithAlgorithm( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory *output, ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result); + Stream &ThenConvolveWithAlgorithm( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory &input_data, @@ -476,6 +487,18 @@ class Stream { DeviceMemory *backward_input_data, ScratchAllocator *scratch_allocator); + Stream &ThenConvolveBackwardDataWithAlgorithm( + const dnn::FilterDescriptor &filter_descriptor, + const DeviceMemory &filter_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::BatchDescriptor &input_descriptor, + DeviceMemory *backward_input_data, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result); + Stream &ThenConvolveBackwardDataWithAlgorithm( const dnn::FilterDescriptor &filter_descriptor, const DeviceMemory &filter_data, @@ -529,6 +552,18 @@ class Stream { DeviceMemory *backward_filter_data, ScratchAllocator *scratch_allocator); + Stream &ThenConvolveBackwardFilterWithAlgorithm( + const dnn::BatchDescriptor &input_descriptor, + const DeviceMemory &input_data, + const dnn::BatchDescriptor &output_descriptor, + DeviceMemory backward_output_data, + const dnn::ConvolutionDescriptor &convolution_descriptor, + const dnn::FilterDescriptor &filter_descriptor, + DeviceMemory *backward_filter_data, + ScratchAllocator *scratch_allocator, + const dnn::AlgorithmConfig &algorithm_config, + dnn::ProfileResult *output_profile_result); + Stream &ThenConvolveBackwardFilterWithAlgorithm( const dnn::BatchDescriptor &input_descriptor, const DeviceMemory &input_data, -- GitLab From faac588327a130fd79b7efdb751c63e98fa3f1e4 Mon Sep 17 00:00:00 2001 From: mdfaijul Date: Tue, 6 Mar 2018 09:27:48 -0800 Subject: [PATCH 453/884] Optmized Relu by in-place computations -- uses OpKernelContext::forward_input_or_allocate_output() --- tensorflow/core/kernels/mkl_relu_op.cc | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/tensorflow/core/kernels/mkl_relu_op.cc b/tensorflow/core/kernels/mkl_relu_op.cc index 267f4f8d12..0a0f69522f 100644 --- a/tensorflow/core/kernels/mkl_relu_op.cc +++ b/tensorflow/core/kernels/mkl_relu_op.cc @@ -392,7 +392,7 @@ class MklReluOpBase : public OpKernel { Tensor* dst_tensor = nullptr; if (src_tensor.dims() == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -437,11 +437,15 @@ class MklReluOpBase : public OpKernel { dnn_shape_dst.SetMklTensor(false); tf_shape_dst = src_tensor.shape(); } - AllocateOutputSetMklShape(context, dst_index, &dst_tensor, tf_shape_dst, - dnn_shape_dst); + + // Allocate output and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {src_index}, dst_index, tf_shape_dst, &dst_tensor)); + AllocateOutputSetMklShape(context, dst_index, dnn_shape_dst); // Destination memory descriptor is same as source memory descriptor. - auto dst_md = src_md; + auto &dst_md = src_md; dst.SetUsrMem(dst_md, dst_tensor); // execute net @@ -492,7 +496,7 @@ class MklReluGradOpBase : public OpKernel { int src_dims_size = src_tensor.dims(); if (src_dims_size == 0) { - Compute_Scalar(context); + Compute_Scalar(context); // scalar case doesn't use in-place operation return; } @@ -603,8 +607,13 @@ class MklReluGradOpBase : public OpKernel { // so it is ok to get TensorFlow shape. tf_shape_diff_src = src_tensor.shape(); } - AllocateOutputSetMklShape(context, diff_src_index, &diff_src_tensor, - tf_shape_diff_src, dnn_shape_diff_src); + + // Allocate diff_src and MklDnnShape tensors separately for possible + // in-place operation + OP_REQUIRES_OK(context, context->forward_input_or_allocate_output( + {diff_dst_index}, diff_src_index, tf_shape_diff_src, + &diff_src_tensor)); + AllocateOutputSetMklShape(context, diff_src_index, dnn_shape_diff_src); // diff_src memory descriptor is same as memory descriptor for both // inputs. -- GitLab From 1f441c191f9a6d8f27b32b1c19c55f76aaf9e387 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Tue, 6 Mar 2018 18:48:01 +0100 Subject: [PATCH 454/884] Windows: Use cc_import to import python lib properly (#17474) Previously, we put python.lib in data attribute of a cc_library and manually added the link option. That caused the build to be non-hermetic. This change fixed the problem. --- third_party/py/BUILD.tpl | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/third_party/py/BUILD.tpl b/third_party/py/BUILD.tpl index de06ad5f27..1dd8ab433a 100644 --- a/third_party/py/BUILD.tpl +++ b/third_party/py/BUILD.tpl @@ -2,20 +2,26 @@ licenses(["restricted"]) package(default_visibility = ["//visibility:public"]) +# To build Python C/C++ extension on Windows, we need to link to python import library pythonXY.lib +# See https://docs.python.org/3/extending/windows.html +cc_import( + name = "python_lib", + interface_library = select({ + ":windows": ":python_import_lib", + # A placeholder for Unix platforms which makes --no_build happy. + "//conditions:default": "not-existing.lib", + }), + system_provided = 1, +) + cc_library( name = "python_headers", hdrs = [":python_include"], - data = select({ - ":windows": [":python_import_lib"], + deps = select({ + ":windows": [":python_lib"], "//conditions:default": [], }), includes = ["python_include"], - linkopts = select({ - # TODO(pcloudy): Ideally, this should just go into deps after resolving - # https://github.com/bazelbuild/bazel/issues/3237, - ":windows": ["$(locations :python_import_lib)"], - "//conditions:default": [], - }), ) cc_library( -- GitLab From 5aee07fd0462d00c52efb5d3c86bfb955a9d976e Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Tue, 6 Mar 2018 09:49:28 -0800 Subject: [PATCH 455/884] Updating the cuda compute info and avx info for Windows. (#17450) --- tensorflow/docs_src/install/install_linux.md | 3 ++- tensorflow/docs_src/install/install_windows.md | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index 5382c9db31..be74a0d951 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -41,7 +41,8 @@ must be installed on your system: [NVIDIA's documentation](https://developer.nvidia.com/cudnn). Ensure that you create the `CUDA_HOME` environment variable as described in the NVIDIA documentation. - * GPU card with CUDA Compute Capability 3.0 or higher. See + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. * The libcupti-dev library, which is the NVIDIA CUDA Profile Tools Interface. diff --git a/tensorflow/docs_src/install/install_windows.md b/tensorflow/docs_src/install/install_windows.md index 87e1a715aa..a837c7dac4 100644 --- a/tensorflow/docs_src/install/install_windows.md +++ b/tensorflow/docs_src/install/install_windows.md @@ -17,7 +17,7 @@ You must choose one of the following types of TensorFlow to install: NVIDIA® GPU, you must install this version. Note that this version of TensorFlow is typically much easier to install (typically, in 5 or 10 minutes), so even if you have an NVIDIA GPU, we recommend - installing this version first. + installing this version first. Prebuilt binaries will use AVX instructions. * **TensorFlow with GPU support**. TensorFlow programs typically run significantly faster on a GPU than on a CPU. Therefore, if your system has a NVIDIA® GPU meeting the prerequisites shown below @@ -41,7 +41,8 @@ installed on your system: Note that cuDNN is typically installed in a different location from the other CUDA DLLs. Ensure that you add the directory where you installed the cuDNN DLL to your `%PATH%` environment variable. - * GPU card with CUDA Compute Capability 3.0 or higher. See + * GPU card with CUDA Compute Capability 3.0 or higher for building + from source and 3.5 or higher for our binaries. See [NVIDIA documentation](https://developer.nvidia.com/cuda-gpus) for a list of supported GPU cards. -- GitLab From edbd683f42f999b8665a51c9312cdf9d05b335bb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 09:54:36 -0800 Subject: [PATCH 456/884] Implementation of tf.cast in TfLite PiperOrigin-RevId: 188036286 --- tensorflow/contrib/lite/builtin_ops.h | 1 + tensorflow/contrib/lite/kernels/BUILD | 13 ++ tensorflow/contrib/lite/kernels/cast.cc | 99 ++++++++++++++ tensorflow/contrib/lite/kernels/cast_test.cc | 66 ++++++++++ tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 5 + .../contrib/lite/schema/schema_generated.h | 124 +++++++++++++++++- tensorflow/contrib/lite/toco/tflite/types.cc | 2 + 10 files changed, 308 insertions(+), 6 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/cast.cc create mode 100644 tensorflow/contrib/lite/kernels/cast_test.cc diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 88cdf1d463..7e08500980 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -77,6 +77,7 @@ typedef enum { kTfLiteBuiltinLogSoftmax = 50, kTfLiteBuiltinDelegate = 51, kTfLiteBuiltinBidirectionalSequenceLstm = 52, + kTfLiteBuiltinCast = 53, } TfLiteBuiltinOperator; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 7dc725d578..6bbc0bf9a7 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -106,6 +106,7 @@ cc_library( "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", "bidirectional_sequence_rnn.cc", + "cast.cc", "concatenation.cc", "conv.cc", "depthwise_conv.cc", @@ -234,6 +235,18 @@ tf_cc_test( ], ) +tf_cc_test( + name = "cast_test", + size = "small", + srcs = ["cast_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "concatenation_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/cast.cc b/tensorflow/contrib/lite/kernels/cast.cc new file mode 100644 index 0000000000..19942de7bc --- /dev/null +++ b/tensorflow/contrib/lite/kernels/cast.cc @@ -0,0 +1,99 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" +#include "tensorflow/contrib/lite/string_util.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace cast { +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + return context->ResizeTensor(context, output, + TfLiteIntArrayCopy(input->dims)); +} + +template +void copyCast(const FromT* in, ToT* out, int num_elements) { + std::transform(in, in + num_elements, out, + [](FromT a) { return static_cast(a); }); +} + +template +TfLiteStatus copyToTensor(const FromT* in, TfLiteTensor* out, + int num_elements) { + switch (out->type) { + case kTfLiteInt64: + copyCast(in, out->data.i64, num_elements); + break; + case kTfLiteInt32: + copyCast(in, out->data.i32, num_elements); + break; + case kTfLiteUInt8: + copyCast(in, out->data.uint8, num_elements); + break; + case kTfLiteFloat32: + copyCast(in, out->data.f, num_elements); + break; + default: + // Unsupported type. + return kTfLiteError; + } + return kTfLiteOk; +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + const int num_elements = NumElements(input); + TF_LITE_ENSURE_EQ(context, num_elements, NumElements(output)); + switch (input->type) { + case kTfLiteInt64: + return copyToTensor(input->data.i64, output, num_elements); + case kTfLiteInt32: + return copyToTensor(input->data.i32, output, num_elements); + case kTfLiteUInt8: + return copyToTensor(input->data.uint8, output, num_elements); + case kTfLiteFloat32: + return copyToTensor(input->data.f, output, num_elements); + default: + // Unsupported type. + return kTfLiteError; + } + return kTfLiteOk; +} +} // namespace cast + +TfLiteRegistration* Register_CAST() { + static TfLiteRegistration r = {nullptr, nullptr, cast::Prepare, cast::Eval}; + return &r; +} + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/cast_test.cc b/tensorflow/contrib/lite/kernels/cast_test.cc new file mode 100644 index 0000000000..4e56482a37 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/cast_test.cc @@ -0,0 +1,66 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class CastOpModel : public SingleOpModel { + public: + CastOpModel(const TensorData& input, const TensorData& output) { + input_ = AddInput(input); + output_ = AddOutput(output); + SetBuiltinOp(BuiltinOperator_CAST, BuiltinOptions_CastOptions, + CreateCastOptions(builder_).Union()); + BuildInterpreter({GetShape(input_)}); + } + + int input() const { return input_; } + int output() const { return output_; } + + protected: + int input_; + int output_; +}; + +TEST(CastOpModel, CastIntToFloat) { + CastOpModel m({TensorType_INT64, {2, 3}}, {TensorType_FLOAT32, {2, 3}}); + m.PopulateTensor(m.input(), {100, 200, 300, 400, 500, 600}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100.f, 200.f, 300.f, 400.f, 500.f, 600.f})); +} + +TEST(CastOpModel, CastFloatToInt) { + CastOpModel m({TensorType_FLOAT32, {3, 2}}, {TensorType_INT32, {3, 2}}); + m.PopulateTensor(m.input(), {100.f, 20.f, 3.f, 0.4f, 0.999f, 1.1f}); + m.Invoke(); + EXPECT_THAT(m.ExtractVector(m.output()), + ElementsAreArray({100, 20, 3, 0, 0, 1})); +} + +} // namespace +} // namespace tflite +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index aea6f8d9d3..06b7ce4a97 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -65,6 +65,7 @@ TfLiteRegistration* Register_STRIDED_SLICE(); TfLiteRegistration* Register_EXP(); TfLiteRegistration* Register_TOPK_V2(); TfLiteRegistration* Register_LOG_SOFTMAX(); +TfLiteRegistration* Register_CAST(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -119,6 +120,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_EXP, Register_EXP()); AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2()); AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); + AddBuiltin(BuiltinOperator_CAST, Register_CAST()); } TfLiteRegistration* BuiltinOpResolver::FindOp( diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 725f2838c5..141d04afd7 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -287,6 +287,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_EXP: case BuiltinOperator_TOPK_V2: case BuiltinOperator_LOG_SOFTMAX: + case BuiltinOperator_CAST: break; case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index e631ffd845..80036d8033 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -347,6 +347,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_EXP: case tflite::BuiltinOperator_LOG_SOFTMAX: case tflite::BuiltinOperator_DELEGATE: + case tflite::BuiltinOperator_CAST: FATAL("Op code %d is currently not delegated to NNAPI", builtin); nn_op_type = -1; // set to invalid break; diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 98ac0469d1..5f617a7e12 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -129,6 +129,7 @@ enum BuiltinOperator : byte { // WARNING: Experimental interface, subject to change DELEGATE = 51, BIDIRECTIONAL_SEQUENCE_LSTM = 52, + CAST = 53, } // Options for the builtin operators. @@ -169,6 +170,7 @@ union BuiltinOptions { TopKV2Options, SplitOptions, LogSoftmaxOptions, + CastOptions, } enum Padding : byte { SAME, VALID } @@ -374,6 +376,9 @@ table StridedSliceOptions { table LogSoftmaxOptions { } +table CastOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index 99e1accaa7..fcacc9816a 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -139,6 +139,9 @@ struct StridedSliceOptionsT; struct LogSoftmaxOptions; struct LogSoftmaxOptionsT; +struct CastOptions; +struct CastOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -246,11 +249,12 @@ enum BuiltinOperator { BuiltinOperator_LOG_SOFTMAX = 50, BuiltinOperator_DELEGATE = 51, BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM = 52, + BuiltinOperator_CAST = 53, BuiltinOperator_MIN = BuiltinOperator_ADD, - BuiltinOperator_MAX = BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM + BuiltinOperator_MAX = BuiltinOperator_CAST }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[50] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[51] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, @@ -301,7 +305,8 @@ inline BuiltinOperator (&EnumValuesBuiltinOperator())[50] { BuiltinOperator_SPLIT, BuiltinOperator_LOG_SOFTMAX, BuiltinOperator_DELEGATE, - BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM + BuiltinOperator_BIDIRECTIONAL_SEQUENCE_LSTM, + BuiltinOperator_CAST }; return values; } @@ -361,6 +366,7 @@ inline const char **EnumNamesBuiltinOperator() { "LOG_SOFTMAX", "DELEGATE", "BIDIRECTIONAL_SEQUENCE_LSTM", + "CAST", nullptr }; return names; @@ -409,11 +415,12 @@ enum BuiltinOptions { BuiltinOptions_TopKV2Options = 34, BuiltinOptions_SplitOptions = 35, BuiltinOptions_LogSoftmaxOptions = 36, + BuiltinOptions_CastOptions = 37, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_LogSoftmaxOptions + BuiltinOptions_MAX = BuiltinOptions_CastOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[37] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[38] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -451,7 +458,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[37] { BuiltinOptions_ExpOptions, BuiltinOptions_TopKV2Options, BuiltinOptions_SplitOptions, - BuiltinOptions_LogSoftmaxOptions + BuiltinOptions_LogSoftmaxOptions, + BuiltinOptions_CastOptions }; return values; } @@ -495,6 +503,7 @@ inline const char **EnumNamesBuiltinOptions() { "TopKV2Options", "SplitOptions", "LogSoftmaxOptions", + "CastOptions", nullptr }; return names; @@ -653,6 +662,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_LogSoftmaxOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -972,6 +985,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_LogSoftmaxOptions ? reinterpret_cast(value) : nullptr; } + CastOptionsT *AsCastOptions() { + return type == BuiltinOptions_CastOptions ? + reinterpret_cast(value) : nullptr; + } + const CastOptionsT *AsCastOptions() const { + return type == BuiltinOptions_CastOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3635,6 +3656,46 @@ inline flatbuffers::Offset CreateLogSoftmaxOptions( flatbuffers::Offset CreateLogSoftmaxOptions(flatbuffers::FlatBufferBuilder &_fbb, const LogSoftmaxOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct CastOptionsT : public flatbuffers::NativeTable { + typedef CastOptions TableType; + CastOptionsT() { + } +}; + +struct CastOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef CastOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + CastOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct CastOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit CastOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + CastOptionsBuilder &operator=(const CastOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateCastOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + CastOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -3860,6 +3921,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const LogSoftmaxOptions *builtin_options_as_LogSoftmaxOptions() const { return builtin_options_type() == BuiltinOptions_LogSoftmaxOptions ? static_cast(builtin_options()) : nullptr; } + const CastOptions *builtin_options_as_CastOptions() const { + return builtin_options_type() == BuiltinOptions_CastOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -4030,6 +4094,10 @@ template<> inline const LogSoftmaxOptions *Operator::builtin_options_as inline const CastOptions *Operator::builtin_options_as() const { + return builtin_options_as_CastOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5512,6 +5580,29 @@ inline flatbuffers::Offset CreateLogSoftmaxOptions(flatbuffer _fbb); } +inline CastOptionsT *CastOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new CastOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void CastOptions::UnPackTo(CastOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset CastOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateCastOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const CastOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateCastOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -5836,6 +5927,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -5998,6 +6093,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -6148,6 +6247,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateLogSoftmaxOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(value); + return CreateCastOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -6298,6 +6401,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new LogSoftmaxOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_CastOptions: { + value = new CastOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -6485,6 +6592,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_CastOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; diff --git a/tensorflow/contrib/lite/toco/tflite/types.cc b/tensorflow/contrib/lite/toco/tflite/types.cc index b4c2851502..0afd2f3df5 100644 --- a/tensorflow/contrib/lite/toco/tflite/types.cc +++ b/tensorflow/contrib/lite/toco/tflite/types.cc @@ -90,6 +90,8 @@ flatbuffers::Offset> DataBuffer::Serialize( return CopyBuffer(array, builder); case ArrayDataType::kInt32: return CopyBuffer(array, builder); + case ArrayDataType::kInt64: + return CopyBuffer(array, builder); case ArrayDataType::kString: return CopyBuffer(array, builder); case ArrayDataType::kUint8: -- GitLab From 2cbfdbcaf6a062a5121f8b436125f2b161c1bf36 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Tue, 6 Mar 2018 10:00:43 -0800 Subject: [PATCH 457/884] Include spectral_ops_test_util in python deps. PiperOrigin-RevId: 188037439 --- tensorflow/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 4fdfacbfa8..8e07c3e7a1 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -91,6 +91,7 @@ py_library( ":sets", ":sparse_ops", ":spectral_ops", + ":spectral_ops_test_util", ":standard_ops", ":state_ops", ":string_ops", -- GitLab From a725a4c06fa60d6517792e1bd294c29fe34ab882 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 6 Mar 2018 10:20:33 -0800 Subject: [PATCH 458/884] Internal change. PiperOrigin-RevId: 188040866 --- tensorflow/python/keras/BUILD | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/keras/BUILD b/tensorflow/python/keras/BUILD index bd1aac5eae..8ace3e0968 100755 --- a/tensorflow/python/keras/BUILD +++ b/tensorflow/python/keras/BUILD @@ -9,6 +9,11 @@ package(default_visibility = ["//visibility:public"]) load("//tensorflow:tensorflow.bzl", "py_test") +config_setting( + name = "empty_condition", + values = {"define": "UNUSED=unused"}, +) + py_library( name = "keras", srcs = [ @@ -126,7 +131,11 @@ py_library( ], srcs_version = "PY2AND3", visibility = ["//visibility:public"], - deps = [ + deps = select({ + ":empty_condition": [], + "//conditions:default": [], + }) + [ + "@six_archive//:six", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", @@ -165,7 +174,6 @@ py_library( "//tensorflow/python/estimator", "//tensorflow/python/estimator:model_fn", "//tensorflow/python/saved_model", - "@six_archive//:six", ], ) -- GitLab From 432650b580611e8a0da7bd8bbd69235bcaa1bd4c Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Tue, 6 Mar 2018 10:24:45 -0800 Subject: [PATCH 459/884] Add HloModuleGroupMetadata and HloModuleGroupUtil PiperOrigin-RevId: 188041608 --- tensorflow/compiler/xla/service/BUILD | 32 ++ .../xla/service/hlo_module_group_metadata.cc | 349 ++++++++++++++++++ .../xla/service/hlo_module_group_metadata.h | 230 ++++++++++++ .../xla/service/hlo_module_group_util.cc | 316 ++++++++++++++++ .../xla/service/hlo_module_group_util.h | 117 ++++++ 5 files changed, 1044 insertions(+) create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_metadata.cc create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_metadata.h create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_util.cc create mode 100644 tensorflow/compiler/xla/service/hlo_module_group_util.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 3eecc4657f..611b1831ae 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1065,6 +1065,38 @@ tf_cc_test( ], ) +cc_library( + name = "hlo_module_group_metadata", + srcs = ["hlo_module_group_metadata.cc"], + hdrs = ["hlo_module_group_metadata.h"], + deps = [ + ":hlo", + "//tensorflow/compiler/xla:shape_util", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + +cc_library( + name = "hlo_module_group_util", + srcs = ["hlo_module_group_util.cc"], + hdrs = ["hlo_module_group_util.h"], + deps = [ + ":hlo", + ":hlo_module_group_metadata", + ":hlo_reachability", + "//tensorflow/compiler/xla:status", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla:types", + "//tensorflow/compiler/xla:util", + "//tensorflow/core:lib", + ], +) + cc_library( name = "hlo_scheduling", srcs = ["hlo_scheduling.cc"], diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc new file mode 100644 index 0000000000..eed0112f62 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.cc @@ -0,0 +1,349 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_module_group_metadata.h" + +#include +#include + +#include "tensorflow/compiler/xla/ptr_util.h" +#include "tensorflow/compiler/xla/shape_util.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +string HloModuleGroupMetadata::TrackedInstruction::ToString() const { + string repr = + (instruction_ != nullptr) ? instruction_->ToShortString() : "NULL"; + switch (kind_) { + case ComputationKind::kInvalid: + repr += ":INVALID"; + break; + case ComputationKind::kWhileCondition: + repr += ":WHILE_CONDITION"; + break; + case ComputationKind::kWhileBody: + repr += ":WHILE_BODY"; + break; + case ComputationKind::kConditionalTrue: + repr += ":CONDITIONAL_TRUE"; + break; + case ComputationKind::kConditionalFalse: + repr += ":CONDITIONAL_FALSE"; + break; + } + return repr; +} + +/* static */ StatusOr> +HloModuleGroupMetadata::Build(const std::vector& modules) { + auto metadata = absl::make_unique(modules); + TF_RETURN_IF_ERROR(metadata->Build()); + return std::move(metadata); +} + +Status HloModuleGroupMetadata::Build() { + TF_RETURN_IF_ERROR(RecordInstructions()); + TF_RETURN_IF_ERROR(VerifyChannelInstructions()); + + // Record all companion while instructions. + const auto visitor = [this](HloInstruction* hlo) -> Status { + // We only need to process if the instruction is within the computation + // of a companion instruction, like in the condition or body computation + // of a While. + const TrackedInstruction* tracked = GetTrackedInstruction(hlo->parent()); + if (tracked == nullptr) { + return Status::OK(); + } + // Add the parent computation of this channel instruction and its peer + // computation (both must be while computations) as companions. + if (IsChannelInstruction(hlo)) { + HloComputation* peer_computation = PeerComputation(hlo); + const TrackedInstruction* peer_tracked = + GetTrackedInstruction(peer_computation); + TF_RET_CHECK(peer_tracked != nullptr) + << "Peer instruction is not a possible companion"; + TF_RET_CHECK(*tracked == *peer_tracked) + << "Peer instruction does not match the computation kind"; + TF_RETURN_IF_ERROR( + AddCompanion(tracked->instruction(), peer_tracked->instruction())); + } + + // Add the parents of companion instructions (they must be all of the same + // kind of instructions, opcode wise) as companions. + if (IsCompanionInstruction(hlo)) { + for (HloInstruction* companion : Companions(hlo)) { + const TrackedInstruction* companion_tracked = + GetTrackedInstruction(companion->parent()); + TF_RET_CHECK(companion_tracked != nullptr); + TF_RET_CHECK(*tracked == *companion_tracked); + TF_RETURN_IF_ERROR(AddCompanion(tracked->instruction(), + companion_tracked->instruction())); + } + } + return Status::OK(); + }; + + // Visit the computations in postorder so that the companion information grows + // from inner computations to outer ones. + for (HloModule* module : modules_) { + for (HloComputation* computation : module->MakeComputationPostOrder()) { + TF_RETURN_IF_ERROR(computation->Accept(visitor)); + } + } + return Status::OK(); +} + +bool HloModuleGroupMetadata::IsChannelInstruction( + const HloInstruction* instruction) const { + switch (instruction->opcode()) { + case HloOpcode::kSend: + case HloOpcode::kRecv: + case HloOpcode::kSendDone: + case HloOpcode::kRecvDone: + return true; + default: + return false; + } +} + +bool HloModuleGroupMetadata::IsCompanionInstruction(HloInstruction* hlo) const { + return companion_set_index_.count(hlo) > 0; +} + +bool HloModuleGroupMetadata::InstructionCommunicates( + HloInstruction* hlo) const { + return IsChannelInstruction(hlo) || IsCompanionInstruction(hlo); +} + +const HloModuleGroupMetadata::Channel& HloModuleGroupMetadata::GetChannel( + int64 channel_id) const { + CHECK(channel_id_map_.find(channel_id) != channel_id_map_.end()); + return channels_[channel_id_map_.at(channel_id)]; +} + +HloComputation* HloModuleGroupMetadata::PeerComputation( + const HloInstruction* instruction) const { + CHECK(IsChannelInstruction(instruction)); + const Channel& channel = GetChannel(instruction->channel_id()); + switch (instruction->opcode()) { + case HloOpcode::kSend: + case HloOpcode::kSendDone: + return channel.recv->parent(); + case HloOpcode::kRecv: + case HloOpcode::kRecvDone: + return channel.send->parent(); + default: + LOG(FATAL) << "opcode not supported"; + } +} + +std::vector +HloModuleGroupMetadata::GetCompanionsPath(const HloInstruction* hlo) const { + std::vector path; + const HloComputation* parent = hlo->parent(); + const TrackedInstruction* companion; + while ((companion = GetTrackedInstruction(parent)) != nullptr) { + parent = companion->instruction()->parent(); + path.push_back(*companion); + } + return path; +} + +bool HloModuleGroupMetadata::CheckCompanionPathsCompatibility( + const std::vector& path0, + const std::vector& path1) const { + if (path0.size() != path1.size()) { + VLOG(5) << "Companion path size do not match: " << path0.size() + << " != " << path1.size(); + return false; + } + for (int64 i = 0; i < path0.size(); ++i) { + if (path0[i] != path1[i]) { + VLOG(5) << "Companion instructions at path index " << i + << " do not have the same opcode: " << path0[i].ToString() + << " vs " << path1[i].ToString(); + return false; + } + } + return true; +} + +int64 HloModuleGroupMetadata::GetModuleId(const HloModule* module) const { + for (int64 i = 0; i < modules_.size(); ++i) { + if (modules_[i] == module) { + return i; + } + } + LOG(FATAL) << "unknown module"; +} + +Status HloModuleGroupMetadata::RecordInstructions() { + const auto visitor = [this](HloInstruction* hlo) -> Status { + if (hlo->opcode() == HloOpcode::kWhile) { + tracked_instructions_[hlo->while_condition()] = + TrackedInstruction(hlo, ComputationKind::kWhileCondition); + tracked_instructions_[hlo->while_body()] = + TrackedInstruction(hlo, ComputationKind::kWhileBody); + } else if (hlo->opcode() == HloOpcode::kConditional) { + tracked_instructions_[hlo->true_computation()] = + TrackedInstruction(hlo, ComputationKind::kConditionalTrue); + tracked_instructions_[hlo->false_computation()] = + TrackedInstruction(hlo, ComputationKind::kConditionalFalse); + } + if (!IsChannelInstruction(hlo)) { + return Status::OK(); + } + + // Add a new channel if needed. + if (channel_id_map_.find(hlo->channel_id()) == channel_id_map_.end()) { + channels_.emplace_back(); + channels_.back().id = hlo->channel_id(); + channel_id_map_[hlo->channel_id()] = channels_.size() - 1; + } + Channel& channel = channels_[channel_id_map_[hlo->channel_id()]]; + + if (hlo->opcode() == HloOpcode::kSend) { + TF_RET_CHECK(channel.send == nullptr) + << "channel id " << hlo->channel_id() + << " is used by multiple send instructions"; + channel.send = hlo; + } + if (hlo->opcode() == HloOpcode::kRecv) { + TF_RET_CHECK(channel.recv == nullptr) + << "channel id " << hlo->channel_id() + << " is used by multiple recv instructions"; + channel.recv = hlo; + } + if (hlo->opcode() == HloOpcode::kSendDone) { + TF_RET_CHECK(channel.send_done == nullptr) + << "channel id " << hlo->channel_id() + << " is used by multiple send-done instructions"; + channel.send_done = hlo; + } + if (hlo->opcode() == HloOpcode::kRecvDone) { + TF_RET_CHECK(channel.recv_done == nullptr) + << "channel id " << hlo->channel_id() + << " is used by multiple recv-done instructions"; + channel.recv_done = hlo; + } + return Status::OK(); + }; + + for (HloModule* module : modules_) { + for (auto* computation : module->computations()) { + TF_RETURN_IF_ERROR(computation->Accept(visitor)); + } + } + return Status::OK(); +} + +Status HloModuleGroupMetadata::AddCompanion(HloInstruction* instruction1, + HloInstruction* instruction2) { + TF_RET_CHECK(instruction1->opcode() == HloOpcode::kWhile || + instruction1->opcode() == HloOpcode::kConditional); + VLOG(2) << "adding as companions:" << instruction1->ToString() << " and " + << instruction2->ToString(); + + if (!ContainsKey(companion_set_index_, instruction1) && + !ContainsKey(companion_set_index_, instruction2)) { + companion_sets_.push_back( + absl::make_unique>()); + auto companion_set = companion_sets_.back().get(); + companion_set->insert(instruction1); + companion_set->insert(instruction2); + companion_set_index_[instruction1] = companion_sets_.size() - 1; + companion_set_index_[instruction2] = companion_sets_.size() - 1; + } else if (!ContainsKey(companion_set_index_, instruction1)) { + companion_sets_[companion_set_index_[instruction2]]->insert(instruction1); + companion_set_index_[instruction1] = companion_set_index_[instruction2]; + } else if (!ContainsKey(companion_set_index_, instruction2)) { + companion_sets_[companion_set_index_[instruction1]]->insert(instruction2); + companion_set_index_[instruction2] = companion_set_index_[instruction1]; + } else if (companion_set_index_[instruction1] != + companion_set_index_[instruction2]) { + companion_sets_[companion_set_index_[instruction1]]->insert( + Companions(instruction2).begin(), Companions(instruction2).end()); + int64 index_to_remove = companion_set_index_[instruction2]; + for (HloInstruction* hlo : Companions(instruction2)) { + companion_set_index_[hlo] = companion_set_index_[instruction1]; + } + companion_sets_.erase(companion_sets_.begin() + index_to_remove); + } + return Status::OK(); +} + +Status HloModuleGroupMetadata::VerifyChannelInstructions() { + for (const Channel& channel : channels_) { + if (channel.send == nullptr) { + return FailedPrecondition("missing send for id : %lld", channel.id); + } + if (channel.recv == nullptr) { + return FailedPrecondition("missing recv for id : %lld", channel.id); + } + if (channel.send_done == nullptr) { + return FailedPrecondition("missing send-done for id : %lld", channel.id); + } + if (channel.recv_done == nullptr) { + return FailedPrecondition("missing recv-done for id : %lld", channel.id); + } + } + + // Check if the shapes match for each channel. + for (const Channel& channel : channels_) { + const Shape& send_shape = channel.send->operand(0)->shape(); + const Shape& recv_shape = channel.recv_done->shape(); + if (!ShapeUtil::Compatible(send_shape, recv_shape)) { + return FailedPrecondition("send/recv shapes do not match"); + } + } + + // Check if channel instructions are used only in allowed computations. + const auto allowed = [this](HloInstruction* hlo) { + HloComputation* computation = hlo->parent(); + const HloModule* module = computation->parent(); + if (module->entry_computation() == computation || + tracked_instructions_.count(computation) > 0) { + return true; + } + return false; + }; + for (const Channel& channel : channels_) { + if (!allowed(channel.send) || !allowed(channel.send_done) || + !allowed(channel.recv) || !allowed(channel.recv_done)) { + return FailedPrecondition("channel is used in disallowed computation"); + } + } + // Check if the nest levels match for each channel. + for (const Channel& channel : channels_) { + std::vector path = GetCompanionsPath(channel.send); + if (!CheckCompanionPathsCompatibility( + path, GetCompanionsPath(channel.send_done)) || + !CheckCompanionPathsCompatibility(path, + GetCompanionsPath(channel.recv)) || + !CheckCompanionPathsCompatibility( + path, GetCompanionsPath(channel.recv_done))) { + return FailedPrecondition( + "Nest companion paths do not match for channel %lld", channel.id); + } + } + return Status::OK(); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module_group_metadata.h b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h new file mode 100644 index 0000000000..15cdbdaade --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_module_group_metadata.h @@ -0,0 +1,230 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_METADATA_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_METADATA_H_ + +#include +#include +#include +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/flatmap.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// Class for bookkeeping the information on the given modules, in particular on +// the interaction between computations. +// +// Companion instructions are one of the information collected as we build the +// metadata. For example, for each While instruction, companion instructions +// refer to a set of While instructions in other computations that communicate +// with each other. +// In the example below with 3 modules, {While_0, While_2, While_5}, {While_1, +// While_4}, {While_3, While_6} are companion sets. +// +// +// While_0() { While_2() { While_5() { +// While_1() { Send(0) } While_3() { Send(1) } While_6() { Recv(1) } +// } While_4() { Recv(0) } +// } +// +// Companion instructions are used to detect cycles in the graph and also for +// global scheduling. +class HloModuleGroupMetadata { + public: + // The kind of companion computation a given instruction can be within. + enum class ComputationKind { + kInvalid, + kWhileCondition, + kWhileBody, + kConditionalTrue, + kConditionalFalse, + }; + + // Tracks the instruction mapped to a given computation, and the computation + // kind. + // For example, a body computation of a while instruction, will generate a + // TrackedInstruction with instruction being the while instruction, and + // kind being ComputationKind::kWhileBody. + class TrackedInstruction { + public: + TrackedInstruction() = default; + TrackedInstruction(HloInstruction* instruction, ComputationKind kind) + : instruction_(instruction), kind_(kind) {} + + bool operator==(const TrackedInstruction& rhs) const { + return instruction_->opcode() == rhs.instruction_->opcode() && + kind_ == rhs.kind_; + } + bool operator!=(const TrackedInstruction& rhs) const { + return !operator==(rhs); + } + + HloInstruction* instruction() const { return instruction_; } + + string ToString() const; + + private: + HloInstruction* instruction_ = nullptr; + ComputationKind kind_ = ComputationKind::kInvalid; + }; + + // Represents a channel and the 4 instructions that form the channel. + struct Channel { + int64 id = -1; + HloInstruction* send = nullptr; + HloInstruction* recv = nullptr; + HloInstruction* send_done = nullptr; + HloInstruction* recv_done = nullptr; + }; + + explicit HloModuleGroupMetadata(const std::vector& modules) + : modules_(modules) {} + + ~HloModuleGroupMetadata() = default; + + // Build and return the metadata for the given modules. + static StatusOr> Build( + const std::vector& modules); + + // Returns true if the instruction is one of the 4 channel instructions (Send, + // Recv, SendDone, RecvDone). + bool IsChannelInstruction(const HloInstruction* instruction) const; + + // Returns true if the instruction is a companion instruction. See the class + // comment above on companion instructions. + bool IsCompanionInstruction(HloInstruction* hlo) const; + + // Returns true if the instruction is either a channel instruction or a + // companion instruction. + bool InstructionCommunicates(HloInstruction* hlo) const; + + // Returns the Channel instance for the given channel id. + const Channel& GetChannel(int64 channel_id) const; + + // Returns the computation that contains the peer channel instructions for + // the given instruction. + // + // Precondition: IsChannelInstruction(instruction) is true. + HloComputation* PeerComputation(const HloInstruction* instruction) const; + + // Returns the path of the nested companion instructions, in terms of HLO + // instructions. The path goes from inner to outer companions. + // The returned path does not include the input hlo instruction, in case it + // is a companion instruction. + std::vector GetCompanionsPath( + const HloInstruction* hlo) const; + + // Checks whether two companion paths (as returned by the GetCompanionsPath() + // API) are compatible. The two paths are compatible if the sequence of + // opcodes, and the companion kinds, of the two paths matches. + bool CheckCompanionPathsCompatibility( + const std::vector& path0, + const std::vector& path1) const; + + // Returns the unique integer for each module. The returned id is the index of + // the module in the module vector. + int64 GetModuleId(const HloModule* module) const; + + // Returns the companion instructions for the given instruction. + // + // Precondition: IsCompanionWhile(instruction) is true. + const std::unordered_set& Companions( + HloInstruction* instruction) const { + CHECK_EQ(companion_set_index_.count(instruction), 1); + return companion_set(companion_set_index_.at(instruction)); + } + + // Returns the companion set at the given index. + const std::unordered_set& companion_set(int64 index) const { + CHECK_LT(index, companion_sets_.size()); + return *companion_sets_[index]; + } + + // Returns the companion set index of the given instruction. + int64 companion_set_index(HloInstruction* instruction) const { + return companion_set_index_.at(instruction); + } + + // Returns the list of all companion sets in the HLO module group. + const std::vector>>& + companion_sets() const { + return companion_sets_; + } + + private: + Status Build(); + + // Record all channel instructions and While instructions. + Status RecordInstructions(); + + // Verifies the given HloModules are well-formed and follow the specification, + // in particular with respect to using channel instructions. + // + // * Each channel has all 4 instructions (Send, Recv, SendDone, RecvDone). + // * The shape of channel instructions match. + // * The nest level of channel instructions match. + // * Channel instructions are used in allowed computations; i.e., in the + // entry computation of the module or condition/body of While computations. + // + // TODO(b/62064342): Currently, HloModuleGroupScheduler checks if there is a + // cycle in the graph, but it would be good to verify here. + Status VerifyChannelInstructions(); + + // Adds metadata that the given two instructions are companions. + Status AddCompanion(HloInstruction* instruction1, + HloInstruction* instruction2); + + // Retrieves a pointer to the stored TrackedInstruction associated with a + // tracked computation, or nullptr in case such computation is not tracked. + const TrackedInstruction* GetTrackedInstruction( + const HloComputation* computation) const { + auto it = tracked_instructions_.find(computation); + return it != tracked_instructions_.end() ? &it->second : nullptr; + } + + // List of all companion instructions sets in the module. + std::vector>> + companion_sets_; + + // Map from each companion while instruction to the index into companion_set_. + tensorflow::gtl::FlatMap companion_set_index_; + + // Map from computation to the instruction using it (a kWhile, kConditional). + tensorflow::gtl::FlatMap + tracked_instructions_; + + // All channels in the module. + std::vector channels_; + + // Map from channel ids to the index in channels_. + tensorflow::gtl::FlatMap channel_id_map_; + + // The modules that this metadata was built from. + const std::vector& modules_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_METADATA_H_ diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.cc b/tensorflow/compiler/xla/service/hlo_module_group_util.cc new file mode 100644 index 0000000000..289c96b0a7 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_module_group_util.cc @@ -0,0 +1,316 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_module_group_util.h" + +#include +#include +#include +#include +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_opcode.h" +#include "tensorflow/compiler/xla/service/hlo_reachability.h" +#include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/core/errors.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/types.h" + +namespace xla { + +std::vector HloModuleGroupUtil::GlobalPredecessors( + HloInstruction* instruction) { + std::vector predecessors; + + // Adds to the unique predecessors list and also add companion instructions + // if the given predecessor has those. + auto add_unique_predecessor = [&](HloInstruction* predecessor) { + if (std::find(predecessors.begin(), predecessors.end(), predecessor) != + predecessors.end()) { + return; + } + if (!metadata_.IsCompanionInstruction(predecessor)) { + predecessors.push_back(predecessor); + return; + } + for (HloInstruction* companion : metadata_.Companions(predecessor)) { + predecessors.push_back(companion); + } + }; + + // If the given instruction is a companion instruction, we need to find the + // predecessors of all of its companion instructions. + std::vector instruction_group; + if (metadata_.IsCompanionInstruction(instruction)) { + for (HloInstruction* companion : metadata_.Companions(instruction)) { + instruction_group.push_back(companion); + } + } else { + instruction_group.push_back(instruction); + } + + for (HloInstruction* hlo : instruction_group) { + for (HloInstruction* operand : hlo->operands()) { + add_unique_predecessor(operand); + } + for (HloInstruction* control_predecessor : hlo->control_predecessors()) { + add_unique_predecessor(control_predecessor); + } + } + if (instruction->opcode() == HloOpcode::kRecvDone) { + // Send is a remote predecessor of RecvDone. + HloInstruction* send = metadata_.GetChannel(instruction->channel_id()).send; + add_unique_predecessor(send); + } + if (instruction->opcode() == HloOpcode::kSend) { + // Recv is a remote predecessor of Send. + HloInstruction* recv_done = + metadata_.GetChannel(instruction->channel_id()).recv_done; + CHECK(recv_done->opcode() == HloOpcode::kRecvDone); + CHECK_EQ(recv_done->operand_count(), 1); + HloInstruction* recv = recv_done->mutable_operand(0); + add_unique_predecessor(recv); + } + return predecessors; +} + +std::vector HloModuleGroupUtil::GlobalSuccessors( + HloInstruction* instruction) { + std::vector successors; + + // Adds to the unique successors list and also add companion instructions + // if the given successor has those. + auto add_unique_successor = [&](HloInstruction* successor) { + if (std::find(successors.begin(), successors.end(), successor) != + successors.end()) { + return; + } + if (!metadata_.IsCompanionInstruction(successor)) { + successors.push_back(successor); + return; + } + for (HloInstruction* companion : metadata_.Companions(successor)) { + successors.push_back(companion); + } + }; + + // If the given instruction is a companion instruction, we need to find the + // successors of all of its companion instructions. + std::vector instruction_group; + if (metadata_.IsCompanionInstruction(instruction)) { + for (HloInstruction* companion : metadata_.Companions(instruction)) { + instruction_group.push_back(companion); + } + } else { + instruction_group.push_back(instruction); + } + + for (HloInstruction* hlo : instruction_group) { + for (HloInstruction* user : hlo->users()) { + add_unique_successor(user); + } + for (HloInstruction* control_successor : hlo->control_successors()) { + add_unique_successor(control_successor); + } + } + if (instruction->opcode() == HloOpcode::kRecv) { + // Send is a remote successor of Recv. + const HloInstruction* recv_done = instruction->users().front(); + CHECK(recv_done->opcode() == HloOpcode::kRecvDone); + HloInstruction* send = metadata_.GetChannel(instruction->channel_id()).send; + add_unique_successor(send); + } + if (instruction->opcode() == HloOpcode::kSend) { + // RecvDone is a remote successor of Send. + HloInstruction* recv_done = + metadata_.GetChannel(instruction->channel_id()).recv_done; + add_unique_successor(recv_done); + } + return successors; +} + +std::vector HloModuleGroupUtil::RootInstructions( + tensorflow::gtl::ArraySlice computations) { + std::vector roots; + for (HloComputation* computation : computations) { + for (HloInstruction* instruction : computation->instructions()) { + if (GlobalSuccessors(instruction).empty()) { + roots.push_back(instruction); + } + } + } + return roots; +} + +Status HloModuleGroupUtil::VisitTopologicalOrder( + VisitStates* visit_state, const VisitFunction& visit_function, + HloInstruction* root) { + // Stack of HLO instructions visited in DFS order. + std::stack stack; + stack.push(root); + + while (!stack.empty()) { + HloInstruction* hlo = stack.top(); + + // Find the instruction group of the currently visited instruction. The + // instruction group represents all companion instructions of the + // current instruction, and are considered to be a single entity for the + // purpose of the traversal (i.e., they must always be in the same visit + // state). + std::vector instruction_group; + if (metadata_.IsCompanionInstruction(hlo)) { + for (HloInstruction* companion : metadata_.Companions(hlo)) { + instruction_group.push_back(companion); + } + } else { + instruction_group.push_back(hlo); + } + + if ((*visit_state)[hlo] == VisitState::kVisited) { + // All instructions in the group must be in the same state. + for (HloInstruction* instruction : instruction_group) { + TF_RET_CHECK((*visit_state)[instruction] == VisitState::kVisited); + } + stack.pop(); + continue; + } + + if ((*visit_state)[hlo] == VisitState::kVisiting) { + TF_RETURN_IF_ERROR(visit_function(hlo, instruction_group)); + + // Set the visit state of all instructions in the group to kVisited. + for (HloInstruction* instruction : instruction_group) { + TF_RET_CHECK((*visit_state)[instruction] == VisitState::kVisiting); + (*visit_state)[instruction] = VisitState::kVisited; + } + stack.pop(); + continue; + } + + // Set the visit state of all instructions in the group to kVisiting. + for (HloInstruction* instruction : instruction_group) { + TF_RET_CHECK((*visit_state)[instruction] == VisitState::kNotVisited) + << instruction->ToString(); + (*visit_state)[instruction] = VisitState::kVisiting; + } + + // For each instruction in the group, visit its predecessors (operands, + // control predecessors and remote predecessors). + for (HloInstruction* instruction : instruction_group) { + for (HloInstruction* predecessor : GlobalPredecessors(instruction)) { + // Visiting a node that is already being visited implies that there is + // a cycle. Generate an error with the list of instructions in the + // cycle. + if ((*visit_state)[predecessor] == VisitState::kVisiting) { + string cyclic_instructions; + for (const auto& state : *visit_state) { + if (state.second == VisitState::kVisiting) { + tensorflow::strings::StrAppend(&cyclic_instructions, + state.first->ToString(), "\n"); + } + } + // TODO(b/64305524): Improve the error message to print out the + // instructions in a deterministic order that forms the cycle. + return FailedPrecondition( + "Cross-computation cycle detected via communicating nodes. The " + "cycle contains the node %s. The cycle is found among the " + "following nodes. Note that the order of the nodes is arbitrary " + "and that the list may include nodes that are not part of the " + "cycle.\n%s", + predecessor->ToString().c_str(), cyclic_instructions.c_str()); + } + stack.push(predecessor); + } + } + } + + return Status::OK(); +} + +Status HloModuleGroupUtil::VerifyComputations( + tensorflow::gtl::ArraySlice computations) { + auto visit_function = + [&](HloInstruction* instruction, + const std::vector& instruction_group) { + return Status::OK(); + }; + int64 instructions_count = 0; + VisitStates visit_states; + for (HloComputation* computation : computations) { + // Visit all instructions, and not just from the root instruction of the + // computation. This allows us to detect dead cycles (i.e., cycles that + // are not reachable from the root) or to enforce an order for the + // communication instructions that are not reachable from any roots. + for (HloInstruction* instruction : computation->instructions()) { + TF_RETURN_IF_ERROR( + VisitTopologicalOrder(&visit_states, visit_function, instruction)); + } + instructions_count += computation->instruction_count(); + } + + // Check if all instructions are visited and are in the visited state. + TF_RET_CHECK(visit_states.size() == instructions_count); + for (auto& state : visit_states) { + TF_RET_CHECK(state.second == VisitState::kVisited); + } + + return Status::OK(); +} + +StatusOr> +HloModuleGroupUtil::ComputeReachability( + tensorflow::gtl::ArraySlice computations) { + std::list post_order; + auto visit_function = + [&](HloInstruction* instruction, + const std::vector& instruction_group) { + post_order.insert(post_order.end(), instruction_group.begin(), + instruction_group.end()); + return Status::OK(); + }; + HloModuleGroupUtil::VisitStates visit_states; + for (HloInstruction* root : RootInstructions(computations)) { + TF_RETURN_IF_ERROR( + VisitTopologicalOrder(&visit_states, visit_function, root)); + } + auto reachability = absl::make_unique(post_order); + for (HloInstruction* hlo : post_order) { + reachability->SetReachabilityToUnion(GlobalPredecessors(hlo), hlo); + } + return std::move(reachability); +} + +void HloModuleGroupUtil::UpdateReachabilityThroughInstruction( + HloInstruction* instruction, HloReachabilityMap* reachability_map) { + std::queue worklist; + worklist.push(instruction); + + while (!worklist.empty()) { + HloInstruction* item = worklist.front(); + worklist.pop(); + if (reachability_map->SetReachabilityToUnion(GlobalPredecessors(item), + item)) { + for (HloInstruction* successor : GlobalSuccessors(item)) { + worklist.push(successor); + } + } + } +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_module_group_util.h b/tensorflow/compiler/xla/service/hlo_module_group_util.h new file mode 100644 index 0000000000..c25ca1aff5 --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_module_group_util.h @@ -0,0 +1,117 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_UTIL_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_UTIL_H_ + +#include +#include +#include + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/service/hlo_module_group_metadata.h" +#include "tensorflow/compiler/xla/service/hlo_reachability.h" +#include "tensorflow/compiler/xla/status.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/lib/gtl/flatmap.h" + +namespace xla { + +// Collection of utilities for handling HloModuleGroups. +class HloModuleGroupUtil { + public: + explicit HloModuleGroupUtil(const HloModuleGroupMetadata& metadata) + : metadata_(metadata) {} + + // Returns all unique predecessors of the instruction. This includes: + // * predecessors in the same computation: operands and control predecessors + // * Recv is a predecessor of Send + // * Send is a predecessor of RecvDone + // * predecessors of companions (if the instruction is a companion while) + // * predecessors' companions (for any predecessor that is a companion while) + std::vector GlobalPredecessors(HloInstruction* instruction); + + // Returns all unique successors of the instruction. This includes: + // * successors in the same computation: users and control successors + // * Send is a successor of Recv + // * RecvDone is a predecessor of Send + // * successors of companions (if the instruction is a companion while) + // * successors' companions (for any successor that is a companion while) + std::vector GlobalSuccessors(HloInstruction* instruction); + + // Returns the root instructions of the computations. + std::vector RootInstructions( + tensorflow::gtl::ArraySlice computations); + + // Visit state of each instruction during DFS traversal. + enum VisitState { + kNotVisited = 0, + kVisiting, + kVisited, + }; + + // Function called on each instruction group during the DFS traversal. See the + // comment for VisitTopologicalOrder()). + using VisitFunction = std::function& instruction_group)>; + + // Given the hlo instruction as the root, recursively visits all its + // predecessor instructions in DFS order to visit nodes in topological order. + // + // Note that the DFS traversal does not only visit nodes in the same + // computation (parent of the root instruction), but also visits nodes in + // different computations connected via communication instructions. During the + // traversal, companion While instructions (see the class comment in + // HloModuleGroupMetadata) are treated as a single instruction (called + // instruction group, which contains only a single instruction if the visiting + // node is not a companion while) -- visiting one of the instructions in the + // group effectively visits all other instructions in the group, and then all + // predecessor instructions of the group are visited. + // + // * visit_state: map from each instruction to its visit state. + // * visit_function: function called when each instruction group. + // * root: the root instruction of the traversal. + using VisitStates = tensorflow::gtl::FlatMap; + Status VisitTopologicalOrder(VisitStates* visit_state, + const VisitFunction& visit_function, + HloInstruction* root); + + // Verifies that the computations are well-formed (e.g., no cycles). + Status VerifyComputations( + tensorflow::gtl::ArraySlice computations); + + // Below Reachability utils resemble those in HloComputation, except that + // they can handle instructions across multiple computations. + // + // Creates the reachability map for the instructions in the computations. + StatusOr> ComputeReachability( + tensorflow::gtl::ArraySlice computations); + + // Updates the reachability of the given instruction, taking the global + // predeccessorss and successors into account. + void UpdateReachabilityThroughInstruction( + HloInstruction* instruction, HloReachabilityMap* reachability_map); + + private: + const HloModuleGroupMetadata& metadata_; +}; + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_MODULE_GROUP_UTIL_H_ -- GitLab From 155743816c0d94ca44186147a9ad1c26f93985a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 10:29:35 -0800 Subject: [PATCH 460/884] Checks that sequence_length is equal among sequence feature columns. PiperOrigin-RevId: 188042426 --- .../feature_column/sequence_feature_column.py | 17 +++++++++-- .../sequence_feature_column_test.py | 30 +++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index ba17b568b6..b25d7e513b 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -127,8 +127,9 @@ def sequence_input_layer( shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) sequence_lengths.append(sequence_length) fc._verify_static_batch_size_equality(output_tensors, ordered_columns) - # TODO(b/73160931): Verify sequence_length equality. - return array_ops.concat(output_tensors, -1), sequence_lengths[0] + fc._verify_static_batch_size_equality(sequence_lengths, ordered_columns) + sequence_length = _assert_all_equal_and_return(sequence_lengths) + return array_ops.concat(output_tensors, -1), sequence_length # TODO(b/73160931): Add remaining categorical columns. @@ -312,6 +313,18 @@ def sequence_numeric_column( dtype=dtype) +def _assert_all_equal_and_return(tensors, name=None): + """Asserts that all tensors are equal and returns the first one.""" + with ops.name_scope(name, 'assert_all_equal', values=tensors): + if len(tensors) == 1: + return tensors[0] + assert_equal_ops = [] + for t in tensors[1:]: + assert_equal_ops.append(check_ops.assert_equal(tensors[0], t)) + with ops.control_dependencies(assert_equal_ops): + return array_ops.identity(tensors[0]) + + class _SequenceDenseColumn(fc._FeatureColumn): """Represents dense sequence data.""" diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 39caa602d9..5c1e76fc62 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -198,6 +198,36 @@ class SequenceInputLayerTest(test.TestCase): self.assertAllEqual( expected_sequence_length, sequence_length.eval(session=sess)) + def test_sequence_length_not_equal(self): + """Tests that an error is raised when sequence lengths are not equal.""" + # Input a with sequence_length = [2, 1] + sparse_input_a = sparse_tensor.SparseTensorValue( + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + # Input b with sequence_length = [1, 1] + sparse_input_b = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0)), + values=(1., 10.), + dense_shape=(2, 2)) + numeric_column_a = sfc.sequence_numeric_column('aaa') + numeric_column_b = sfc.sequence_numeric_column('bbb') + + _, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + feature_columns=[numeric_column_a, numeric_column_b]) + + with monitored_session.MonitoredSession() as sess: + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'\[Condition x == y did not hold element-wise:\] ' + r'\[x \(sequence_input_layer/aaa/sequence_length:0\) = \] \[2 1\] ' + r'\[y \(sequence_input_layer/bbb/sequence_length:0\) = \] \[1 1\]'): + sess.run(sequence_length) + def _assert_sparse_tensor_value(test_case, expected, actual): test_case.assertEqual(np.int64, np.array(actual.indices).dtype) -- GitLab From 4b692b11f0988bbe0368722eba9dddde1c12af42 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Tue, 6 Mar 2018 10:31:07 -0800 Subject: [PATCH 461/884] Fixed the bug that predict input_fn requires the labels. PiperOrigin-RevId: 188042708 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 1b2eda1caa..a7991eb1f4 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -2308,6 +2308,11 @@ class _InputsWithStoppingSignals(_Inputs): """ def _map_fn(*args): + """The map fn to insert signals.""" + if len(args) == 1: + # Unpack the single Tensor/dict argument as features. This is required + # for the input_fn returns no labels. + args = args[0] features, labels = _Inputs._parse_inputs(args) new_input_dict = {} new_input_dict['features'] = features -- GitLab From 00bbe6aaa84089ade597b3807f692923f8865a16 Mon Sep 17 00:00:00 2001 From: Anjali Sridhar Date: Tue, 6 Mar 2018 10:35:56 -0800 Subject: [PATCH 462/884] Add mask keyword to ensure that we don't pass masks in place of training. PiperOrigin-RevId: 188043473 --- tensorflow/python/keras/_impl/keras/engine/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 0fc05420fe..93d97d6474 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -396,7 +396,7 @@ class Network(base_layer.Layer): if cache_key in self._output_mask_cache: return self._output_mask_cache[cache_key] else: - _, output_masks = self._run_internal_graph(inputs, masks) + _, output_masks = self._run_internal_graph(inputs, mask=masks) return output_masks @property -- GitLab From 5bc7653102ea091fe2e74eace888a9a5d6fc8127 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 10:52:26 -0800 Subject: [PATCH 463/884] Remove accidental pdb import PiperOrigin-RevId: 188046246 --- .../distributions/python/ops/bijectors/batch_normalization.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py index e47a3e01f5..be72ff3081 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/batch_normalization.py @@ -190,7 +190,6 @@ class BatchNormalization(bijector.Bijector): # Broadcasting only necessary for single-axis batch norm where the axis is # not the last dimension broadcast_shape = [1] * ndims - # import pdb; pdb.set_trace() broadcast_shape[self.batchnorm.axis[0]] = ( input_shape[self.batchnorm.axis[0]]) def _broadcast(v): -- GitLab From cbc4134543784cf9b794aefaef6599dbadaa200e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 11:00:46 -0800 Subject: [PATCH 464/884] Add a helper function to copy annotations between nodes. PiperOrigin-RevId: 188047677 --- tensorflow/contrib/py2tf/pyct/anno.py | 5 +++++ tensorflow/contrib/py2tf/pyct/anno_test.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/anno.py b/tensorflow/contrib/py2tf/pyct/anno.py index 7a0528b6d0..cc4a7edf02 100644 --- a/tensorflow/contrib/py2tf/pyct/anno.py +++ b/tensorflow/contrib/py2tf/pyct/anno.py @@ -70,3 +70,8 @@ def delanno(node, key, field_name='___pyct_anno'): if not annotations: delattr(node, field_name) node._fields = tuple(f for f in node._fields if f != field_name) + + +def copyanno(from_node, to_node, key, field_name='___pyct_anno'): + if hasanno(from_node, key, field_name): + setanno(to_node, key, getanno(from_node, key, field_name), field_name) diff --git a/tensorflow/contrib/py2tf/pyct/anno_test.py b/tensorflow/contrib/py2tf/pyct/anno_test.py index ff40bfe1f5..6c29918fdf 100644 --- a/tensorflow/contrib/py2tf/pyct/anno_test.py +++ b/tensorflow/contrib/py2tf/pyct/anno_test.py @@ -24,6 +24,9 @@ from tensorflow.contrib.py2tf.pyct import anno from tensorflow.python.platform import test +# TODO(mdan): Consider strong types instead of primitives. + + class AnnoTest(test.TestCase): def test_basic(self): @@ -42,6 +45,17 @@ class AnnoTest(test.TestCase): with self.assertRaises(AttributeError): anno.getanno(node, 'foo') + def test_copyanno(self): + node_1 = ast.Name() + anno.setanno(node_1, 'foo', 3) + + node_2 = ast.Name() + anno.copyanno(node_1, node_2, 'foo') + anno.copyanno(node_1, node_2, 'bar') + + self.assertTrue(anno.hasanno(node_2, 'foo')) + self.assertFalse(anno.hasanno(node_2, 'bar')) + if __name__ == '__main__': test.main() -- GitLab From 131f13afafd59278d4441f61f5f6e231b48f077c Mon Sep 17 00:00:00 2001 From: Christopher Suter Date: Tue, 6 Mar 2018 11:21:20 -0800 Subject: [PATCH 465/884] Fix broken test (invalid string comparison in py3) PiperOrigin-RevId: 188051422 --- .../python/training/tpu_cluster_resolver_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py index b7d56fc122..48c3f6bb4f 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver_test.py @@ -362,10 +362,10 @@ class TPUClusterResolverTest(test.TestCase): self.assertTrue(tpu_cluster_resolver._inGke()) self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), - tpu_cluster_resolver._gkeMaster()) + compat.as_bytes(tpu_cluster_resolver._gkeMaster())) self.assertEqual( compat.as_bytes('grpc://10.120.27.5:8470'), - tpu_cluster_resolver.get_master()) + compat.as_bytes(tpu_cluster_resolver.get_master())) del os.environ['KUBE_GOOGLE_CLOUD_TPU_ENDPOINTS'] -- GitLab From c6feeafaabb09bdcda3e34009506c5dae596c5d9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 11:23:41 -0800 Subject: [PATCH 466/884] Sequence versions of remaining categorical columns PiperOrigin-RevId: 188051821 --- .../feature_column/sequence_feature_column.py | 138 +++++++++++++++- .../sequence_feature_column_test.py | 148 +++++++++++++++++- 2 files changed, 282 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py index b25d7e513b..f57557c1cc 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column.py @@ -132,7 +132,6 @@ def sequence_input_layer( return array_ops.concat(output_tensors, -1), sequence_length -# TODO(b/73160931): Add remaining categorical columns. def sequence_categorical_column_with_identity( key, num_buckets, default_value=None): """Returns a feature column that represents sequences of integers. @@ -143,7 +142,7 @@ def sequence_categorical_column_with_identity( watches = sequence_categorical_column_with_identity( 'watches', num_buckets=1000) watches_embedding = embedding_column(watches, dimension=10) - columns = [watches] + columns = [watches_embedding] features = tf.parse_example(..., features=make_parse_example_spec(columns)) input_layer, sequence_length = sequence_input_layer(features, columns) @@ -171,6 +170,141 @@ def sequence_categorical_column_with_identity( default_value=default_value)) +def sequence_categorical_column_with_hash_bucket( + key, hash_bucket_size, dtype=dtypes.string): + """A sequence of categorical terms where ids are set by hashing. + + Example: + + ```python + tokens = sequence_categorical_column_with_hash_bucket( + 'tokens', hash_bucket_size=1000) + tokens_embedding = embedding_column(tokens, dimension=10) + columns = [tokens_embedding] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input feature. + hash_bucket_size: An int > 1. The number of buckets. + dtype: The type of features. Only string and integer types are supported. + + Returns: + A `_SequenceCategoricalColumn`. + """ + return _SequenceCategoricalColumn( + fc.categorical_column_with_hash_bucket( + key=key, + hash_bucket_size=hash_bucket_size, + dtype=dtype)) + + +def sequence_categorical_column_with_vocabulary_file( + key, vocabulary_file, vocabulary_size=None, num_oov_buckets=0, + default_value=None, dtype=dtypes.string): + """A sequence of categorical terms where ids use a vocabulary file. + + Example: + + ```python + states = sequence_categorical_column_with_vocabulary_file( + key='states', vocabulary_file='/us/states.txt', vocabulary_size=50, + num_oov_buckets=5) + states_embedding = embedding_column(states, dimension=10) + columns = [states_embedding] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input feature. + vocabulary_file: The vocabulary file name. + vocabulary_size: Number of the elements in the vocabulary. This must be no + greater than length of `vocabulary_file`, if less than length, later + values are ignored. If None, it is set to the length of `vocabulary_file`. + num_oov_buckets: Non-negative integer, the number of out-of-vocabulary + buckets. All out-of-vocabulary inputs will be assigned IDs in the range + `[vocabulary_size, vocabulary_size+num_oov_buckets)` based on a hash of + the input value. A positive `num_oov_buckets` can not be specified with + `default_value`. + default_value: The integer ID value to return for out-of-vocabulary feature + values, defaults to `-1`. This can not be specified with a positive + `num_oov_buckets`. + dtype: The type of features. Only string and integer types are supported. + + Returns: + A `_SequenceCategoricalColumn`. + """ + return _SequenceCategoricalColumn( + fc.categorical_column_with_vocabulary_file( + key=key, + vocabulary_file=vocabulary_file, + vocabulary_size=vocabulary_size, + num_oov_buckets=num_oov_buckets, + default_value=default_value, + dtype=dtype)) + + +def sequence_categorical_column_with_vocabulary_list( + key, vocabulary_list, dtype=None, default_value=-1, num_oov_buckets=0): + """A sequence of categorical terms where ids use an in-memory list. + + Example: + + ```python + colors = sequence_categorical_column_with_vocabulary_list( + key='colors', vocabulary_list=('R', 'G', 'B', 'Y'), + num_oov_buckets=2) + colors_embedding = embedding_column(colors, dimension=3) + columns = [colors_embedding] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Args: + key: A unique string identifying the input feature. + vocabulary_list: An ordered iterable defining the vocabulary. Each feature + is mapped to the index of its value (if present) in `vocabulary_list`. + Must be castable to `dtype`. + dtype: The type of features. Only string and integer types are supported. + If `None`, it will be inferred from `vocabulary_list`. + default_value: The integer ID value to return for out-of-vocabulary feature + values, defaults to `-1`. This can not be specified with a positive + `num_oov_buckets`. + num_oov_buckets: Non-negative integer, the number of out-of-vocabulary + buckets. All out-of-vocabulary inputs will be assigned IDs in the range + `[len(vocabulary_list), len(vocabulary_list)+num_oov_buckets)` based on a + hash of the input value. A positive `num_oov_buckets` can not be specified + with `default_value`. + + Returns: + A `_SequenceCategoricalColumn`. + """ + return _SequenceCategoricalColumn( + fc.categorical_column_with_vocabulary_list( + key=key, + vocabulary_list=vocabulary_list, + dtype=dtype, + default_value=default_value, + num_oov_buckets=num_oov_buckets)) + + # TODO(b/73160931): Merge with embedding_column def _sequence_embedding_column( categorical_column, dimension, initializer=None, ckpt_to_load_from=None, diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py index 5c1e76fc62..c077f03291 100644 --- a/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py +++ b/tensorflow/contrib/feature_column/python/feature_column/sequence_feature_column_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import os import numpy as np from tensorflow.contrib.feature_column.python.feature_column import sequence_feature_column as sfc @@ -230,13 +231,17 @@ class SequenceInputLayerTest(test.TestCase): def _assert_sparse_tensor_value(test_case, expected, actual): - test_case.assertEqual(np.int64, np.array(actual.indices).dtype) - test_case.assertAllEqual(expected.indices, actual.indices) + _assert_sparse_tensor_indices_shape(test_case, expected, actual) test_case.assertEqual( np.array(expected.values).dtype, np.array(actual.values).dtype) test_case.assertAllEqual(expected.values, actual.values) + +def _assert_sparse_tensor_indices_shape(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) @@ -314,6 +319,145 @@ class SequenceCategoricalColumnWithIdentityTest(test.TestCase): expected_sequence_length, sequence_length.eval(session=sess)) +class SequenceCategoricalColumnWithHashBucketTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_hash_bucket( + 'aaa', hash_bucket_size=10) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2)) + + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + # Ignored to avoid hash dependence in test. + values=np.array((0, 0, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_indices_shape( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_hash_bucket( + 'aaa', hash_bucket_size=10) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('omar', 'stringer', 'marlo'), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceCategoricalColumnWithVocabularyFileTest(test.TestCase): + + def _write_vocab(self, vocab_strings, file_name): + vocab_file = os.path.join(self.get_temp_dir(), file_name) + with open(vocab_file, 'w') as f: + f.write('\n'.join(vocab_strings)) + return vocab_file + + def setUp(self): + super(SequenceCategoricalColumnWithVocabularyFileTest, self).setUp() + + vocab_strings = ['omar', 'stringer', 'marlo'] + self._wire_vocabulary_file_name = self._write_vocab(vocab_strings, + 'wire_vocabulary.txt') + self._wire_vocabulary_size = 3 + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_vocabulary_file( + key='aaa', + vocabulary_file=self._wire_vocabulary_file_name, + vocabulary_size=self._wire_vocabulary_size) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceCategoricalColumnWithVocabularyListTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo')) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((2, -1, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_vocabulary_list( + key='aaa', + vocabulary_list=('omar', 'stringer', 'marlo')) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=('marlo', 'skywalker', 'omar'), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + class SequenceEmbeddingColumnTest(test.TestCase): def test_get_sequence_dense_tensor(self): -- GitLab From 429ce2a60b9faa3db204aed05ab4a9a3a1a6c725 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 6 Mar 2018 11:26:18 -0800 Subject: [PATCH 467/884] lib_ might get destroyed when there are 2 different graphs using the same FunctionBufferingResource. As a result, making a clone of lib_. Also, fixing the LookupOrCreate call in the handle op to run only once for initialization. PiperOrigin-RevId: 188052319 --- tensorflow/contrib/data/kernels/BUILD | 1 + .../data/kernels/prefetching_kernels.cc | 57 +++++++++++++------ .../data/python/ops/prefetching_ops.py | 4 +- tensorflow/core/BUILD | 7 +++ 4 files changed, 51 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/data/kernels/BUILD b/tensorflow/contrib/data/kernels/BUILD index 9bd6a42da2..c87da7dfaa 100644 --- a/tensorflow/contrib/data/kernels/BUILD +++ b/tensorflow/contrib/data/kernels/BUILD @@ -10,6 +10,7 @@ cc_library( name = "prefetching_kernels", srcs = ["prefetching_kernels.cc"], deps = [ + "//tensorflow/core:core_cpu_headers_lib", "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", "@protobuf_archive//:protobuf_headers", diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index d3df14bdd0..c0155e8d91 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include +#include "tensorflow/core/common_runtime/process_function_library_runtime.h" #include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/resource_op_kernel.h" @@ -241,7 +242,7 @@ class FunctionBufferingResource : public ResourceBase { class FunctionBufferResourceHandleOp : public OpKernel { public: explicit FunctionBufferResourceHandleOp(OpKernelConstruction* ctx) - : OpKernel(ctx) { + : OpKernel(ctx), flib_def_(nullptr), pflr_(nullptr) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("buffer_size", &buffer_size_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("container", &container_)); @@ -249,6 +250,17 @@ class FunctionBufferResourceHandleOp : public OpKernel { OP_REQUIRES_OK(ctx, ctx->GetAttr("thread_pool_size", &thread_pool_size_)); } + ~FunctionBufferResourceHandleOp() override { + if (cinfo_.resource_is_private_to_kernel()) { + if (!cinfo_.resource_manager() + ->Delete(cinfo_.container(), + cinfo_.name()) + .ok()) { + // Do nothing; the resource can have been deleted by session resets. + } + } + } + void Compute(OpKernelContext* ctx) override { const Tensor* string_arg; OP_REQUIRES_OK(ctx, ctx->input("string_arg", &string_arg)); @@ -267,28 +279,39 @@ class FunctionBufferResourceHandleOp : public OpKernel { const string& source_device = ctx->device()->name(); - ContainerInfo cinfo; - OP_REQUIRES_OK(ctx, cinfo.Init(ctx->resource_manager(), def())); - // Create the resource. - FunctionBufferingResource* buffer; - OP_REQUIRES_OK( - ctx, ctx->resource_manager()->LookupOrCreate( - cinfo.container(), cinfo.name(), &buffer, - [lib, &source_device, &target_device, func_args, - this](FunctionBufferingResource** ptr) { - *ptr = new FunctionBufferingResource( - lib, func_, buffer_size_, source_device, target_device, - func_args, thread_pool_size_); - return Status::OK(); - })); - OP_REQUIRES_OK(ctx, buffer->Instantiate()); + mutex_lock l(mu_); + if (!initialized_) { + OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def())); + FunctionLibraryRuntime* clone_lib; + OP_REQUIRES_OK(ctx, lib->Clone(&flib_def_, &pflr_, &clone_lib)); + // Create the resource. + FunctionBufferingResource* buffer; + OP_REQUIRES_OK( + ctx, + ctx->resource_manager()->LookupOrCreate( + cinfo_.container(), cinfo_.name(), &buffer, + [clone_lib, &source_device, &target_device, func_args, + this](FunctionBufferingResource** ptr) { + *ptr = new FunctionBufferingResource( + clone_lib, func_, buffer_size_, source_device, + target_device, func_args, thread_pool_size_); + return Status::OK(); + })); + OP_REQUIRES_OK(ctx, buffer->Instantiate()); + initialized_ = true; + } OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput( - ctx, 0, cinfo.container(), cinfo.name(), + ctx, 0, cinfo_.container(), cinfo_.name(), MakeTypeIndex())); } private: + mutex mu_; + ContainerInfo cinfo_ GUARDED_BY(mu_); + bool initialized_ GUARDED_BY(mu_) = false; + std::unique_ptr flib_def_; + std::unique_ptr pflr_; NameAttrList func_; int64 buffer_size_; string container_; diff --git a/tensorflow/contrib/data/python/ops/prefetching_ops.py b/tensorflow/contrib/data/python/ops/prefetching_ops.py index 96a9e9ed66..7059b358f3 100644 --- a/tensorflow/contrib/data/python/ops/prefetching_ops.py +++ b/tensorflow/contrib/data/python/ops/prefetching_ops.py @@ -25,12 +25,14 @@ from tensorflow.contrib.data.python.ops import gen_dataset_ops # method and provides a get_next() that calls the prefetch op. def function_buffering_resource(string_arg, target_device, - shared_name, f, buffer_size, thread_pool_size=1, container="", + shared_name=None, name=None): + if shared_name is None: + shared_name = "" return gen_dataset_ops.function_buffering_resource( string_arg=string_arg, target_device=target_device, diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index b7f84a4d27..619899ae95 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1874,6 +1874,13 @@ cc_header_only_library( ], ) +cc_header_only_library( + name = "core_cpu_headers_lib", + deps = [ + ":core_cpu_lib", + ], +) + tf_cuda_library( name = "framework_internal_impl", srcs = FRAMEWORK_INTERNAL_PRIVATE_HEADERS + [ -- GitLab From c8236883db3b53563b24d527aade12e60d5ed246 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Tue, 6 Mar 2018 11:55:08 -0800 Subject: [PATCH 468/884] Migrate MCMC diagnostics and Halton Sequence sampler into tensorflow_probability. PiperOrigin-RevId: 188057302 --- tensorflow/contrib/bayesflow/BUILD | 20 - tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/mcmc_diagnostics_test.py | 445 ------------------ .../bayesflow/python/ops/mcmc_diagnostics.py | 32 -- .../python/ops/mcmc_diagnostics_impl.py | 400 ---------------- 5 files changed, 899 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 7302c9119d..2a32ea6952 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -124,26 +124,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "mcmc_diagnostics_test", - size = "small", - srcs = ["python/kernel_tests/mcmc_diagnostics_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/python:spectral_ops_test_util", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_seed", - ], -) - cuda_py_test( name = "monte_carlo_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index f2b7fb77a8..156a2ef8cf 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -25,7 +25,6 @@ from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import layers -from tensorflow.contrib.bayesflow.python.ops import mcmc_diagnostics from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import optimizers @@ -41,7 +40,6 @@ _allowed_symbols = [ 'hmc', 'layers', 'metropolis_hastings', - 'mcmc_diagnostics', 'monte_carlo', 'optimizers', 'special_math', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py deleted file mode 100644 index 52e36e135d..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/mcmc_diagnostics_test.py +++ /dev/null @@ -1,445 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for MCMC diagnostic utilities.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import mcmc_diagnostics_impl as mcmc_diagnostics -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import spectral_ops_test_util -from tensorflow.python.platform import test - -rng = np.random.RandomState(42) - - -class _EffectiveSampleSizeTest(object): - - @property - def use_static_shape(self): - raise NotImplementedError( - "Subclass failed to implement `use_static_shape`.") - - def _check_versus_expected_effective_sample_size(self, - x_, - expected_ess, - sess, - atol=1e-2, - rtol=1e-2, - filter_threshold=None, - filter_beyond_lag=None): - x = array_ops.placeholder_with_default( - input=x_, shape=x_.shape if self.use_static_shape else None) - ess = mcmc_diagnostics.effective_sample_size( - x, - filter_threshold=filter_threshold, - filter_beyond_lag=filter_beyond_lag) - if self.use_static_shape: - self.assertAllEqual(x.shape[1:], ess.shape) - - ess_ = sess.run(ess) - - self.assertAllClose( - np.ones_like(ess_) * expected_ess, ess_, atol=atol, rtol=rtol) - - def testIidRank1NormalHasFullEssMaxLags10(self): - # With a length 5000 iid normal sequence, and filter_beyond_lag = 10, we - # should have a good estimate of ESS, and it should be close to the full - # sequence length of 5000. - # The choice of filter_beyond_lag = 10 is a short cutoff, reasonable only - # since we know the correlation length should be zero right away. - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=rng.randn(5000).astype(np.float32), - expected_ess=5000, - sess=sess, - filter_beyond_lag=10, - filter_threshold=None, - rtol=0.3) - - def testIidRank2NormalHasFullEssMaxLags10(self): - # See similar test for Rank1Normal for reasoning. - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=rng.randn(5000, 2).astype(np.float32), - expected_ess=5000, - sess=sess, - filter_beyond_lag=10, - filter_threshold=None, - rtol=0.3) - - def testIidRank1NormalHasFullEssMaxLagThresholdZero(self): - # With a length 5000 iid normal sequence, and filter_threshold = 0, - # we should have a super-duper estimate of ESS, and it should be very close - # to the full sequence length of 5000. - # The choice of filter_beyond_lag = 0 means we cutoff as soon as the - # auto-corris below zero. This should happen very quickly, due to the fact - # that the theoretical auto-corr is [1, 0, 0,...] - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=rng.randn(5000).astype(np.float32), - expected_ess=5000, - sess=sess, - filter_beyond_lag=None, - filter_threshold=0., - rtol=0.1) - - def testIidRank2NormalHasFullEssMaxLagThresholdZero(self): - # See similar test for Rank1Normal for reasoning. - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=rng.randn(5000, 2).astype(np.float32), - expected_ess=5000, - sess=sess, - filter_beyond_lag=None, - filter_threshold=0., - rtol=0.1) - - def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLags50(self): - # Create x_, such that - # x_[i] = iid_x_[0], i = 0,...,9 - # x_[i] = iid_x_[1], i = 10,..., 19, - # and so on. - iid_x_ = rng.randn(5000, 1).astype(np.float32) - x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,)) - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=x_, - expected_ess=50000 // 10, - sess=sess, - filter_beyond_lag=50, - filter_threshold=None, - rtol=0.2) - - def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLagsThresholdZero( - self): - # Create x_, such that - # x_[i] = iid_x_[0], i = 0,...,9 - # x_[i] = iid_x_[1], i = 10,..., 19, - # and so on. - iid_x_ = rng.randn(5000, 1).astype(np.float32) - x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,)) - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - self._check_versus_expected_effective_sample_size( - x_=x_, - expected_ess=50000 // 10, - sess=sess, - filter_beyond_lag=None, - filter_threshold=0., - rtol=0.1) - - def testListArgs(self): - # x_ has correlation length 10 ==> ESS = N / 10 - # y_ has correlation length 1 ==> ESS = N - iid_x_ = rng.randn(5000, 1).astype(np.float32) - x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,)) - y_ = rng.randn(50000).astype(np.float32) - states = [x_, x_, y_, y_] - filter_threshold = [0., None, 0., None] - filter_beyond_lag = [None, 5, None, 5] - - # See other tests for reasoning on tolerance. - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - ess = mcmc_diagnostics.effective_sample_size( - states, - filter_threshold=filter_threshold, - filter_beyond_lag=filter_beyond_lag) - ess_ = sess.run(ess) - self.assertAllEqual(4, len(ess_)) - - self.assertAllClose(50000 // 10, ess_[0], rtol=0.3) - self.assertAllClose(50000 // 10, ess_[1], rtol=0.3) - self.assertAllClose(50000, ess_[2], rtol=0.1) - self.assertAllClose(50000, ess_[3], rtol=0.1) - - def testMaxLagsThresholdLessThanNeg1SameAsNone(self): - # Setting both means we filter out items R_k from the auto-correlation - # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. - - # x_ has correlation length 10. - iid_x_ = rng.randn(500, 1).astype(np.float32) - x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - x = array_ops.placeholder_with_default( - input=x_, shape=x_.shape if self.use_static_shape else None) - - ess_none_none = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=None, filter_beyond_lag=None) - ess_none_200 = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=None, filter_beyond_lag=200) - ess_neg2_200 = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=-2., filter_beyond_lag=200) - ess_neg2_none = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=-2., filter_beyond_lag=None) - ess_none_none_, ess_none_200_, ess_neg2_200_, ess_neg2_none_ = sess.run( - [ess_none_none, ess_none_200, ess_neg2_200, ess_neg2_none]) - - # filter_threshold=-2 <==> filter_threshold=None. - self.assertAllClose(ess_none_none_, ess_neg2_none_) - self.assertAllClose(ess_none_200_, ess_neg2_200_) - - def testMaxLagsArgsAddInAnOrManner(self): - # Setting both means we filter out items R_k from the auto-correlation - # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold. - - # x_ has correlation length 10. - iid_x_ = rng.randn(500, 1).astype(np.float32) - x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,)) - with self.test_session() as sess: - with spectral_ops_test_util.fft_kernel_label_map(): - x = array_ops.placeholder_with_default( - input=x_, shape=x_.shape if self.use_static_shape else None) - - ess_1_9 = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=1., filter_beyond_lag=9) - ess_1_none = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=1., filter_beyond_lag=None) - ess_none_9 = mcmc_diagnostics.effective_sample_size( - x, filter_threshold=1., filter_beyond_lag=9) - ess_1_9_, ess_1_none_, ess_none_9_ = sess.run( - [ess_1_9, ess_1_none, ess_none_9]) - - # Since R_k = 1 for k < 10, and R_k < 1 for k >= 10, - # filter_threshold = 1 <==> filter_beyond_lag = 9. - self.assertAllClose(ess_1_9_, ess_1_none_) - self.assertAllClose(ess_1_9_, ess_none_9_) - - -class EffectiveSampleSizeStaticTest(test.TestCase, _EffectiveSampleSizeTest): - - @property - def use_static_shape(self): - return True - - -class EffectiveSampleSizeDynamicTest(test.TestCase, _EffectiveSampleSizeTest): - - @property - def use_static_shape(self): - return False - - -class _PotentialScaleReductionTest(object): - - @property - def use_static_shape(self): - raise NotImplementedError( - "Subclass failed to impliment `use_static_shape`.") - - def testListOfStatesWhereFirstPassesSecondFails(self): - """Simple test showing API with two states. Read first!.""" - n_samples = 1000 - - # state_0 is two scalar chains taken from iid Normal(0, 1). Will pass. - state_0 = rng.randn(n_samples, 2) - - # state_1 is three 4-variate chains taken from Normal(0, 1) that have been - # shifted. Since every chain is shifted, they are not the same, and the - # test should fail. - offset = np.array([1., -1., 2.]).reshape(3, 1) - state_1 = rng.randn(n_samples, 3, 4) + offset - - rhat = mcmc_diagnostics.potential_scale_reduction( - chains_states=[state_0, state_1], independent_chain_ndims=1) - - self.assertIsInstance(rhat, list) - with self.test_session() as sess: - rhat_0_, rhat_1_ = sess.run(rhat) - - # r_hat_0 should be close to 1, meaning test is passed. - self.assertAllEqual((), rhat_0_.shape) - self.assertAllClose(1., rhat_0_, rtol=0.02) - - # r_hat_1 should be greater than 1.2, meaning test has failed. - self.assertAllEqual((4,), rhat_1_.shape) - self.assertAllEqual(np.ones_like(rhat_1_).astype(bool), rhat_1_ > 1.2) - - def check_results(self, state_, independent_chain_shape, should_pass): - sample_ndims = 1 - independent_chain_ndims = len(independent_chain_shape) - with self.test_session(): - state = array_ops.placeholder_with_default( - input=state_, shape=state_.shape if self.use_static_shape else None) - - rhat = mcmc_diagnostics.potential_scale_reduction( - state, independent_chain_ndims=independent_chain_ndims) - - if self.use_static_shape: - self.assertAllEqual( - state_.shape[sample_ndims + independent_chain_ndims:], rhat.shape) - - rhat_ = rhat.eval() - if should_pass: - self.assertAllClose(np.ones_like(rhat_), rhat_, atol=0, rtol=0.02) - else: - self.assertAllEqual(np.ones_like(rhat_).astype(bool), rhat_ > 1.2) - - def iid_normal_chains_should_pass_wrapper(self, - sample_shape, - independent_chain_shape, - other_shape, - dtype=np.float32): - """Check results with iid normal chains.""" - - state_shape = sample_shape + independent_chain_shape + other_shape - state_ = rng.randn(*state_shape).astype(dtype) - - # The "other" dimensions do not have to be identical, just independent, so - # force them to not be identical. - if other_shape: - state_ *= rng.rand(*other_shape).astype(dtype) - - self.check_results(state_, independent_chain_shape, should_pass=True) - - def testPassingIIDNdimsAreIndependentOneOtherZero(self): - self.iid_normal_chains_should_pass_wrapper( - sample_shape=[10000], independent_chain_shape=[4], other_shape=[]) - - def testPassingIIDNdimsAreIndependentOneOtherOne(self): - self.iid_normal_chains_should_pass_wrapper( - sample_shape=[10000], independent_chain_shape=[3], other_shape=[7]) - - def testPassingIIDNdimsAreIndependentOneOtherTwo(self): - self.iid_normal_chains_should_pass_wrapper( - sample_shape=[10000], independent_chain_shape=[2], other_shape=[5, 7]) - - def testPassingIIDNdimsAreIndependentTwoOtherTwo64Bit(self): - self.iid_normal_chains_should_pass_wrapper( - sample_shape=[10000], - independent_chain_shape=[2, 3], - other_shape=[5, 7], - dtype=np.float64) - - def offset_normal_chains_should_fail_wrapper( - self, sample_shape, independent_chain_shape, other_shape): - """Check results with normal chains that are offset from each other.""" - - state_shape = sample_shape + independent_chain_shape + other_shape - state_ = rng.randn(*state_shape) - - # Add a significant offset to the different (formerly iid) chains. - offset = np.linspace( - 0, 2, num=np.prod(independent_chain_shape)).reshape([1] * len( - sample_shape) + independent_chain_shape + [1] * len(other_shape)) - state_ += offset - - self.check_results(state_, independent_chain_shape, should_pass=False) - - def testFailingOffsetNdimsAreSampleOneIndependentOneOtherOne(self): - self.offset_normal_chains_should_fail_wrapper( - sample_shape=[10000], independent_chain_shape=[2], other_shape=[5]) - - -class PotentialScaleReductionStaticTest(test.TestCase, - _PotentialScaleReductionTest): - - @property - def use_static_shape(self): - return True - - def testIndependentNdimsLessThanOneRaises(self): - with self.assertRaisesRegexp(ValueError, "independent_chain_ndims"): - mcmc_diagnostics.potential_scale_reduction( - rng.rand(2, 3, 4), independent_chain_ndims=0) - - -class PotentialScaleReductionDynamicTest(test.TestCase, - _PotentialScaleReductionTest): - - @property - def use_static_shape(self): - return False - - -class _ReduceVarianceTest(object): - - @property - def use_static_shape(self): - raise NotImplementedError( - "Subclass failed to impliment `use_static_shape`.") - - def check_versus_numpy(self, x_, axis, biased, keepdims): - with self.test_session(): - x_ = np.asarray(x_) - x = array_ops.placeholder_with_default( - input=x_, shape=x_.shape if self.use_static_shape else None) - var = mcmc_diagnostics._reduce_variance( - x, axis=axis, biased=biased, keepdims=keepdims) - np_var = np.var(x_, axis=axis, ddof=0 if biased else 1, keepdims=keepdims) - - if self.use_static_shape: - self.assertAllEqual(np_var.shape, var.shape) - - var_ = var.eval() - # We will mask below, which changes shape, so check shape explicitly here. - self.assertAllEqual(np_var.shape, var_.shape) - - # We get NaN when we divide by zero due to the size being the same as ddof - nan_mask = np.isnan(np_var) - if nan_mask.any(): - self.assertTrue(np.isnan(var_[nan_mask]).all()) - self.assertAllClose(np_var[~nan_mask], var_[~nan_mask], atol=0, rtol=0.02) - - def testScalarBiasedTrue(self): - self.check_versus_numpy(x_=-1.234, axis=None, biased=True, keepdims=False) - - def testScalarBiasedFalse(self): - # This should result in NaN. - self.check_versus_numpy(x_=-1.234, axis=None, biased=False, keepdims=False) - - def testShape2x3x4AxisNoneBiasedFalseKeepdimsFalse(self): - self.check_versus_numpy( - x_=rng.randn(2, 3, 4), axis=None, biased=True, keepdims=False) - - def testShape2x3x4Axis1BiasedFalseKeepdimsTrue(self): - self.check_versus_numpy( - x_=rng.randn(2, 3, 4), axis=1, biased=True, keepdims=True) - - def testShape2x3x4x5Axis13BiasedFalseKeepdimsTrue(self): - self.check_versus_numpy( - x_=rng.randn(2, 3, 4, 5), axis=1, biased=True, keepdims=True) - - def testShape2x3x4x5Axis13BiasedFalseKeepdimsFalse(self): - self.check_versus_numpy( - x_=rng.randn(2, 3, 4, 5), axis=1, biased=False, keepdims=False) - - -class ReduceVarianceTestStaticShape(test.TestCase, _ReduceVarianceTest): - - @property - def use_static_shape(self): - return True - - -class ReduceVarianceTestDynamicShape(test.TestCase, _ReduceVarianceTest): - - @property - def use_static_shape(self): - return False - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics.py b/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics.py deleted file mode 100644 index f3a645eafc..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for Markov Chain Monte Carlo (MCMC) sampling.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.mcmc_diagnostics_impl import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - "effective_sample_size", - "potential_scale_reduction", -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py b/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py deleted file mode 100644 index 0424b6952b..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/mcmc_diagnostics_impl.py +++ /dev/null @@ -1,400 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for Markov Chain Monte Carlo (MCMC) sampling. - -@@effective_sample_size -@@potential_scale_reduction -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.distributions.python.ops import sample_stats -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops - -__all__ = [ - "effective_sample_size", - "potential_scale_reduction", -] - - -def effective_sample_size(states, - filter_threshold=0., - filter_beyond_lag=None, - name=None): - """Estimate a lower bound on effective sample size for each independent chain. - - Roughly speaking, "effective sample size" (ESS) is the size of an iid sample - with the same variance as `state`. - - More precisely, given a stationary sequence of possibly correlated random - variables `X_1, X_2,...,X_N`, each identically distributed ESS is the number - such that - - ```Variance{ N**-1 * Sum{X_i} } = ESS**-1 * Variance{ X_1 }.``` - - If the sequence is uncorrelated, `ESS = N`. In general, one should expect - `ESS <= N`, with more highly correlated sequences having smaller `ESS`. - - #### Example of using ESS to estimate standard error. - - ``` - tfd = tf.contrib.distributions - tfb = tf.contrib.bayesflow - - target = tfd.MultivariateNormalDiag(scale_diag=[1., 2.]) - - # Get 1000 states from one chain. - states = tfb.hmc.sample_chain( - num_results=1000, - target_log_prob_fn=target.log_prob, - current_state=tf.constant([0., 0.]), - step_size=0.05, - num_leapfrog_steps=20, - num_burnin_steps=200) - states.shape - ==> (1000, 2) - - ess = effective_sample_size(states) - ==> Shape (2,) Tensor - - mean, variance = tf.nn.moments(states, axis=0) - standard_error = tf.sqrt(variance / ess) - ``` - - Some math shows that, with `R_k` the auto-correlation sequence, - `R_k := Covariance{X_1, X_{1+k}} / Variance{X_1}`, we have - - ```ESS(N) = N / [ 1 + 2 * ( (N - 1) / N * R_1 + ... + 1 / N * R_{N-1} ) ]``` - - This function estimates the above by first estimating the auto-correlation. - Since `R_k` must be estimated using only `N - k` samples, it becomes - progressively noisier for larger `k`. For this reason, the summation over - `R_k` should be truncated at some number `filter_beyond_lag < N`. Since many - MCMC methods generate chains where `R_k > 0`, a reasonable critera is to - truncate at the first index where the estimated auto-correlation becomes - negative. - - The arguments `filter_beyond_lag`, `filter_threshold` are filters intended to - remove noisy tail terms from `R_k`. They combine in an "OR" manner meaning - terms are removed if they were to be filtered under the `filter_beyond_lag` OR - `filter_threshold` criteria. - - Args: - states: `Tensor` or list of `Tensor` objects. Dimension zero should index - identically distributed states. - filter_threshold: `Tensor` or list of `Tensor` objects. - Must broadcast with `state`. The auto-correlation sequence is truncated - after the first appearance of a term less than `filter_threshold`. - Setting to `None` means we use no threshold filter. Since `|R_k| <= 1`, - setting to any number less than `-1` has the same effect. - filter_beyond_lag: `Tensor` or list of `Tensor` objects. Must be - `int`-like and scalar valued. The auto-correlation sequence is truncated - to this length. Setting to `None` means we do not filter based on number - of lags. - name: `String` name to prepend to created ops. - - Returns: - ess: `Tensor` or list of `Tensor` objects. The effective sample size of - each component of `states`. Shape will be `states.shape[1:]`. - - Raises: - ValueError: If `states` and `filter_threshold` or `states` and - `filter_beyond_lag` are both lists with different lengths. - """ - states_was_list = _is_list_like(states) - - # Convert all args to lists. - if not states_was_list: - states = [states] - - filter_beyond_lag = _broadcast_maybelist_arg(states, filter_beyond_lag, - "filter_beyond_lag") - filter_threshold = _broadcast_maybelist_arg(states, filter_threshold, - "filter_threshold") - - # Process items, one at a time. - with ops.name_scope(name, "effective_sample_size"): - ess_list = [ - _effective_sample_size_single_state(s, ml, mlt) - for (s, ml, mlt) in zip(states, filter_beyond_lag, filter_threshold) - ] - - if states_was_list: - return ess_list - return ess_list[0] - - -def _effective_sample_size_single_state(states, filter_beyond_lag, - filter_threshold): - """ESS computation for one single Tensor argument.""" - - with ops.name_scope( - "effective_sample_size_single_state", - values=[states, filter_beyond_lag, filter_threshold]): - - states = ops.convert_to_tensor(states, name="states") - dt = states.dtype - - # filter_beyond_lag == None ==> auto_corr is the full sequence. - auto_corr = sample_stats.auto_correlation( - states, axis=0, max_lags=filter_beyond_lag) - if filter_threshold is not None: - filter_threshold = ops.convert_to_tensor( - filter_threshold, dtype=dt, name="filter_threshold") - # Get a binary mask to zero out values of auto_corr below the threshold. - # mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i, - # mask[i, ...] = 0, otherwise. - # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...] - # Building step by step, - # Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2. - # Step 1: mask = [False, False, True, False] - mask = auto_corr < filter_threshold - # Step 2: mask = [0, 0, 1, 1] - mask = math_ops.cast(mask, dtype=dt) - # Step 3: mask = [0, 0, 1, 2] - mask = math_ops.cumsum(mask, axis=0) - # Step 4: mask = [1, 1, 0, 0] - mask = math_ops.maximum(1. - mask, 0.) - auto_corr *= mask - - # With R[k] := auto_corr[k, ...], - # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]} - # = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1) - # approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]} - # where M is the filter_beyond_lag truncation point chosen above. - - # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total - # ndims the same as auto_corr - n = _axis_size(states, axis=0) - k = math_ops.range(0., _axis_size(auto_corr, axis=0)) - nk_factor = (n - k) / n - if auto_corr.shape.ndims is not None: - new_shape = [-1] + [1] * (auto_corr.shape.ndims - 1) - else: - new_shape = array_ops.concat( - ([-1], - array_ops.ones([array_ops.rank(auto_corr) - 1], dtype=dtypes.int32)), - axis=0) - nk_factor = array_ops.reshape(nk_factor, new_shape) - - return n / (-1 + 2 * math_ops.reduce_sum(nk_factor * auto_corr, axis=0)) - - -def potential_scale_reduction(chains_states, - independent_chain_ndims=1, - name=None): - """Gelman and Rubin's potential scale reduction factor for chain convergence. - - Given `N > 1` states from each of `C > 1` independent chains, the potential - scale reduction factor, commonly referred to as R-hat, measures convergence of - the chains (to the same target) by testing for equality of means. - Specifically, R-hat measures the degree to which variance (of the means) - between chains exceeds what one would expect if the chains were identically - distributed. See [1], [2]. - - Some guidelines: - - * The initial state of the chains should be drawn from a distribution - overdispersed with respect to the target. - * If all chains converge to the target, then as `N --> infinity`, R-hat --> 1. - Before that, R-hat > 1 (except in pathological cases, e.g. if the chain - paths were identical). - * The above holds for any number of chains `C > 1`. Increasing `C` does - improves effectiveness of the diagnostic. - * Sometimes, R-hat < 1.2 is used to indicate approximate convergence, but of - course this is problem depedendent. See [2]. - * R-hat only measures non-convergence of the mean. If higher moments, or other - statistics are desired, a different diagnostic should be used. See [2]. - - #### Examples - - Diagnosing convergence by monitoring 10 chains that each attempt to - sample from a 2-variate normal. - - ```python - tfd = tf.contrib.distributions - tfb = tf.contrib.bayesflow - - target = tfd.MultivariateNormalDiag(scale_diag=[1., 2.]) - - # Get 10 (2x) overdispersed initial states. - initial_state = target.sample(10) * 2. - ==> (10, 2) - - # Get 1000 samples from the 10 independent chains. - chains_states, _ = tfb.hmc.sample_chain( - num_results=1000, - target_log_prob_fn=target.log_prob, - current_state=initial_state, - step_size=0.05, - num_leapfrog_steps=20, - num_burnin_steps=200) - chains_states.shape - ==> (1000, 10, 2) - - rhat = tfb.mcmc_diagnostics.potential_scale_reduction( - chains_states, independent_chain_ndims=1) - - # The second dimension needed a longer burn-in. - rhat.eval() - ==> [1.05, 1.3] - ``` - - To see why R-hat is reasonable, let `X` be a random variable drawn uniformly - from the combined states (combined over all chains). Then, in the limit - `N, C --> infinity`, with `E`, `Var` denoting expectation and variance, - - ```R-hat = ( E[Var[X | chain]] + Var[E[X | chain]] ) / E[Var[X | chain]].``` - - Using the law of total variance, the numerator is the variance of the combined - states, and the denominator is the total variance minus the variance of the - the individual chain means. If the chains are all drawing from the same - distribution, they will have the same mean, and thus the ratio should be one. - - [1] "Inference from Iterative Simulation Using Multiple Sequences" - Andrew Gelman and Donald B. Rubin - Statist. Sci. Volume 7, Number 4 (1992), 457-472. - [2] "General Methods for Monitoring Convergence of Iterative Simulations" - Stephen P. Brooks and Andrew Gelman - Journal of Computational and Graphical Statistics, 1998. Vol 7, No. 4. - - Args: - chains_states: `Tensor` or Python `list` of `Tensor`s representing the - state(s) of a Markov Chain at each result step. The `ith` state is - assumed to have shape `[Ni, Ci1, Ci2,...,CiD] + A`. - Dimension `0` indexes the `Ni > 1` result steps of the Markov Chain. - Dimensions `1` through `D` index the `Ci1 x ... x CiD` independent - chains to be tested for convergence to the same target. - The remaining dimensions, `A`, can have any shape (even empty). - independent_chain_ndims: Integer type `Tensor` with value `>= 1` giving the - number of giving the number of dimensions, from `dim = 1` to `dim = D`, - holding independent chain results to be tested for convergence. - name: `String` name to prepend to created ops. Default: - `potential_scale_reduction`. - - Returns: - `Tensor` or Python `list` of `Tensor`s representing the R-hat statistic for - the state(s). Same `dtype` as `state`, and shape equal to - `state.shape[1 + independent_chain_ndims:]`. - - Raises: - ValueError: If `independent_chain_ndims < 1`. - """ - chains_states_was_list = _is_list_like(chains_states) - if not chains_states_was_list: - chains_states = [chains_states] - - # tensor_util.constant_value returns None iff a constant value (as a numpy - # array) is not efficiently computable. Therefore, we try constant_value then - # check for None. - icn_const_ = tensor_util.constant_value( - ops.convert_to_tensor(independent_chain_ndims)) - if icn_const_ is not None: - independent_chain_ndims = icn_const_ - if icn_const_ < 1: - raise ValueError( - "Argument `independent_chain_ndims` must be `>= 1`, found: {}".format( - independent_chain_ndims)) - - with ops.name_scope(name, "potential_scale_reduction"): - rhat_list = [ - _potential_scale_reduction_single_state(s, independent_chain_ndims) - for s in chains_states - ] - - if chains_states_was_list: - return rhat_list - return rhat_list[0] - - -def _potential_scale_reduction_single_state(state, independent_chain_ndims): - """potential_scale_reduction for one single state `Tensor`.""" - with ops.name_scope( - "potential_scale_reduction_single_state", - values=[state, independent_chain_ndims]): - # We assume exactly one leading dimension indexes e.g. correlated samples - # from each Markov chain. - state = ops.convert_to_tensor(state, name="state") - sample_ndims = 1 - - sample_axis = math_ops.range(0, sample_ndims) - chain_axis = math_ops.range(sample_ndims, - sample_ndims + independent_chain_ndims) - sample_and_chain_axis = math_ops.range( - 0, sample_ndims + independent_chain_ndims) - - n = _axis_size(state, sample_axis) - m = _axis_size(state, chain_axis) - - # In the language of [2], - # B / n is the between chain variance, the variance of the chain means. - # W is the within sequence variance, the mean of the chain variances. - b_div_n = _reduce_variance( - math_ops.reduce_mean(state, sample_axis, keepdims=True), - sample_and_chain_axis, - biased=False) - w = math_ops.reduce_mean( - _reduce_variance(state, sample_axis, keepdims=True, biased=True), - sample_and_chain_axis) - - # sigma^2_+ is an estimate of the true variance, which would be unbiased if - # each chain was drawn from the target. c.f. "law of total variance." - sigma_2_plus = w + b_div_n - - return ((m + 1.) / m) * sigma_2_plus / w - (n - 1.) / (m * n) - - -# TODO(b/72873233) Move some variant of this to sample_stats. -def _reduce_variance(x, axis=None, biased=True, keepdims=False): - with ops.name_scope("reduce_variance"): - x = ops.convert_to_tensor(x, name="x") - mean = math_ops.reduce_mean(x, axis=axis, keepdims=True) - biased_var = math_ops.reduce_mean( - math_ops.squared_difference(x, mean), axis=axis, keepdims=keepdims) - if biased: - return biased_var - n = _axis_size(x, axis) - return (n / (n - 1.)) * biased_var - - -def _axis_size(x, axis=None): - """Get number of elements of `x` in `axis`, as type `x.dtype`.""" - if axis is None: - return math_ops.cast(array_ops.size(x), x.dtype) - return math_ops.cast( - math_ops.reduce_prod(array_ops.gather(array_ops.shape(x), axis)), x.dtype) - - -def _is_list_like(x): - """Helper which returns `True` if input is `list`-like.""" - return isinstance(x, (tuple, list)) - - -def _broadcast_maybelist_arg(states, secondary_arg, name): - """Broadcast a listable secondary_arg to that of states.""" - if _is_list_like(secondary_arg): - if len(secondary_arg) != len(states): - raise ValueError("Argument `%s` was a list of different length ({}) than " - "`states` ({})".format(name, len(states))) - else: - secondary_arg = [secondary_arg] * len(states) - - return secondary_arg -- GitLab From cfa4ad28b32dc8a863461efda8fc13d2c8d00724 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 12:04:06 -0800 Subject: [PATCH 469/884] Layers bind to a graph when first called, not at __init__. PiperOrigin-RevId: 188059096 --- tensorflow/python/layers/base_test.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 91b8988d31..1ee9ec7f7a 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -643,6 +643,16 @@ class BaseLayerTest(test.TestCase): self.assertEqual(len(layer.get_losses_for([intermediate_inputs])), 1) self.assertEqual(len(layer.get_losses_for([outputs])), 0) + def testLayerGraphSetInFirstApply(self): + with ops.Graph().as_default(): + layer = core_layers.Dense(1) # Graph at construction time is ignored + with ops.Graph().as_default(): + layer.apply(constant_op.constant([[1]])) + # layer is now bound to second Graph + with ops.Graph().as_default(), self.assertRaisesRegexp( + ValueError, 'Input graph and Layer graph are not the same'): + layer.apply(constant_op.constant([[1]])) + if __name__ == '__main__': test.main() -- GitLab From a8bd3677077ffbcae4416b5a18b50d128cbf3a46 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Tue, 6 Mar 2018 12:06:01 -0800 Subject: [PATCH 470/884] keras: Fix typo PiperOrigin-RevId: 188059457 --- tensorflow/python/keras/_impl/keras/engine/network.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 93d97d6474..143efd97a0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -99,7 +99,7 @@ class Network(base_layer.Layer): self._losses = [] # Used in symbolic mode only. self._scope = None # Never used. self._reuse = None # Never used. - if context.in_eager_mode: + if context.in_eager_mode(): self._graph = None else: self._graph = ops.get_default_graph() # Used in symbolic mode only. -- GitLab From aa129d523f27739c98032fb08346def395b1afda Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 6 Mar 2018 12:15:47 -0800 Subject: [PATCH 471/884] Add HLO evaluator support for Gather This isn't optimal -- it copies element by element -- but I figured, at least for bringup, it will be helpful to have the HLO evaluator follow the spec closely. PiperOrigin-RevId: 188061274 --- tensorflow/compiler/xla/literal_util.cc | 27 +- tensorflow/compiler/xla/literal_util.h | 5 + .../compiler/xla/service/hlo_evaluator.cc | 334 ++++++++++++++++++ .../compiler/xla/service/hlo_evaluator.h | 2 + .../xla/service/hlo_evaluator_test.cc | 201 +++++++++++ tensorflow/compiler/xla/shape_util.h | 16 + tensorflow/compiler/xla/tests/BUILD | 1 + .../xla/tests/hlo_verified_test_base.cc | 26 +- .../xla/tests/hlo_verified_test_base.h | 2 + tensorflow/compiler/xla/util.h | 5 + 10 files changed, 609 insertions(+), 10 deletions(-) diff --git a/tensorflow/compiler/xla/literal_util.cc b/tensorflow/compiler/xla/literal_util.cc index 1d1418fc2f..d247aeb41f 100644 --- a/tensorflow/compiler/xla/literal_util.cc +++ b/tensorflow/compiler/xla/literal_util.cc @@ -248,6 +248,28 @@ Status Literal::CopySliceFromInternal( return Status::OK(); } +Status Literal::CopyElementFrom(const Literal& src_literal, + tensorflow::gtl::ArraySlice src_index, + tensorflow::gtl::ArraySlice dest_index) { + DCHECK_EQ(shape().element_type(), src_literal.shape().element_type()); + const int64 src_linear_index = IndexUtil::MultidimensionalIndexToLinearIndex( + src_literal.shape(), src_index); + const int64 dest_linear_index = + IndexUtil::MultidimensionalIndexToLinearIndex(shape(), dest_index); + const int64 primitive_size = + ShapeUtil::ByteSizeOfPrimitiveType(shape().element_type()); + + char* dest_address = + static_cast(untyped_data()) + dest_linear_index * primitive_size; + const char* source_address = + static_cast(src_literal.untyped_data()) + + src_linear_index * primitive_size; + if (dest_address != source_address) { + memcpy(dest_address, source_address, primitive_size); + } + return Status::OK(); +} + std::vector Literal::DecomposeTuple() { CHECK(ShapeUtil::IsTuple(shape())); std::vector elements; @@ -811,9 +833,10 @@ std::unique_ptr Literal::Slice( DimensionVector result_dimensions; for (int64 dnum = 0; dnum < ShapeUtil::Rank(shape()); ++dnum) { CHECK_GE(start_indices[dnum], 0); - CHECK_LE(limit_indices[dnum], shape().dimensions(dnum)); + CHECK_LE(limit_indices[dnum], shape().dimensions(dnum)) + << "dnum = " << dnum; int64 dimension = limit_indices[dnum] - start_indices[dnum]; - CHECK_GE(dimension, 0); + CHECK_GE(dimension, 0) << "dnum = " << dnum; result_dimensions.push_back(dimension); } const auto result_shape = diff --git a/tensorflow/compiler/xla/literal_util.h b/tensorflow/compiler/xla/literal_util.h index cdc5d807e0..d525487733 100644 --- a/tensorflow/compiler/xla/literal_util.h +++ b/tensorflow/compiler/xla/literal_util.h @@ -262,6 +262,11 @@ class Literal { tensorflow::gtl::ArraySlice dest_base, tensorflow::gtl::ArraySlice copy_size); + // Copies one element from src_literal[src_index] to (*this)[dest_index]. + Status CopyElementFrom(const Literal& src_literal, + tensorflow::gtl::ArraySlice src_index, + tensorflow::gtl::ArraySlice dest_index); + // Returns a vector containing the tuple elements of this Literal as separate // Literals. This Literal must be tuple-shaped and can be a nested tuple. The // elements are moved into the new Literals; no data is copied. Upon return diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.cc b/tensorflow/compiler/xla/service/hlo_evaluator.cc index 534433be7b..a839f8066e 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator.cc @@ -2466,6 +2466,340 @@ Status HloEvaluator::HandleTuple(HloInstruction* tuple) { return Status::OK(); } +// Returns an ShapeUtil::IndexIterationSpace that iterates over the output +// gather dimensions while keeping the rest of the output dimensions clamped to +// 0. +ShapeUtil::IndexIterationSpace IterationSpaceForOutputGatherIndices( + const Shape& output_shape, const GatherDimensionNumbers& dim_numbers) { + int64 output_rank = output_shape.dimensions_size(); + std::vector index_base(output_rank, 0); + std::vector index_count; + index_count.reserve(output_rank); + for (int64 i = 0; i < output_rank; i++) { + bool is_output_gather_dim = + !c_binary_search(dim_numbers.output_window_dims(), i); + index_count.push_back(is_output_gather_dim ? output_shape.dimensions(i) + : 1); + } + + return {std::move(index_base), std::move(index_count), + std::vector(output_rank, 1)}; +} + +// Return an ShapeUtil::IndexIterationSpace that iterates over the output window +// dimensions while keeping the rest of the output dimensions clamped to 0. +ShapeUtil::IndexIterationSpace IterationSpaceForOutputWindowIndices( + int64 output_rank, ArraySlice window_bounds, + const GatherDimensionNumbers& dim_numbers) { + std::vector index_base(output_rank, 0); + std::vector index_count(output_rank, 1); + int64 window_bounds_idx = 0; + for (int64 i = 0; i < output_rank; i++) { + bool is_output_window_dim = + c_binary_search(dim_numbers.output_window_dims(), i); + if (is_output_window_dim) { + while (c_binary_search(dim_numbers.elided_window_dims(), + window_bounds_idx)) { + window_bounds_idx++; + } + index_count[i] = window_bounds[window_bounds_idx++]; + } + } + + return {std::move(index_base), std::move(index_count), + std::vector(output_rank, 1)}; +} + +// This functor computes the contribution of gather_indices to an input index +// corresponding to an output index. That is, given an output index I, it picks +// out the gather output indices in I and uses them to look up a gather index, +// G, from the gather indices tensor, and expands G into the input space +// according to gather_dims_to_operand_dims. +class OutputGatherIndexToInputIndex { + public: + // The constructor does some setup work that is amortized across all + // iterations. + explicit OutputGatherIndexToInputIndex( + const GatherDimensionNumbers* dim_numbers, const Shape& input_shape, + const Shape& output_shape, const Literal* gather_indices) + : dim_numbers_(*dim_numbers), gather_indices_(*gather_indices) { + for (int64 i = 0; i < output_shape.dimensions_size(); i++) { + output_dim_is_gather_dims_.push_back( + !c_binary_search(dim_numbers_.output_window_dims(), i)); + } + + for (int64 i = 0; i < input_shape.dimensions_size(); i++) { + int64 index_of_input_dim_in_index_vector = + std::distance(dim_numbers_.gather_dims_to_operand_dims().begin(), + c_find(dim_numbers_.gather_dims_to_operand_dims(), i)); + if (index_of_input_dim_in_index_vector == + dim_numbers_.gather_dims_to_operand_dims_size()) { + input_dim_value_to_index_vector_.push_back(-1); + } else { + input_dim_value_to_index_vector_.push_back( + index_of_input_dim_in_index_vector); + } + } + + index_vector_index_.resize(gather_indices_.shape().dimensions_size()); + input_index_.resize(input_shape.dimensions_size()); + int64 index_vector_size = + gather_indices_.shape().dimensions(dim_numbers_.index_vector_dim()); + index_vector_.resize(index_vector_size); + } + + // Returns the contribution of gather_indices to the input index corresponding + // to output_index. See gather_inner_loop_body. + // + // This is conceptually a stateless transformation from output_index to the + // gather input index, but: + // + // - Instead of allocating memory to represent the gather input index on + // every invocation we reuse the same storage for the result + // (input_index_), mutating it in place. + // - Instead of allocating buffers for temporary values like + // index_vector_index_ and index_vector on every invocation, we reuse the + // same storage for all invocations. + // + // This returns an arrayslice into memory owned by the class. + StatusOr> operator()(ArraySlice output_index) { + PropagateOutputIndexGatherDimsToIndexVectorIndex(output_index); + TF_RETURN_IF_ERROR(FetchIndexVector()); + PropagateIndexVectorToInputIndex(); + return ArraySlice(input_index_); + } + + private: + // Propagates the gather index dimensions from the output index into + // index_vector_index_ by mutating index_vector_index_ in place. Does not + // update the dim_numbers.index_vector_dim() dimension -- that's the dimension + // we iterate over in FetchIndexVector. + void PropagateOutputIndexGatherDimsToIndexVectorIndex( + ArraySlice output_index) { + int64 index_vector_index_i = 0; + for (int64 i = 0, e = output_index.size(); i < e; i++) { + if (!output_dim_is_gather_dims_[i]) { + continue; + } + + if (index_vector_index_i == dim_numbers_.index_vector_dim()) { + index_vector_index_i++; + } + + index_vector_index_[index_vector_index_i++] = output_index[i]; + } + } + + // Populates index_vector_ by iterating over gather_indices_ according to + // index_vector_index_. + Status FetchIndexVector() { + int64 index_vector_dim = dim_numbers_.index_vector_dim(); + for (int64 i = 0, e = index_vector_.size(); i < e; i++) { + index_vector_index_[index_vector_dim] = i; + TF_ASSIGN_OR_RETURN(index_vector_[i], gather_indices_.GetIntegralAsS64( + index_vector_index_)); + } + return Status::OK(); + } + + // Populates input_index_. + void PropagateIndexVectorToInputIndex() { + for (int64 i = 0, e = input_index_.size(); i < e; i++) { + if (input_dim_value_to_index_vector_[i] != -1) { + input_index_[i] = index_vector_[input_dim_value_to_index_vector_[i]]; + } + + // If input_dim_value_to_index_vector_[i] == -1 then input_index_[i] + // remains 0, as set by the constructor. + } + } + + // input_dim_value_to_index_vector_[i] tells us how to compute dimension i of + // the input index from the index vector. See + // PropagateIndexVectorToInputIndex. + std::vector input_dim_value_to_index_vector_; + + // output_dim_is_gather_dims_[i] is true iff the output index i is a gather + // dimension. + std::vector output_dim_is_gather_dims_; + + // The buffer into which we construct an index into gather_indices_ to fetch + // the index vector. + std::vector index_vector_index_; + + // The index vector fetched from gather_indices_. + std::vector index_vector_; + + // The result computed by this functor. operator() returns an ArraySlice into + // this vector. + std::vector input_index_; + + const GatherDimensionNumbers& dim_numbers_; + const Literal& gather_indices_; +}; + +// This functor computes the contribution of the window indices in an output +// index to an input index. That is, given an output index I it picks out the +// output window indices in I and expands it into a window index into the input +// shape. +class OutputWindowIndexToInputIndex { + public: + // The constructor does some setup work that is amortized across all + // iterations. + explicit OutputWindowIndexToInputIndex( + const GatherDimensionNumbers& dim_numbers, const Shape& input_shape, + const Shape& output_shape) { + std::vector window_index_to_output_index; + int64 output_index_count = 0; + for (int64 i = 0; i < output_shape.dimensions_size(); i++) { + if (c_binary_search(dim_numbers.output_window_dims(), i)) { + window_index_to_output_index.push_back(output_index_count++); + } else { + output_index_count++; + } + } + + int64 window_dim_count = 0; + for (int64 i = 0; i < input_shape.dimensions_size(); i++) { + if (c_binary_search(dim_numbers.elided_window_dims(), i)) { + input_dim_value_to_output_index_.push_back(-1); + } else { + input_dim_value_to_output_index_.push_back( + window_index_to_output_index[window_dim_count++]); + } + } + + input_index_.resize(input_shape.dimensions_size()); + } + + // Returns the contribution of the window indices to the input index + // corresponding to output_index. See gather_inner_loop_body. + // + // This is conceptually a stateless transformation from output_index to the + // window input index, but instead of allocating memory to represent the + // gather input index on every invocation we reuse the same storage for the + // result (input_index_), mutating it in place. + // + // This returns an arrayslice into memory owned by the class. + StatusOr> operator()(ArraySlice output_index) { + PropagateOutputIndexWindowDimsToInputIndex(output_index); + return ArraySlice(input_index_); + } + + private: + // Propagates window dimensions from the output index to input_index_ by + // mutating input_index_ in place. + void PropagateOutputIndexWindowDimsToInputIndex( + ArraySlice output_index) { + for (int64 i = 0, e = input_index_.size(); i < e; i++) { + if (input_dim_value_to_output_index_[i] != -1) { + input_index_[i] = output_index[input_dim_value_to_output_index_[i]]; + } + + // If input_dim_value_to_index_vector_[i] == -1 then input_index_[i] + // remains 0, as set by the constructor. + } + } + + // input_dim_value_to_index_vector_[i] tells us how to compute dimension i of + // the input index from the output index. See + // PropagateOutputIndexToInputIndex. + std::vector input_dim_value_to_output_index_; + + // The result computed by this functor. operator() returns an ArraySlice into + // this vector. + std::vector input_index_; +}; + +// Rehapes the gather indices input to have a trailing degenerate `1` dimension +// if necessary. Hands over the ownership of the newly created literal (if +// there is one) to `reshaped_gather_indices`. +static StatusOr> ReshapedGatherIndices( + int64 index_vector_dim, const Literal& gather_indices, + std::unique_ptr* reshaped_gather_indices) { + if (gather_indices.shape().dimensions_size() != index_vector_dim) { + return std::cref(gather_indices); + } + + std::vector new_shape(gather_indices.shape().dimensions().begin(), + gather_indices.shape().dimensions().end()); + new_shape.push_back(1); + TF_ASSIGN_OR_RETURN(*reshaped_gather_indices, + gather_indices.Reshape(new_shape)); + return std::cref(**reshaped_gather_indices); +} + +Status HloEvaluator::HandleGather(HloInstruction* gather) { + std::unique_ptr result = Literal::CreateFromShape(gather->shape()); + const Shape& shape = gather->shape(); + const GatherDimensionNumbers& dim_numbers = + gather->gather_dimension_numbers(); + const Literal& operand = GetEvaluatedLiteralFor(gather->operand(0)); + std::unique_ptr reshaped_gather_indices; + TF_ASSIGN_OR_RETURN( + const Literal& gather_indices, + ReshapedGatherIndices(dim_numbers.index_vector_dim(), + GetEvaluatedLiteralFor(gather->operand(1)), + &reshaped_gather_indices)); + + // We iterate over the gather dimensions in the output shape in an outer loop + // nest, and iterate over the window dimensions in the output shape in an + // inner loop nest. + + ShapeUtil::IndexIterationSpace gather_indices_iteration_space = + IterationSpaceForOutputGatherIndices(shape, dim_numbers); + ShapeUtil::IndexIterationSpace window_indices_iteration_space = + IterationSpaceForOutputWindowIndices( + shape.dimensions_size(), gather->gather_window_bounds(), dim_numbers); + + // Scratch buffers that hold an index in the output shape and the + // corresponding index in the input shape. + std::vector input_index(operand.shape().dimensions_size()); + std::vector output_index(gather->shape().dimensions_size()); + + OutputGatherIndexToInputIndex output_gather_index_to_input_index( + &gather->gather_dimension_numbers(), /*input_shape=*/operand.shape(), + /*output_shape=*/shape, &gather_indices); + OutputWindowIndexToInputIndex output_window_index_to_input_index( + gather->gather_dimension_numbers(), /*input_shape=*/operand.shape(), + /*output_shape=*/shape); + + auto gather_inner_loop_body = + [&](ArraySlice output_window_index, + ArraySlice input_gather_index, + ArraySlice output_gather_index) -> StatusOr { + TF_ASSIGN_OR_RETURN( + ArraySlice input_window_index, + output_window_index_to_input_index(output_window_index)); + for (int i = 0, e = output_index.size(); i < e; i++) { + output_index[i] = output_gather_index[i] + output_window_index[i]; + } + for (int i = 0, e = input_index.size(); i < e; i++) { + input_index[i] = input_gather_index[i] + input_window_index[i]; + } + TF_RETURN_IF_ERROR( + result->CopyElementFrom(operand, input_index, output_index)); + return true; + }; + + auto gather_outer_loop_body = + [&](ArraySlice output_gather_index) -> StatusOr { + TF_ASSIGN_OR_RETURN( + ArraySlice input_gather_index, + output_gather_index_to_input_index(output_gather_index)); + TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus( + shape, window_indices_iteration_space, + std::bind(gather_inner_loop_body, std::placeholders::_1, + input_gather_index, output_gather_index))); + return true; + }; + + TF_RETURN_IF_ERROR(ShapeUtil::ForEachIndexWithStatus( + shape, gather_indices_iteration_space, gather_outer_loop_body)); + evaluated_[gather] = std::move(result); + return Status::OK(); +} + Status HloEvaluator::HandleGetTupleElement(HloInstruction* get_tuple_element) { const auto result_shape = get_tuple_element->shape(); const int64 index = get_tuple_element->tuple_index(); diff --git a/tensorflow/compiler/xla/service/hlo_evaluator.h b/tensorflow/compiler/xla/service/hlo_evaluator.h index 8a27cf9a3a..410e5ce7af 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator.h +++ b/tensorflow/compiler/xla/service/hlo_evaluator.h @@ -152,6 +152,8 @@ class HloEvaluator : public DfsHloVisitorWithDefault { Status HandleTuple(HloInstruction* tuple) override; + Status HandleGather(HloInstruction* gather) override; + Status HandleGetTupleElement(HloInstruction* get_tuple_element) override; Status HandleCopy(HloInstruction* copy) override; diff --git a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc index 97765d6590..685cacd7f7 100644 --- a/tensorflow/compiler/xla/service/hlo_evaluator_test.cc +++ b/tensorflow/compiler/xla/service/hlo_evaluator_test.cc @@ -1729,6 +1729,207 @@ TEST_P(HloEvaluatorTest, EvaluateWithSubstitutionsWithConstantOperand) { *result.ValueOrDie()); } +TEST_P(HloEvaluatorTest, EvaluateGather_TensorFlowGatherV1) { + const char* hlo_text = R"( +HloModule TensorFlowGatherV1 + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[2,3] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1, 3} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR1({0, 2}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{1, 2, 3}, {7, 8, 9}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_TensorFlowGatherV2) { + const char* hlo_text = R"( +HloModule TensorFlowGatherV2 + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[3,2] gather(operand, indices), + output_window_dims={0}, + elided_window_dims={1}, + gather_dims_to_operand_dims={1}, + index_vector_dim=1, + window_bounds={3, 1} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR1({0, 2}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{1, 3}, {4, 6}, {7, 9}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_TensorFlowGatherMultipleBatchDims) { + const char* hlo_text = R"( +HloModule TensorFlowGatherMultipleBatchDims + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2,2] parameter(1) + ROOT gather = s32[2,3,2] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={1}, + gather_dims_to_operand_dims={1}, + index_vector_dim=2, + window_bounds={3, 1} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = + Literal::CreateR2({{0, 2}, {2, 1}}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR3( + {{{1, 3}, {4, 6}, {7, 9}}, {{3, 2}, {6, 5}, {9, 8}}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_TensorFlowGatherNd) { + const char* hlo_text = R"( +HloModule TensorFlowGatherNd + +ENTRY main { + operand = s32[3,3,2] parameter(0) + indices = s32[2,2] parameter(1) + ROOT gather = s32[2,2] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0,1}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=1, + window_bounds={1,1,2} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR3({{{-1, 1}, {-2, 2}, {-3, 3}}, // + {{-4, 4}, {-5, 5}, {-6, 6}}, // + {{-7, 7}, {-8, 8}, {-9, 9}}}); + std::unique_ptr gather_indices = + Literal::CreateR2({{0, 0}, {1, 0}}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{-1, 1}, {-4, 4}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, + EvaluateGather_TensorFlowGatherNdNonDefaultIndexVectorDim) { + const char* hlo_text = R"( +HloModule TensorFlowGatherNd + +ENTRY main { + operand = s32[3,3,2] parameter(0) + indices = s32[2,2] parameter(1) + ROOT gather = s32[2,2] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0,1}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=0, + window_bounds={1,1,2} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR3({{{-1, 1}, {-2, 2}, {-3, 3}}, // + {{-4, 4}, {-5, 5}, {-6, 6}}, // + {{-7, 7}, {-8, 8}, {-9, 9}}}); + std::unique_ptr gather_indices = + Literal::CreateR2({{0, 0}, {1, 0}}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{-2, 2}, {-1, 1}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_DynamicSlice) { + const char* hlo_text = R"( +HloModule DynamicSlice + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[1,1] gather(operand, indices), + output_window_dims={0,1}, + elided_window_dims={}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=0, + window_bounds={1,1} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = Literal::CreateR1({1, 1}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{5}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_BatchDynamicSlice) { + const char* hlo_text = R"( +HloModule BatchDynamicSlice + +ENTRY main { + operand = s32[3,3] parameter(0) + indices = s32[2,2] parameter(1) + ROOT gather = s32[2,1,1] gather(operand, indices), + output_window_dims={1,2}, + elided_window_dims={}, + gather_dims_to_operand_dims={0,1}, + index_vector_dim=0, + window_bounds={1,1} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = + Literal::CreateR2({{1, 2, 3}, {4, 5, 6}, {7, 8, 9}}); + std::unique_ptr gather_indices = + Literal::CreateR2({{2, 1}, {1, 1}}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR3({{{8}}, {{5}}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + +TEST_P(HloEvaluatorTest, EvaluateGather_ZeroDimBounds) { + const char* hlo_text = R"( +HloModule TensorFlowGatherV1 + +ENTRY main { + operand = s32[3,0] parameter(0) + indices = s32[2] parameter(1) + ROOT gather = s32[2,0] gather(operand, indices), + output_window_dims={1}, + elided_window_dims={0}, + gather_dims_to_operand_dims={0}, + index_vector_dim=1, + window_bounds={1, 0} +} +)"; + ParseAndVerifyModule(hlo_text); + std::unique_ptr operand = Literal::CreateR2({{}, {}, {}}); + std::unique_ptr gather_indices = Literal::CreateR1({0, 2}); + LiteralTestUtil::ExpectEqual( + *Literal::CreateR2({{}, {}}), + *Evaluate({operand.get(), gather_indices.get()})); +} + INSTANTIATE_TEST_CASE_P(HloEvaluatorTest_Instantiation, HloEvaluatorTest, ::testing::ValuesIn(use_bf16_params)); diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index fb66f69709..92b365e072 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -612,6 +612,22 @@ class ShapeUtil { return Status::OK(); } + // Simple ergonomic wrapper around ShapeUtil::ForEachIndexWithStatus. + struct IndexIterationSpace { + std::vector index_base; + std::vector index_count; + std::vector index_incr; + }; + + template + static Status ForEachIndexWithStatus( + const Shape& shape, const IndexIterationSpace& iteration_space, + FnTy&& function) { + return ShapeUtil::ForEachIndexWithStatus( + shape, iteration_space.index_base, iteration_space.index_count, + iteration_space.index_incr, std::forward(function)); + } + template static void ForEachIndex(const Shape& shape, tensorflow::gtl::ArraySlice base, diff --git a/tensorflow/compiler/xla/tests/BUILD b/tensorflow/compiler/xla/tests/BUILD index 1b2008accd..5fb38d65f1 100644 --- a/tensorflow/compiler/xla/tests/BUILD +++ b/tensorflow/compiler/xla/tests/BUILD @@ -139,6 +139,7 @@ cc_library( "//tensorflow/compiler/xla:status_macros", "//tensorflow/compiler/xla/service:hlo", "//tensorflow/compiler/xla/service:hlo_verifier", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", "//tensorflow/core:test", ], diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc index 506091ddd8..641907acf2 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.cc @@ -18,6 +18,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_verifier.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/test.h" @@ -40,18 +41,22 @@ void HloVerifiedTestBase::TearDown() { << "TearDown called more than once; it should be called exactly once."; tear_down_called_ = true; if (module_) { - HloVerifier verifier; - xla::StatusOr mutated = verifier.Run(module_.get()); - if (!mutated.ok()) { - ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); - } else { - EXPECT_FALSE(mutated.ValueOrDie()) - << "HloVerifier should never mutate the HloModule"; - } + VerifyModule(); } HloTestBase::TearDown(); } +void HloVerifiedTestBase::VerifyModule() { + HloVerifier verifier; + xla::StatusOr mutated = verifier.Run(module_.get()); + if (!mutated.ok()) { + ADD_FAILURE() << "HloVerifier failed: " << mutated.status(); + } else { + EXPECT_FALSE(mutated.ValueOrDie()) + << "HloVerifier should never mutate the HloModule"; + } +} + HloModule& HloVerifiedTestBase::module() { if (!module_) { module_ = CreateNewModule(); @@ -59,4 +64,9 @@ HloModule& HloVerifiedTestBase::module() { return *module_; } +void HloVerifiedTestBase::ParseAndVerifyModule(const char* hlo_text) { + CHECK(!module_) << "Called ParseModule when test already has a module."; + TF_ASSERT_OK_AND_ASSIGN(module_, tools::Parse(hlo_text)); + VerifyModule(); +} } // namespace xla diff --git a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h index 492688bf7d..c0cb12bc93 100644 --- a/tensorflow/compiler/xla/tests/hlo_verified_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_verified_test_base.h @@ -44,6 +44,7 @@ class HloVerifiedTestBase : public HloTestBase { // Returns the default HloModule, lazily creating it if necessary via // HloTestBase::CreateNewModule(). HloModule& module(); + void ParseAndVerifyModule(const char* hlo_text); // Sets the shape-size function used during hlo verification. If this isn't // called, a default ShapeVerifier is used instead. @@ -55,6 +56,7 @@ class HloVerifiedTestBase : public HloTestBase { std::unique_ptr module_; // Lazily populated. Access via module(). std::unique_ptr shape_verifier_; bool tear_down_called_ = false; + void VerifyModule(); }; } // namespace xla diff --git a/tensorflow/compiler/xla/util.h b/tensorflow/compiler/xla/util.h index 82e5a59da0..98467cd650 100644 --- a/tensorflow/compiler/xla/util.h +++ b/tensorflow/compiler/xla/util.h @@ -494,6 +494,11 @@ template auto c_find_if(const C& c, Pred&& pred) -> decltype(std::begin(c)) { return std::find_if(std::begin(c), std::end(c), std::forward(pred)); } + +template +auto c_find(const C& c, Value&& value) -> decltype(std::begin(c)) { + return std::find(std::begin(c), std::end(c), std::forward(value)); +} } // namespace xla #define XLA_LOG_LINES(SEV, STRING) \ -- GitLab From e28aa1b817c179976b0535dd321c0dfde506725f Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Tue, 6 Mar 2018 12:33:50 -0800 Subject: [PATCH 472/884] Create OSS-compatible TF Lite portable test suite rule Adding the new rule tflite_portable_test_suite to the bottom of a package in TF Lite will indicate that all previous cc_test rules in the package are supposed to be portable, unless excluded by a tag. Outside of Google, tflite_portable_test_suite is a no-op, which may change in the future as mobile testing infrastructure improves. PiperOrigin-RevId: 188063712 --- tensorflow/contrib/lite/special_rules.bzl | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 tensorflow/contrib/lite/special_rules.bzl diff --git a/tensorflow/contrib/lite/special_rules.bzl b/tensorflow/contrib/lite/special_rules.bzl new file mode 100644 index 0000000000..54083c4918 --- /dev/null +++ b/tensorflow/contrib/lite/special_rules.bzl @@ -0,0 +1,6 @@ +"""External versions of build rules that differ outside of Google.""" + +def tflite_portable_test_suite(**kwargs): + """This is a no-op outside of Google.""" + _ignore = [kwargs] + pass -- GitLab From b7d97351198ee29a82a88c73e5d531baf07da211 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Tue, 6 Mar 2018 13:06:53 -0800 Subject: [PATCH 473/884] Improvement to eager linear regression benchmark Before: entry { name: "EagerLinearRegressionBenchmark.eager_train_cpu" iters: 2000 wall_time: 2.45178794861 extras { key: "examples_per_sec" value { double_value: 52206.7987456 } } } After: entry { name: "EagerLinearRegressionBenchmark.eager_train_cpu" iters: 2000 wall_time: 1.9873790741 extras { key: "examples_per_sec" value { double_value: 64406.4344182 } } } PiperOrigin-RevId: 188068838 --- .../linear_regression/linear_regression.py | 2 +- tensorflow/python/eager/backprop.py | 23 +------- tensorflow/python/eager/context.py | 25 +++++++++ tensorflow/python/eager/pywrap_tfe_src.cc | 53 ++++++++++++------- tensorflow/python/framework/tensor_shape.py | 12 ++++- tensorflow/python/framework/test_util.py | 1 + tensorflow/python/layers/base.py | 15 +++--- tensorflow/python/layers/core.py | 3 +- tensorflow/python/ops/math_grad.py | 15 ++++-- tensorflow/python/ops/math_ops.py | 16 +++++- tensorflow/python/ops/nn_ops.py | 5 +- .../python/ops/resource_variable_ops.py | 19 +++++++ .../python/training/gradient_descent.py | 7 ++- 13 files changed, 137 insertions(+), 59 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py index 157a6360ea..6ab847cb78 100644 --- a/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py +++ b/tensorflow/contrib/eager/python/examples/linear_regression/linear_regression.py @@ -54,7 +54,7 @@ class LinearModel(tf.keras.Model): def mean_square_loss(model, xs, ys): - return tf.reduce_mean(tf.square(model(xs) - ys)) + return tf.reduce_mean(tf.square(tf.subtract(model(xs), ys))) def fit(model, dataset, optimizer, verbose=False, logdir=None): diff --git a/tensorflow/python/eager/backprop.py b/tensorflow/python/eager/backprop.py index 14bcc60006..88de1a951f 100644 --- a/tensorflow/python/eager/backprop.py +++ b/tensorflow/python/eager/backprop.py @@ -18,7 +18,6 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -import collections import functools import operator import threading @@ -43,26 +42,6 @@ from tensorflow.python.util import nest from tensorflow.python.util import tf_inspect -class _TensorCache(object): - """Simple cache which evicts items based on length in a FIFO manner.""" - - def __init__(self, max_items=256): - self._data = collections.OrderedDict() - self._max_items = max_items if max_items else 256 - - def put(self, key, value): - self._data[key] = value - - if len(self._data) > self._max_items: - self._data.popitem(last=False) - - def get(self, key): - return self._data.get(key, None) - - def flush(self): - self._data = {} - - _op_attr_type_cache = {} @@ -622,7 +601,7 @@ def _num_elements(grad): raise ValueError("`grad` not a Tensor or IndexedSlices.") -_zeros_cache = _TensorCache() +_zeros_cache = context._TensorCache() # pylint: disable=protected-access def _fast_fill(value, shape, dtype): diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 0e9c21b221..fb27ab65fa 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -54,6 +54,26 @@ DEVICE_PLACEMENT_SILENT_FOR_INT32 = ( pywrap_tensorflow.TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32) +class _TensorCache(object): + """Simple cache which evicts items based on length in a FIFO manner.""" + + def __init__(self, max_items=256): + self._data = collections.OrderedDict() + self._max_items = max_items if max_items else 256 + + def put(self, key, value): + self._data[key] = value + + if len(self._data) > self._max_items: + self._data.popitem(last=False) + + def get(self, key): + return self._data.get(key, None) + + def flush(self): + self._data = {} + + # TODO(agarwal): better name ? class _EagerContext(threading.local): """Thread local eager context.""" @@ -67,6 +87,7 @@ class _EagerContext(threading.local): self.recording_summaries = False self.summary_writer_resource = None self.scalar_cache = {} + self.ones_rank_cache = _TensorCache() ContextStackEntry = collections.namedtuple( @@ -251,6 +272,10 @@ class Context(object): """Per-device cache for scalars.""" return self._eager_context.scalar_cache + def ones_rank_cache(self): + """Per-device cache for scalars.""" + return self._eager_context.ones_rank_cache + @property def scope_name(self): """Returns scope name for the current thread.""" diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 27c9d05081..9146e2bb95 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -93,6 +93,34 @@ Py_ssize_t TensorShapeNumDims(PyObject* value) { return size; } +bool IsInteger(PyObject* py_value) { +#if PY_MAJOR_VERSION >= 3 + return PyLong_Check(py_value); +#else + return PyInt_Check(py_value); +#endif +} + +bool ParseDimensionValue(const string& key, PyObject* py_value, + TF_Status* status, int64_t* value) { + if (IsInteger(py_value)) { + return ParseInt64Value(key, py_value, status, value); + } + + tensorflow::Safe_PyObjectPtr dimension_value( + PyObject_GetAttrString(py_value, "_value")); + if (dimension_value == nullptr) { + TF_SetStatus( + status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting a Dimension for attr ", key, + ", got ", py_value->ob_type->tp_name) + .c_str()); + return false; + } + + return ParseInt64Value(key, dimension_value.get(), status, value); +} + bool ParseStringValue(const string& key, PyObject* py_value, TF_Status* status, const char** value) { if (PyBytes_Check(py_value)) { @@ -119,14 +147,6 @@ bool ParseBoolValue(const string& key, PyObject* py_value, TF_Status* status, return true; } -bool IsInteger(PyObject* py_value) { -#if PY_MAJOR_VERSION >= 3 - return PyLong_Check(py_value); -#else - return PyInt_Check(py_value); -#endif -} - // The passed in py_value is expected to be an object of the python type // dtypes.DType or an int. bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, @@ -135,7 +155,8 @@ bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, return ParseIntValue(key, py_value, status, value); } - PyObject* py_type_enum = PyObject_GetAttrString(py_value, "_type_enum"); + tensorflow::Safe_PyObjectPtr py_type_enum( + PyObject_GetAttrString(py_value, "_type_enum")); if (py_type_enum == nullptr) { TF_SetStatus( status, TF_INVALID_ARGUMENT, @@ -145,13 +166,7 @@ bool ParseTypeValue(const string& key, PyObject* py_value, TF_Status* status, return false; } - if (!ParseIntValue(key, py_type_enum, status, value)) { - Py_DECREF(py_type_enum); - return false; - } - - Py_DECREF(py_type_enum); - return true; + return ParseIntValue(key, py_type_enum.get(), status, value); } bool SetOpAttrList( @@ -240,7 +255,8 @@ bool SetOpAttrList( auto inner_py_value = PySequence_ITEM(py_value, j); if (inner_py_value == Py_None) { *offset = -1; - } else if (!ParseInt64Value(key, inner_py_value, status, offset)) { + } else if (!ParseDimensionValue(key, inner_py_value, status, + offset)) { return false; } ++offset; @@ -424,7 +440,8 @@ bool SetOpAttrScalar( auto inner_py_value = PySequence_ITEM(py_value, i); if (inner_py_value == Py_None) { dims[i] = -1; - } else if (!ParseInt64Value(key, inner_py_value, status, &dims[i])) { + } else if (!ParseDimensionValue(key, inner_py_value, status, + &dims[i])) { return false; } } diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 222071cb9e..6f2ab8408e 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -456,6 +456,7 @@ class TensorShape(object): else: # Got a list of dimensions self._dims = [as_dimension(d) for d in dims_iter] + self._ndims = None def __repr__(self): return "TensorShape(%r)" % self._dims @@ -473,19 +474,26 @@ class TensorShape(object): """Returns a list of Dimensions, or None if the shape is unspecified.""" return self._dims + @dims.setter + def dims(self, dims): + self._dims = dims + self._ndims = None + @property def ndims(self): """Returns the rank of this shape, or None if it is unspecified.""" if self._dims is None: return None else: - return len(self._dims) + if self._ndims is None: + self._ndims = len(self._dims) + return self._ndims def __len__(self): """Returns the rank of this shape, or raises ValueError if unspecified.""" if self._dims is None: raise ValueError("Cannot take the length of Shape with unknown rank.") - return len(self._dims) + return self.ndims def __bool__(self): """Returns True if this shape contains non-zero information.""" diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 78252e4518..1c8398e686 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -472,6 +472,7 @@ def assert_no_new_tensors(f): # Make an effort to clear caches, which would otherwise look like leaked # Tensors. backprop._zeros_cache.flush() + context.get_default_context().ones_rank_cache().flush() context.get_default_context().scalar_cache().clear() gc.collect() tensors_after = [ diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index c6d16a3bc0..15f72786de 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -129,10 +129,10 @@ class Layer(checkpointable.CheckpointableBase): self._reuse = kwargs.get('_reuse') self._graph = None # Will be set at build time. self._dtype = None if dtype is None else dtypes.as_dtype(dtype).name - call_fn_args = estimator_util.fn_args(self.call) - self._compute_previous_mask = ('mask' in call_fn_args or + self._call_fn_args = estimator_util.fn_args(self.call) + self._compute_previous_mask = ('mask' in self._call_fn_args or hasattr(self, 'compute_mask')) - self._call_has_scope_arg = 'scope' in call_fn_args + self._call_has_scope_arg = 'scope' in self._call_fn_args # These lists will be filled via successive calls # to self._add_inbound_node(). @@ -642,8 +642,9 @@ class Layer(checkpointable.CheckpointableBase): if (not hasattr(self, '_compute_previous_mask') or self._compute_previous_mask): previous_mask = _collect_previous_mask(inputs) - if ('mask' in estimator_util.fn_args(self.call) and - 'mask' not in kwargs and + if not hasattr(self, '_call_fn_args'): + self._call_fn_args = estimator_util.fn_args(self.call) + if ('mask' in self._call_fn_args and 'mask' not in kwargs and not _is_all_none(previous_mask)): # The previous layer generated a mask, and mask was not explicitly pass # to __call__, hence we set previous_mask as the default value. @@ -699,7 +700,9 @@ class Layer(checkpointable.CheckpointableBase): # TODO(agarwal): Fix the sub-classes and avoid this complexity. call_has_scope_arg = self._call_has_scope_arg except AttributeError: - call_has_scope_arg = 'scope' in estimator_util.fn_args(self.call) + self._call_fn_args = estimator_util.fn_args(self.call) + self._call_has_scope_arg = 'scope' in self._call_fn_args + call_has_scope_arg = self._call_has_scope_arg if call_has_scope_arg: kwargs['scope'] = scope # Check input assumptions set after layer building, e.g. input shape. diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index 6970bf9234..bdbbc59eaf 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -35,6 +35,7 @@ from tensorflow.python.layers import utils from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import gen_math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops from tensorflow.python.ops import standard_ops @@ -159,7 +160,7 @@ class Dense(base.Layer): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: - outputs = standard_ops.matmul(inputs, self.kernel) + outputs = gen_math_ops.mat_mul(inputs, self.kernel) if self.use_bias: outputs = nn.bias_add(outputs, self.bias) if self.activation is not None: diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 51e19b4ad3..55dd0c0e0d 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -52,10 +52,18 @@ def _SumGrad(op, grad): if axes is not None: rank = len(input_0_shape) if np.array_equal(axes, np.arange(rank)): # Reduce all dims. - grad = array_ops.reshape(grad, [1] * rank) + if context.in_graph_mode(): + new_shape = [1] * rank + else: + ctx = context.context() + new_shape = ctx.ones_rank_cache().get(rank) + if new_shape is None: + new_shape = constant_op.constant([1] * rank, dtype=dtypes.int32) + ctx.ones_rank_cache().put(rank, new_shape) + grad = array_ops.reshape(grad, new_shape) # If shape is not fully defined (but rank is), we use Shape. if None not in input_0_shape: - input_shape = input_0_shape + input_shape = constant_op.constant(input_0_shape, dtype=dtypes.int32) else: input_shape = array_ops.shape(op.inputs[0]) return [array_ops.tile(grad, input_shape), None] @@ -338,7 +346,8 @@ def _SquareGrad(op, grad): # Added control dependencies to prevent 2*x from being computed too early. with ops.control_dependencies([grad]): x = math_ops.conj(x) - return math_ops.multiply(grad, math_ops.multiply(x, 2.0)) + y = constant_op.constant(2.0, dtype=x.dtype) + return math_ops.multiply(grad, math_ops.multiply(x, y)) @ops.RegisterGradient("Sqrt") diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 14d6862919..c019a5851f 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -176,6 +176,11 @@ arg_max = deprecation.deprecated(None, "Use `argmax` instead")(arg_max) # pylin arg_min = deprecation.deprecated(None, "Use `argmin` instead")(arg_min) # pylint: disable=used-before-assignment +# This is set by resource_variable_ops.py. It is included in this way since +# there is a circular dependency between math_ops and resource_variable_ops +_resource_variable_type = None + + def _set_doc(doc): def _decorator(func): @@ -2002,8 +2007,15 @@ def matmul(a, if transpose_b and adjoint_b: raise ValueError("Only one of transpose_b and adjoint_b can be True.") - a = ops.convert_to_tensor(a, name="a") - b = ops.convert_to_tensor(b, name="b") + if context.in_graph_mode(): + a = ops.convert_to_tensor(a, name="a") + b = ops.convert_to_tensor(b, name="b") + else: + if not isinstance(a, (ops.EagerTensor, _resource_variable_type)): + a = ops.convert_to_tensor(a, name="a") + if not isinstance(b, (ops.EagerTensor, _resource_variable_type)): + b = ops.convert_to_tensor(b, name="b") + # TODO(apassos) remove _shape_tuple here when it is not needed. a_shape = a._shape_tuple() # pylint: disable=protected-access b_shape = b._shape_tuple() # pylint: disable=protected-access diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 852ab365bb..66a05f2228 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1504,8 +1504,9 @@ def bias_add(value, bias, data_format=None, name=None): A `Tensor` with the same type as `value`. """ with ops.name_scope(name, "BiasAdd", [value, bias]) as name: - value = ops.convert_to_tensor(value, name="input") - bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") + if context.in_graph_mode(): + value = ops.convert_to_tensor(value, name="input") + bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") return gen_nn_ops.bias_add(value, bias, data_format=data_format, name=name) diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 6c5d692e82..5b8af8054c 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import gen_array_ops from tensorflow.python.ops import gen_resource_variable_ops from tensorflow.python.ops import gen_state_ops +from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables # go/tf-wildcard-import # pylint: disable=wildcard-import @@ -483,6 +484,7 @@ class ResourceVariable(variables.Variable): # all in graph mode. self._handle_deleter = EagerResourceDeleter( handle=self._handle, handle_device=self._handle.device) + self._cached_shape_as_list = None def _init_from_proto(self, variable_def, import_scope=None): """Initializes from `VariableDef` proto.""" @@ -529,6 +531,7 @@ class ResourceVariable(variables.Variable): self._graph_element = g.get_tensor_by_name( self._handle.op.name + "/Read/ReadVariableOp:0") self._constraint = None + self._cached_shape_as_list = None def __nonzero__(self): return self.__bool__() @@ -561,6 +564,20 @@ class ResourceVariable(variables.Variable): """The shape of this variable.""" return self._shape + def _shape_as_list(self): + if self._cached_shape_as_list: + return self._cached_shape_as_list + if self.shape.ndims is None: + return None + self._cached_shape_as_list = [dim.value for dim in self.shape.dims] + return self._cached_shape_as_list + + def _shape_tuple(self): + shape = self._shape_as_list() + if shape is None: + return None + return tuple(shape) + @property def create(self): """The op responsible for initializing this variable.""" @@ -934,6 +951,7 @@ class ResourceVariable(variables.Variable): pywrap_tensorflow.TFE_Py_RegisterResourceVariableType(ResourceVariable) +math_ops._resource_variable_type = ResourceVariable # pylint: disable=protected-access def _dense_var_to_tensor(var, dtype=None, name=None, as_ref=False): @@ -985,6 +1003,7 @@ class _UnreadVariable(ResourceVariable): def set_shape(self, shape): self._shape = shape + self._cached_shape_as_list = None @property def op(self): diff --git a/tensorflow/python/training/gradient_descent.py b/tensorflow/python/training/gradient_descent.py index 380e14e024..538164adb6 100644 --- a/tensorflow/python/training/gradient_descent.py +++ b/tensorflow/python/training/gradient_descent.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.eager import context from tensorflow.python.framework import ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import resource_variable_ops @@ -43,6 +44,7 @@ class GradientDescentOptimizer(optimizer.Optimizer): """ super(GradientDescentOptimizer, self).__init__(use_locking, name) self._learning_rate = learning_rate + self._learning_rate_tensor = None def _apply_dense(self, grad, var): return training_ops.apply_gradient_descent( @@ -69,5 +71,6 @@ class GradientDescentOptimizer(optimizer.Optimizer): return var.scatter_sub(delta, use_locking=self._use_locking) def _prepare(self): - self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, - name="learning_rate") + if context.in_graph_mode() or self._learning_rate_tensor is None: + self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, + name="learning_rate") -- GitLab From 77fbbdf3793ecb1037644d865e60814b9f5bc39c Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Tue, 6 Mar 2018 13:07:53 -0800 Subject: [PATCH 474/884] disabling timing out test on msan PiperOrigin-RevId: 188068963 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 10cb05ece1..22bcf90dd4 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -171,6 +171,7 @@ py_test( "no_cuda_on_cpu_tap", "no_oss", "no_pip", + "nomsan", ], deps = [ ":dataset_serialization_test", -- GitLab From 77e20d1b1912febfba568cb2ea3f9df7d3066e5c Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Tue, 6 Mar 2018 13:08:34 -0800 Subject: [PATCH 475/884] disabling flaky test in msan PiperOrigin-RevId: 188069046 --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index fff972c1f3..0ce7b0bb91 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -425,6 +425,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 + "nomsan", ], deps = [ ":feature_keys", -- GitLab From abac588e745fab66200741f45e9343b71820a311 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 6 Mar 2018 13:20:36 -0800 Subject: [PATCH 476/884] Fix the include for cuda_runtime_api.h --- tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc | 2 +- .../contrib/tensorrt/resources/trt_int8_calibrator.cc | 3 ++- .../contrib/tensorrt/resources/trt_int8_calibrator.h | 9 +++++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index b78ff18a8d..d4be96a424 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -25,7 +25,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" namespace tensorflow { diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc index 1a842cf993..1ae6347220 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -23,7 +23,7 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" +#include "cuda/include/cuda_runtime_api.h" namespace tensorflow { namespace tensorrt { @@ -120,5 +120,6 @@ TRTInt8Calibrator::~TRTInt8Calibrator() { } // namespace tensorrt } // namespace tensorflow + #endif #endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h index aaf93ef733..4e7b74d620 100644 --- a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -24,8 +24,10 @@ limitations under the License. #if GOOGLE_CUDA #if GOOGLE_TENSORRT -#include "cuda_runtime_api.h" + +#include "cuda/include/cuda_runtime_api.h" #include "tensorrt/include/NvInfer.h" + namespace tensorflow { namespace tensorrt { // This class provides a 1 element queue to match TFs push model to @@ -61,8 +63,11 @@ struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { bool batch_is_set_; string engine_name_; }; + } // namespace tensorrt } // namespace tensorflow -#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + #endif #endif + +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ -- GitLab From ad08baa5c27ab063596116a178ccff7d3796df65 Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Tue, 6 Mar 2018 13:12:17 -0800 Subject: [PATCH 477/884] IteratorContext might be dead while GetNext is being called for the ThreadPoolDataset. Making sure we don't capture that. PiperOrigin-RevId: 188069516 --- tensorflow/contrib/data/kernels/threadpool_dataset_op.cc | 8 ++------ tensorflow/core/framework/dataset.h | 8 ++++++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc b/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc index 4b3edde85f..63e19ae3f8 100644 --- a/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc +++ b/tensorflow/contrib/data/kernels/threadpool_dataset_op.cc @@ -166,14 +166,10 @@ class ThreadPoolDatasetOp : public UnaryDatasetOpKernel { params.runner = [pool](std::function c) { pool->Schedule(std::move(c)); }; - params.stats_aggregator_getter = [ctx]() { - return ctx->stats_aggregator(); - }; + params.stats_aggregator_getter = ctx->stats_aggregator_getter(); params.lib = ctx->lib(); params.function_library = ctx->function_library(); - params.allocator_getter = [ctx](AllocatorAttributes attrs) { - return ctx->allocator(attrs); - }; + params.allocator_getter = ctx->allocator_getter(); IteratorContext threadpool_ctx(params); return input_impl_->GetNext(&threadpool_ctx, out_tensors, end_of_sequence); diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h index 6ab23d92a4..beaf0adbc5 100644 --- a/tensorflow/core/framework/dataset.h +++ b/tensorflow/core/framework/dataset.h @@ -305,6 +305,14 @@ class IteratorContext { return params_.allocator_getter(attrs); } + std::function allocator_getter() { + return params_.allocator_getter; + } + + std::function()> stats_aggregator_getter() { + return params_.stats_aggregator_getter; + } + private: Params params_; }; -- GitLab From 1d64f9038084095bf92a8ca120d7e1f34ec24ac9 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 6 Mar 2018 13:38:56 -0800 Subject: [PATCH 478/884] Add TF_TryEvaluateConstant to the C API and have smart_cond call it. This effectively plumbs EvaluateConstantTensor to smart_cond. This makes smart_cond even smarter by trying to evaluate the predicate if it can't statically infer it. PiperOrigin-RevId: 188073244 --- tensorflow/c/c_api.cc | 20 +++++++++ tensorflow/c/c_api.h | 17 +++++-- tensorflow/python/client/tf_session.i | 2 + tensorflow/python/client/tf_session_helper.cc | 15 +++++++ tensorflow/python/client/tf_session_helper.h | 5 +++ tensorflow/python/framework/smart_cond.py | 12 +++++ .../python/framework/smart_cond_test.py | 44 +++++++++++++++++-- 7 files changed, 107 insertions(+), 8 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 85f1d1639b..3d0e886476 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -30,6 +30,7 @@ limitations under the License. #endif #include "tensorflow/c/c_api_internal.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/eval_const_tensor.h" #include "tensorflow/core/common_runtime/shape_refiner.h" #include "tensorflow/core/framework/allocation_description.pb.h" #include "tensorflow/core/framework/log_memory.h" @@ -73,6 +74,7 @@ using tensorflow::NodeBuilder; using tensorflow::NodeDef; using tensorflow::OpDef; using tensorflow::OpRegistry; +using tensorflow::OutputTensor; using tensorflow::PartialTensorShape; using tensorflow::RunMetadata; using tensorflow::RunOptions; @@ -2682,6 +2684,24 @@ void TF_SessionPRun(TF_Session* session, const char* handle, output_values, target_names, nullptr, status); } +unsigned char TF_TryEvaluateConstant(TF_Graph* graph, TF_Output output, + TF_Tensor** result, TF_Status* status) { + *result = nullptr; + mutex_lock l(graph->mu); + OutputTensor tensor(&output.oper->node, output.index); + bool evaluated; + Tensor result_tensor; + status->status = EvaluateConstantTensor( + tensor, graph->refiner, *graph->graph.op_registry(), + graph->graph.versions().producer(), &evaluated, &result_tensor); + if (evaluated) { + DCHECK(status->status.ok()); + *result = TF_TensorFromTensor(result_tensor, status); + if (!status->status.ok()) evaluated = false; + } + return evaluated; +} + TF_ApiDefMap* TF_NewApiDefMap(TF_Buffer* op_list_buffer, TF_Status* status) { tensorflow::OpList op_list; if (!op_list.ParseFromArray(op_list_buffer->data, op_list_buffer->length)) { diff --git a/tensorflow/c/c_api.h b/tensorflow/c/c_api.h index ad592ef709..b32f574628 100644 --- a/tensorflow/c/c_api.h +++ b/tensorflow/c/c_api.h @@ -1275,13 +1275,22 @@ TF_CAPI_EXPORT extern void TF_FunctionGetAttrValueProto( // Deleting a function does not remove it from any graphs it was copied to. TF_CAPI_EXPORT extern void TF_DeleteFunction(TF_Function* func); +// Attempts to evaluate `output`. This will only be possible if `output` doesn't +// depend on any graph inputs (this function is safe to call if this isn't the +// case though). +// +// If the evaluation is successful, this function returns true and `output`s +// value is returned in `result`. Otherwise returns false. An error status is +// returned if something is wrong with the graph or input. Note that this may +// return false even if no error status is set. +TF_CAPI_EXPORT extern unsigned char TF_TryEvaluateConstant(TF_Graph* graph, + TF_Output output, + TF_Tensor** result, + TF_Status* status); + // TODO(josh11b): Register OpDef, available to all operations added // to this graph. -// The following two may both benefit from a subgraph-definition API -// that re-uses most of the graph-definition API. -// TODO(andydavis): Add functions to a graph. - // -------------------------------------------------------------------------- // API for driving Graph execution. diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index f305cd271f..53557acaa1 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -720,6 +720,8 @@ def TF_Reset(target, containers=None, config=None): } %unignore SetRequireShapeInferenceFns; +%unignore TF_TryEvaluateConstant_wrapper; +%noexception TF_TryEvaluateConstant_wrapper; %include "tensorflow/python/client/tf_session_helper.h" diff --git a/tensorflow/python/client/tf_session_helper.cc b/tensorflow/python/client/tf_session_helper.cc index 361dbc22b0..a8ab91749a 100644 --- a/tensorflow/python/client/tf_session_helper.cc +++ b/tensorflow/python/client/tf_session_helper.cc @@ -493,4 +493,19 @@ std::vector TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper( return input_strs; } +PyObject* TF_TryEvaluateConstant_wrapper(TF_Graph* graph, TF_Output output, + TF_Status* status) { + TF_Tensor* result_tensor; + bool evaluated = + TF_TryEvaluateConstant(graph, output, &result_tensor, status); + if (!evaluated || TF_GetCode(status) != TF_OK) Py_RETURN_NONE; + + Safe_TF_TensorPtr safe_result_tensor(result_tensor); + PyObject* out; + Status s = TF_TensorToPyArray(std::move(safe_result_tensor), &out); + Set_TF_Status_from_Status(status, s); + if (!s.ok()) Py_RETURN_NONE; + return out; +} + } // namespace tensorflow diff --git a/tensorflow/python/client/tf_session_helper.h b/tensorflow/python/client/tf_session_helper.h index 29d5b28f40..83318dc178 100644 --- a/tensorflow/python/client/tf_session_helper.h +++ b/tensorflow/python/client/tf_session_helper.h @@ -213,6 +213,11 @@ std::vector TF_GraphGetTensorShape_wrapper(TF_Graph* graph, std::vector TF_ImportGraphDefResultsMissingUnusedInputMappings_wrapper( TF_ImportGraphDefResults* results); +// If evaluation was possible, returns the numpy ndarray of the evaluated +// result. Otherwise returns None. +PyObject* TF_TryEvaluateConstant_wrapper(TF_Graph* graph, TF_Output output, + TF_Status* status); + } // namespace tensorflow #endif // TENSORFLOW_PYTHON_CLIENT_TF_SESSION_HELPER_H_ diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py index f97bb01f54..4f2f1db882 100644 --- a/tensorflow/python/framework/smart_cond.py +++ b/tensorflow/python/framework/smart_cond.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python import pywrap_tensorflow as c_api +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util from tensorflow.python.ops import control_flow_ops @@ -74,6 +76,16 @@ def smart_constant_value(pred): pred_value = pred elif isinstance(pred, ops.Tensor): pred_value = tensor_util.constant_value(pred) + # TODO(skyewm): consider folding this into tensor_util.constant_value when + # _USE_C_API is removed (there may be performance and correctness bugs, so I + # wanted to limit the change hidden behind _USE_C_API). + # pylint: disable=protected-access + if pred_value is None and ops._USE_C_API: + with errors.raise_exception_on_not_ok_status() as status: + pred_value = c_api.TF_TryEvaluateConstant_wrapper( + pred.graph._c_graph, pred._as_tf_output(), status) + # pylint: enable=protected-access + else: raise TypeError("`pred` must be a Tensor or a Python bool.") return pred_value diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py index b682506da0..3070355980 100644 --- a/tensorflow/python/framework/smart_cond_test.py +++ b/tensorflow/python/framework/smart_cond_test.py @@ -19,9 +19,11 @@ from __future__ import print_function from tensorflow.python.client import session from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import smart_cond from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -29,7 +31,7 @@ from tensorflow.python.platform import googletest @test_util.with_c_api class SmartCondTest(test_util.TensorFlowTestCase): - def testSmartCondTrue(self): + def testTrue(self): with ops.Graph().as_default(): with session.Session(): x = constant_op.constant(2) @@ -38,7 +40,7 @@ class SmartCondTest(test_util.TensorFlowTestCase): lambda: math_ops.multiply(y, 5)) self.assertEqual(z.eval(), 32) - def testSmartCondFalse(self): + def testFalse(self): with ops.Graph().as_default(): with session.Session(): x = constant_op.constant(4) @@ -47,14 +49,48 @@ class SmartCondTest(test_util.TensorFlowTestCase): lambda: math_ops.multiply(y, 3)) self.assertEqual(z.eval(), 9) - def testSmartCondMissingArg1(self): + def testUnknown(self): + with ops.Graph().as_default(): + with session.Session(): + x = array_ops.placeholder(dtype=dtypes.int32) + y = smart_cond.smart_cond(x > 0, lambda: constant_op.constant(1), + lambda: constant_op.constant(2)) + self.assertEqual(y.eval(feed_dict={x: 1}), 1) + self.assertEqual(y.eval(feed_dict={x: -1}), 2) + + def testEval(self): + # Constant expression evaluation only works with the C API enabled. + if not ops._USE_C_API: return + + with ops.Graph().as_default(): + with session.Session(): + x = constant_op.constant(1) + y = constant_op.constant(2) + # x * y > 0 can be evaluated at graph construction time, so the false + # branch shouldn't be evaluated at all. + def raise_exception(): + raise RuntimeError("did not expect to be called") + z = smart_cond.smart_cond(x * y > 0, lambda: constant_op.constant(1), + raise_exception) + self.assertEqual(z.eval(feed_dict={x: 1}), 1) + + def testPlaceholderWithDefault(self): + with ops.Graph().as_default(): + with session.Session(): + x = array_ops.placeholder_with_default(1, shape=()) + y = smart_cond.smart_cond(x > 0, lambda: constant_op.constant(1), + lambda: constant_op.constant(2)) + self.assertEqual(y.eval(), 1) + self.assertEqual(y.eval(feed_dict={x: -1}), 2) + + def testMissingArg1(self): with ops.Graph().as_default(): with session.Session(): x = constant_op.constant(1) with self.assertRaises(TypeError): smart_cond.smart_cond(True, false_fn=lambda: x) - def testSmartCondMissingArg2(self): + def testMissingArg2(self): with ops.Graph().as_default(): with session.Session(): x = constant_op.constant(1) -- GitLab From 3942b2673c1935a56e506ab865d4f0c8d87c0ba5 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Tue, 6 Mar 2018 13:40:16 -0800 Subject: [PATCH 479/884] Fix README formatting. PiperOrigin-RevId: 188073454 --- tensorflow/contrib/quantize/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/contrib/quantize/README.md b/tensorflow/contrib/quantize/README.md index 8b0e7bb68f..348c824a40 100644 --- a/tensorflow/contrib/quantize/README.md +++ b/tensorflow/contrib/quantize/README.md @@ -3,8 +3,7 @@ tf.contrib.quantize provides tools for transforming graphs to include ops to model quantization of weights, biases and activations during both training and inference. This is done using the -[fake quantization op] -(https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). +[fake quantization op](https://www.tensorflow.org/versions/r0.12/api_docs/python/array_ops/fake_quantization). Recent literature has shown that fixed point networks provide comparable performance to floating point networks [1]. This is achieved by modeling the -- GitLab From b5a5d4d677ff50cee5b98918497fd24cb54131c6 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 6 Mar 2018 13:54:33 -0800 Subject: [PATCH 480/884] Fix std::string and unused Status problems --- .../contrib/tensorrt/convert/convert_graph.cc | 7 +- .../contrib/tensorrt/convert/convert_nodes.cc | 75 +++++++++---------- .../contrib/tensorrt/kernels/trt_engine_op.cc | 3 +- 3 files changed, 40 insertions(+), 45 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index 1feaabbfed..ddbdf8dbc6 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -192,7 +192,7 @@ static tensorflow::Status FillSubGraphEdgeSets(ConvertGraphParams* p) { }; tensorflow::Status GetCalibNode(ConvertGraphParams* params) { - FillSubGraphEdgeSets(params); + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); tensorflow::NodeDef trt_node_def; SubGraphParams s(params->graph, params->subgraph_node_ids, params->subgraph_inputs, params->subgraph_outputs, @@ -214,13 +214,14 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { auto dst_input = in_edge->dst_input(); VLOG(1) << " update edge " << trt_node->name() << ":" << src_output << " -> " << dst_node->name() << ":" << dst_input; - params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input); + TF_RETURN_IF_ERROR(params->graph.UpdateEdge( + trt_node, src_output, dst_node, dst_input)); } return tensorflow::Status::OK(); } tensorflow::Status ConvertSubGraphToTensorRT(ConvertGraphParams* params) { - FillSubGraphEdgeSets(params); + TF_RETURN_IF_ERROR(FillSubGraphEdgeSets(params)); tensorflow::NodeDef trt_node_def; SubGraphParams s(params->graph, params->subgraph_node_ids, diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 7d81831539..4c00630cfe 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -27,8 +27,7 @@ limitations under the License. #include "tensorflow/contrib/tensorrt/log/trt_logger.h" #include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" #include "tensorflow/contrib/tensorrt/resources/trt_resources.h" -#include "tensorflow/core/framework/graph.pb.h" -#include "tensorflow/core/framework/node_def.pb.h" +#include "tensorflow/core/framework/node_def.pb.h" // NOLINT #include "tensorflow/core/framework/node_def_builder.h" #include "tensorflow/core/framework/tensor_shape.pb.h" // NOLINT #include "tensorflow/core/framework/types.h" @@ -54,6 +53,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { +using ::tensorflow::strings::StrCat; namespace { @@ -69,7 +69,6 @@ inline tensorflow::Status ConvertDType(tensorflow::DataType tf_dtype, case tensorflow::DataType::DT_HALF: *trt_dtype = nvinfer1::DataType::kHALF; break; - default: return tensorflow::errors::InvalidArgument( "Unsupported data type " + tensorflow::DataTypeString(tf_dtype)); @@ -497,7 +496,7 @@ class Converter { TRT_TensorOrWeights output = outputs.at(i); // TODO(jie): tf protobuf seems to be omitting the :0 suffix string output_name = node_def.name(); - if (i != 0) output_name = output_name + ":" + std::to_string(i); + if (i != 0) output_name = StrCat(output_name, ":", i); if (output.is_tensor()) { output.tensor()->setName(output_name.c_str()); } @@ -2227,10 +2226,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { subgraph_name_scope = GetCommonNameScope(subgraph_name_scope, node->name()); } // TODO(sami,ben,jie): proper naming! - string calib_op_name = tensorflow::strings::StrCat( - subgraph_name_scope, "my_trt_calib_op_", static_id); - string engine_name = - tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op", static_id); + string calib_op_name = + StrCat(subgraph_name_scope, "my_trt_calib_op_", static_id); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op", static_id); static_id++; auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto op_rmgr = trt_rmgr->getManager("TRTCalibOps"); @@ -2258,7 +2256,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { std::vector input_names; std::vector input_dtypes; for (const std::pair& input : s.input_inds) { - VLOG(2) << "parsing input. Node id= "<< input.first; + VLOG(2) << "parsing input. Node id= " << input.first; int node_id = input.first; int output_idx = input.second; tensorflow::Node* node = s.graph.FindNodeId(node_id); @@ -2272,9 +2270,8 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { auto op_info_vec = s.graph_properties.GetOutputProperties(node_name); if (static_cast(op_info_vec.size()) < output_idx) return tensorflow::errors::Internal( - "accessing output index of: " + std::to_string(output_idx) + - ", at node: " + node_name + "with output entry from shape_map: " + - std::to_string(op_info_vec.size())); + "accessing output index of: ", output_idx, ", at node: ", node_name, + "with output entry from shape_map: ", op_info_vec.size()); auto op_info = op_info_vec.at(output_idx); @@ -2284,10 +2281,9 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "accessing output index of: " << std::to_string(output_idx) + VLOG(2) << "accessing output index of: " << output_idx << ", at node: " << node_name - << "with output entry from shape_map: " - << std::to_string(op_info_vec.size()); + << "with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; @@ -2301,8 +2297,7 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) - input_tensor_name = node_name + ":" + std::to_string(output_idx); + if (output_idx != 0) input_tensor_name = StrCat(node_name, ":", output_idx); nvinfer1::ITensor* input_tensor = converter.network()->addInput( input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); @@ -2341,11 +2336,12 @@ tensorflow::Status InjectCalibrationNode(tensorrt::convert::SubGraphParams& s) { s.output_edge_map->insert( {trt_engine_op_output_idx == 0 ? engine_name - : engine_name + ":" + std::to_string(trt_engine_op_output_idx), + : StrCat(engine_name, ":", trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; - if (output_idx != 0) - tensor_name = tensor_name + ":" + std::to_string(output_idx); + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } VLOG(1) << "output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2451,9 +2447,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( } static int static_id = 0; // TODO(sami,ben,jie): proper naming! - string engine_name = - tensorflow::strings::StrCat(subgraph_name_scope, "my_trt_op"); - engine_name = tensorflow::strings::StrCat(engine_name, static_id++); + string engine_name = StrCat(subgraph_name_scope, "my_trt_op"); + engine_name = StrCat(engine_name, static_id++); auto trt_rmgr = tensorflow::tensorrt::TRTResourceManager::instance(); auto weight_rmgr = trt_rmgr->getManager("WeightStore"); auto ws = new tensorflow::tensorrt::TRTWeightStore(); @@ -2474,8 +2469,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // here it should be the input tensor name -> matching the binding // insert original node name without port auto tensor_name = node_name; - if (output_idx != 0) - tensor_name = tensor_name + ":" + std::to_string(output_idx); + if (output_idx != 0) { + tensor_name = StrCat(tensor_name, ":", output_idx); + } VLOG(2) << "input name: " << node_name << " tensor_name: " << tensor_name << " idx: " << output_idx; @@ -2499,10 +2495,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( s.graph_properties.GetOutputProperties(shape_inference_node_name); if (static_cast(op_info_vec.size()) <= shape_inference_output_idx) return tensorflow::errors::Internal( - "accessing output index of: " + - std::to_string(shape_inference_output_idx) + ", at node: " + - shape_inference_node_name + " with output entry from shape_map: " + - std::to_string(op_info_vec.size())); + "accessing output index of: ", shape_inference_output_idx, + ", at node: ", shape_inference_node_name, + " with output entry from shape_map: ", op_info_vec.size()); auto op_info = op_info_vec.at(shape_inference_output_idx); tensorflow::DataType tf_dtype = op_info.dtype(); @@ -2511,10 +2506,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( nvinfer1::DataType dtype(nvinfer1::DataType::kFLOAT); TF_CHECK_OK(ConvertDType(tf_dtype, &dtype)); - VLOG(2) << "Accessing output index of: " << std::to_string(output_idx) + VLOG(2) << "Accessing output index of: " << output_idx << ", at node: " << node_name - << " with output entry from shape_map: " - << std::to_string(op_info_vec.size()); + << " with output entry from shape_map: " << op_info_vec.size(); // TODO(ben,jie): update TRT input format/dimension nvinfer1::DimsCHW input_dim_psuedo_chw; for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; @@ -2532,8 +2526,9 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( // TODO(ben,jie): proper way to restore input tensor name? auto input_tensor_name = node_name; - if (output_idx != 0) - input_tensor_name = node_name + ":" + std::to_string(output_idx); + if (output_idx != 0) { + input_tensor_name = StrCat(node_name, ":", output_idx); + } input_names.push_back(input_tensor_name); nvinfer1::ITensor* input_tensor = converter.network()->addInput( @@ -2573,13 +2568,11 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( s.output_edge_map->insert( {trt_engine_op_output_idx == 0 ? engine_name - : tensorflow::strings::StrCat(engine_name, ":", - trt_engine_op_output_idx), + : StrCat(engine_name, ":", trt_engine_op_output_idx), {output_idx, tensor_name}}); trt_engine_op_output_idx++; if (output_idx != 0) - tensorflow::strings::StrAppend(&tensor_name, ":", - std::to_string(output_idx)); + tensorflow::strings::StrAppend(&tensor_name, ":", output_idx); VLOG(2) << "Output tensor name: " << tensor_name; output_names.push_back(tensor_name); auto tensor_or_weights = converter.get_tensor(tensor_name); @@ -2627,8 +2620,8 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( engine_plan_string = string(engine_plan_data, engine_plan_data + engine_plan->size()); } - weight_rmgr->Delete(engine_name, - engine_name); + TF_RETURN_IF_ERROR(weight_rmgr->Delete( + engine_name, engine_name)); LOG(INFO) << "finished engine " << engine_name; // Build the TRT op @@ -2636,7 +2629,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( std::vector income_edges; VLOG(2) << "input edge size: " << input_names.size(); for (size_t i = 0; i < input_names.size(); ++i) { - VLOG(2) << "input edges: " << std::to_string(i) << " " << input_names.at(i); + VLOG(2) << "input edges: " << i << " " << input_names.at(i); int output_idx = s.input_inds.at(i).second; // we wired up the input here already, it is redundant to do it again in // ConvertSubGraphToTensorRT(convert_graph.cc) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 445b2bdbde..3f98e64265 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -26,9 +26,10 @@ limitations under the License. namespace tensorflow { static ::tensorflow::tensorrt::Logger logger; - +namespace gpu = ::perftools::gputools; using IRuntime = nvinfer1::IRuntime; using Dims = nvinfer1::Dims; + namespace tensorrt { TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { -- GitLab From e79a3d99e43b797036d0e35ab9b332e371108a5d Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Tue, 6 Mar 2018 13:51:07 -0800 Subject: [PATCH 481/884] Docs: Fix prefix for the fancy-linker. PiperOrigin-RevId: 188075262 --- tensorflow/contrib/bayesflow/python/ops/custom_grad.py | 2 +- tensorflow/contrib/bayesflow/python/ops/layers.py | 2 +- tensorflow/contrib/bayesflow/python/ops/optimizers.py | 2 +- .../contrib/estimator/python/estimator/extenders.py | 10 +++++----- .../python/learn/utils/saved_model_export_utils.py | 8 ++++---- tensorflow/contrib/tpu/python/tpu/tpu_config.py | 2 +- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/feature_column/feature_column.py | 10 +++++----- tensorflow/python/ops/array_ops.py | 4 ++-- tensorflow/python/ops/resource_variable_ops.py | 2 +- tensorflow/python/training/supervisor.py | 2 +- 11 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/ops/custom_grad.py b/tensorflow/contrib/bayesflow/python/ops/custom_grad.py index ca1ecb9c40..c8218c57cc 100644 --- a/tensorflow/contrib/bayesflow/python/ops/custom_grad.py +++ b/tensorflow/contrib/bayesflow/python/ops/custom_grad.py @@ -14,7 +14,7 @@ # ============================================================================== """Functions for specifying custom gradients. -See ${python/contrib.bayesflow.custom_gradient}. +See @{tf.contrib.bayesflow.custom_grad.custom_gradient}. """ from __future__ import absolute_import diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py index a742b7c1aa..610613dca5 100644 --- a/tensorflow/contrib/bayesflow/python/ops/layers.py +++ b/tensorflow/contrib/bayesflow/python/ops/layers.py @@ -14,7 +14,7 @@ # ============================================================================== """Probabilistic neural layers. -See ${python/contrib.bayesflow.layers}. +See @{tf.contrib.bayesflow.layers}. """ from __future__ import absolute_import diff --git a/tensorflow/contrib/bayesflow/python/ops/optimizers.py b/tensorflow/contrib/bayesflow/python/ops/optimizers.py index fb70628d10..bff6bb7948 100644 --- a/tensorflow/contrib/bayesflow/python/ops/optimizers.py +++ b/tensorflow/contrib/bayesflow/python/ops/optimizers.py @@ -14,7 +14,7 @@ # ============================================================================== """Probabilistic optimizer modules. -See ${python/contrib.bayesflow.optimizers}. +See @{tf.contrib.bayesflow.optimizers}. """ from __future__ import absolute_import diff --git a/tensorflow/contrib/estimator/python/estimator/extenders.py b/tensorflow/contrib/estimator/python/estimator/extenders.py index c99bf8badb..2b6881b814 100644 --- a/tensorflow/contrib/estimator/python/estimator/extenders.py +++ b/tensorflow/contrib/estimator/python/estimator/extenders.py @@ -33,7 +33,7 @@ _VALID_METRIC_FN_ARGS = set(['features', 'labels', 'predictions', 'config']) def add_metrics(estimator, metric_fn): - """Creates a new ${tf.estimator.Estimator} which has given metrics. + """Creates a new @{tf.estimator.Estimator} which has given metrics. Example: @@ -60,7 +60,7 @@ def add_metrics(estimator, metric_fn): ``` Args: - estimator: A ${tf.estimator.Estimator} object. + estimator: A @{tf.estimator.Estimator} object. metric_fn: A function which should obey the following signature: - Args: can only have following four arguments in any order: * predictions: Predictions `Tensor` or dict of `Tensor` created by given @@ -78,7 +78,7 @@ def add_metrics(estimator, metric_fn): function, namely a `(metric_tensor, update_op)` tuple. Returns: - A new ${tf.estimator.Estimator} which has a union of original metrics with + A new @{tf.estimator.Estimator} which has a union of original metrics with given ones. """ _verify_metric_fn_args(metric_fn) @@ -161,14 +161,14 @@ def forward_features(estimator, keys=None): ``` Args: - estimator: A ${tf.estimator.Estimator} object. + estimator: A @{tf.estimator.Estimator} object. keys: a `string` or a `list` of `string`. If it is `None`, all of the `features` in `dict` is forwarded to the `predictions`. If it is a `string`, only given key is forwarded. If it is a `list` of strings, all the given `keys` are forwarded. Returns: - A new ${tf.estimator.Estimator} which forwards features to predictions. + A new @{tf.estimator.Estimator} which forwards features to predictions. Raises: ValueError: diff --git a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py index 213619a187..c7cdb41312 100644 --- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py +++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils.py @@ -414,7 +414,7 @@ def make_export_strategy(serving_input_fn, `InputFnOps`. default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. - Must be `None` if the estimator inherits from ${tf.estimator.Estimator} + Must be `None` if the estimator inherits from @{tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination @@ -452,7 +452,7 @@ def make_export_strategy(serving_input_fn, The string path to the exported directory. Raises: - ValueError: If `estimator` is a ${tf.estimator.Estimator} instance + ValueError: If `estimator` is a @{tf.estimator.Estimator} instance and `default_output_alternative_key` was specified. """ if isinstance(estimator, core_estimator.Estimator): @@ -503,7 +503,7 @@ def make_parsing_export_strategy(feature_columns, that must be provided at serving time (excluding labels!). default_output_alternative_key: the name of the head to serve when an incoming serving request does not explicitly request a specific head. - Must be `None` if the estimator inherits from ${tf.estimator.Estimator} + Must be `None` if the estimator inherits from @{tf.estimator.Estimator} or for single-headed models. assets_extra: A dict specifying how to populate the assets.extra directory within the exported SavedModel. Each key should give the destination @@ -765,7 +765,7 @@ def extend_export_strategy(base_export_strategy, The string path to the SavedModel indicated by post_export_fn. Raises: - ValueError: If `estimator` is a ${tf.estimator.Estimator} instance + ValueError: If `estimator` is a @{tf.estimator.Estimator} instance and `default_output_alternative_key` was specified or if post_export_fn does not return a valid directory. RuntimeError: If unable to create temporary or final export directory. diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_config.py b/tensorflow/contrib/tpu/python/tpu/tpu_config.py index 7ceb4069cf..009326e3d0 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_config.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_config.py @@ -66,7 +66,7 @@ class TPUConfig( cores. This is required by model-parallelism which enables partitioning the model to multiple cores. For example, [2, 2, 1] means the model is partitioned across 4 cores which span two cores in both x and y - coordinates. Please refer to ${tf.contrib.tpu.TopologyProto} for the + coordinates. Please refer to @{tf.contrib.tpu.Topology} for the geometry of a TPU mesh. per_host_input_for_training: If `True`, `input_fn` is invoked Per-Host rather than Per-Core. With Per-Host input pipeline deployment, `input_fn` diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 60351471f1..3e20fc2c74 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -721,7 +721,7 @@ class Estimator(object): """Creates the global step tensor in graph. The global step tensor must be an integer type with name 'global_step' and - be added to the collection ${tf.GraphKeys.GLOBAL_STEP}. + be added to the collection @{tf.GraphKeys.GLOBAL_STEP}. Args: graph: The graph in which to create the global step tensor. diff --git a/tensorflow/python/feature_column/feature_column.py b/tensorflow/python/feature_column/feature_column.py index c416881c31..85971c91bf 100644 --- a/tensorflow/python/feature_column/feature_column.py +++ b/tensorflow/python/feature_column/feature_column.py @@ -16,7 +16,7 @@ FeatureColumns provide a high level abstraction for ingesting and representing features. FeatureColumns are also the primary way of encoding features for -canned ${tf.estimator.Estimator}s. +canned @{tf.estimator.Estimator}s. When using FeatureColumns with `Estimators`, the type of feature column you should choose depends on (1) the feature type and (2) the model type. @@ -1626,7 +1626,7 @@ class _FeatureColumn(object): It is used for get_parsing_spec for `tf.parse_example`. Returned spec is a dict from keys ('string') to `VarLenFeature`, `FixedLenFeature`, and other - supported objects. Please check documentation of ${tf.parse_example} for all + supported objects. Please check documentation of @{tf.parse_example} for all supported spec objects. Let's say a Feature column depends on raw feature ('raw') and another @@ -1677,7 +1677,7 @@ class _DenseColumn(_FeatureColumn): weight_collections: List of graph collections to which Variables (if any will be created) are added. trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see ${tf.Variable}). + `GraphKeys.TRAINABLE_VARIABLES` (see @{tf.Variable}). Returns: `Tensor` of shape [batch_size] + `_variable_shape`. @@ -1735,7 +1735,7 @@ class _CategoricalColumn(_FeatureColumn): WARNING: Do not subclass this layer unless you know what you are doing: the API is subject to future changes. - A categorical feature typically handled with a ${tf.SparseTensor} of IDs. + A categorical feature typically handled with a @{tf.SparseTensor} of IDs. """ __metaclass__ = abc.ABCMeta @@ -1770,7 +1770,7 @@ class _CategoricalColumn(_FeatureColumn): weight_collections: List of graph collections to which variables (if any will be created) are added. trainable: If `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see ${tf.get_variable}). + `GraphKeys.TRAINABLE_VARIABLES` (see @{tf.get_variable}). """ pass diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index bd1e84ec82..9108fe759b 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -794,8 +794,8 @@ def _SliceHelperVar(var, slice_spec): """Creates a slice helper object given a variable. This allows creating a sub-tensor from part of the current contents - of a variable. See ${tf.Tensor$`Tensor.__getitem__`} - for detailed examples of slicing. + of a variable. See @{tf.Tensor.__getitem__} for detailed examples + of slicing. This function in addition also allows assignment to a sliced range. This is similar to `__setitem__` functionality in Python. However, diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index 5b8af8054c..d0578f8205 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -153,7 +153,7 @@ def shape_safe_assign_variable_handle(handle, shape, value, name=None): class ResourceVariable(variables.Variable): """Variable based on resource handles. - See the ${variables} documentation for more details. + See the @{$python/state_ops$`Variables`} documentation for more details. A `ResourceVariable` allows you to maintain state across subsequent calls to session.run. diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py index d2ad34773e..86d2f1ab0a 100644 --- a/tensorflow/python/training/supervisor.py +++ b/tensorflow/python/training/supervisor.py @@ -45,7 +45,7 @@ class Supervisor(object): """A training helper that checkpoints models and computes summaries. This class is deprecated. Please use - ${tf.train.MonitoredTrainingSession} instead. + @{tf.train.MonitoredTrainingSession} instead. The Supervisor is a small wrapper around a `Coordinator`, a `Saver`, and a `SessionManager` that takes care of common needs of TensorFlow -- GitLab From 8234fd66e1112e40bdf381aed47da13c76759ed4 Mon Sep 17 00:00:00 2001 From: Martin Wicke Date: Tue, 6 Mar 2018 14:03:16 -0800 Subject: [PATCH 482/884] Automated g4 rollback of changelist 185842713 PiperOrigin-RevId: 188077387 --- .../estimator/python/estimator/head_test.py | 14 ++++---- .../python/estimator/multi_head_test.py | 4 +-- .../python/learn/estimators/head_test.py | 4 +-- .../metrics/python/ops/metric_ops_test.py | 22 ++++++------ .../python/estimator/canned/baseline_test.py | 6 ++-- .../estimator/canned/dnn_testing_utils.py | 2 +- .../python/estimator/canned/head_test.py | 10 +++--- .../estimator/canned/linear_testing_utils.py | 2 +- .../python/kernel_tests/metrics_test.py | 35 +++++-------------- tensorflow/python/ops/metrics_impl.py | 6 ++-- 10 files changed, 43 insertions(+), 62 deletions(-) diff --git a/tensorflow/contrib/estimator/python/estimator/head_test.py b/tensorflow/contrib/estimator/python/estimator/head_test.py index 76d050cb28..dc30dde877 100644 --- a/tensorflow/contrib/estimator/python/estimator/head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/head_test.py @@ -447,7 +447,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.5972, + keys.AUC_PR: 0.7639, } self._test_eval( head=head, @@ -479,7 +479,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.5972, + keys.AUC_PR: 0.7639, } self._test_eval( head=head, @@ -510,7 +510,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.5972, + keys.AUC_PR: 0.7639, } self._test_eval( head=head, @@ -544,7 +544,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.5972, + keys.AUC_PR: 0.7639, } self._test_eval( head=head, @@ -574,7 +574,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.3333, - keys.AUC_PR: 0.5972, + keys.AUC_PR: 0.7639, keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 2. / 4., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 2. / 3., keys.RECALL_AT_THRESHOLD % thresholds[0]: 2. / 3., @@ -622,7 +622,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.2000, - keys.AUC_PR: 0.5833, + keys.AUC_PR: 0.7833, } # Assert spec contains expected tensors. @@ -1096,7 +1096,7 @@ class MultiLabelHead(test.TestCase): # auc and auc_pr cannot be reliably calculated for only 4 samples, but # this assert tests that the algorithm remains consistent. keys.AUC: 0.4977, - keys.AUC_PR: 0.4037, + keys.AUC_PR: 0.6645, } self._test_eval( head=head, diff --git a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py index e47a6788f3..65ea89ba1b 100644 --- a/tensorflow/contrib/estimator/python/estimator/multi_head_test.py +++ b/tensorflow/contrib/estimator/python/estimator/multi_head_test.py @@ -306,8 +306,8 @@ class MultiHeadTest(test.TestCase): # this assert tests that the algorithm remains consistent. keys.AUC + '/head1': 0.1667, keys.AUC + '/head2': 0.3333, - keys.AUC_PR + '/head1': 0.49999964, - keys.AUC_PR + '/head2': 0.33333313, + keys.AUC_PR + '/head1': 0.6667, + keys.AUC_PR + '/head2': 0.5000, } # Assert spec contains expected tensors. diff --git a/tensorflow/contrib/learn/python/learn/estimators/head_test.py b/tensorflow/contrib/learn/python/learn/estimators/head_test.py index 6d5da81b4c..7c2d9bb076 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/head_test.py +++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py @@ -362,7 +362,7 @@ class MultiLabelHeadTest(test.TestCase): "auc_precision_recall": 0.166667, "auc_precision_recall/class0": 0, "auc_precision_recall/class1": 0., - "auc_precision_recall/class2": 0.49999, + "auc_precision_recall/class2": 1., "labels/actual_label_mean/class0": self._labels[0][0], "labels/actual_label_mean/class1": self._labels[0][1], "labels/actual_label_mean/class2": self._labels[0][2], @@ -748,7 +748,7 @@ class BinaryClassificationHeadTest(test.TestCase): "accuracy/baseline_label_mean": label_mean, "accuracy/threshold_0.500000_mean": 1. / 2, "auc": 1. / 2, - "auc_precision_recall": 0.25, + "auc_precision_recall": 0.749999, "labels/actual_label_mean": label_mean, "labels/prediction_mean": .731059, # softmax "loss": expected_loss, diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py index b387f26c01..33eb655fb6 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py @@ -1802,9 +1802,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.54166603, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.54166603, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) def testAnotherAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1816,9 +1816,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.44365042, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.44365042, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) def testThirdAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1830,9 +1830,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.73611039, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.73611039, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3) def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) @@ -1865,9 +1865,9 @@ class StreamingAUCTest(test.TestCase): auc, update_op = metrics.streaming_auc(predictions, labels, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.49999976, sess.run(update_op), 6) + self.assertAlmostEqual(1, sess.run(update_op), 6) - self.assertAlmostEqual(0.49999976, auc.eval(), 6) + self.assertAlmostEqual(1, auc.eval(), 6) def testWithMultipleUpdates(self): num_samples = 1000 @@ -6888,8 +6888,7 @@ class CohenKappaTest(test.TestCase): # [[0, 25, 0], # [0, 0, 25], # [25, 0, 0]] - # Calculated by v0.19: sklearn.metrics.cohen_kappa_score( - # labels, predictions) + # Calculated by v0.19: sklearn.metrics.cohen_kappa_score(labels, predictions) expect = -0.333333333333 with self.test_session() as sess: @@ -6948,8 +6947,7 @@ class CohenKappaTest(test.TestCase): weights_t: weights[batch_start:batch_end] }) # Calculated by v0.19: sklearn.metrics.cohen_kappa_score( - # labels_np, predictions_np, - # sample_weight=weights_np) + # labels_np, predictions_np, sample_weight=weights_np) expect = 0.289965397924 self.assertAlmostEqual(expect, kappa.eval(), 5) diff --git a/tensorflow/python/estimator/canned/baseline_test.py b/tensorflow/python/estimator/canned/baseline_test.py index 18c955f5a0..96639e88ea 100644 --- a/tensorflow/python/estimator/canned/baseline_test.py +++ b/tensorflow/python/estimator/canned/baseline_test.py @@ -1075,7 +1075,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., - metric_keys.MetricKeys.AUC_PR: 0.5, + metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( softmax(logits)[label] ) @@ -1136,7 +1136,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.LABEL_MEAN: 0.5, metric_keys.MetricKeys.ACCURACY_BASELINE: 0.5, metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.25, + metric_keys.MetricKeys.AUC_PR: 0.75, } else: # Expand logits since batch_size=2 @@ -1212,7 +1212,7 @@ class BaselineClassifierEvaluationTest(test.TestCase): metric_keys.MetricKeys.ACCURACY_BASELINE: ( max(label_mean, 1-label_mean)), metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.16666645, + metric_keys.MetricKeys.AUC_PR: 2. / (1. + 2.), } else: # Multi classes: unweighted_loss = 1 * -log ( soft_max(logits)[label] ) diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index cbae43e4f7..706575985f 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -1041,7 +1041,7 @@ class BaseDNNClassifierEvaluateTest(object): # There is no good way to calculate AUC for only two data points. But # that is what the algorithm returns. metric_keys.MetricKeys.AUC: 0.5, - metric_keys.MetricKeys.AUC_PR: 0.25, + metric_keys.MetricKeys.AUC_PR: 0.75, ops.GraphKeys.GLOBAL_STEP: global_step }, dnn_classifier.evaluate(input_fn=_input_fn, steps=1)) diff --git a/tensorflow/python/estimator/canned/head_test.py b/tensorflow/python/estimator/canned/head_test.py index 23158c76e7..b40758f8fe 100644 --- a/tensorflow/python/estimator/canned/head_test.py +++ b/tensorflow/python/estimator/canned/head_test.py @@ -1563,7 +1563,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 0.74999905, + keys.AUC_PR: 1., } # Assert spec contains expected tensors. @@ -1641,7 +1641,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 0.75, + keys.AUC_PR: 1., } # Assert predictions, loss, and metrics. @@ -1746,7 +1746,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: 2./2, keys.ACCURACY_BASELINE: 2./2, keys.AUC: 0., - keys.AUC_PR: 0.74999905, + keys.AUC_PR: 1., keys.ACCURACY_AT_THRESHOLD % thresholds[0]: 1., keys.PRECISION_AT_THRESHOLD % thresholds[0]: 1., keys.RECALL_AT_THRESHOLD % thresholds[0]: 1., @@ -2193,7 +2193,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): keys.LABEL_MEAN: expected_label_mean, keys.ACCURACY_BASELINE: 1 - expected_label_mean, keys.AUC: .45454565, - keys.AUC_PR: .21923049, + keys.AUC_PR: .6737757325172424, } # Assert spec contains expected tensors. @@ -2492,7 +2492,7 @@ class BinaryLogisticHeadWithSigmoidCrossEntropyLossTest(test.TestCase): # We cannot reliably calculate AUC with only 4 data points, but the # values should not change because of backwards-compatibility. keys.AUC: 0.5222, - keys.AUC_PR: 0.5119, + keys.AUC_PR: 0.7341, } tol = 1e-2 diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py index e88fcbbd2e..3e9183cf1b 100644 --- a/tensorflow/python/estimator/canned/linear_testing_utils.py +++ b/tensorflow/python/estimator/canned/linear_testing_utils.py @@ -1342,7 +1342,7 @@ class BaseLinearClassifierEvaluationTest(object): metric_keys.MetricKeys.LABEL_MEAN: 1., metric_keys.MetricKeys.ACCURACY_BASELINE: 1, metric_keys.MetricKeys.AUC: 0., - metric_keys.MetricKeys.AUC_PR: 0.5, + metric_keys.MetricKeys.AUC_PR: 1., } else: # Multi classes: loss = 1 * -log ( soft_max(logits)[label] ) diff --git a/tensorflow/python/kernel_tests/metrics_test.py b/tensorflow/python/kernel_tests/metrics_test.py index 59e7afa2dc..ad802f7e1f 100644 --- a/tensorflow/python/kernel_tests/metrics_test.py +++ b/tensorflow/python/kernel_tests/metrics_test.py @@ -1132,9 +1132,9 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.54166, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.79166, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.54166, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.79166, auc.eval(), delta=1e-3) def testAnotherAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1146,9 +1146,9 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.44365042, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.610317, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.44365042, auc.eval(), delta=1e-3) + self.assertAlmostEqual(0.610317, auc.eval(), delta=1e-3) def testThirdAUCPRSpecialCase(self): with self.test_session() as sess: @@ -1160,26 +1160,9 @@ class AUCTest(test.TestCase): auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.73611039, sess.run(update_op), delta=1e-3) + self.assertAlmostEqual(0.90277, sess.run(update_op), delta=1e-3) - self.assertAlmostEqual(0.73611039, auc.eval(), delta=1e-3) - - def testFourthAUCPRSpecialCase(self): - # Create the labels and data. - labels = np.array([ - 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]) - predictions = np.array([ - 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35]) - - with self.test_session() as sess: - auc, _ = metrics.auc( - labels, predictions, curve='PR', num_thresholds=11) - - sess.run(variables.local_variables_initializer()) - # Since this is only approximate, we can't expect a 6 digits match. - # Although with higher number of samples/thresholds we should see the - # accuracy improving - self.assertAlmostEqual(0.0, auc.eval(), delta=0.001) + self.assertAlmostEqual(0.90277, auc.eval(), delta=1e-3) def testAllIncorrect(self): inputs = np.random.randint(0, 2, size=(100, 1)) @@ -1205,16 +1188,16 @@ class AUCTest(test.TestCase): self.assertAlmostEqual(1, auc.eval(), 6) - def testRecallOneAndPrecisionOne(self): + def testRecallOneAndPrecisionOneGivesOnePRAUC(self): with self.test_session() as sess: predictions = array_ops.ones([4], dtype=dtypes_lib.float32) labels = array_ops.ones([4]) auc, update_op = metrics.auc(labels, predictions, curve='PR') sess.run(variables.local_variables_initializer()) - self.assertAlmostEqual(0.5, sess.run(update_op), 6) + self.assertAlmostEqual(1, sess.run(update_op), 6) - self.assertAlmostEqual(0.5, auc.eval(), 6) + self.assertAlmostEqual(1, auc.eval(), 6) def np_auc(self, predictions, labels, weights): """Computes the AUC explicitly using Numpy. diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 043c0e30cd..0123162b54 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -672,7 +672,7 @@ def auc(labels, x = fp_rate y = rec else: # curve == 'PR'. - prec = math_ops.div(tp, tp + fp + epsilon) + prec = math_ops.div(tp + epsilon, tp + fp + epsilon) x = rec y = prec if summation_method == 'trapezoidal': @@ -923,8 +923,8 @@ def mean_per_class_accuracy(labels, weights = array_ops.reshape(weights, [-1]) weights = math_ops.to_float(weights) - is_correct *= weights - ones *= weights + is_correct = is_correct * weights + ones = ones * weights update_total_op = state_ops.scatter_add(total, labels, ones) update_count_op = state_ops.scatter_add(count, labels, is_correct) -- GitLab From 4b48598f73deccca2c0eccf21150413378044145 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 14:07:23 -0800 Subject: [PATCH 483/884] Internal change PiperOrigin-RevId: 188078128 --- tensorflow/contrib/lite/kernels/BUILD | 11 ++++ .../contrib/lite/kernels/test_util_test.cc | 51 +++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 tensorflow/contrib/lite/kernels/test_util_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 6bbc0bf9a7..a6be410dc8 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -97,6 +97,17 @@ tf_cc_test( ], ) +tf_cc_test( + name = "test_util_test", + size = "small", + srcs = ["test_util_test.cc"], + deps = [ + ":test_util", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + cc_library( name = "builtin_ops", srcs = [ diff --git a/tensorflow/contrib/lite/kernels/test_util_test.cc b/tensorflow/contrib/lite/kernels/test_util_test.cc new file mode 100644 index 0000000000..1e10e89061 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/test_util_test.cc @@ -0,0 +1,51 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +TEST(TestUtilTest, QuantizeVector) { + std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; + auto q_data = Quantize(data, /*scale=*/1.0, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 1, 1, 255}; + EXPECT_THAT(q_data, ElementsAreArray(expected)); +} + +TEST(TestUtilTest, QuantizeVectorScalingDown) { + std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; + auto q_data = Quantize(data, /*scale=*/10.0, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 0, 0, 100}; + EXPECT_THAT(q_data, ElementsAreArray(expected)); +} + +TEST(TestUtilTest, QuantizeVectorScalingUp) { + std::vector data = {-1.0, -0.5, 0.0, 0.5, 1.0, 1000.0}; + auto q_data = Quantize(data, /*scale=*/0.1, /*zero_point=*/0); + std::vector expected = {0, 0, 0, 5, 10, 255}; + EXPECT_THAT(q_data, ElementsAreArray(expected)); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- GitLab From b2779a86c2152f2a949be6d743e31e8756fa00ff Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Tue, 6 Mar 2018 14:36:36 -0800 Subject: [PATCH 484/884] tpu_estimator gives us unstable input shapes when inputs are labeled with names. Sorting the input keys solves the issue. PiperOrigin-RevId: 188082738 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index a7991eb1f4..f3c2a510fd 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -784,7 +784,8 @@ class _InputPipeline(object): def _extract_key_names(tensor_or_dict): if tensor_or_dict is None: return [] - return tensor_or_dict.keys() if isinstance(tensor_or_dict, dict) else [] + return sorted(tensor_or_dict.keys()) if isinstance( + tensor_or_dict, dict) else [] # Extract structure. has_labels = labels is not None -- GitLab From fb6cebf5e8444c180713c5c3a71c640e30de1c6d Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 6 Mar 2018 14:41:02 -0800 Subject: [PATCH 485/884] Add link to tflite codelab PiperOrigin-RevId: 188083446 --- tensorflow/docs_src/mobile/tflite/demo_android.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/docs_src/mobile/tflite/demo_android.md b/tensorflow/docs_src/mobile/tflite/demo_android.md index 79b567897c..c94b5597a6 100644 --- a/tensorflow/docs_src/mobile/tflite/demo_android.md +++ b/tensorflow/docs_src/mobile/tflite/demo_android.md @@ -8,6 +8,9 @@ You'll need an Android device running Android 5.0 or higher to run the demo. To get you started working with TensorFlow Lite on Android, we'll walk you through building and deploying our TensorFlow demo app in Android Studio. +Note: For a more detailed guide see the +[TFLite Codelab](https://codelabs.developers.google.com/codelabs/tensorflow-for-poets-2-tflite/index.html#0) + It's also possible to build the demo app with Bazel, but we only recommend this for advanced users who are very familiar with the Bazel build environment. For more information on that, see our page [on Github](https://github.com/tensorflow/tensorflow/tree/master/tensorflow/contrib/lite#building-tensorflow-lite-and-the-demo-app-from-source). -- GitLab From 86919effa2c1bfb36d0a3accbbbcd1727bf25cb1 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 6 Mar 2018 14:43:10 -0800 Subject: [PATCH 486/884] Fix bug in importing MetaGraphDefs containing nested conds. This change makes CondContext._external_values more consistently store Tensors external this context. These values are then not added to the context when it's imported. This also removes the workaround I added earlier to manually remove the predicate and pivot Tensors from the context, instead adding them to _external_values were they're automatically excluded. PiperOrigin-RevId: 188083780 --- .../python/framework/fake_summary_writer.py | 7 ++- tensorflow/python/ops/control_flow_ops.py | 20 ++++----- tensorflow/python/training/saver_test.py | 43 +++++++++++++++---- 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/tensorflow/contrib/testing/python/framework/fake_summary_writer.py b/tensorflow/contrib/testing/python/framework/fake_summary_writer.py index f2065c6662..15a415df30 100644 --- a/tensorflow/contrib/testing/python/framework/fake_summary_writer.py +++ b/tensorflow/contrib/testing/python/framework/fake_summary_writer.py @@ -18,6 +18,7 @@ from __future__ import division from __future__ import print_function from tensorflow.core.framework import summary_pb2 +from tensorflow.python.framework import test_util from tensorflow.python.summary.writer import writer from tensorflow.python.summary.writer import writer_cache @@ -85,7 +86,11 @@ class FakeSummaryWriter(object): if expected_added_graphs is not None: test_case.assertEqual(expected_added_graphs, self._added_graphs) if expected_added_meta_graphs is not None: - test_case.assertEqual(expected_added_meta_graphs, self._added_meta_graphs) + test_case.assertEqual(len(expected_added_meta_graphs), + len(self._added_meta_graphs)) + for expected, actual in zip(expected_added_meta_graphs, + self._added_meta_graphs): + test_util.assert_meta_graph_protos_equal(test_case, expected, actual) if expected_session_logs is not None: test_case.assertEqual(expected_session_logs, self._added_session_logs) diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 689f7cdc8f..1fa25a0429 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -1499,9 +1499,11 @@ class ControlFlowContext(object): if values_def: self._init_values_from_proto(values_def, import_scope=import_scope) else: - # Values that have been already seen in this context. + # The names of tensors that have been already seen in this context. self._values = set() - # Values referenced by but external to this context. + # The keys are the names of tensors referenced by but external to this + # context. Each value is the Tensor that should be used by this context to + # access the key value (e.g. a switch output guarding a cond input value). self._external_values = {} def _init_values_from_proto(self, values_def, import_scope=None): @@ -1688,9 +1690,12 @@ class CondContext(ControlFlowContext): self._pivot = pivot # The predicate tensor in this branch self._branch = branch # 0 or 1 representing this branch - # Values considered to have been already seen in this context. + # Values considered to have been already seen in this context. They are + # not included in this context. self._values.add(pred.name) + self._external_values[pred.name] = pred self._values.add(pivot.name) + self._external_values[pivot.name] = pivot def _init_from_proto(self, context_def, import_scope=None): """Creates a new `CondContext` from protocol buffer. @@ -1710,13 +1715,6 @@ class CondContext(ControlFlowContext): self._branch = context_def.branch super(CondContext, self).__init__(values_def=context_def.values_def, import_scope=import_scope) - # The predicate and pivot ops appear in self._values, but don't have self - # set as their control context. The __init__ call above will set self for - # all values, so manually override the predicate and pivot contexts here. - # pylint: disable=protected-access - self._pred.op._set_control_flow_context(self.outer_context) - self._pivot.op._set_control_flow_context(self.outer_context) - # pylint: enable=protected-access @property def pred(self): @@ -1800,6 +1798,7 @@ class CondContext(ControlFlowContext): if self._outer_context: result = self._outer_context.AddValue(val) self._values.add(result.name) + self._external_values[result.name] = result with ops.control_dependencies(None): result = _SwitchRefOrTensor(result, self._pred)[self._branch] if self._outer_context: @@ -1864,6 +1863,7 @@ class CondContext(ControlFlowContext): if self._outer_context: real_val = self._outer_context.AddValue(val) self._values.add(real_val.name) + self._external_values[real_val.name] = real_val real_val = _SwitchRefOrTensor(real_val, self._pred)[self._branch] self._external_values[val.name] = real_val else: diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 7947765449..4fd3b58da1 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -2059,20 +2059,25 @@ class MetaGraphTest(test.TestCase): self._testGraphExtensionRestore(test_dir) self._testRestoreFromTrainGraphWithControlContext(test_dir) - def _testWhileLoopAndGradientSerDes(self, outer_body_fn): - # Build a while loop with `outer_body_fn`, export it, and verify that it can - # be imported and the gradient can be built and run correctly. + def _testGradientSerDes(self, graph_fn): + """Tests that gradients can be computed after exporting and importing. + + Builds a graph, exports it, and verifies that it can be imported and the + gradient can be built and run correctly. + Args: + graph_fn: takes a single float Tensor argument as input, outputs a single + Tensor + """ test_dir = self._get_test_dir("nested_control_flow") filename = os.path.join(test_dir, "metafile") saver_ckpt = os.path.join(test_dir, "saver.ckpt") # Create while loop using `outer_body_fn`. with ops_lib.Graph().as_default(): - var = variables.Variable(0) + var = variables.Variable(0.0) var_name = var.name - _, output = control_flow_ops.while_loop(lambda i, x: i < 5, outer_body_fn, - [0, var]) + output = graph_fn(var) output_name = output.name init_op = variables.global_variables_initializer() @@ -2109,12 +2114,21 @@ class MetaGraphTest(test.TestCase): actual_grad_value = sess.run(grad) self.assertEqual(expected_grad_value, actual_grad_value) + def _testWhileLoopAndGradientSerDes(self, outer_body_fn): + # Build a while loop with `outer_body_fn`, export it, and verify that it can + # be imported and the gradient can be built and run correctly. + # pylint: disable=g-long-lambda + return self._testGradientSerDes( + lambda x: control_flow_ops.while_loop( + lambda i, y: i < 5, outer_body_fn, [0, x])[1]) + # pylint: enable=g-long-lambda + def testNestedWhileLoopsSerDes(self): # Test two simple nested while loops. def body(i, x): _, r = control_flow_ops.while_loop(lambda j, y: j < 3, lambda j, y: (j + 1, y + x), - [0, 0]) + [0, 0.0]) return i + 1, x + r self._testWhileLoopAndGradientSerDes(body) @@ -2127,12 +2141,25 @@ class MetaGraphTest(test.TestCase): lambda: control_flow_ops.while_loop( lambda j, y: j < 3, lambda j, y: (j + 1, y + x), - [0, 0])[1], + [0, 0.0])[1], lambda: x) return i + 1, cond_result # pylint: enable=g-long-lambda self._testWhileLoopAndGradientSerDes(body) + def testNestedCondsSerDes(self): + # Test conds in a cond. + # pylint: disable=g-long-lambda + self._testGradientSerDes(lambda x: control_flow_ops.cond( + x > 0, + lambda: control_flow_ops.cond(x > 3, + lambda: array_ops.identity(x), + lambda: math_ops.multiply(x, 2.0)), + lambda: control_flow_ops.cond(x < -3, + lambda: constant_op.constant(1.0), + lambda: math_ops.multiply(x, -1.0)))) + # pylint: enable=g-long-lambda + def testStrippedOpListDef(self): with self.test_session(): # Creates a graph. -- GitLab From 642320077dafdc8ae11650d90637ade11f9509cc Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 6 Mar 2018 14:59:33 -0800 Subject: [PATCH 487/884] Revert the changes of ScopedActivateExecutorContext, which requires depending on core:lib which is forbidden --- .../contrib/tensorrt/kernels/trt_engine_op.cc | 21 +++++++------------ 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc index 3f98e64265..b32371b642 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_engine_op.cc @@ -18,7 +18,6 @@ limitations under the License. #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/stream_executor.h" #include "tensorflow/core/platform/types.h" -#include "tensorflow/stream_executor/cuda/cuda_activation.h" #if GOOGLE_CUDA #if GOOGLE_TENSORRT @@ -43,19 +42,15 @@ TRTEngineOp::TRTEngineOp(OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("output_nodes", &output_nodes_)); // TODO(samikama) runtime should be taken from a resourcemanager as well. - // Only engine should be in the op and context and runtime should be taken - // from resourcemanager - // TODO(jie): Relying on TF scheme to limit gpu scope for device placement - // cannot have dependency on //tensorflow/core:gpu_runtimeo - // Copied the function here. + // Only engine should be in the op and context and runtime should be taken + // from resourcemanager + // TODO(jie): cudaSetDevice make sure trt engine is allocated on the same + // gpu where the input/output is also located. int gpu_id = context->device()->tensorflow_gpu_device_info()->gpu_id; - auto result = gpu::MultiPlatformManager::PlatformWithName("CUDA"); - if (!result.ok()) { - LOG(FATAL) << "Could not find Platform with name CUDA"; - } - gpu::Platform* gpu_machine_manager = result.ValueOrDie(); - gpu::cuda::ScopedActivateExecutorContext scoped_activation{ - gpu_machine_manager->ExecutorForDevice(gpu_id).ValueOrDie()}; + cudaSetDevice(gpu_id); + int device; + cudaGetDevice(&device); + if (gpu_id != device) LOG(FATAL) << "set device failed!"; // TODO(samikama) runtime should be taken from a resourcemanager as well. // Only engine should be in the op and context and runtime should be taken -- GitLab From cebb7fc9a406061ff3eea3fe6e2219197265d1d5 Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Tue, 6 Mar 2018 14:59:21 -0800 Subject: [PATCH 488/884] Introduce API `keras.backend.learning_phase_scope(value)` (internal for now). Primary goal is to avoid side effects when setting the learning phase in eager training. PiperOrigin-RevId: 188086280 --- tensorflow/python/framework/smart_cond.py | 7 +- .../python/keras/_impl/keras/backend.py | 33 +- .../python/keras/_impl/keras/backend_test.py | 16 + .../_impl/keras/engine/training_eager.py | 604 +++++++++--------- 4 files changed, 356 insertions(+), 304 deletions(-) diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py index 4f2f1db882..7bd9f47d5a 100644 --- a/tensorflow/python/framework/smart_cond.py +++ b/tensorflow/python/framework/smart_cond.py @@ -72,7 +72,9 @@ def smart_constant_value(pred): Raises: TypeError: If `pred` is not a Tensor or bool. """ - if isinstance(pred, bool): + if pred in {0, 1}: # Accept 1/0 as valid boolean values + pred_value = bool(pred) + elif isinstance(pred, bool): pred_value = pred elif isinstance(pred, ops.Tensor): pred_value = tensor_util.constant_value(pred) @@ -87,5 +89,6 @@ def smart_constant_value(pred): # pylint: enable=protected-access else: - raise TypeError("`pred` must be a Tensor or a Python bool.") + raise TypeError("`pred` must be a Tensor, or a Python bool, or 1 or 0. " + "Found instead: %s" % pred) return pred_value diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 2b75666b9e..3d539f9a76 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -55,10 +55,10 @@ from tensorflow.python.ops import tensor_array_grad # pylint: disable=unused-im from tensorflow.python.ops import tensor_array_ops from tensorflow.python.ops import variables as variables_module from tensorflow.python.training import moving_averages +from tensorflow.python.util import tf_contextlib from tensorflow.python.util import tf_inspect from tensorflow.python.util.tf_export import tf_export - py_all = all py_sum = sum @@ -369,13 +369,42 @@ def set_learning_phase(value): """ global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned if value not in {0, 1}: - raise ValueError('Expected learning phase to be ' '0 or 1.') + raise ValueError('Expected learning phase to be 0 or 1.') if context.in_eager_mode(): _GRAPH_LEARNING_PHASES['eager'] = value else: _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = value +@tf_contextlib.contextmanager +def learning_phase_scope(value): + """Provides a scope within which the learning phase is equal to `value`. + + The learning phase gets restored to its original value upon exiting the scope. + + Arguments: + value: Learning phase value, either 0 or 1 (integers). + + Yields: + The provided value. + + Raises: + ValueError: if `value` is neither `0` nor `1`. + """ + if value not in {0, 1}: + raise ValueError('Expected learning phase to be 0 or 1.') + previous_value = learning_phase() + try: + set_learning_phase(value) + yield value + finally: + # Restore learning phase to initial value. + if context.in_eager_mode(): + _GRAPH_LEARNING_PHASES['eager'] = previous_value + else: + _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = previous_value + + @tf_export('keras.backend.get_session') def get_session(): """Returns the TF session to be used by the backend. diff --git a/tensorflow/python/keras/_impl/keras/backend_test.py b/tensorflow/python/keras/_impl/keras/backend_test.py index f29ca49378..fb4b2a0e1d 100644 --- a/tensorflow/python/keras/_impl/keras/backend_test.py +++ b/tensorflow/python/keras/_impl/keras/backend_test.py @@ -128,6 +128,22 @@ class BackendUtilsTest(test.TestCase): sess.run(variables.global_variables_initializer()) sess.run(y, feed_dict={x: np.random.random((2, 3))}) + def test_learning_phase_scope(self): + with self.test_session(): + initial_learning_phase = keras.backend.learning_phase() + with keras.backend.learning_phase_scope(1) as lp: + self.assertEqual(lp, 1) + self.assertEqual(keras.backend.learning_phase(), 1) + self.assertEqual(keras.backend.learning_phase(), initial_learning_phase) + with keras.backend.learning_phase_scope(0) as lp: + self.assertEqual(lp, 0) + self.assertEqual(keras.backend.learning_phase(), 0) + self.assertEqual(keras.backend.learning_phase(), initial_learning_phase) + with self.assertRaises(ValueError): + with keras.backend.learning_phase_scope(None): + pass + self.assertEqual(keras.backend.learning_phase(), initial_learning_phase) + def test_int_shape(self): x = keras.backend.placeholder(shape=(3, 4)) self.assertEqual(keras.backend.int_shape(x), (3, 4)) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_eager.py b/tensorflow/python/keras/_impl/keras/engine/training_eager.py index 75c96e6916..67858a578c 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_eager.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_eager.py @@ -26,7 +26,7 @@ import numpy as np from tensorflow.python.eager.backprop import GradientTape from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_util -from tensorflow.python.keras._impl.keras import backend as K +from tensorflow.python.keras._impl.keras import backend from tensorflow.python.keras._impl.keras import callbacks as cbks from tensorflow.python.keras._impl.keras import losses from tensorflow.python.keras._impl.keras import metrics as metrics_module @@ -60,7 +60,7 @@ def _get_metrics_info(metric, internal_output_shapes=None, loss_func=None): def _eager_loss_fn(outputs, targets, loss_fn, output_name): - with K.name_scope(output_name + '_loss'): + with backend.name_scope(output_name + '_loss'): loss = loss_fn(targets, outputs) return loss @@ -88,7 +88,7 @@ def _eager_metrics_fn(model, outputs, targets): output_metrics = model.nested_metrics[i] for nested_output_metric in output_metrics: metric_name, metric_fn = _get_metrics_info( - nested_output_metric, K.int_shape(model.outputs[i]), + nested_output_metric, backend.int_shape(model.outputs[i]), model.loss_functions[i]) if len(model.output_names) > 1: @@ -96,10 +96,10 @@ def _eager_metrics_fn(model, outputs, targets): if metric_name not in model.metrics_names: model.metrics_names.append(metric_name) - with K.name_scope(metric_name): + with backend.name_scope(metric_name): metric_result = metric_fn(outputs[i], targets[i]) metric_names.append(metric_name) - metric_results.append(K.mean(metric_result)) + metric_results.append(backend.mean(metric_result)) return metric_names, metric_results @@ -137,7 +137,7 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): targets = [targets] loss_metrics = [] - with K.name_scope('loss'): + with backend.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): if sample_weights: weights = sample_weights[i] @@ -149,10 +149,10 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): mask = outs[i]._keras_mask weighted_masked_fn = training_utils.weighted_masked_objective(loss_fn) - with K.name_scope(model.output_names[i] + '_loss'): + with backend.name_scope(model.output_names[i] + '_loss'): output_loss = weighted_masked_fn( outs[i], targets[i], weights, mask=mask) - loss_metrics.append(K.mean(output_loss)) + loss_metrics.append(backend.mean(output_loss)) loss_weight = model.loss_weights_list[i] if total_loss is None: @@ -160,7 +160,7 @@ def _model_loss(model, inputs, targets, sample_weights=None, training=False): else: total_loss += loss_weight * output_loss - total_loss = K.mean(total_loss) + total_loss = backend.mean(total_loss) # Add regularization losses custom_losses = [] for layer in model.layers: @@ -197,24 +197,24 @@ def _process_single_batch(model, Raises: ValueError: If the model has no loss to optimize. """ - K.set_learning_phase(training) - with GradientTape() as tape: - outs, loss, loss_metrics = _model_loss(model, inputs, targets, - sample_weights=sample_weights, - training=training) - if loss is None: - raise ValueError('The model cannot be run ' - 'because it has no loss to optimize.') - if training: - if not model._collected_trainable_weights: - logging.warning('The list of trainable weights is empty. Make sure that ' - 'you are not setting model.trainable to False before ' - 'compiling the model.') - else: - grads = tape.gradient(loss, model._collected_trainable_weights) - model.optimizer.apply_gradients(zip(grads, - model._collected_trainable_weights)) - return outs, loss, loss_metrics + with backend.learning_phase_scope(1 if training else 0): + with GradientTape() as tape: + outs, loss, loss_metrics = _model_loss(model, inputs, targets, + sample_weights=sample_weights, + training=training) + if loss is None: + raise ValueError('The model cannot be run ' + 'because it has no loss to optimize.') + if training: + if not model._collected_trainable_weights: + logging.warning('The list of trainable weights is empty. Make sure that' + ' you are not setting model.trainable to False before ' + 'compiling the model.') + else: + grads = tape.gradient(loss, model._collected_trainable_weights) + model.optimizer.apply_gradients(zip(grads, + model._collected_trainable_weights)) + return outs, loss, loss_metrics def train_on_batch(model, inputs, targets, sample_weights=None): @@ -230,11 +230,11 @@ def train_on_batch(model, inputs, targets, sample_weights=None): total loss and the loss associated with each output. """ inputs = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs] targets = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + ops.convert_to_tensor(val, dtype=backend.floatx()) for val in targets] sample_weights = [ - ops.convert_to_tensor(val, dtype=K.floatx()) + ops.convert_to_tensor(val, dtype=backend.floatx()) if val is not None else None for val in sample_weights] outs, loss, _ = _process_single_batch( model, inputs, targets, sample_weights=sample_weights, training=True) @@ -260,11 +260,11 @@ def test_on_batch(model, inputs, targets, sample_weights=None): total loss, loss and metrics associated with each output. """ inputs = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs] + ops.convert_to_tensor(val, dtype=backend.floatx()) for val in inputs] targets = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets] + ops.convert_to_tensor(val, dtype=backend.floatx()) for val in targets] sample_weights = [ - ops.convert_to_tensor(val, dtype=K.floatx()) + ops.convert_to_tensor(val, dtype=backend.floatx()) if val is not None else None for val in sample_weights] outs, loss, loss_metrics = _process_single_batch( model, inputs, targets, sample_weights=sample_weights, training=False) @@ -329,181 +329,182 @@ def fit_loop( ValueError: In case of invalid argument values. """ # Required for Eager mode - K.set_learning_phase(True) - - do_validation = False - if val_inputs: - do_validation = True - if (verbose and inputs and hasattr(inputs[0], 'shape') and - hasattr(val_inputs[0], 'shape')): - print('Train on %d samples, validate on %d samples' % - (inputs[0].shape[0], val_inputs[0].shape[0])) - if validation_steps: - if steps_per_epoch is None: - raise ValueError('Can only use `validation_steps` when doing step-wise ' - 'training, i.e. `steps_per_epoch` must be set.') - do_validation = True - - out_labels = model.metrics_names - if do_validation: - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels - ] - else: - callback_metrics = copy.copy(out_labels) + with backend.learning_phase_scope(1): + do_validation = False + if val_inputs: + do_validation = True + if (verbose and inputs and hasattr(inputs[0], 'shape') and + hasattr(val_inputs[0], 'shape')): + print('Train on %d samples, validate on %d samples' % + (inputs[0].shape[0], val_inputs[0].shape[0])) + if validation_steps: + if steps_per_epoch is None: + raise ValueError('Can only use `validation_steps` when doing step-wise ' + 'training, i.e. `steps_per_epoch` must be set.') + do_validation = True + + out_labels = model.metrics_names + if do_validation: + callback_metrics = copy.copy(out_labels) + [ + 'val_' + n for n in out_labels + ] + else: + callback_metrics = copy.copy(out_labels) - if sample_weights: - feed_data = inputs + targets + sample_weights - else: - feed_data = inputs + targets - num_train_samples = training_utils.check_num_samples( - feed_data, - batch_size=batch_size, - steps=steps_per_epoch, - steps_name='steps_per_epoch') - - if num_train_samples is not None: - index_array = np.arange(num_train_samples) - - model.history = cbks.History() - callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history] - if verbose: - if steps_per_epoch is not None: - count_mode = 'steps' + if sample_weights: + feed_data = inputs + targets + sample_weights else: - count_mode = 'samples' - callbacks += [cbks.ProgbarLogger(count_mode)] - callbacks = cbks.CallbackList(callbacks) - - # it's possible to callback a different model than self - # (used by Sequential models) - if hasattr(model, 'callback_model') and model.callback_model: - callback_model = model.callback_model - else: - callback_model = model - - callbacks.set_model(callback_model) - - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - callbacks.on_train_begin() - callback_model.stop_training = False - for cbk in callbacks: - if not val_inputs: - cbk.validation_data = [] - elif val_sample_weights: - cbk.validation_data = val_inputs + val_targets + val_sample_weights + feed_data = inputs + targets + num_train_samples = training_utils.check_num_samples( + feed_data, + batch_size=batch_size, + steps=steps_per_epoch, + steps_name='steps_per_epoch') + + if num_train_samples is not None: + index_array = np.arange(num_train_samples) + + model.history = cbks.History() + callbacks = [cbks.BaseLogger()] + (callbacks or []) + [model.history] + if verbose: + if steps_per_epoch is not None: + count_mode = 'steps' + else: + count_mode = 'samples' + callbacks += [cbks.ProgbarLogger(count_mode)] + callbacks = cbks.CallbackList(callbacks) + + # it's possible to callback a different model than self + # (used by Sequential models) + if hasattr(model, 'callback_model') and model.callback_model: + callback_model = model.callback_model else: - cbk.validation_data = val_inputs + val_targets - - for epoch in range(initial_epoch, epochs): - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - if shuffle == 'batch': - index_array = model._batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - - batches = make_batches(num_train_samples, batch_size) - - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - try: - inputs_batch = slice_arrays(inputs, batch_ids) - targets_batch = slice_arrays(targets, batch_ids) + callback_model = model + + callbacks.set_model(callback_model) + + callbacks.set_params({ + 'batch_size': batch_size, + 'epochs': epochs, + 'steps': steps_per_epoch, + 'samples': num_train_samples, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics or [], + }) + callbacks.on_train_begin() + callback_model.stop_training = False + for cbk in callbacks: + if not val_inputs: + cbk.validation_data = [] + elif val_sample_weights: + cbk.validation_data = val_inputs + val_targets + val_sample_weights + else: + cbk.validation_data = val_inputs + val_targets + + for epoch in range(initial_epoch, epochs): + callbacks.on_epoch_begin(epoch) + epoch_logs = {} + if shuffle == 'batch': + index_array = model._batch_shuffle(index_array, batch_size) + elif shuffle: + np.random.shuffle(index_array) + + batches = make_batches(num_train_samples, batch_size) + + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + try: + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) + else: + sample_weights_batch = None + except TypeError: + raise TypeError('TypeError while preparing batch. ' + 'If using HDF5 input data, ' + 'pass shuffle="batch".') + batch_logs = {} + batch_logs['batch'] = batch_index + batch_logs['size'] = len(batch_ids) + + callbacks.on_batch_begin(batch_index, batch_logs) + + inputs_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + for val in targets_batch] if sample_weights: - sample_weights_batch = slice_arrays(sample_weights, batch_ids) - else: - sample_weights_batch = None - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - batch_logs = {} - batch_logs['batch'] = batch_index - batch_logs['size'] = len(batch_ids) - - callbacks.on_batch_begin(batch_index, batch_logs) - - inputs_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] - targets_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] - if sample_weights: - sample_weights_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) - if val is not None else None - for val in sample_weights_batch] - - outs, loss, loss_metrics = _process_single_batch( - model, - inputs_batch, - targets_batch, - sample_weights=sample_weights_batch, - training=True) - - if not isinstance(outs, list): - outs = [outs] - - for l, o in zip(out_labels, outs): - batch_logs[l] = o - # Required for Eager mode - metrics_names, metrics_results = _eager_metrics_fn( - model, outs, targets_batch) - batch_logs['loss'] = tensor_util.constant_value(K.mean(loss)) - - # TODO(anjalisridhar): Move this to compile to avoid duplicate code. - # In graph mode we set the metric names in compile. However in - # Eager mode we calculate the metrics for each batch in fit_loop. - # We could calculate the metric names and functions in compile. - # This would avoid setting the callback parameters separately. - # We need to do this for the first iteration alone - for m in metrics_names: - if m not in callback_metrics: - callback_metrics.append(m) - - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - - for k, v in zip(model.metrics_names, - [K.mean(loss)] + loss_metrics + metrics_results): - batch_logs[k] = tensor_util.constant_value(v) - - callbacks.on_batch_end(batch_index, batch_logs) + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + if val is not None else None + for val in sample_weights_batch] + + outs, loss, loss_metrics = _process_single_batch( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=True) + + if not isinstance(outs, list): + outs = [outs] + + for l, o in zip(out_labels, outs): + batch_logs[l] = o + # Required for Eager mode + metrics_names, metrics_results = _eager_metrics_fn( + model, outs, targets_batch) + batch_logs['loss'] = tensor_util.constant_value(backend.mean(loss)) + + # TODO(anjalisridhar): Move this to compile to avoid duplicate code. + # In graph mode we set the metric names in compile. However in + # Eager mode we calculate the metrics for each batch in fit_loop. + # We could calculate the metric names and functions in compile. + # This would avoid setting the callback parameters separately. + # We need to do this for the first iteration alone + for m in metrics_names: + if m not in callback_metrics: + callback_metrics.append(m) + + callbacks.set_params({ + 'batch_size': batch_size, + 'epochs': epochs, + 'steps': steps_per_epoch, + 'samples': num_train_samples, + 'verbose': verbose, + 'do_validation': do_validation, + 'metrics': callback_metrics or [], + }) + + for k, v in zip(model.metrics_names, + [backend.mean(loss)] + loss_metrics + metrics_results): + batch_logs[k] = tensor_util.constant_value(v) + + callbacks.on_batch_end(batch_index, batch_logs) + if callback_model.stop_training: + break + + if batch_index == len(batches) - 1: # Last batch. + if do_validation: + val_outs = test_loop( + model, val_inputs, val_targets, + sample_weights=val_sample_weights, + batch_size=batch_size, + verbose=0) + if not isinstance(val_outs, list): + val_outs = [val_outs] + # Same labels assumed. + for l, o in zip(out_labels, val_outs): + epoch_logs['val_' + l] = o + callbacks.on_epoch_end(epoch, epoch_logs) if callback_model.stop_training: break - - if batch_index == len(batches) - 1: # Last batch. - if do_validation: - val_outs = test_loop( - model, val_inputs, val_targets, - sample_weights=val_sample_weights, - batch_size=batch_size, - verbose=0) - if not isinstance(val_outs, list): - val_outs = [val_outs] - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - callbacks.on_epoch_end(epoch, epoch_logs) - if callback_model.stop_training: - break - callbacks.on_train_end() - return model.history + callbacks.on_train_end() + return model.history def test_loop(model, inputs, targets, @@ -530,66 +531,68 @@ def test_loop(model, inputs, targets, and/or metrics). The attribute `model.metrics_names` will give you the display labels for the scalar outputs. """ - K.set_learning_phase(False) - feed_data = inputs + targets - if sample_weights: - feed_data += sample_weights - num_samples = training_utils.check_num_samples( - feed_data, batch_size=batch_size, steps=steps, steps_name='steps') - outs = [] - if verbose == 1: - progbar = Progbar(target=num_samples) - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - inputs_batch = slice_arrays(inputs, batch_ids) - targets_batch = slice_arrays(targets, batch_ids) + with backend.learning_phase_scope(0): + feed_data = inputs + targets if sample_weights: - sample_weights_batch = slice_arrays(sample_weights, batch_ids) - else: - sample_weights_batch = None + feed_data += sample_weights + num_samples = training_utils.check_num_samples( + feed_data, batch_size=batch_size, steps=steps, steps_name='steps') + outs = [] + if verbose == 1: + progbar = Progbar(target=num_samples) + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + inputs_batch = slice_arrays(inputs, batch_ids) + targets_batch = slice_arrays(targets, batch_ids) + if sample_weights: + sample_weights_batch = slice_arrays(sample_weights, batch_ids) + else: + sample_weights_batch = None - inputs_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] - targets_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in targets_batch] - if sample_weights: - sample_weights_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) - if val is not None else None - for val in sample_weights_batch] - - loss_outs, loss, loss_metrics = _model_loss( - model, - inputs_batch, - targets_batch, - sample_weights=sample_weights_batch, - training=False) - _, metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) - batch_outs = [] - for _, v in zip(model.metrics_names, - [K.mean(loss)] + loss_metrics + metrics_results): - batch_outs.append(tensor_util.constant_value(v)) - - if isinstance(batch_outs, list): - if batch_index == 0: - for batch_out in enumerate(batch_outs): + inputs_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + for val in inputs_batch] + targets_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + for val in targets_batch] + if sample_weights: + sample_weights_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + if val is not None else None + for val in sample_weights_batch] + + loss_outs, loss, loss_metrics = _model_loss( + model, + inputs_batch, + targets_batch, + sample_weights=sample_weights_batch, + training=False) + _, metrics_results = _eager_metrics_fn(model, loss_outs, targets_batch) + batch_outs = [] + for _, v in zip(model.metrics_names, + [backend.mean(loss)] + loss_metrics + metrics_results): + batch_outs.append(tensor_util.constant_value(v)) + + if isinstance(batch_outs, list): + if batch_index == 0: + for batch_out in enumerate(batch_outs): + outs.append(0.) + for i, batch_out in enumerate(batch_outs): + outs[i] += batch_out * len(batch_ids) + else: + if batch_index == 0: outs.append(0.) - for i, batch_out in enumerate(batch_outs): - outs[i] += batch_out * len(batch_ids) - else: - if batch_index == 0: - outs.append(0.) - outs[0] += batch_outs * len(batch_ids) + outs[0] += batch_outs * len(batch_ids) - if verbose == 1: - progbar.update(batch_end) - for i in range(len(outs)): - outs[i] /= num_samples - if len(outs) == 1: - return outs[0] - return outs + if verbose == 1: + progbar.update(batch_end) + for i in range(len(outs)): + outs[i] /= num_samples + if len(outs) == 1: + return outs[0] + return outs def predict_loop(model, inputs, @@ -612,49 +615,50 @@ def predict_loop(model, inputs, or list of arrays of predictions (if the model has multiple outputs). """ - K.set_learning_phase(False) - num_samples = training_utils.check_num_samples( - inputs, batch_size, steps, 'steps') - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps) - else: - progbar = Progbar(target=num_samples) + with backend.learning_phase_scope(0): + num_samples = training_utils.check_num_samples( + inputs, batch_size, steps, 'steps') + if verbose == 1: + if steps is not None: + progbar = Progbar(target=steps) + else: + progbar = Progbar(target=num_samples) - outs = [] - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - inputs_batch = slice_arrays(inputs, batch_ids) + outs = [] + batches = make_batches(num_samples, batch_size) + index_array = np.arange(num_samples) + for batch_index, (batch_start, batch_end) in enumerate(batches): + batch_ids = index_array[batch_start:batch_end] + inputs_batch = slice_arrays(inputs, batch_ids) - inputs_batch = [ - ops.convert_to_tensor(val, dtype=K.floatx()) for val in inputs_batch] + inputs_batch = [ + ops.convert_to_tensor(val, dtype=backend.floatx()) + for val in inputs_batch] - if len(inputs_batch) == 1: - if model._expects_training_arg: - batch_outs = model.call(inputs_batch[0], training=False) - else: - batch_outs = model.call(inputs_batch[0]) - else: - if model._expects_training_arg: - batch_outs = model.call(inputs_batch, training=False) + if len(inputs_batch) == 1: + if model._expects_training_arg: + batch_outs = model.call(inputs_batch[0], training=False) + else: + batch_outs = model.call(inputs_batch[0]) else: - batch_outs = model.call(inputs_batch) - - if not isinstance(batch_outs, list): - batch_outs = [batch_outs] - if batch_index == 0: - # Pre-allocate the results arrays. - for batch_out in batch_outs: - dims = batch_out.shape[1:].dims - dims_list = [d.value for d in dims] - shape = (num_samples,) + tuple(dims_list) - outs.append(np.zeros(shape, dtype=batch_out.dtype.as_numpy_dtype)) - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - if verbose == 1: - progbar.update(batch_end) - if len(outs) == 1: - return outs[0] - return outs + if model._expects_training_arg: + batch_outs = model.call(inputs_batch, training=False) + else: + batch_outs = model.call(inputs_batch) + + if not isinstance(batch_outs, list): + batch_outs = [batch_outs] + if batch_index == 0: + # Pre-allocate the results arrays. + for batch_out in batch_outs: + dims = batch_out.shape[1:].dims + dims_list = [d.value for d in dims] + shape = (num_samples,) + tuple(dims_list) + outs.append(np.zeros(shape, dtype=batch_out.dtype.as_numpy_dtype)) + for i, batch_out in enumerate(batch_outs): + outs[i][batch_start:batch_end] = batch_out + if verbose == 1: + progbar.update(batch_end) + if len(outs) == 1: + return outs[0] + return outs -- GitLab From 9bac59bc68c5f9b7fd9d3b28f118dfd0c78c5fed Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 15:07:27 -0800 Subject: [PATCH 489/884] Add Kullback-Leibler for Independent distribution(s). PiperOrigin-RevId: 188087902 --- .../python/kernel_tests/independent_test.py | 95 +++++++++++++++++++ .../distributions/python/ops/independent.py | 56 +++++++++++ 2 files changed, 151 insertions(+) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/independent_test.py b/tensorflow/contrib/distributions/python/kernel_tests/independent_test.py index 06318ca09d..6a69f9e60b 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/independent_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/independent_test.py @@ -27,6 +27,7 @@ from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import bernoulli as bernoulli_lib +from tensorflow.python.ops.distributions import kullback_leibler from tensorflow.python.ops.distributions import normal as normal_lib from tensorflow.python.platform import test from tensorflow.python.platform import tf_logging @@ -126,6 +127,100 @@ class ProductDistributionTest(test.TestCase): self.assertAllClose(sample_entropy_, actual_entropy_, rtol=0.01, atol=0.) self.assertAllClose(loc, actual_mode_, rtol=1e-6, atol=0.) + def testKLRaises(self): + ind1 = independent_lib.Independent( + distribution=normal_lib.Normal( + loc=np.float32([-1., 1]), + scale=np.float32([0.1, 0.5])), + reinterpreted_batch_ndims=1) + ind2 = independent_lib.Independent( + distribution=normal_lib.Normal( + loc=np.float32(-1), + scale=np.float32(0.5)), + reinterpreted_batch_ndims=0) + + with self.assertRaisesRegexp( + ValueError, "Event shapes do not match"): + kullback_leibler.kl_divergence(ind1, ind2) + + ind1 = independent_lib.Independent( + distribution=normal_lib.Normal( + loc=np.float32([-1., 1]), + scale=np.float32([0.1, 0.5])), + reinterpreted_batch_ndims=1) + ind2 = independent_lib.Independent( + distribution=mvn_diag_lib.MultivariateNormalDiag( + loc=np.float32([-1., 1]), + scale_diag=np.float32([0.1, 0.5])), + reinterpreted_batch_ndims=0) + + with self.assertRaisesRegexp( + NotImplementedError, "different event shapes"): + kullback_leibler.kl_divergence(ind1, ind2) + + def testKLScalarToMultivariate(self): + normal1 = normal_lib.Normal( + loc=np.float32([-1., 1]), + scale=np.float32([0.1, 0.5])) + ind1 = independent_lib.Independent( + distribution=normal1, reinterpreted_batch_ndims=1) + + normal2 = normal_lib.Normal( + loc=np.float32([-3., 3]), + scale=np.float32([0.3, 0.3])) + ind2 = independent_lib.Independent( + distribution=normal2, reinterpreted_batch_ndims=1) + + normal_kl = kullback_leibler.kl_divergence(normal1, normal2) + ind_kl = kullback_leibler.kl_divergence(ind1, ind2) + self.assertAllClose( + self.evaluate(math_ops.reduce_sum(normal_kl, axis=-1)), + self.evaluate(ind_kl)) + + def testKLIdentity(self): + normal1 = normal_lib.Normal( + loc=np.float32([-1., 1]), + scale=np.float32([0.1, 0.5])) + # This is functionally just a wrapper around normal1, + # and doesn't change any outputs. + ind1 = independent_lib.Independent( + distribution=normal1, reinterpreted_batch_ndims=0) + + normal2 = normal_lib.Normal( + loc=np.float32([-3., 3]), + scale=np.float32([0.3, 0.3])) + # This is functionally just a wrapper around normal2, + # and doesn't change any outputs. + ind2 = independent_lib.Independent( + distribution=normal2, reinterpreted_batch_ndims=0) + + normal_kl = kullback_leibler.kl_divergence(normal1, normal2) + ind_kl = kullback_leibler.kl_divergence(ind1, ind2) + self.assertAllClose( + self.evaluate(normal_kl), self.evaluate(ind_kl)) + + def testKLMultivariateToMultivariate(self): + # (1, 1, 2) batch of MVNDiag + mvn1 = mvn_diag_lib.MultivariateNormalDiag( + loc=np.float32([[[[-1., 1, 3.], [2., 4., 3.]]]]), + scale_diag=np.float32([[[0.2, 0.1, 5.], [2., 3., 4.]]])) + ind1 = independent_lib.Independent( + distribution=mvn1, reinterpreted_batch_ndims=2) + + # (1, 1, 2) batch of MVNDiag + mvn2 = mvn_diag_lib.MultivariateNormalDiag( + loc=np.float32([[[[-2., 3, 2.], [1., 3., 2.]]]]), + scale_diag=np.float32([[[0.1, 0.5, 3.], [1., 2., 1.]]])) + + ind2 = independent_lib.Independent( + distribution=mvn2, reinterpreted_batch_ndims=2) + + mvn_kl = kullback_leibler.kl_divergence(mvn1, mvn2) + ind_kl = kullback_leibler.kl_divergence(ind1, ind2) + self.assertAllClose( + self.evaluate(math_ops.reduce_sum(mvn_kl, axis=[-1, -2])), + self.evaluate(ind_kl)) + def _testMnistLike(self, static_shape): sample_shape = [4, 5] batch_shape = [10] diff --git a/tensorflow/contrib/distributions/python/ops/independent.py b/tensorflow/contrib/distributions/python/ops/independent.py index cbce005013..7dcb3e3ac4 100644 --- a/tensorflow/contrib/distributions/python/ops/independent.py +++ b/tensorflow/contrib/distributions/python/ops/independent.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import distribution as distribution_lib +from tensorflow.python.ops.distributions import kullback_leibler class Independent(distribution_lib.Distribution): @@ -254,3 +255,58 @@ class Independent(distribution_lib.Distribution): else: which_maximum = np.maximum return which_maximum(0, ndims - 1) + + +@kullback_leibler.RegisterKL(Independent, Independent) +def _kl_independent(a, b, name="kl_independent"): + """Batched KL divergence `KL(a || b)` for Independent distributions. + + We can leverage the fact that + ``` + KL(Independent(a) || Independent(b)) = sum(KL(a || b)) + ``` + where the sum is over the `reinterpreted_batch_ndims`. + + Args: + a: Instance of `Independent`. + b: Instance of `Independent`. + name: (optional) name to use for created ops. Default "kl_independent". + + Returns: + Batchwise `KL(a || b)`. + + Raises: + ValueError: If the event space for `a` and `b`, or their underlying + distributions don't match. + """ + p = a.distribution + q = b.distribution + + # The KL between any two (non)-batched distributions is a scalar. + # Given that the KL between two factored distributions is the sum, i.e. + # KL(p1(x)p2(y) || q1(x)q2(y)) = KL(p1 || q1) + KL(q1 || q2), we compute + # KL(p || q) and do a `reduce_sum` on the reinterpreted batch dimensions. + if a.event_shape.is_fully_defined() and b.event_shape.is_fully_defined(): + if a.event_shape == b.event_shape: + if p.event_shape == q.event_shape: + num_reduce_dims = a.event_shape.ndims - p.event_shape.ndims + reduce_dims = [-i - 1 for i in range(0, num_reduce_dims)] + + return math_ops.reduce_sum( + kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims) + else: + raise NotImplementedError("KL between Independents with different " + "event shapes not supported.") + else: + raise ValueError("Event shapes do not match.") + else: + with ops.control_dependencies([ + check_ops.assert_equal(a.event_shape_tensor(), b.event_shape_tensor()), + check_ops.assert_equal(p.event_shape_tensor(), q.event_shape_tensor()) + ]): + num_reduce_dims = ( + array_ops.shape(a.event_shape_tensor()[0]) - + array_ops.shape(p.event_shape_tensor()[0])) + reduce_dims = math_ops.range(-num_reduce_dims - 1, -1, 1) + return math_ops.reduce_sum( + kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims) -- GitLab From 323af99527662ba93f54f71cc59224bed8adc596 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 6 Mar 2018 15:14:11 -0800 Subject: [PATCH 490/884] Fix c++ and python formatting --- .../contrib/tensorrt/convert/convert_graph.cc | 11 ++-- .../contrib/tensorrt/convert/convert_graph.h | 2 + .../contrib/tensorrt/convert/convert_nodes.h | 6 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 2 +- tensorflow/contrib/tensorrt/log/trt_logger.cc | 2 +- .../contrib/tensorrt/python/trt_convert.py | 18 +++--- .../contrib/tensorrt/test/test_tftrt.py | 57 ++++++++++--------- 7 files changed, 55 insertions(+), 43 deletions(-) diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.cc b/tensorflow/contrib/tensorrt/convert/convert_graph.cc index ddbdf8dbc6..eea8c8efa2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.cc @@ -134,9 +134,10 @@ std::unordered_map> BuildTensorNameMap( // TODO(sami): convert references to pointers struct ConvertGraphParams { ConvertGraphParams( - tensorflow::Graph& inp_graph, const std::vector& output_node_names, - const std::set& subgraph_node_id_numbers, size_t max_supported_batch_size, - size_t max_consumed_workspace_size_bytes, + tensorflow::Graph& inp_graph, + const std::vector& output_node_names, + const std::set& subgraph_node_id_numbers, + size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, int engine_precision_mode) @@ -214,8 +215,8 @@ tensorflow::Status GetCalibNode(ConvertGraphParams* params) { auto dst_input = in_edge->dst_input(); VLOG(1) << " update edge " << trt_node->name() << ":" << src_output << " -> " << dst_node->name() << ":" << dst_input; - TF_RETURN_IF_ERROR(params->graph.UpdateEdge( - trt_node, src_output, dst_node, dst_input)); + TF_RETURN_IF_ERROR( + params->graph.UpdateEdge(trt_node, src_output, dst_node, dst_input)); } return tensorflow::Status::OK(); } diff --git a/tensorflow/contrib/tensorrt/convert/convert_graph.h b/tensorflow/contrib/tensorrt/convert/convert_graph.h index 4cdc768a42..e1596e89e2 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_graph.h +++ b/tensorflow/contrib/tensorrt/convert/convert_graph.h @@ -27,6 +27,7 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { + // This method converts an already generated calibration graph which was used in // calibration runs to an inference graph tensorflow::Status ConvertCalibGraphToInferGraph( @@ -41,6 +42,7 @@ tensorflow::Status ConvertGraphDefToTensorRT( const std::vector& output_names, size_t max_batch_size, size_t max_workspace_size_bytes, tensorflow::GraphDef* new_graph_def, int precision_mode, int minimum_segment_size); + } // namespace convert } // namespace tensorrt } // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.h b/tensorflow/contrib/tensorrt/convert/convert_nodes.h index 518798c0ad..954a1e72f8 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.h +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.h @@ -33,9 +33,11 @@ limitations under the License. namespace tensorflow { namespace tensorrt { namespace convert { + const int FP32MODE = 0; const int FP16MODE = 1; const int INT8MODE = 2; + struct SubGraphParams { SubGraphParams( tensorflow::Graph& inp_graph, @@ -45,7 +47,8 @@ struct SubGraphParams { size_t max_supported_batch_size, size_t max_consumed_workspace_size_bytes, const tensorflow::grappler::GraphProperties& current_graph_properties, std::unordered_map>* output_edges, - tensorflow::NodeDef* constructed_trt_node, int engine_precision_mode = FP32MODE) + tensorflow::NodeDef* constructed_trt_node, + int engine_precision_mode = FP32MODE) : graph(inp_graph), subgraph_node_ids(subgraph_node_id_numbers), input_inds(input_indices), @@ -68,6 +71,7 @@ struct SubGraphParams { tensorflow::NodeDef* trt_node; const int precision_mode; }; + // TODO(sami): Replace references with const reference or pointers tensorflow::Status ConvertSubGraphToTensorRTNodeDef(SubGraphParams& params); tensorflow::Status InjectCalibrationNode(SubGraphParams& params); diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc index d4be96a424..aea44fd8a2 100644 --- a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -120,7 +120,7 @@ void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { ->stream() ->implementation() ->CudaStreamMemberHack())); - calib_res->calibrator_->setBatch(input_data,*stream); + calib_res->calibrator_->setBatch(input_data, *stream); VLOG(2) << "Passed calibration data"; // TODO(aaroey): make sure we wait for the completion of calibration on the // last batch in future PR. diff --git a/tensorflow/contrib/tensorrt/log/trt_logger.cc b/tensorflow/contrib/tensorrt/log/trt_logger.cc index 83ae5db1d9..dda0dc9e71 100644 --- a/tensorflow/contrib/tensorrt/log/trt_logger.cc +++ b/tensorflow/contrib/tensorrt/log/trt_logger.cc @@ -27,7 +27,7 @@ void Logger::log(Severity severity, const char* msg) { // Suppress info-level messages switch (severity) { case Severity::kINFO: { // Mark TRT info messages as debug! - VLOG(2) << name_ << " " < Date: Tue, 6 Mar 2018 15:20:58 -0800 Subject: [PATCH 491/884] Remove clipping on BoundedTensorSpec range. PiperOrigin-RevId: 188089885 --- tensorflow/python/framework/tensor_spec.py | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/framework/tensor_spec.py b/tensorflow/python/framework/tensor_spec.py index a0411bc3d9..27a9ab8c60 100644 --- a/tensorflow/python/framework/tensor_spec.py +++ b/tensorflow/python/framework/tensor_spec.py @@ -166,16 +166,8 @@ class BoundedTensorSpec(TensorSpec): @classmethod def from_spec(cls, spec): dtype = dtypes.as_dtype(spec.dtype) - if dtype in [dtypes.float64, dtypes.float32]: - # Avoid under/over-flow for `dtype.maximum - dtype.minimum`. - low = dtype.min / 2 - high = dtype.max / 2 - else: - low = dtype.min - high = dtype.max - - minimum = getattr(spec, "minimum", low) - maximum = getattr(spec, "maximum", high) + minimum = getattr(spec, "minimum", dtype.min) + maximum = getattr(spec, "maximum", dtype.max) return BoundedTensorSpec(spec.shape, dtype, minimum, maximum, spec.name) @property -- GitLab From 5dac9182ddec67a98199129e09bd2980b0077e65 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Tue, 6 Mar 2018 15:33:21 -0800 Subject: [PATCH 492/884] Fix python formatting and add missing docstrings --- .../contrib/tensorrt/python/__init__.py | 2 +- .../contrib/tensorrt/python/trt_convert.py | 34 +++++++++++++------ .../contrib/tensorrt/test/test_tftrt.py | 8 ++--- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 3941d150d1..0b2321b5fc 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -20,6 +20,6 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.tensorrt.python.ops import trt_engine_op -from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph +from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long diff --git a/tensorflow/contrib/tensorrt/python/trt_convert.py b/tensorflow/contrib/tensorrt/python/trt_convert.py index 861b316f48..666220d78c 100644 --- a/tensorflow/contrib/tensorrt/python/trt_convert.py +++ b/tensorflow/contrib/tensorrt/python/trt_convert.py @@ -20,15 +20,17 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long import six as _six +from tensorflow.contrib.tensorrt.wrap_conversion import calib_convert +from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert from tensorflow.core.framework import graph_pb2 +from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import errors from tensorflow.python.framework import errors_impl as _impl -from tensorflow.contrib.tensorrt.wrap_conversion import trt_convert, calib_convert -from tensorflow.python.util import compat -from tensorflow.python.grappler import tf_optimizer -from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.python.framework import meta_graph from tensorflow.python.framework import ops +from tensorflow.python.grappler import tf_optimizer +from tensorflow.python.util import compat +# pylint: enable=unused-import,line-too-long # TODO(skama): get outputs from session when implemented as c++ @@ -41,17 +43,20 @@ def create_inference_graph(input_graph_def, minimum_segment_size=3): """Python wrapper for the TRT transormation. - Args: input_graph_def: GraphDef object containing a model to be transformed. - outputs: List of tensors or node names for the model outputs. + outputs: list of tensors or node names for the model outputs. max_batch_size: max size for the input batch max_workspace_size_bytes: parameter to control memory allocation (in Bytes) + precision_mode: one of 'FP32', 'FP16' and 'INT8' + minimum_segment_size: the minimum number of nodes required for a subgraph to + be replaced by TRTEngineOp. Returns: New GraphDef with TRTEngineOps placed in graph replacing subgraphs. Raises: + ValueError: if the provided precision mode is invalid. RuntimeError: if the returned status message is malformed. """ supported_precision_modes = {"FP32": 0, "FP16": 1, "INT8": 2} @@ -116,8 +121,15 @@ def create_inference_graph(input_graph_def, def calib_graph_to_infer_graph(calibration_graph_def): - """Convert an existing calibration graph containing calibration data - to inference graph""" + """Convert an existing calibration graph to inference graph. + + Args: + calibration_graph_def: the calibration GraphDef object with calibration data + Returns: + New GraphDef with TRTEngineOps placed in graph replacing calibration nodes. + Raises: + RuntimeError: if the returned status message is malformed. + """ def py2string(inp): return inp @@ -134,16 +146,18 @@ def calib_graph_to_infer_graph(calibration_graph_def): out = calib_convert(graph_str) status = to_string(out[0]) output_graph_def_string = out[1] - del graph_str #save some memory + del graph_str # Save some memory if len(status) < 2: raise _impl.UnknownError(None, None, status) if status[:2] != "OK": msg = status.split(";") if len(msg) == 1: raise RuntimeError("Status message is malformed {}".format(status)) + # pylint: disable=protected-access raise _impl._make_specific_exception(None, None, ";".join(msg[1:]), int(msg[0])) + # pylint: enable=protected-access output_graph_def = graph_pb2.GraphDef() output_graph_def.ParseFromString(output_graph_def_string) - del output_graph_def_string #save some memory + del output_graph_def_string # Save some memory return output_graph_def diff --git a/tensorflow/contrib/tensorrt/test/test_tftrt.py b/tensorflow/contrib/tensorrt/test/test_tftrt.py index a5cfb9b167..0b661bd536 100644 --- a/tensorflow/contrib/tensorrt/test/test_tftrt.py +++ b/tensorflow/contrib/tensorrt/test/test_tftrt.py @@ -60,7 +60,7 @@ def get_simple_graph_def(): def run_graph(gdef, dumm_inp): - """Run given graphdef once""" + """Run given graphdef once.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() @@ -76,11 +76,9 @@ def run_graph(gdef, dumm_inp): # Use real data that is representatitive of the inference dataset -# for calibration. For this test script it is random data - - +# for calibration. For this test script it is random data. def run_calibration(gdef, dumm_inp): - """Run given calibration graph multiple times""" + """Run given calibration graph multiple times.""" gpu_options = cpb2.GPUOptions(per_process_gpu_memory_fraction=0.50) ops.reset_default_graph() g = ops.Graph() -- GitLab From 18d97ec74e1f08e7ab2c7700c5355394c8284231 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Tue, 6 Mar 2018 15:44:15 -0800 Subject: [PATCH 493/884] RemoteCall: Cache function handles. Currently, whenever a functional_ops.remote_call(...) is executed against a remote worker, the function will be instantiated each and every time against the remote worker causing a memory leak on both the caller and the callee. Instead, we cache the function handles and reuse them. PiperOrigin-RevId: 188093266 --- tensorflow/core/kernels/function_ops.cc | 28 +++++++++++++++++++++---- tensorflow/core/ops/functional_ops.cc | 1 + 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index a094ebe5e2..e3c78d6b70 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -307,11 +307,25 @@ class RemoteCallOp : public AsyncOpKernel { AttrValueMap attr_values = func_.attr(); FunctionLibraryRuntime::InstantiateOptions instantiate_opts; instantiate_opts.target = target_device; + + FunctionTarget function_target = {target_device, lib}; + FunctionLibraryRuntime::Handle handle; - OP_REQUIRES_OK_ASYNC(ctx, - lib->Instantiate(func_.name(), AttrSlice(&attr_values), - instantiate_opts, &handle), - done); + { + mutex_lock l(mu_); + auto cached_entry = handle_cache_.find(function_target); + if (cached_entry != handle_cache_.end()) { + handle = cached_entry->second; + } else { + OP_REQUIRES_OK_ASYNC( + ctx, + lib->Instantiate(func_.name(), AttrSlice(&attr_values), + instantiate_opts, &handle), + done); + auto insert_result = handle_cache_.insert({function_target, handle}); + CHECK(insert_result.second) << "Insert unsuccessful."; + } + } OpInputList arguments; OP_REQUIRES_OK_ASYNC(ctx, ctx->input_list("args", &arguments), done); @@ -346,6 +360,12 @@ class RemoteCallOp : public AsyncOpKernel { private: string target_; NameAttrList func_; + + mutex mu_; + typedef std::pair FunctionTarget; + std::map handle_cache_ + GUARDED_BY(mu_); + TF_DISALLOW_COPY_AND_ASSIGN(RemoteCallOp); }; diff --git a/tensorflow/core/ops/functional_ops.cc b/tensorflow/core/ops/functional_ops.cc index 9e18d20db6..4b21fac80a 100644 --- a/tensorflow/core/ops/functional_ops.cc +++ b/tensorflow/core/ops/functional_ops.cc @@ -47,6 +47,7 @@ REGISTER_OP("RemoteCall") .Attr("Tin: list(type)") .Attr("Tout: list(type)") .Attr("f: func") + .SetIsStateful() .SetShapeFn(shape_inference::UnknownShape); REGISTER_OP("_If") -- GitLab From 2775ac493806fefa4e7c2fd798be5b1f87e01a94 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 15:50:13 -0800 Subject: [PATCH 494/884] Extend tensor_list with basic support for appending to TensorArrays. This allows handling list-type operations on lists that we haven't created, e.g. received as parameters. PiperOrigin-RevId: 188094077 --- tensorflow/contrib/py2tf/utils/tensor_list.py | 19 +++++++++++++ .../contrib/py2tf/utils/tensor_list_test.py | 28 +++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/tensorflow/contrib/py2tf/utils/tensor_list.py b/tensorflow/contrib/py2tf/utils/tensor_list.py index b6ff49e2a0..2556f41289 100644 --- a/tensorflow/contrib/py2tf/utils/tensor_list.py +++ b/tensorflow/contrib/py2tf/utils/tensor_list.py @@ -18,7 +18,26 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import ops from tensorflow.python.ops import list_ops +from tensorflow.python.ops import tensor_array_ops + + +def dynamic_list_append(target, element): + """Converts a list append call inline.""" + if isinstance(target, tensor_array_ops.TensorArray): + return target.write(target.size(), element) + # TODO(mdan): What's the right way to check this? + # TODO(mdan): We may not need this branch. + # It may be possible to use TensorList alone if the loop body will not + # require wrapping it, although we'd have to think about an autoboxing + # mechanism for lists received as parameter. + if isinstance(target, ops.Tensor): + return list_ops.tensor_list_push_back(target, element) + + # Python targets (including TensorList): fallback to their original append. + target.append(element) + return target class TensorList(object): diff --git a/tensorflow/contrib/py2tf/utils/tensor_list_test.py b/tensorflow/contrib/py2tf/utils/tensor_list_test.py index b5e554a162..110e4d105e 100644 --- a/tensorflow/contrib/py2tf/utils/tensor_list_test.py +++ b/tensorflow/contrib/py2tf/utils/tensor_list_test.py @@ -21,13 +21,41 @@ from __future__ import print_function from tensorflow.contrib.py2tf.utils import tensor_list as tl from tensorflow.python.client.session import Session from tensorflow.python.eager import context +from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework.constant_op import constant +from tensorflow.python.ops import list_ops +from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test class TensorListTest(test.TestCase): + def _shape(self, shape_tuple): + return constant(shape_tuple, dtypes.int32) + + def test_dynamic_list_append(self): + l = [] + l = tl.dynamic_list_append(l, 1) + self.assertListEqual(l, [1]) + + l = list_ops.empty_tensor_list(self._shape(()), dtypes.int32) + l = tl.dynamic_list_append(l, 1) + s = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32) + with self.test_session() as sess: + self.assertAllEqual(sess.run(s), [1]) + + l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True) + l = tl.dynamic_list_append(l, 1) + s = l.stack() + with self.test_session() as sess: + self.assertAllEqual(sess.run(s), [1]) + + l = tl.TensorList(self._shape(()), dtypes.int32) + l = tl.dynamic_list_append(l, 1) + with self.test_session() as sess: + self.assertAllEqual(sess.run(l[0]), 1) + def test_list_append_python(self): with context.eager_mode(): a = constant(3.0) -- GitLab From ebc3077a2a39157d96cf85c5296e4efe98b20c1e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 16:18:11 -0800 Subject: [PATCH 495/884] Update ops-related pbtxt files. PiperOrigin-RevId: 188098602 --- .../core/ops/compat/ops_history.v1.pbtxt | 32 +++++++++++++++++++ tensorflow/core/ops/ops.pbtxt | 1 + 2 files changed, 33 insertions(+) diff --git a/tensorflow/core/ops/compat/ops_history.v1.pbtxt b/tensorflow/core/ops/compat/ops_history.v1.pbtxt index 35c49658b3..18b8bc5495 100644 --- a/tensorflow/core/ops/compat/ops_history.v1.pbtxt +++ b/tensorflow/core/ops/compat/ops_history.v1.pbtxt @@ -38235,6 +38235,38 @@ op { type: "func" } } +op { + name: "RemoteCall" + input_arg { + name: "target" + type: DT_STRING + } + input_arg { + name: "args" + type_list_attr: "Tin" + } + output_arg { + name: "output" + type_list_attr: "Tout" + } + attr { + name: "Tin" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "Tout" + type: "list(type)" + has_minimum: true + minimum: 1 + } + attr { + name: "f" + type: "func" + } + is_stateful: true +} op { name: "RemoteFusedGraphExecute" input_arg { diff --git a/tensorflow/core/ops/ops.pbtxt b/tensorflow/core/ops/ops.pbtxt index bf7682712c..3d84ab3f25 100644 --- a/tensorflow/core/ops/ops.pbtxt +++ b/tensorflow/core/ops/ops.pbtxt @@ -19541,6 +19541,7 @@ op { name: "f" type: "func" } + is_stateful: true } op { name: "RemoteFusedGraphExecute" -- GitLab From cd8801199275f23d78905c3154a124d56b8e4b0a Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Tue, 6 Mar 2018 16:27:35 -0800 Subject: [PATCH 496/884] Internal change. PiperOrigin-RevId: 188100164 --- tensorflow/core/distributed_runtime/rpc/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/core/distributed_runtime/rpc/BUILD b/tensorflow/core/distributed_runtime/rpc/BUILD index dade26abc6..e9d5390c63 100644 --- a/tensorflow/core/distributed_runtime/rpc/BUILD +++ b/tensorflow/core/distributed_runtime/rpc/BUILD @@ -381,6 +381,7 @@ tf_cuda_library( data = [ ":grpc_testlib_server", ], + visibility = ["//tensorflow:__subpackages__"], deps = [ ":grpc_session", ":grpc_testlib_ops", -- GitLab From 721a60801055190dae18fe3e3933950c75fa9d1c Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Tue, 6 Mar 2018 16:27:56 -0800 Subject: [PATCH 497/884] python3 fix PiperOrigin-RevId: 188100221 --- .../python/data/kernel_tests/dataset_constructor_op_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py index 14627810b5..ea5b41e5d8 100644 --- a/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py +++ b/tensorflow/python/data/kernel_tests/dataset_constructor_op_test.py @@ -263,7 +263,7 @@ class DatasetConstructorTest(test.TestCase): for i in range(3): results = sess.run(get_next) for component, result_component in zip( - (zip(*components[:3])[i] + expected[i]), results): + (list(zip(*components[:3]))[i] + expected[i]), results): if sparse_tensor.is_sparse(component): self.assertSparseValuesEqual(component, result_component) else: -- GitLab From 75e15a2b25f731d7ddf4ffc455a4bf8d1c0fd7ca Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 16:29:33 -0800 Subject: [PATCH 498/884] [XLA] Store the program shape in the HloModuleProto and HloComputationProto. PiperOrigin-RevId: 188100425 --- tensorflow/compiler/xla/service/hlo.proto | 6 + .../compiler/xla/service/hlo_computation.cc | 2 +- .../compiler/xla/service/hlo_computation.h | 2 +- tensorflow/compiler/xla/service/hlo_module.cc | 68 ++------- .../compiler/xla/service/hlo_proto_util.cc | 138 +++--------------- .../xla/service/hlo_proto_util_test.cc | 114 +-------------- 6 files changed, 39 insertions(+), 291 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo.proto b/tensorflow/compiler/xla/service/hlo.proto index a43785b4a9..66fd317051 100644 --- a/tensorflow/compiler/xla/service/hlo.proto +++ b/tensorflow/compiler/xla/service/hlo.proto @@ -145,6 +145,9 @@ message HloComputationProto { // The name of the root of the computation. string root_name = 3; + + // The program shape (with layout) of this computation. + xla.ProgramShape program_shape = 4; } // Serialization of HloModule. @@ -155,6 +158,9 @@ message HloModuleProto { // The array of computations is always in a valid dependency order, where // callees appear before their callers. repeated HloComputationProto computations = 3; + + // The program shape (with layout) of the entry computation. + xla.ProgramShape program_shape = 4; } // Serialization of HloOrdering. diff --git a/tensorflow/compiler/xla/service/hlo_computation.cc b/tensorflow/compiler/xla/service/hlo_computation.cc index 21e6b2ca73..f99c7cf5e4 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.cc +++ b/tensorflow/compiler/xla/service/hlo_computation.cc @@ -399,6 +399,7 @@ HloComputationProto HloComputation::ToProto() const { proto.add_instructions()->Swap(&instruction_proto); } proto.set_root_name(root_instruction()->name()); + *proto.mutable_program_shape() = ComputeProgramShape(); return proto; } @@ -532,7 +533,6 @@ ProgramShape HloComputation::ComputeProgramShape() const { } *program_shape.mutable_result() = root_instruction_->shape(); - LayoutUtil::ClearLayout(&program_shape); return program_shape; } diff --git a/tensorflow/compiler/xla/service/hlo_computation.h b/tensorflow/compiler/xla/service/hlo_computation.h index 39d864efcb..dd9d346999 100644 --- a/tensorflow/compiler/xla/service/hlo_computation.h +++ b/tensorflow/compiler/xla/service/hlo_computation.h @@ -248,7 +248,7 @@ class HloComputation { ShapeTree* copies_added = nullptr); // Computes and returns the ProgramShape of this computation (shape of - // parameters and result without layout). + // parameters and result with layout). ProgramShape ComputeProgramShape() const; // Return whether `*this` and `other` are functionally equivalent. diff --git a/tensorflow/compiler/xla/service/hlo_module.cc b/tensorflow/compiler/xla/service/hlo_module.cc index cb2fe9f874..cdea3d5978 100644 --- a/tensorflow/compiler/xla/service/hlo_module.cc +++ b/tensorflow/compiler/xla/service/hlo_module.cc @@ -213,74 +213,23 @@ HloModuleProto HloModule::ToProto() const { continue; } HloComputationProto computation_proto = computation->ToProto(); + if (computation->name() == entry_computation_->name()) { + *proto.mutable_program_shape() = computation_proto.program_shape(); + } proto.add_computations()->Swap(&computation_proto); } return proto; } -namespace { - -// Construct a ProgramShape matching the shape of the parameters and root of the -// given module's entry computation. -StatusOr ProgramShapeFromProto(const HloModuleProto& module) { - const HloComputationProto* entry_computation = nullptr; - for (const HloComputationProto& computation : module.computations()) { - if (computation.name() == module.entry_computation_name()) { - entry_computation = &computation; - break; - } - } - TF_RET_CHECK(entry_computation != nullptr) - << "No computation with entry computation name" - << module.entry_computation_name(); - - tensorflow::gtl::FlatMap> parameters; - const HloInstructionProto* root = nullptr; - for (const HloInstructionProto& instruction : - entry_computation->instructions()) { - if (instruction.name() == entry_computation->root_name()) { - TF_RET_CHECK(root == nullptr) << "Entry computation has more than " - "one instruction with (root) name " - << instruction.name(); - root = &instruction; - } - if (instruction.opcode() == HloOpcodeString(HloOpcode::kParameter)) { - TF_RET_CHECK(!ContainsKey(parameters, instruction.parameter_number())) - << "Entry computation has more than one parameter instruction " - "with parameter number " - << instruction.parameter_number(); - parameters[instruction.parameter_number()] = {instruction.name(), - &instruction.shape()}; - } - } - TF_RET_CHECK(root != nullptr) - << "Entry computation is missing root instruction named " - << entry_computation->root_name(); - - ProgramShape program_shape; - *program_shape.mutable_result() = root->shape(); - for (int64 i = 0; i < parameters.size(); ++i) { - TF_RET_CHECK(ContainsKey(parameters, i)) - << "Entry computation missing parameter number " << i; - const string& name = parameters.at(i).first; - const Shape& shape = *parameters.at(i).second; - *program_shape.add_parameters() = shape; - program_shape.add_parameter_names(name); - } - - return std::move(program_shape); -} - -} // namespace - /* static */ StatusOr> HloModule::CreateFromProto( const HloModuleProto& proto, const HloModuleConfig& module_config, const VersionedComputationHandle& entry_computation_handle) { // The ProgramShape in the passed in module config must match the shapes of // the entry parameters and root. - TF_ASSIGN_OR_RETURN(ProgramShape expected_program_shape, - ProgramShapeFromProto(proto)); + TF_RET_CHECK(proto.has_program_shape()) + << "No program shape found in the proto"; + const auto& expected_program_shape = proto.program_shape(); TF_RET_CHECK(expected_program_shape.parameters_size() == module_config.entry_computation_layout().parameter_count()); for (int i = 0; i < expected_program_shape.parameters_size(); ++i) { @@ -354,8 +303,9 @@ StatusOr> HloModule::CreateFromProto( /* static */ StatusOr HloModule::CreateModuleConfigFromProto( const HloModuleProto& module) { - TF_ASSIGN_OR_RETURN(ProgramShape program_shape, - ProgramShapeFromProto(module)); + TF_RET_CHECK(module.has_program_shape()) + << "No program shape found in the proto"; + const auto& program_shape = module.program_shape(); HloModuleConfig module_config(program_shape); diff --git a/tensorflow/compiler/xla/service/hlo_proto_util.cc b/tensorflow/compiler/xla/service/hlo_proto_util.cc index f75c452082..3460679558 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util.cc +++ b/tensorflow/compiler/xla/service/hlo_proto_util.cc @@ -21,106 +21,6 @@ limitations under the License. namespace xla { -namespace { - -// Returns the entry computation of the HLO module in the given HloProto. -StatusOr GetEntryComputation( - const HloProto& hlo_proto) { - if (!hlo_proto.has_hlo_module()) { - return NotFound("HloProto missing HloModuleProto."); - } - - if (hlo_proto.hlo_module().entry_computation_name().empty()) { - return NotFound("HloProto has empty entry computation name."); - } - - const string& entry_computation_name = - hlo_proto.hlo_module().entry_computation_name(); - const HloComputationProto* entry_computation = nullptr; - for (const HloComputationProto& computation : - hlo_proto.hlo_module().computations()) { - if (computation.name() == entry_computation_name) { - if (entry_computation == nullptr) { - entry_computation = &computation; - } else { - return InvalidArgument( - "HloProto has multiple computations with entry computation named " - "%s.", - entry_computation_name.c_str()); - } - } - } - if (entry_computation == nullptr) { - return InvalidArgument("HloProto has no entry computation named %s.", - entry_computation_name.c_str()); - } - return entry_computation; -} - -// Returns the root instruction of the given computation proto. -StatusOr GetRootInstruction( - const HloComputationProto& computation) { - if (computation.root_name().empty()) { - return InvalidArgument("Missing root instruction name."); - } - - const HloInstructionProto* root = nullptr; - for (const HloInstructionProto& instruction : computation.instructions()) { - if (instruction.name() == computation.root_name()) { - if (root == nullptr) { - root = &instruction; - } else { - return InvalidArgument( - "Computation has multiple instructions named %s.", - computation.root_name().c_str()); - } - } - } - if (root == nullptr) { - return InvalidArgument("Computation has no instruction named %s.", - computation.root_name().c_str()); - } - return root; -} - -// Returns the parameters of the given computation. Parameter numbers are -// checked for validity and contiguousness. -StatusOr> GetParameters( - const HloComputationProto& computation) { - std::vector parameters; - for (const HloInstructionProto& instruction : computation.instructions()) { - if (instruction.opcode() == HloOpcodeString(HloOpcode::kParameter)) { - parameters.push_back(&instruction); - } - } - - // Verify the uniqueness and validity of the parameter numbers. - tensorflow::gtl::FlatSet parameter_numbers; - for (const HloInstructionProto* parameter : parameters) { - if (parameter->parameter_number() < 0 || - parameter->parameter_number() >= parameters.size()) { - return InvalidArgument( - "Parameter instruction %s has invalid parameter number %lld.", - parameter->name().c_str(), parameter->parameter_number()); - } - if (parameter_numbers.count(parameter->parameter_number()) != 0) { - return InvalidArgument( - "Multiple parameter instructions have parameter number %lld.", - parameter->parameter_number()); - } - parameter_numbers.insert(parameter->parameter_number()); - } - - std::sort(parameters.begin(), parameters.end(), - [](const HloInstructionProto* a, const HloInstructionProto* b) { - return a->parameter_number() < b->parameter_number(); - }); - - return parameters; -} - -} // namespace - HloProto MakeHloProto(const HloModule& module, const BufferAssignment& assignment) { HloOrderingProto proto_ordering = @@ -141,33 +41,33 @@ HloProto MakeHloProto(const HloModule& module) { StatusOr> EntryComputationParameterShapes( const HloProto& hlo_proto) { - TF_ASSIGN_OR_RETURN(const HloComputationProto* entry_computation, - GetEntryComputation(hlo_proto)); - TF_ASSIGN_OR_RETURN(std::vector parameters, - GetParameters(*entry_computation)); + if (!hlo_proto.has_hlo_module()) { + return NotFound("HloProto missing HloModuleProto."); + } + if (!hlo_proto.hlo_module().has_program_shape()) { + return NotFound("HloProto missing program shape."); + } + std::vector parameter_shapes; - for (const HloInstructionProto* parameter : parameters) { - if (!parameter->has_shape()) { - return InvalidArgument("Parameter instruction %s is missing shape.", - parameter->name().c_str()); - } - parameter_shapes.push_back(¶meter->shape()); + const auto& program_shape = hlo_proto.hlo_module().program_shape(); + for (const Shape& shape : program_shape.parameters()) { + parameter_shapes.push_back(&shape); } return parameter_shapes; } StatusOr EntryComputationOutputShape(const HloProto& hlo_proto) { - TF_ASSIGN_OR_RETURN(const HloComputationProto* entry_computation, - GetEntryComputation(hlo_proto)); - - TF_ASSIGN_OR_RETURN(const HloInstructionProto* root, - GetRootInstruction(*entry_computation)); - if (!root->has_shape()) { - return InvalidArgument("Instruction %s is missing shape.", - root->name().c_str()); + if (!hlo_proto.has_hlo_module()) { + return NotFound("HloProto missing HloModuleProto."); + } + if (!hlo_proto.hlo_module().has_program_shape()) { + return NotFound("HloProto missing program shape."); + } + if (!hlo_proto.hlo_module().program_shape().has_result()) { + return NotFound("HloProto missing result in its program shape"); } - return &root->shape(); + return &hlo_proto.hlo_module().program_shape().result(); } } // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_proto_util_test.cc b/tensorflow/compiler/xla/service/hlo_proto_util_test.cc index 0c0abf10fa..b9cca13870 100644 --- a/tensorflow/compiler/xla/service/hlo_proto_util_test.cc +++ b/tensorflow/compiler/xla/service/hlo_proto_util_test.cc @@ -29,69 +29,6 @@ namespace { class HloProtoUtilTest : public ::testing::Test {}; -TEST_F(HloProtoUtilTest, ParamsAndOutputShape) { - HloProto hlo_proto; - HloModuleProto* module = hlo_proto.mutable_hlo_module(); - module->set_entry_computation_name("entry"); - HloComputationProto* computation = module->add_computations(); - computation->set_name("entry"); - computation->set_root_name("root"); - - HloInstructionProto* param0 = computation->add_instructions(); - param0->set_opcode(HloOpcodeString(HloOpcode::kParameter)); - param0->set_parameter_number(0); - *param0->mutable_shape() = ShapeUtil::MakeShape(F32, {42}); - - HloInstructionProto* param2 = computation->add_instructions(); - param2->set_opcode(HloOpcodeString(HloOpcode::kParameter)); - param2->set_parameter_number(2); - *param2->mutable_shape() = ShapeUtil::MakeShape(S32, {1, 2, 3}); - - HloInstructionProto* param1 = computation->add_instructions(); - param1->set_opcode(HloOpcodeString(HloOpcode::kParameter)); - param1->set_parameter_number(1); - *param1->mutable_shape() = ShapeUtil::MakeShape(F64, {}); - - HloInstructionProto* root = computation->add_instructions(); - root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); - root->set_name("root"); - *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); - - VLOG(1) << hlo_proto.DebugString(); - - TF_ASSERT_OK_AND_ASSIGN(std::vector parameter_shapes, - EntryComputationParameterShapes(hlo_proto)); - ASSERT_EQ(parameter_shapes.size(), 3); - EXPECT_TRUE( - ShapeUtil::Equal(*parameter_shapes[0], ShapeUtil::MakeShape(F32, {42}))); - EXPECT_TRUE( - ShapeUtil::Equal(*parameter_shapes[1], ShapeUtil::MakeShape(F64, {}))); - EXPECT_TRUE(ShapeUtil::Equal(*parameter_shapes[2], - ShapeUtil::MakeShape(S32, {1, 2, 3}))); - - TF_ASSERT_OK_AND_ASSIGN(const Shape* output_shape, - EntryComputationOutputShape(hlo_proto)); - EXPECT_TRUE(ShapeUtil::Equal(*output_shape, ShapeUtil::MakeShape(U8, {2}))); -} - -TEST_F(HloProtoUtilTest, ParamsAndOutputShapeNoParameters) { - HloProto hlo_proto; - HloModuleProto* module = hlo_proto.mutable_hlo_module(); - module->set_entry_computation_name("entry"); - HloComputationProto* computation = module->add_computations(); - computation->set_name("entry"); - computation->set_root_name("root"); - - HloInstructionProto* root = computation->add_instructions(); - root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); - root->set_name("root"); - *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); - - TF_ASSERT_OK_AND_ASSIGN(std::vector parameter_shapes, - EntryComputationParameterShapes(hlo_proto)); - ASSERT_EQ(parameter_shapes.size(), 0); -} - TEST_F(HloProtoUtilTest, ParamsAndOutputShapeMissingModule) { HloProto hlo_proto; @@ -101,60 +38,15 @@ TEST_F(HloProtoUtilTest, ParamsAndOutputShapeMissingModule) { ::testing::HasSubstr("missing HloModuleProto")); } -TEST_F(HloProtoUtilTest, ParamsAndOutputShapeMissingEntryComputation) { +TEST_F(HloProtoUtilTest, MissingProgramShape) { HloProto hlo_proto; HloModuleProto* module = hlo_proto.mutable_hlo_module(); - module->set_entry_computation_name("entry"); - HloComputationProto* computation = module->add_computations(); - computation->set_name("not_entry"); - - auto status = EntryComputationParameterShapes(hlo_proto).status(); - ASSERT_FALSE(status.ok()); - ASSERT_THAT(status.error_message(), - ::testing::HasSubstr("has no entry computation named")); -} - -TEST_F(HloProtoUtilTest, OutputShapeMissingEntryRoot) { - HloProto hlo_proto; - HloModuleProto* module = hlo_proto.mutable_hlo_module(); - module->set_entry_computation_name("entry"); - HloComputationProto* computation = module->add_computations(); - computation->set_name("entry"); - computation->set_root_name("root"); - - auto status = EntryComputationOutputShape(hlo_proto).status(); - ASSERT_FALSE(status.ok()); - ASSERT_THAT(status.error_message(), - ::testing::HasSubstr("has no instruction named")); -} - -TEST_F(HloProtoUtilTest, ParamsShapesMissingParameterNumbers) { - HloProto hlo_proto; - HloModuleProto* module = hlo_proto.mutable_hlo_module(); - module->set_entry_computation_name("entry"); - HloComputationProto* computation = module->add_computations(); - computation->set_name("entry"); - computation->set_root_name("root"); - - HloInstructionProto* param0 = computation->add_instructions(); - param0->set_opcode(HloOpcodeString(HloOpcode::kParameter)); - param0->set_parameter_number(0); - *param0->mutable_shape() = ShapeUtil::MakeShape(F32, {42}); - - HloInstructionProto* param2 = computation->add_instructions(); - param2->set_opcode(HloOpcodeString(HloOpcode::kParameter)); - param2->set_parameter_number(2); - *param2->mutable_shape() = ShapeUtil::MakeShape(S32, {1, 2, 3}); - - HloInstructionProto* root = computation->add_instructions(); - root->set_opcode(HloOpcodeString(HloOpcode::kAdd)); - root->set_name("root"); - *root->mutable_shape() = ShapeUtil::MakeShape(U8, {2}); + module->set_name("entry"); auto status = EntryComputationParameterShapes(hlo_proto).status(); ASSERT_FALSE(status.ok()); ASSERT_THAT(status.error_message(), - ::testing::HasSubstr("invalid parameter number")); + ::testing::HasSubstr("missing program shape")); } } // namespace -- GitLab From 7efc16ed02121b92993b3417805cea652bab3c92 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Tue, 6 Mar 2018 16:44:20 -0800 Subject: [PATCH 499/884] Re-enable math_utils_test msan PiperOrigin-RevId: 188102388 --- tensorflow/contrib/timeseries/python/timeseries/BUILD | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index 0ce7b0bb91..fff972c1f3 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -425,7 +425,6 @@ py_test( srcs_version = "PY2AND3", tags = [ "no_pip_gpu", # b/63391119 - "nomsan", ], deps = [ ":feature_keys", -- GitLab From 6e99d56489b4e6c3176fa1199d4270b6439a22fe Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 16:46:54 -0800 Subject: [PATCH 500/884] Add metadata for gathering information about host compute transfers while compiling XLA. PiperOrigin-RevId: 188102740 --- tensorflow/compiler/tf2xla/BUILD | 10 +++ .../tf2xla/host_compute_metadata.proto | 38 +++++++++++ tensorflow/compiler/tf2xla/xla_compiler.cc | 63 +++++++++++++++++++ tensorflow/compiler/tf2xla/xla_compiler.h | 24 +++++++ 4 files changed, 135 insertions(+) create mode 100644 tensorflow/compiler/tf2xla/host_compute_metadata.proto diff --git a/tensorflow/compiler/tf2xla/BUILD b/tensorflow/compiler/tf2xla/BUILD index fb82c2601c..eb20ca501c 100644 --- a/tensorflow/compiler/tf2xla/BUILD +++ b/tensorflow/compiler/tf2xla/BUILD @@ -58,6 +58,15 @@ xla_proto_library( ], ) +xla_proto_library( + name = "host_compute_metadata_proto", + srcs = ["host_compute_metadata.proto"], + visibility = ["//visibility:public"], + deps = [ + "//tensorflow/core:protos_all_cc", + ], +) + cc_library( name = "tf2xla", srcs = ["tf2xla.cc"], @@ -149,6 +158,7 @@ cc_library( ":common", ":dump_graph", ":functionalize_control_flow", + ":host_compute_metadata_proto", ":sharding_util", ":tf2xla_util", "//tensorflow/compiler/tf2xla/lib:util", diff --git a/tensorflow/compiler/tf2xla/host_compute_metadata.proto b/tensorflow/compiler/tf2xla/host_compute_metadata.proto new file mode 100644 index 0000000000..43ab371a21 --- /dev/null +++ b/tensorflow/compiler/tf2xla/host_compute_metadata.proto @@ -0,0 +1,38 @@ +syntax = "proto3"; + +package tensorflow.tf2xla; +option cc_enable_arenas = true; +option java_outer_classname = "Tf2XlaProtos"; +option java_multiple_files = true; +option java_package = "org.tensorflow.tf2xla"; + +import "tensorflow/core/framework/tensor_shape.proto"; +import "tensorflow/core/framework/types.proto"; + +// TensorMetadata indicates the type and shape of a Tensor that is +// part of a host compute transfer. +message TensorMetadata { + DataType type = 1; + TensorShapeProto shape = 2; +} + +// HostTransferMetadata describes a transfer either from host to device +// or device to host. It has a key that is unique to the computation, +// and metadata about the list of tensors being transferred. +message HostTransferMetadata { + // The key used to identify this transfer. + string key = 1; + + // For each Tensor being transferred, its type and shape. + repeated TensorMetadata metadata = 2; +} + +// HostComputeMetadata describes all the sends and recvs +// from all host compute transfer ops in a computation. +message HostComputeMetadata { + // Metadata about each device_to_host transfer + repeated HostTransferMetadata device_to_host = 1; + + // Metadata about each host_to_device transfer + repeated HostTransferMetadata host_to_device = 2; +} diff --git a/tensorflow/compiler/tf2xla/xla_compiler.cc b/tensorflow/compiler/tf2xla/xla_compiler.cc index 5ec05c4121..0dc5118c9c 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.cc +++ b/tensorflow/compiler/tf2xla/xla_compiler.cc @@ -674,6 +674,14 @@ Status XlaCompiler::CompileGraph(const XlaCompiler::CompileOptions& options, VLOG(2) << "XLA output shape: " << xla::ShapeUtil::HumanString(result->xla_output_shape); + // Copy the host transfer metadata to the result. + for (const auto& send : host_compute_sends_) { + *result->host_compute_metadata.add_device_to_host() = send.second; + } + for (const auto& recv : host_compute_recvs_) { + *result->host_compute_metadata.add_host_to_device() = recv.second; + } + // Tensorflow expects a major-to-minor order of results. xla::LayoutUtil::SetToDefaultLayout(&result->xla_output_shape); @@ -708,4 +716,59 @@ Status XlaCompiler::GetChannelHandle(const string& key, return Status::OK(); } +namespace { + +void SetTransfer(const string& key, const std::vector& types, + const std::vector& shapes, + tf2xla::HostTransferMetadata* transfer) { + transfer->set_key(key); + CHECK(types.size() == shapes.size()); + for (int i = 0; i < types.size(); ++i) { + tf2xla::TensorMetadata* metadata = transfer->add_metadata(); + metadata->set_type(types[i]); + shapes[i].AsProto(metadata->mutable_shape()); + } +} + +} // namespace + +Status XlaCompiler::SetDeviceToHostMetadata( + const string& key, const std::vector& types, + const std::vector& shapes) { + if (host_compute_sends_.find(key) != host_compute_sends_.end()) { + return errors::InvalidArgument( + "Duplicate calls to SetDeviceToHostMetadata with key ", key); + } + tf2xla::HostTransferMetadata& transfer = host_compute_sends_[key]; + SetTransfer(key, types, shapes, &transfer); + return Status::OK(); +} + +Status XlaCompiler::GetDeviceToHostShapes( + const string& key, std::vector* shapes) const { + const auto iter = host_compute_sends_.find(key); + if (iter == host_compute_sends_.end()) { + return errors::InvalidArgument( + "No host compute send shapes registered for key ", key); + } + shapes->clear(); + for (int i = 0; i < iter->second.metadata_size(); ++i) { + TensorShape shape(iter->second.metadata(i).shape()); + shapes->push_back(shape); + } + return Status::OK(); +} + +Status XlaCompiler::SetHostToDeviceMetadata( + const string& key, const std::vector& types, + const std::vector& shapes) { + if (host_compute_recvs_.find(key) != host_compute_sends_.end()) { + return errors::InvalidArgument( + "Duplicate calls to SetHostToDeviceMetadata with key ", key); + } + tf2xla::HostTransferMetadata& transfer = host_compute_recvs_[key]; + SetTransfer(key, types, shapes, &transfer); + return Status::OK(); +} + } // namespace tensorflow diff --git a/tensorflow/compiler/tf2xla/xla_compiler.h b/tensorflow/compiler/tf2xla/xla_compiler.h index c4449bc4be..a70d2637e0 100644 --- a/tensorflow/compiler/tf2xla/xla_compiler.h +++ b/tensorflow/compiler/tf2xla/xla_compiler.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILER_H_ #define TENSORFLOW_COMPILER_TF2XLA_XLA_COMPILER_H_ +#include "tensorflow/compiler/tf2xla/host_compute_metadata.pb.h" #include "tensorflow/compiler/tf2xla/xla_compilation_device.h" #include "tensorflow/compiler/xla/client/local_client.h" #include "tensorflow/core/common_runtime/device.h" @@ -216,6 +217,10 @@ class XlaCompiler { // containing both constant and non-constant results. std::vector outputs; + // TensorFlow shapes and types of sends/recvs from HostCompute Ops to their + // matching RecvAtHost/SendFromHost Ops in the outer graph. + tf2xla::HostComputeMetadata host_compute_metadata; + // Resources whose values were updated by the computation, ordered // by return value position. Resource updates follow the non-constant // results in the outputs of XLA computation. @@ -296,6 +301,22 @@ class XlaCompiler { // same XlaCompiler. Status GetChannelHandle(const string& key, xla::ChannelHandle* channel); + // Sets the shapes and types for the device to host transfer associated with + // 'key'. + Status SetDeviceToHostMetadata(const string& key, + const std::vector& types, + const std::vector& shapes); + + // Gets the shapes the device to host transfer associated with 'key'. + Status GetDeviceToHostShapes(const string& key, + std::vector* shapes) const; + + // Sets the shapes and types for the host to device transfer associated with + // 'key'. + Status SetHostToDeviceMetadata(const string& key, + const std::vector& types, + const std::vector& shapes); + const Options& options() const { return options_; } xla::Client* client() const { return options_.client; } FunctionLibraryRuntime* flib_runtime() const { return flib_runtime_; } @@ -359,6 +380,9 @@ class XlaCompiler { std::unordered_map channels_; + std::unordered_map host_compute_sends_; + std::unordered_map host_compute_recvs_; + TF_DISALLOW_COPY_AND_ASSIGN(XlaCompiler); }; -- GitLab From 9c3cf322a3051339899ffb74c33533f60c0c2d8e Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Tue, 6 Mar 2018 17:19:36 -0800 Subject: [PATCH 501/884] Make graph construction work while graph is being concurrently run. The overall approach is to use Graph._lock to synchronize Session.run calls and construction methods that rely on graph mutation. We don't want to synchronize the actual running of the graph, only the Extend call, so this change exposes an ExtendSession method to the Python API and disables extending automatically in TF_SessionRun. PiperOrigin-RevId: 188106818 --- tensorflow/c/c_api.cc | 134 ++++++++++++---------- tensorflow/c/c_api_internal.h | 8 ++ tensorflow/c/python_api.cc | 6 + tensorflow/c/python_api.h | 10 ++ tensorflow/python/client/session.py | 124 +++++++++----------- tensorflow/python/client/session_test.py | 39 +++++++ tensorflow/python/client/tf_session.i | 1 + tensorflow/python/framework/importer.py | 25 ++-- tensorflow/python/framework/ops.py | 44 ++++--- tensorflow/python/ops/control_flow_ops.py | 7 +- tensorflow/python/ops/gradients_impl.py | 11 ++ 11 files changed, 250 insertions(+), 159 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 3d0e886476..e3a95a0577 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -710,6 +710,58 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output, Status LoadLibrary(const char* library_filename, void** result, const void** buf, size_t* len); +// TODO(josh11b,mrry): Change Session to be able to use a Graph* +// directly, instead of requiring us to serialize to a GraphDef and +// call Session::Extend(). +bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) + EXCLUSIVE_LOCKS_REQUIRED(session->mu) { + if (session->graph != nullptr) { + session->graph->mu.lock(); + const Graph& graph = session->graph->graph; + + status->status = session->graph->sessions[session]; + if (!status->status.ok()) { + session->graph->mu.unlock(); + return false; + } + + const auto num_nodes = graph.num_node_ids(); + if (session->last_num_graph_nodes < num_nodes) { + status->status = tensorflow::ValidateNoCycles(session->graph->graph); + if (!status->status.ok()) { + session->graph->mu.unlock(); + return false; + } + + GraphDef graph_def; + *graph_def.mutable_versions() = graph.versions(); + // Fill graph_def with nodes with ids in the range + // [session->last_num_graph_nodes, num_nodes), that is the nodes + // added since the last TF_SessionRun() call. + for (auto id = session->last_num_graph_nodes; id < num_nodes; ++id) { + Node* const node = graph.FindNodeId(id); + if (node != nullptr && node->IsOp()) { + NodeDef* const node_def = graph_def.add_node(); + *node_def = node->def(); + } + } + *graph_def.mutable_library() = graph.flib_def().ToProto(); + session->graph->mu.unlock(); + status->status = session->session->Extend(graph_def); + if (!status->status.ok()) { + // Contract is we always delete input_values[i]. + return false; + } + // Note: session->session is not modified if Extend() fails, so + // we only set last_num_graph_nodes if it succeeds. + session->last_num_graph_nodes = num_nodes; + } else { + session->graph->mu.unlock(); + } + } + return true; +} + } // namespace tensorflow static void TF_Run_Setup(int noutputs, TF_Tensor** c_outputs, @@ -2410,7 +2462,11 @@ void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx, // TF_Session functions ---------------------------------------------- TF_Session::TF_Session(tensorflow::Session* s, TF_Graph* g) - : session(s), graph(g), last_num_graph_nodes(0), device_mgr(nullptr) { + : session(s), + graph(g), + last_num_graph_nodes(0), + device_mgr(nullptr), + extend_before_run(true) { if (s->LocalDeviceManager(&device_mgr).ok()) { devices = device_mgr->ListDevices(); } @@ -2514,58 +2570,6 @@ void TF_DeleteSession(TF_Session* s, TF_Status* status) { delete s; } -// TODO(josh11b,mrry): Change Session to be able to use a Graph* -// directly, instead of requiring us to serialize to a GraphDef and -// call Session::Extend(). -static bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) { - if (session->graph != nullptr) { - mutex_lock session_lock(session->mu); - session->graph->mu.lock(); - const Graph& graph = session->graph->graph; - - status->status = session->graph->sessions[session]; - if (!status->status.ok()) { - session->graph->mu.unlock(); - return false; - } - - const auto num_nodes = graph.num_node_ids(); - if (session->last_num_graph_nodes < num_nodes) { - status->status = tensorflow::ValidateNoCycles(session->graph->graph); - if (!status->status.ok()) { - session->graph->mu.unlock(); - return false; - } - - GraphDef graph_def; - *graph_def.mutable_versions() = graph.versions(); - // Fill graph_def with nodes with ids in the range - // [session->last_num_graph_nodes, num_nodes), that is the nodes - // added since the last TF_SessionRun() call. - for (auto id = session->last_num_graph_nodes; id < num_nodes; ++id) { - Node* const node = graph.FindNodeId(id); - if (node != nullptr && node->IsOp()) { - NodeDef* const node_def = graph_def.add_node(); - *node_def = node->def(); - } - } - *graph_def.mutable_library() = graph.flib_def().ToProto(); - session->graph->mu.unlock(); - status->status = session->session->Extend(graph_def); - if (!status->status.ok()) { - // Contract is we always delete input_values[i]. - return false; - } - // Note: session->session is not modified if Extend() fails, so - // we only set last_num_graph_nodes if it succeeds. - session->last_num_graph_nodes = num_nodes; - } else { - session->graph->mu.unlock(); - } - } - return true; -} - void TF_SessionRun(TF_Session* session, const TF_Buffer* run_options, const TF_Output* inputs, TF_Tensor* const* input_values, int ninputs, const TF_Output* outputs, @@ -2575,8 +2579,12 @@ void TF_SessionRun(TF_Session* session, const TF_Buffer* run_options, // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and // call Session::Extend(). - if (!ExtendSessionGraphHelper(session, status)) { - return; + { + mutex_lock l(session->mu); + if (session->extend_before_run && + !tensorflow::ExtendSessionGraphHelper(session, status)) { + return; + } } TF_Run_Setup(noutputs, output_values, status); @@ -2612,8 +2620,12 @@ void TF_SessionPRunSetup(TF_Session* session, const TF_Output* inputs, const char** handle, TF_Status* status) { *handle = nullptr; - if (!ExtendSessionGraphHelper(session, status)) { - return; + { + mutex_lock l(session->mu); + if (session->extend_before_run && + !tensorflow::ExtendSessionGraphHelper(session, status)) { + return; + } } std::vector input_names(ninputs); @@ -2655,8 +2667,12 @@ void TF_SessionPRun(TF_Session* session, const char* handle, // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and // call Session::Extend(). - if (!ExtendSessionGraphHelper(session, status)) { - return; + { + mutex_lock l(session->mu); + if (session->extend_before_run && + !tensorflow::ExtendSessionGraphHelper(session, status)) { + return; + } } TF_Run_Setup(noutputs, output_values, status); diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 91667056e0..027e2d2b15 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -133,6 +133,12 @@ struct TF_Session { // buffers of a TF_Tensor pinned in device memory. const tensorflow::DeviceMgr* device_mgr; // Owned by session. std::vector devices; // Owned by device_mgr. + + // If true, TF_SessionRun and similar methods will call + // ExtendSessionGraphHelper before running the graph (this is the default + // public behavior). Can be set to false if the caller needs to call + // ExtendSessionGraphHelper manually. + bool extend_before_run GUARDED_BY(mu); }; struct TF_ImportGraphDefOptions { @@ -212,6 +218,8 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output, void RecordMutation(TF_Graph* graph, const TF_Operation& op, const char* mutation_type); +bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status); + } // end namespace tensorflow #endif // TENSORFLOW_C_C_API_INTERNAL_H_ diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index f553142d15..26683f50ec 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -104,4 +104,10 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require) { graph->refiner.set_require_shape_inference_fns(require); } +void ExtendSession(TF_Session* session, TF_Status* status) { + mutex_lock l(session->mu); + session->extend_before_run = false; + ExtendSessionGraphHelper(session, status); +} + } // namespace tensorflow diff --git a/tensorflow/c/python_api.h b/tensorflow/c/python_api.h index 542d70f42c..13b680b3a2 100644 --- a/tensorflow/c/python_api.h +++ b/tensorflow/c/python_api.h @@ -41,6 +41,16 @@ void RemoveAllControlInputs(TF_Graph* graph, TF_Operation* op); // error. The default is true. void SetRequireShapeInferenceFns(TF_Graph* graph, bool require); +// Extends `session` with any new operations added to its associated graph. +// Usually this happens automatically in TF_SessionRun. After this is called, +// TF_SessionRun will no longer extend the session on every call. +// +// We expose this here to allow fine-grained synchronization in multi-threaded +// workloads, which is required since the Python implementation depends on the +// above mutation methods. This allows us to prevent modifications to nodes in +// the graph after the session has been made aware of them. +void ExtendSession(TF_Session* session, TF_Status* status); + } // namespace tensorflow #endif // TENSORFLOW_C_PYTHON_API_H_ diff --git a/tensorflow/python/client/session.py b/tensorflow/python/client/session.py index 5737047c4b..924d62992a 100644 --- a/tensorflow/python/client/session.py +++ b/tensorflow/python/client/session.py @@ -1220,19 +1220,12 @@ class BaseSession(SessionInterface): compat.as_bytes(options.SerializeToString())) if options else None run_metadata_ptr = tf_session.TF_NewBuffer() if run_metadata else None try: - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - results = tf_session.TF_SessionRun_wrapper( - self._session, options_ptr, {}, fetch_list, target_list, - run_metadata_ptr, status) - else: - results = tf_session.TF_Run(self._session, options_ptr, {}, - fetch_list, target_list, status, - run_metadata_ptr) - if fetch_handler: - results = fetch_handler.build_results(self, results) - else: - results = results[0] if results else None + results = self._call_tf_sessionrun( + options_ptr, {}, fetch_list, target_list, run_metadata_ptr) + if fetch_handler: + results = fetch_handler.build_results(self, results) + else: + results = results[0] if results else None if run_metadata: proto_data = tf_session.TF_GetBuffer(run_metadata_ptr) run_metadata.ParseFromString(compat.as_bytes(proto_data)) @@ -1253,13 +1246,7 @@ class BaseSession(SessionInterface): assert len(target_list) == 1 def _single_operation_run(): - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - tf_session.TF_SessionRun_wrapper(self._session, None, {}, [], - target_list, None, status) - else: - tf_session.TF_Run(self._session, None, {}, [], target_list, status, - None) + self._call_tf_sessionrun(None, {}, [], target_list, None) return _single_operation_run elif isinstance(fetches, ops.Tensor): @@ -1269,13 +1256,7 @@ class BaseSession(SessionInterface): assert not target_list def _single_tensor_run(): - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - results = tf_session.TF_SessionRun_wrapper( - self._session, None, {}, fetch_list, [], None, status) - else: - results = tf_session.TF_Run(self._session, None, {}, fetch_list, [], - status, None) + results = self._call_tf_sessionrun(None, {}, fetch_list, [], None) return results[0] return _single_tensor_run @@ -1283,13 +1264,8 @@ class BaseSession(SessionInterface): # In all other cases, we must use `fetch_handler` to build the # results for us. def _fetch_handler_run(): - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - results = tf_session.TF_SessionRun_wrapper( - self._session, None, {}, fetch_list, target_list, None, status) - else: - results = tf_session.TF_Run(self._session, None, {}, fetch_list, - target_list, status, None) + results = self._call_tf_sessionrun( + None, {}, fetch_list, target_list, None) return fetch_handler.build_results(self, results) return _fetch_handler_run @@ -1329,35 +1305,22 @@ class BaseSession(SessionInterface): fetches = _name_list(fetch_list) targets = _name_list(target_list) - def _run_fn(session, feed_dict, fetch_list, target_list, options, - run_metadata): + def _run_fn(feed_dict, fetch_list, target_list, options, run_metadata): # Ensure any changes to the graph are reflected in the runtime. self._extend_graph() - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - return tf_session.TF_SessionRun_wrapper(session, options, feed_dict, - fetch_list, target_list, - run_metadata, status) - else: - return tf_session.TF_Run(session, options, feed_dict, fetch_list, - target_list, status, run_metadata) + return self._call_tf_sessionrun( + options, feed_dict, fetch_list, target_list, run_metadata) - def _prun_fn(session, handle, feed_dict, fetch_list): + def _prun_fn(handle, feed_dict, fetch_list): if target_list: raise RuntimeError('partial_run() requires empty target_list.') - with errors.raise_exception_on_not_ok_status() as status: - if self._created_with_new_api: - return tf_session.TF_SessionPRun_wrapper(session, handle, feed_dict, - fetch_list, status) - else: - return tf_session.TF_PRun(session, handle, feed_dict, fetch_list, - status) + return self._call_tf_sessionprun(handle, feed_dict, fetch_list) if handle is None: - return self._do_call(_run_fn, self._session, feeds, fetches, targets, - options, run_metadata) + return self._do_call(_run_fn, feeds, fetches, targets, options, + run_metadata) else: - return self._do_call(_prun_fn, self._session, handle, feeds, fetches) + return self._do_call(_prun_fn, handle, feeds, fetches) def _do_call(self, fn, *args): try: @@ -1377,23 +1340,23 @@ class BaseSession(SessionInterface): raise type(e)(node_def, op, message) def _extend_graph(self): - # Nothing to do if we're using the new session interface - # TODO(skyewm): remove this function altogether eventually if self._created_with_new_api: - return - - # Ensure any changes to the graph are reflected in the runtime. - with self._extend_lock: - if self._graph.version > self._current_version: - # pylint: disable=protected-access - graph_def, self._current_version = self._graph._as_graph_def( - from_version=self._current_version, add_shapes=self._add_shapes) - # pylint: enable=protected-access - + with self._graph._lock: # pylint: disable=protected-access with errors.raise_exception_on_not_ok_status() as status: - tf_session.TF_ExtendGraph(self._session, - graph_def.SerializeToString(), status) - self._opened = True + tf_session.ExtendSession(self._session, status) + else: + # Ensure any changes to the graph are reflected in the runtime. + with self._extend_lock: + if self._graph.version > self._current_version: + # pylint: disable=protected-access + graph_def, self._current_version = self._graph._as_graph_def( + from_version=self._current_version, add_shapes=self._add_shapes) + # pylint: enable=protected-access + + with errors.raise_exception_on_not_ok_status() as status: + tf_session.TF_ExtendGraph(self._session, + graph_def.SerializeToString(), status) + self._opened = True # The threshold to run garbage collection to delete dead tensors. _DEAD_HANDLES_THRESHOLD = 10 @@ -1444,6 +1407,27 @@ class BaseSession(SessionInterface): feed_dict[feed_tensor] = np_val return handles + def _call_tf_sessionrun(self, options, feed_dict, fetch_list, target_list, + run_metadata): + with errors.raise_exception_on_not_ok_status() as status: + if self._created_with_new_api: + return tf_session.TF_SessionRun_wrapper( + self._session, options, feed_dict, fetch_list, target_list, + run_metadata, status) + else: + return tf_session.TF_Run( + self._session, options, feed_dict, fetch_list, target_list, + status, run_metadata) + + def _call_tf_sessionprun(self, handle, feed_dict, fetch_list): + with errors.raise_exception_on_not_ok_status() as status: + if self._created_with_new_api: + return tf_session.TF_SessionPRun_wrapper( + self._session, handle, feed_dict, fetch_list, status) + else: + return tf_session.TF_PRun( + self._session, handle, feed_dict, fetch_list, status) + @tf_export('Session') class Session(BaseSession): diff --git a/tensorflow/python/client/session_test.py b/tensorflow/python/client/session_test.py index 490572254b..442a66a68e 100644 --- a/tensorflow/python/client/session_test.py +++ b/tensorflow/python/client/session_test.py @@ -37,6 +37,7 @@ from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors from tensorflow.python.framework import function +from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_util @@ -46,6 +47,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import gen_control_flow_ops +from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops # Import resource_variable_ops for the variables-to-tensor implicit conversion. from tensorflow.python.ops import resource_variable_ops # pylint: disable=unused-import @@ -1052,6 +1054,43 @@ class SessionTest(test_util.TensorFlowTestCase): for t in threads: t.join() + def testParallelRunAndBuild(self): + with session.Session() as sess: + c = constant_op.constant(5.0) + stop = threading.Event() + + def run_loop(): + while not stop.is_set(): + self.assertEqual(sess.run(c), 5.0) + + threads = [self.checkedThread(target=run_loop) for _ in range(100)] + for t in threads: + t.start() + + # Do some graph construction. Try to exercise non-trivial paths. + graph = ops.get_default_graph() + gdef = None + for _ in range(10): + x = array_ops.placeholder(dtype=dtypes.float32) + with ops.colocate_with(x): + y = array_ops.placeholder(dtype=dtypes.float32) + with ops.device('/cpu:0'): + z = control_flow_ops.while_loop( + lambda x, y: x < 10, lambda x, y: (x + 1, x * y), [x, y]) + with graph._attr_scope({'_a': attr_value_pb2.AttrValue(b=False)}): + gradients_impl.gradients(z, [x, y]) + if gdef is None: + gdef = graph.as_graph_def() + else: + # NOTE(skyewm): import_graph_def breaks the running threads without + # the C API enabled. This is not a regression so I didn't fix it. + if ops._USE_C_API: + importer.import_graph_def(gdef, name='import') + + stop.set() + for t in threads: + t.join() + def testRunFeedDict(self): with session.Session() as s: x = array_ops.zeros([2]) diff --git a/tensorflow/python/client/tf_session.i b/tensorflow/python/client/tf_session.i index 53557acaa1..e88fc0c01a 100644 --- a/tensorflow/python/client/tf_session.i +++ b/tensorflow/python/client/tf_session.i @@ -722,6 +722,7 @@ def TF_Reset(target, containers=None, config=None): %unignore SetRequireShapeInferenceFns; %unignore TF_TryEvaluateConstant_wrapper; %noexception TF_TryEvaluateConstant_wrapper; +%unignore ExtendSession; %include "tensorflow/python/client/tf_session_helper.h" diff --git a/tensorflow/python/framework/importer.py b/tensorflow/python/framework/importer.py index 6ecc1a40ae..783e9259ad 100644 --- a/tensorflow/python/framework/importer.py +++ b/tensorflow/python/framework/importer.py @@ -475,16 +475,21 @@ def import_graph_def(graph_def, _PopulateTFImportGraphDefOptions(options, prefix, input_map, return_elements) - with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: - try: - with errors.raise_exception_on_not_ok_status() as status: - results = c_api.TF_GraphImportGraphDefWithResults( - graph._c_graph, serialized, options, status) # pylint: disable=protected-access - except errors.InvalidArgumentError as e: - # Convert to ValueError for backwards compatibility. - raise ValueError(str(e)) - - _ProcessNewOps(graph) + # _ProcessNewOps mutates the new operations. _lock ensures a Session.run + # call cannot occur between creating the TF_Operations in the + # TF_GraphImportGraphDefWithResults call and mutating the them in + # _ProcessNewOps. + with graph._lock: # pylint: disable=protected-access + with c_api_util.tf_buffer(graph_def.SerializeToString()) as serialized: + try: + with errors.raise_exception_on_not_ok_status() as status: + results = c_api.TF_GraphImportGraphDefWithResults( + graph._c_graph, serialized, options, status) # pylint: disable=protected-access + except errors.InvalidArgumentError as e: + # Convert to ValueError for backwards compatibility. + raise ValueError(str(e)) + + _ProcessNewOps(graph) # Create _DefinedFunctions for any imported functions. # diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 47d0beca90..2a8319a19f 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -2694,15 +2694,20 @@ class Graph(object): def __init__(self): """Creates a new, empty Graph.""" - # Protects the core state that may be accessed by multiple readers. - # Only state that can be returned via public accessors (`as_graph_def()`, - # `get_operations()`, `as_graph_element()`, `get_collection()`, and - # `get_collection_ref()`) is by the lock. Thread-safety is provided on a - # best-effort basis to support buggy programs, and is not guaranteed by the - # public `tf.Graph` API. + # Protects core state that can be returned via public accessors, as well as + # synchronizes Session.run calls with methods that create and mutate ops + # (e.g. Graph.create_op()). This synchronization is necessary because it's + # illegal to modify an operation after it's been run. Thread-safety is + # provided on a best-effort basis to support buggy programs, and is not + # guaranteed by the public `tf.Graph` API. + # + # The lock must be reentrant because create_op can be called recursively due + # to control flow. Without a reentrant lock, many methods would also need a + # "locked" version or parameter (including generated code). + # # NOTE(mrry): This does not protect the various stacks. A warning will # be reported if these are used from multiple threads - self._lock = threading.Lock() + self._lock = threading.RLock() self._nodes_by_id = dict() # GUARDED_BY(self._lock) self._next_id_counter = 0 # GUARDED_BY(self._lock) self._nodes_by_name = dict() # GUARDED_BY(self._lock) @@ -3271,17 +3276,20 @@ class Graph(object): input_ops = set([t.op for t in inputs]) control_inputs = self._control_dependencies_for_inputs(input_ops) - ret = Operation( - node_def, - self, - inputs=inputs, - output_types=dtypes, - control_inputs=control_inputs, - input_types=input_types, - original_op=self._default_original_op, - op_def=op_def) - self._create_op_helper(ret, compute_shapes=compute_shapes, - compute_device=compute_device) + # _create_op_helper mutates the new Operation. _lock ensures a Session.run + # call cannot occur between creating and mutating the op. + with self._lock: + ret = Operation( + node_def, + self, + inputs=inputs, + output_types=dtypes, + control_inputs=control_inputs, + input_types=input_types, + original_op=self._default_original_op, + op_def=op_def) + self._create_op_helper(ret, compute_shapes=compute_shapes, + compute_device=compute_device) return ret def _create_op_from_tf_operation(self, c_op, compute_device=True): diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 1fa25a0429..4e524846cc 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -2933,8 +2933,11 @@ class WhileContext(ControlFlowContext): loop_vars = ops.convert_n_to_tensor_or_indexed_slices(loop_vars) try: self.Enter() - original_body_result, exit_vars = self._BuildLoop( - pred, body, original_loop_vars, loop_vars, shape_invariants) + # _BuildLoop calls _update_input in several places. _lock ensures a + # Session.run call cannot occur between creating and mutating new ops. + with ops.get_default_graph()._lock: # pylint: disable=protected-access + original_body_result, exit_vars = self._BuildLoop( + pred, body, original_loop_vars, loop_vars, shape_invariants) finally: self.Exit() diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index be61014395..b678090542 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -480,6 +480,17 @@ def gradients(ys, RuntimeError: if called in Eager mode. """ + # Creating the gradient graph for control flow mutates Operations. _lock + # ensures a Session.run call cannot occur between creating and mutating new + # ops. + with ops.get_default_graph()._lock: # pylint: disable=protected-access + return _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, + gate_gradients, aggregation_method, stop_gradients) + + +def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, + gate_gradients, aggregation_method, stop_gradients): + """Implementation of gradients().""" if context.in_eager_mode(): raise RuntimeError("tf.gradients not supported in EAGER mode. Use " "functions in tf.contrib.eager.backprop instead.") -- GitLab From 77aface145e4785a05106a049b552b42d984ca1a Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Tue, 6 Mar 2018 17:39:01 -0800 Subject: [PATCH 502/884] Fix build. PiperOrigin-RevId: 188109002 --- tensorflow/contrib/lite/java/src/main/native/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/lite/java/src/main/native/BUILD b/tensorflow/contrib/lite/java/src/main/native/BUILD index 15806d57c8..3571182ca9 100644 --- a/tensorflow/contrib/lite/java/src/main/native/BUILD +++ b/tensorflow/contrib/lite/java/src/main/native/BUILD @@ -11,6 +11,7 @@ licenses(["notice"]) # Apache 2.0 cc_library( name = "native_framework_only", srcs = [ + "duration_utils_jni.cc", "exception_jni.cc", "nativeinterpreterwrapper_jni.cc", "tensor_jni.cc", -- GitLab From 7f0915562571512f369119f2b5a467e65e478445 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Tue, 6 Mar 2018 17:53:08 -0800 Subject: [PATCH 503/884] Remove dead code. We're guaranteed to have CURLE_OK because we return early above. PiperOrigin-RevId: 188110480 --- tensorflow/core/platform/cloud/curl_http_request.cc | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index b4e1193c21..35bdcba737 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -529,16 +529,9 @@ Status CurlHttpRequest::Send() { case 201: // Created case 204: // No Content case 206: // Partial Content - if (curl_result != CURLE_OK) { - // This means the server executed the request successfully, but then - // something went wrong during the transmission of the response. - result = errors::Unavailable(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, - curl_result, error_buffer)); - } else { - result = Status::OK(); - } + result = Status::OK(); break; + case 416: // Requested Range Not Satisfiable // The requested range had no overlap with the available range. // This doesn't indicate an error, but this does mean an empty response -- GitLab From 1220eb82cca62e792347a222bdcc976842ba215d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 18:06:08 -0800 Subject: [PATCH 504/884] Adding support for subscripts to qualified names. This also removes the QN copy constructor and adds an assert to ensure that the no attribute/no subscript QN constructor does not receive any strings with '.', '[', or ']'. Additionally this changes the self.qn construction to be a tuple of (base QN, attribute/subscript) instead of a concatenation of the base QN and attribute/subscript so that the has_attr and has_subscript fields are handled properly. Constant subscripts are not yet supported. PiperOrigin-RevId: 188111933 --- .../contrib/py2tf/pyct/ast_util_test.py | 12 +- tensorflow/contrib/py2tf/pyct/qual_names.py | 83 +++++++++--- .../contrib/py2tf/pyct/qual_names_test.py | 122 +++++++++++++++--- 3 files changed, 178 insertions(+), 39 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/ast_util_test.py b/tensorflow/contrib/py2tf/pyct/ast_util_test.py index e0b00c1781..a871ccad6f 100644 --- a/tensorflow/contrib/py2tf/pyct/ast_util_test.py +++ b/tensorflow/contrib/py2tf/pyct/ast_util_test.py @@ -33,15 +33,15 @@ class AstUtilTest(test.TestCase): ast.Name('b', ast.Load()), ast.Attribute(ast.Name('b', None), 'c', ast.Store()), ast.Attribute( - ast.Attribute(ast.Name('b', None), 'c', ast.Load()), 'd', - None) + ast.Attribute(ast.Name('b', None), 'c', ast.Load()), 'd', None) ], None) node = qual_names.resolve(node) node = ast_util.rename_symbols( - node, - { - qual_names.QN('a'): qual_names.QN('renamed_a'), - qual_names.QN('b.c'): qual_names.QN('renamed_b_c'), + node, { + qual_names.QN('a'): + qual_names.QN('renamed_a'), + qual_names.QN(qual_names.QN('b'), attr='c'): + qual_names.QN('renamed_b_c'), }) self.assertEqual(node.elts[0].id, 'renamed_a') diff --git a/tensorflow/contrib/py2tf/pyct/qual_names.py b/tensorflow/contrib/py2tf/pyct/qual_names.py index 8717ee6cff..2ffda03868 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names.py @@ -33,26 +33,41 @@ from tensorflow.contrib.py2tf.pyct import anno class QN(object): """Represents a qualified name.""" - def __init__(self, base, attr=None): - if attr: + def __init__(self, base, attr=None, subscript=None): + if attr is not None and subscript is not None: + raise ValueError('A QN can only be either an attr or a subscript, not ' + 'both: attr={}, subscript={}.'.format(attr, subscript)) + self._has_attr = False + self._has_subscript = False + if attr is not None: if not isinstance(base, QN): raise ValueError('For attribute QNs, base must be a QN.') self._parent = base - self.qn = base.qn + (attr,) + # TODO(mdan): Get rid of the tuple - it can only have 1 or 2 elements now. + self.qn = (base, attr) + self._has_attr = True + elif subscript is not None: + if not isinstance(base, QN): + raise ValueError('For subscript QNs, base must be a QN.') + self._parent = base + self.qn = (base, subscript) + self._has_subscript = True else: - if isinstance(base, QN): - if base.is_composite(): - self._parent = base.parent - else: - self._parent = None - self.qn = base.qn - else: - self._parent = None - self.qn = tuple(base.split('.')) + if not isinstance(base, str): + raise ValueError('For simple QNs, base must be a string.') + assert '.' not in base and '[' not in base and ']' not in base + self._parent = None + self.qn = (base,) def is_composite(self): return len(self.qn) > 1 + def has_subscript(self): + return self._has_subscript + + def has_attr(self): + return self._has_attr + @property def parent(self): if self._parent is None: @@ -60,24 +75,41 @@ class QN(object): return self._parent def __hash__(self): - return hash(self.qn) + return hash(self.qn + (self._has_attr, self._has_subscript)) def __eq__(self, other): - return self.qn == other.qn + return (isinstance(other, QN) and self.qn == other.qn and + self.has_subscript() == other.has_subscript() and + self.has_attr() == other.has_attr()) def __str__(self): - return '.'.join(self.qn) + if self.has_subscript(): + return str(self.qn[0]) + '[' + str(self.qn[1]) + ']' + if self.has_attr(): + return '.'.join(map(str, self.qn)) + else: + return str(self.qn[0]) def __repr__(self): return str(self) def ssf(self): """Simple symbol form.""" - return '_'.join(self.qn) + ssfs = [n.ssf() if isinstance(n, QN) else n for n in self.qn] + ssf_string = '' + for i in range(0, len(self.qn) - 1): + if self.has_subscript(): + delimiter = '_sub_' + else: + delimiter = '_' + ssf_string += ssfs[i] + delimiter + return ssf_string + ssfs[-1] def ast(self): # The caller must adjust the context appropriately. - if self.is_composite(): + if self.has_subscript(): + return gast.Subscript(self.parent.ast(), str(self.qn[-1]), None) + if self.has_attr(): return gast.Attribute(self.parent.ast(), self.qn[-1], None) return gast.Name(self.qn[0], None, None) @@ -96,7 +128,22 @@ class QnResolver(gast.NodeTransformer): def visit_Attribute(self, node): self.generic_visit(node) anno.setanno(node, anno.Basic.QN, - QN(anno.getanno(node.value, anno.Basic.QN), node.attr)) + QN(anno.getanno(node.value, anno.Basic.QN), attr=node.attr)) + return node + + def visit_Subscript(self, node): + if not isinstance(node.slice, gast.Index): + raise NotImplementedError('range and multi-dimensional indexing are not' + ' yet supported') + self.generic_visit(node) + if isinstance(node.slice.value, gast.Num) or isinstance( + node.slice.value, gast.Str): + raise NotImplementedError('constant subscripts are not yet supported') + else: + subscript = anno.getanno(node.slice.value, anno.Basic.QN) + anno.setanno(node, anno.Basic.QN, + QN(anno.getanno(node.value, anno.Basic.QN), + subscript=subscript)) return node diff --git a/tensorflow/contrib/py2tf/pyct/qual_names_test.py b/tensorflow/contrib/py2tf/pyct/qual_names_test.py index 1b1eee2dec..9eaaaf9d4c 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names_test.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names_test.py @@ -22,14 +22,15 @@ import textwrap from tensorflow.contrib.py2tf.pyct import anno from tensorflow.contrib.py2tf.pyct import parser -from tensorflow.contrib.py2tf.pyct import qual_names +from tensorflow.contrib.py2tf.pyct.qual_names import QN +from tensorflow.contrib.py2tf.pyct.qual_names import resolve from tensorflow.python.platform import test class QNTest(test.TestCase): def test_basic(self): - a = qual_names.QN('a') + a = QN('a') self.assertEqual(a.qn, ('a',)) self.assertEqual(str(a), 'a') self.assertEqual(a.ssf(), 'a') @@ -38,8 +39,8 @@ class QNTest(test.TestCase): with self.assertRaises(ValueError): _ = a.parent - a_b = qual_names.QN(a, 'b') - self.assertEqual(a_b.qn, ('a', 'b')) + a_b = QN(a, attr='b') + self.assertEqual(a_b.qn, (a, 'b')) self.assertEqual(str(a_b), 'a.b') self.assertEqual(a_b.ssf(), 'a_b') self.assertEqual(a_b.ast().value.id, 'a') @@ -47,13 +48,47 @@ class QNTest(test.TestCase): self.assertTrue(a_b.is_composite()) self.assertEqual(a_b.parent.qn, ('a',)) - a2 = qual_names.QN(a) + def test_subscripts(self): + a = QN('a') + b = QN('b') + a_sub_b = QN(a, subscript=b) + self.assertEqual(a_sub_b.qn, (a, b)) + self.assertEqual(str(a_sub_b), 'a[b]') + self.assertEqual(a_sub_b.ssf(), 'a_sub_b') + self.assertEqual(a_sub_b.ast().value.id, 'a') + self.assertEqual(a_sub_b.ast().slice, 'b') + self.assertTrue(a_sub_b.is_composite()) + self.assertTrue(a_sub_b.has_subscript()) + self.assertEqual(a_sub_b.parent.qn, ('a',)) + + c = QN('c') + b_sub_c = QN(b, subscript=c) + a_sub_b_sub_c = QN(a, subscript=b_sub_c) + self.assertEqual(a_sub_b_sub_c.qn, (a, b_sub_c)) + self.assertTrue(a_sub_b.is_composite()) + self.assertTrue(a_sub_b_sub_c.is_composite()) + self.assertTrue(a_sub_b.has_subscript()) + self.assertTrue(a_sub_b_sub_c.has_subscript()) + self.assertEqual(b_sub_c.qn, (b, c)) + self.assertEqual(str(a_sub_b_sub_c), 'a[b[c]]') + self.assertEqual(a_sub_b_sub_c.ssf(), 'a_sub_b_sub_c') + self.assertEqual(a_sub_b_sub_c.ast().value.id, 'a') + self.assertEqual(a_sub_b_sub_c.ast().slice, 'b[c]') + self.assertEqual(b_sub_c.ast().slice, 'c') + self.assertEqual(a_sub_b_sub_c.parent.qn, ('a',)) + with self.assertRaises(ValueError): + QN('a', 'b') + + def test_equality(self): + a = QN('a') + a2 = QN('a') + a_b = QN(a, attr='b') self.assertEqual(a2.qn, ('a',)) with self.assertRaises(ValueError): _ = a.parent - a_b2 = qual_names.QN(a_b) - self.assertEqual(a_b2.qn, ('a', 'b')) + a_b2 = QN(a, attr='b') + self.assertEqual(a_b2.qn, (a, 'b')) self.assertEqual(a_b2.parent.qn, ('a',)) self.assertTrue(a2 == a) @@ -65,16 +100,46 @@ class QNTest(test.TestCase): self.assertTrue(a_b2 == a_b) self.assertFalse(a_b2 is a_b) self.assertFalse(a_b2 == a) + a_sub_b = QN(a, subscript='b') + a_sub_b2 = QN(a, subscript='b') + self.assertTrue(a_sub_b == a_sub_b2) + self.assertFalse(a_sub_b == a_b) - with self.assertRaises(ValueError): - qual_names.QN('a', 'b') + def test_nested_attrs_subscripts(self): + a = QN('a') + b = QN('b') + c = QN('c') + b_sub_c = QN(b, subscript=c) + a_sub_b_sub_c = QN(a, subscript=b_sub_c) - def test_hashable(self): - d = {qual_names.QN('a'): 'a', qual_names.QN('b'): 'b'} + b_dot_c = QN(b, attr=c) + a_sub__b_dot_c = QN(a, subscript=b_dot_c) + + a_sub_b = QN(a, subscript=b) + a_sub_b__dot_c = QN(a_sub_b, attr=c) + + a_dot_b = QN(a, attr=b) + a_dot_b_sub_c = QN(a_dot_b, subscript=c) + + self.assertEqual(str(a_sub_b_sub_c), 'a[b[c]]') + self.assertEqual(str(a_sub__b_dot_c), 'a[b.c]') + self.assertEqual(str(a_sub_b__dot_c), 'a[b].c') + self.assertEqual(str(a_dot_b_sub_c), 'a.b[c]') + + self.assertFalse(a_sub_b_sub_c == a_sub__b_dot_c) + self.assertFalse(a_sub_b_sub_c == a_sub_b__dot_c) + self.assertFalse(a_sub_b_sub_c == a_dot_b_sub_c) - self.assertEqual(d[qual_names.QN('a')], 'a') - self.assertEqual(d[qual_names.QN('b')], 'b') - self.assertTrue(qual_names.QN('c') not in d) + self.assertFalse(a_sub__b_dot_c == a_sub_b__dot_c) + self.assertFalse(a_sub__b_dot_c == a_dot_b_sub_c) + + self.assertFalse(a_sub_b__dot_c == a_dot_b_sub_c) + + def test_hashable(self): + d = {QN('a'): 'a', QN('b'): 'b'} + self.assertEqual(d[QN('a')], 'a') + self.assertEqual(d[QN('b')], 'b') + self.assertTrue(QN('c') not in d) class QNResolverTest(test.TestCase): @@ -90,7 +155,7 @@ class QNResolverTest(test.TestCase): [f, (g.h.i)] j(k, l) """ - nodes = qual_names.resolve(parser.parse_str(textwrap.dedent(samples))) + nodes = resolve(parser.parse_str(textwrap.dedent(samples))) nodes = tuple(n.value for n in nodes.body) self.assertQNStringIs(nodes[0], 'a') @@ -103,6 +168,33 @@ class QNResolverTest(test.TestCase): self.assertQNStringIs(nodes[4].args[0], 'k') self.assertQNStringIs(nodes[4].args[1], 'l') + def test_subscript_resolve(self): + samples = """ + x[i] + x[i.b] + a.b[c] + a.b[x.y] + a[z[c]] + a[b[c[d]]] + a[b].c + a.b.c[d].e.f + a.b[c[d]].e.f + a.b[c[d.e.f].g].h + """ + nodes = resolve(parser.parse_str(textwrap.dedent(samples))) + nodes = tuple(n.value for n in nodes.body) + + self.assertQNStringIs(nodes[0], 'x[i]') + self.assertQNStringIs(nodes[1], 'x[i.b]') + self.assertQNStringIs(nodes[2], 'a.b[c]') + self.assertQNStringIs(nodes[3], 'a.b[x.y]') + self.assertQNStringIs(nodes[4], 'a[z[c]]') + self.assertQNStringIs(nodes[5], 'a[b[c[d]]]') + self.assertQNStringIs(nodes[6], 'a[b].c') + self.assertQNStringIs(nodes[7], 'a.b.c[d].e.f') + self.assertQNStringIs(nodes[8], 'a.b[c[d]].e.f') + self.assertQNStringIs(nodes[9], 'a.b[c[d.e.f].g].h') + if __name__ == '__main__': test.main() -- GitLab From 99cf9f81c178056dfd295e12b4b50e271f8d4bd8 Mon Sep 17 00:00:00 2001 From: Bjarke Hammersholt Roune Date: Tue, 6 Mar 2018 18:13:13 -0800 Subject: [PATCH 505/884] PiperOrigin-RevId: 188112759 --- tensorflow/compiler/xla/tests/reduce_window_test.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index b11b64e40a..8e976e8a31 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -41,7 +41,9 @@ limitations under the License. namespace xla { namespace { -#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16 +// TODO(b/74260408): This test is timing out if bfloat16 is enabled on +// GPU. Last timed out on 2018-03-06. +#if defined(XLA_BACKEND_SUPPORTS_BFLOAT16) && !defined(XLA_TEST_BACKEND_GPU) // Tests both F32 and BF16. static std::array use_bfloat16_params{false, true}; #else -- GitLab From 708b43ca30359e6ac5be6241ca323ca20021103c Mon Sep 17 00:00:00 2001 From: HyoukJoong Lee Date: Tue, 6 Mar 2018 19:05:58 -0800 Subject: [PATCH 506/884] Avoid merging colocation sets that include parameter/result buffers PiperOrigin-RevId: 188117187 --- tensorflow/compiler/xla/service/BUILD | 1 + .../compiler/xla/service/buffer_assignment.cc | 8 +- .../xla/service/buffer_assignment_test.cc | 76 +++++++++++++++++++ 3 files changed, 81 insertions(+), 4 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 611b1831ae..0e272e1eea 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -989,6 +989,7 @@ tf_cc_test( "//tensorflow/compiler/xla:xla_data_proto", "//tensorflow/compiler/xla/tests:hlo_test_base", "//tensorflow/compiler/xla/tests:xla_internal_test_main", + "//tensorflow/compiler/xla/tools/parser:hlo_parser", "//tensorflow/core:lib", ], ) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index 0434c0a92b..fb18c9d828 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -1342,10 +1342,10 @@ BufferAssigner::MergeColocatedBufferSets( for (auto& buffer_a : colocated_buffer_sets[i]) { for (auto& buffer_b : colocated_buffer_sets[j]) { // Do not merge if the set includes live outs or entry parameters. - if ((buffer_liveness.MaybeLiveOut(*buffer_a) && - is_entry_parameter(*buffer_b)) || - (buffer_liveness.MaybeLiveOut(*buffer_b) && - is_entry_parameter(*buffer_a))) { + if (buffer_liveness.MaybeLiveOut(*buffer_a) || + is_entry_parameter(*buffer_a) || + buffer_liveness.MaybeLiveOut(*buffer_b) || + is_entry_parameter(*buffer_b)) { return true; } // Do not merge if the buffers interfere with each other. diff --git a/tensorflow/compiler/xla/service/buffer_assignment_test.cc b/tensorflow/compiler/xla/service/buffer_assignment_test.cc index 234c725bb9..513a8785bb 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment_test.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment_test.cc @@ -37,6 +37,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/test_helpers.h" #include "tensorflow/compiler/xla/tests/hlo_test_base.h" +#include "tensorflow/compiler/xla/tools/parser/hlo_parser.h" #include "tensorflow/compiler/xla/types.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/platform/macros.h" @@ -1696,6 +1697,81 @@ TEST_F(WhileBufferAssignmentTest, TwoForwardWhileLoops) { assignment->GetUniqueSlice(while1, {1}).ConsumeValueOrDie()); } +// Tests that two colocated buffer sets are not merged if an entry parameter +// buffer belongs to either of the colocation sets (b/73267882). +// +// %param --> %while.0 --> %mul --> %while.1 --> %broadcast +// +// %while.0 body just forwards the init value, so the loop carried variable +// remains the constant, whereas %while.1 changes the loop carried variable. +TEST_F(WhileBufferAssignmentTest, ColocatedBufferWithEntryParameter) { + const Shape r0s32 = ShapeUtil::MakeShape(S32, {}); + + const char* module_str = R"( +HloModule test_module + +%cond.v0 { + %param = s32[] parameter(0) + ROOT %constant = pred[] constant(true) +} + +%cond.v1 { + %param.0 = s32[] parameter(0) + ROOT %constant.0 = pred[] constant(true) +} + +%body.v0 { + ROOT %param.1 = s32[] parameter(0) +} + +%body.v1 { + %param.2 = s32[] parameter(0) + ROOT add = s32[] add(%param.2, %param.2) +} + +ENTRY %test_module { + %param.3 = s32[] parameter(0) + %while.0 = s32[] while(%param.3), condition=%cond.v0, body=%body.v0 + %mul = s32[] multiply(%while.0, %while.0) + %while.1 = s32[] while(%mul), condition=%cond.v1, body=%body.v1 + ROOT %bcast = s32[1024,1024]{1,0} broadcast(s32[] %while.1), dimensions={} +})"; + + TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr module, + tools::Parse(module_str)); + + // Run CopyInsertion and check if the graph constructed above doesn't need + // any copies inserted for BufferAssignment to run. + int64 instruction_count = module->instruction_count(); + CopyInsertion copy_insertion; + ASSERT_IS_OK(copy_insertion.Run(module.get()).status()); + ASSERT_EQ(instruction_count, module->instruction_count()); + + // Get the instructions in the module. + const HloInstruction* bcast = module->entry_computation()->root_instruction(); + const HloInstruction* param = + module->entry_computation()->parameter_instruction(0); + ASSERT_EQ(bcast->opcode(), HloOpcode::kBroadcast); + const HloInstruction* while1 = bcast->operand(0); + ASSERT_EQ(while1->opcode(), HloOpcode::kWhile); + const HloInstruction* while0 = while1->operand(0)->operand(0); + ASSERT_EQ(while0->opcode(), HloOpcode::kWhile); + + // Run buffer assignment. + auto assignment = RunBufferAssignment(module.get()); + TF_ASSERT_OK_AND_ASSIGN(auto slice_param, + assignment->GetUniqueSlice(param, {})); + TF_ASSERT_OK_AND_ASSIGN(auto slice_while0, + assignment->GetUniqueSlice(while0, {})); + TF_ASSERT_OK_AND_ASSIGN(auto slice_while1, + assignment->GetUniqueSlice(while1, {})); + + // The parameter slice is part of the while0's colocation set (init value), + // but not merged into the while1's colocation set. + EXPECT_EQ(slice_param, slice_while0); + EXPECT_NE(slice_param, slice_while1); +} + // Tests that the colocated buffers for while instructions are properly assigned // during buffer assignment such that the result tuple elements are not assigned // to the same buffer. -- GitLab From ecbb8b1ccac295537827dfe1ca25ddb03ca5f22b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 19:41:00 -0800 Subject: [PATCH 507/884] Add helper function for Xor in HLO. RELNOTES: n/a PiperOrigin-RevId: 188119450 --- tensorflow/compiler/xla/client/computation_builder.cc | 8 ++++++++ tensorflow/compiler/xla/client/computation_builder.h | 4 ++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/compiler/xla/client/computation_builder.cc b/tensorflow/compiler/xla/client/computation_builder.cc index 4afef6e448..39d02f0863 100644 --- a/tensorflow/compiler/xla/client/computation_builder.cc +++ b/tensorflow/compiler/xla/client/computation_builder.cc @@ -868,6 +868,14 @@ ComputationDataHandle ComputationBuilder::Or( return BinaryOp(BINOP_OR, lhs, rhs, broadcast_dimensions); } +// TODO(b/65209188): Create a dedicated lowering for Xor +ComputationDataHandle ComputationBuilder::Xor( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions) { + return Or(And(Not(lhs), rhs, broadcast_dimensions), + And(lhs, Not(rhs), broadcast_dimensions)); +} + ComputationDataHandle ComputationBuilder::Not( const ComputationDataHandle& operand) { return UnaryOp(UNOP_NOT, operand); diff --git a/tensorflow/compiler/xla/client/computation_builder.h b/tensorflow/compiler/xla/client/computation_builder.h index e085fcb3b1..2141ebc206 100644 --- a/tensorflow/compiler/xla/client/computation_builder.h +++ b/tensorflow/compiler/xla/client/computation_builder.h @@ -512,6 +512,10 @@ class ComputationBuilder { const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, tensorflow::gtl::ArraySlice broadcast_dimensions = {}); + ComputationDataHandle Xor( + const ComputationDataHandle& lhs, const ComputationDataHandle& rhs, + tensorflow::gtl::ArraySlice broadcast_dimensions = {}); + ComputationDataHandle Not(const ComputationDataHandle& operand); ComputationDataHandle ShiftLeft( -- GitLab From 4380d6eff899ca2f5e14d4d92f7fcf770b36b099 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 19:57:12 -0800 Subject: [PATCH 508/884] Add basic support for explicit type annotations. This is done by inserting a no-op function call. Note that this is meant as fallback, and we prefer the following alternatives (in their order) for inferring the type: 1. Automatic from context, e.g. the type of a list based on the elements added to it (WIP) 2. Type annotations (Python 3.6+ only) PiperOrigin-RevId: 188120527 --- tensorflow/contrib/py2tf/impl/conversion.py | 37 +++++++---- tensorflow/contrib/py2tf/pyct/context.py | 6 +- .../contrib/py2tf/pyct/static_analysis/BUILD | 1 + .../py2tf/pyct/static_analysis/type_info.py | 63 +++++++++++++++++-- .../pyct/static_analysis/type_info_test.py | 25 +++++++- tensorflow/contrib/py2tf/utils/BUILD | 1 + tensorflow/contrib/py2tf/utils/__init__.py | 1 + tensorflow/contrib/py2tf/utils/type_hints.py | 41 ++++++++++++ 8 files changed, 153 insertions(+), 22 deletions(-) create mode 100644 tensorflow/contrib/py2tf/utils/type_hints.py diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index c6f4988375..97ee4ca435 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -41,6 +41,7 @@ from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.static_analysis import activity from tensorflow.contrib.py2tf.pyct.static_analysis import live_values from tensorflow.contrib.py2tf.pyct.static_analysis import type_info +from tensorflow.contrib.py2tf.utils import type_hints from tensorflow.python.util import tf_inspect @@ -48,7 +49,9 @@ from tensorflow.python.util import tf_inspect class ConversionMap(object): - """ConversionMaps keep track of converting function hierarchies. + """ConversionMap keeps track of converting function hierarchies. + + This object is mutable, and is updated as functions are converted. Attributes: recursive: Whether to recusrively convert any functions that the decorator @@ -154,14 +157,20 @@ def entity_to_graph(o, conversion_map, arg_values, arg_types): conversion_map.add_to_cache(o, node) if conversion_map.recursive: - for obj in conversion_map.name_map.keys(): - if obj not in conversion_map.dependency_cache: - if (hasattr(obj, 'im_class') and - getattr(obj, 'im_class') not in conversion_map.partial_types): - # Class members are converted with their objects, unless they're - # only converted partially. - continue - entity_to_graph(obj, conversion_map, {}, {}) + while True: + candidate = None + for obj in conversion_map.name_map.keys(): + if obj not in conversion_map.dependency_cache: + candidate = obj + break + if candidate is None: + break + if (hasattr(candidate, 'im_class') and + getattr(candidate, 'im_class') not in conversion_map.partial_types): + # Class members are converted with their objects, unless they're + # only converted partially. + continue + entity_to_graph(candidate, conversion_map, {}, {}) return node, new_name @@ -169,9 +178,10 @@ def entity_to_graph(o, conversion_map, arg_values, arg_types): def class_to_graph(c, conversion_map): """Specialization of `entity_to_graph` for classes.""" converted_members = {} - members = tf_inspect.getmembers(c, predicate=tf_inspect.ismethod) + method_filter = lambda m: tf_inspect.isfunction(m) or tf_inspect.ismethod(m) + members = tf_inspect.getmembers(c, predicate=method_filter) if not members: - raise ValueError('Cannot convert %s: it has no member methods.') + raise ValueError('Cannot convert %s: it has no member methods.' % c) class_namespace = None for _, m in members: @@ -191,7 +201,7 @@ def class_to_graph(c, conversion_map): class_name, bases=[], keywords=[], - body=converted_members.values(), + body=list(converted_members.values()), decorator_list=[]) return node, class_name @@ -233,7 +243,8 @@ def function_to_graph(f, conversion_map, arg_values, arg_types, arg_values=arg_values, arg_types=arg_types, owner_type=owner_type, - recursive=conversion_map.recursive) + recursive=conversion_map.recursive, + type_annotation_func=type_hints.set_element_type) node, deps = node_to_graph(node, ctx, conversion_map.nocompile_decorators) # TODO(mdan): This somewhat duplicates the call rename logic in call_treest.py diff --git a/tensorflow/contrib/py2tf/pyct/context.py b/tensorflow/contrib/py2tf/pyct/context.py index 4fcf2a687d..b34015cfd2 100644 --- a/tensorflow/contrib/py2tf/pyct/context.py +++ b/tensorflow/contrib/py2tf/pyct/context.py @@ -22,6 +22,8 @@ from __future__ import print_function class EntityContext(object): """Contains information about an entity, like source code. + In general, objects of this class should be considered immutable. + Attributes: namer: Namer that matches the contract of all converters. source_code: The entity's source code. @@ -33,8 +35,9 @@ class EntityContext(object): owner_type: The surrounding class type of the function, if present. """ + # TODO(mdan): Remove the default and update tests. def __init__(self, namer, source_code, source_file, namespace, arg_values, - arg_types, owner_type, recursive): + arg_types, owner_type, recursive, type_annotation_func=None): self.namer = namer self.source_code = source_code self.source_file = source_file @@ -43,3 +46,4 @@ class EntityContext(object): self.arg_types = {} if arg_types is None else arg_types self.owner_type = owner_type self.recursive = recursive + self.type_annotation_func = type_annotation_func diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD b/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD index fbfce18c60..2799b56a00 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/BUILD @@ -60,6 +60,7 @@ py_test( deps = [ ":static_analysis", "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/contrib/py2tf/utils", "//tensorflow/python:client_testlib", ], ) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py index 8203bda0f9..5556a58c02 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info.py @@ -14,9 +14,29 @@ # ============================================================================== """Type resolution. +This analyzer uses known live values to further infer object types. This +may include for instance constructed objects and object member functions. + +In addition, the analyzer will also process annotations for TF (staged) type +annotations. + Requires annotations generated by LiveValuesResolver. """ +# TODO(mdan): This would be more robust with a CFG. +# Situations with multiple reaching modifications (e.g. modified inside and +# outside a control flow statement) should be more robustly detected and +# analyzed. + +# TODO(mdan): Look into using Python AST's type annotation fields instead. +# It would be desirable to use that mechanism if we can. +# Some caveats to consider: We may need to annotate other nodes like +# Attribute. It may also not be feasible for us to faithfully to replicate +# PY3's type annotations where it isn't available. It would also require us +# to design rigorous type definitions that can accommodate Python types +# as well as TensorFLow dtypes and shapes. + + from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -29,7 +49,7 @@ from tensorflow.python.util import tf_inspect class Scope(object): - """Encloses symbol value references. + """Tracks symbol value references. Attributes: values: A dict mapping string to gast.Node, containing the value that was @@ -138,11 +158,14 @@ class TypeInfoResolver(transformer.Base): elif isinstance(node.ctx, gast.Load) and self.scope.hasval(qn): # E.g. if we had # a = b - # then for future references to `a` we should have traced_source = `b` - traced_source = self.scope.getval(qn) - if anno.hasanno(traced_source, 'type'): - anno.setanno(node, 'type', anno.getanno(traced_source, 'type')) - anno.setanno(node, 'type_fqn', anno.getanno(traced_source, 'type_fqn')) + # then for future references to `a` we should have definition = `b` + definition = self.scope.getval(qn) + if anno.hasanno(definition, 'type'): + anno.setanno(node, 'type', anno.getanno(definition, 'type')) + anno.setanno(node, 'type_fqn', anno.getanno(definition, 'type_fqn')) + if anno.hasanno(definition, 'element_type'): + anno.setanno(node, 'element_type', + anno.getanno(definition, 'element_type')) return node def _process_variable_assignment(self, source, targets): @@ -181,6 +204,34 @@ class TypeInfoResolver(transformer.Base): self._process_variable_assignment(node.value, node.targets) return node + def visit_Call(self, node): + if anno.hasanno(node.func, 'live_val'): + # Symbols targeted by the "set_type" marker function are assigned the data + # type that it specified. + if (anno.getanno(node.func, 'live_val') is + self.context.type_annotation_func): + # Expecting the actual type to be the second argument. + if len(node.args) != 2: + raise ValueError('"%s" must have exactly two parameters' + % self.context.type_annotation_func) + if not anno.hasanno(node.args[0], anno.Basic.QN): + raise ValueError('the first argument of "%s" must by a symbol' + % self.context.type_annotation_func) + if not anno.hasanno(node.args[1], 'live_val'): + raise ValueError( + 'the second argument of "%s" must be statically resolvable' % + self.context.type_annotation_func) + target_symbol = anno.getanno(node.args[0], anno.Basic.QN) + element_type = anno.getanno(node.args[1], 'live_val') + # Find the definition of this symbol and annotate it with the given + # data type. That in turn will cause future uses of the symbol + # to receive the same type annotation. + definition = self.scope.getval(target_symbol) + anno.setanno(node, 'element_type', element_type) + anno.setanno(definition, 'element_type', element_type) + # TODO(mdan): Should we update references between definition and here? + return self.generic_visit(node) + def resolve(node, context): return TypeInfoResolver(context).visit(node) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py index a3e78202c8..0d9d5a85f0 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/type_info_test.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.py2tf import utils from tensorflow.contrib.py2tf.pyct import anno from tensorflow.contrib.py2tf.pyct import context from tensorflow.contrib.py2tf.pyct import parser @@ -56,7 +57,10 @@ class ScopeTest(test.TestCase): class TypeInfoResolverTest(test.TestCase): - def _parse_and_analyze(self, test_fn, namespace, arg_types=None): + def _parse_and_analyze(self, + test_fn, + namespace, + arg_types=None): node, source = parser.parse_entity(test_fn) ctx = context.EntityContext( namer=None, @@ -66,7 +70,8 @@ class TypeInfoResolverTest(test.TestCase): arg_values=None, arg_types=arg_types, owner_type=None, - recursive=True) + recursive=True, + type_annotation_func=utils.set_element_type) node = qual_names.resolve(node) node = activity.resolve(node, ctx) node = live_values.resolve(node, ctx, {}) @@ -175,6 +180,22 @@ class TypeInfoResolverTest(test.TestCase): method_call = node.body[0].body[1].value.func self.assertFalse(anno.hasanno(method_call, 'live_val')) + def test_type_annotation(self): + + class Foo(object): + pass + + def test_fn(): + f = [] + f = utils.set_element_type(f, Foo) + return f + + node = self._parse_and_analyze(test_fn, {'Foo': Foo, 'utils': utils}) + f_def = node.body[0].body[0].value + self.assertEqual(anno.getanno(f_def, 'element_type'), Foo) + f_ref = node.body[0].body[1].value + self.assertEqual(anno.getanno(f_ref, 'element_type'), Foo) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index 63261d5043..c6a894b508 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -28,6 +28,7 @@ py_library( "tensor_list.py", "testing.py", "type_check.py", + "type_hints.py", ], srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 313e5c97cc..997c815887 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -27,3 +27,4 @@ from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func from tensorflow.contrib.py2tf.utils.testing import fake_tf from tensorflow.contrib.py2tf.utils.type_check import is_tensor +from tensorflow.contrib.py2tf.utils.type_hints import set_element_type diff --git a/tensorflow/contrib/py2tf/utils/type_hints.py b/tensorflow/contrib/py2tf/utils/type_hints.py new file mode 100644 index 0000000000..aeb9e54561 --- /dev/null +++ b/tensorflow/contrib/py2tf/utils/type_hints.py @@ -0,0 +1,41 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""No-op utilities that provide static type hints. + +These are used when the data type is not known at creation, for instance in the +case of empty lists. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +def set_element_type(entity, dtype, shape=None): + """Indicates that the entity is expected hold items of specified type. + + This function is a no-op. Its presence merely marks the data type of its + argument. The staged TensorFlow ops will reflect and assert this data type. + + Args: + entity: A Tensor or TensorArray. + dtype: TensorFlow dtype value to assert for entity. + shape: Optional shape to assert for entity. + Returns: + The value of entity, unchanged. + """ + del dtype + del shape + return entity -- GitLab From 7a7de6f18f0e8f13e69f1df9b9c9cc3b65051ef2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Tue, 6 Mar 2018 20:12:55 -0800 Subject: [PATCH 509/884] Made sure all the nodes in the body of an inlined function run in the same frame PiperOrigin-RevId: 188121852 --- tensorflow/core/grappler/optimizers/function_optimizer.cc | 8 +++++++- .../core/grappler/optimizers/function_optimizer_test.cc | 4 ++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index 4b830bcc6e..d8a237c297 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -78,10 +78,16 @@ Status InlineFunction(const NodeDef& node, const FunctionDef& func, func_body_node.add_input( strings::StrCat(func_inputs->name(), ":", input_id)); } else { - // Update the input names. + // Update the input names if any. for (string& input : *func_body_node.mutable_input()) { input = AddPrefixToNodeName(input, node.name()); } + // If the node has no input, make hook it up to the func_inputs node to + // ensure it runs in the same frame as the other nodes of the function + // body. + if (func_body_node.input_size() == 0) { + *func_body_node.add_input() = AsControlDependency(func_inputs->name()); + } } // Add the node name as a prefix to avoid collisions after inlining diff --git a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc index 8db9b7f77a..bafcdf4923 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer_test.cc @@ -63,6 +63,8 @@ TEST_F(FunctionOptimizerTest, SimpleFunction) { count++; EXPECT_EQ("Const", node.op()); EXPECT_EQ(device, node.device()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^y/inlined_inputs", node.input(0)); } else if (node.name() == "y/scale") { count++; EXPECT_EQ("Cast", node.op()); @@ -153,6 +155,8 @@ TEST_F(FunctionOptimizerTest, FixedTypeFunction) { } else if (node.name() == "y/two") { count++; EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^y/inlined_inputs", node.input(0)); EXPECT_EQ(device, node.device()); } else if (node.name() == "y/y") { count++; -- GitLab From 99e29f79576a8a1fc4c32beae4c44f7af5ee53a7 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Tue, 6 Mar 2018 20:28:00 -0800 Subject: [PATCH 510/884] [TF:XLA] Bump open source llvm revision to r326687 PiperOrigin-RevId: 188122825 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 1af246f9dc..8350993cc8 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/193aea3782308c66a7a12f1c37520a1b4ff1dbd8.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fce2d38e3979d1b01238c6b7df1b2c56da8569f1.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/fce2d38e3979d1b01238c6b7df1b2c56da8569f1.tar.gz", ], - sha256 = "2eda56deafb8da85bc23aa52fa1fb8c39da6a58c865e5216d0a0787bd09a09ed", - strip_prefix = "llvm-193aea3782308c66a7a12f1c37520a1b4ff1dbd8", + sha256 = "9931112227f09b8533911174fa03f563e822d3e02d73df506fa97caa7a31363a", + strip_prefix = "llvm-fce2d38e3979d1b01238c6b7df1b2c56da8569f1", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 17a0b492b1548830b87a048b931522b59bd7466a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 20:38:27 -0800 Subject: [PATCH 511/884] Makes GLSTMCell accept input of any compatible dimension. Currently, GLSTMCell requires that the input dimension is is the same as the output dimension. After this change, the input can be any compatible dimension---i.e., anything divisible by the number of groups. The input size is still assumed to be the output size in the case where the innermost dimension of the input is not statically-defined. PiperOrigin-RevId: 188123536 --- .../rnn/python/kernel_tests/rnn_cell_test.py | 107 ++++++++++++------ tensorflow/contrib/rnn/python/ops/rnn_cell.py | 34 +++++- 2 files changed, 99 insertions(+), 42 deletions(-) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index eef1ae25e9..7de55a0bb3 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -1031,57 +1031,92 @@ class RNNCellTest(test.TestCase): num_units = 4 number_of_groups = 1 - with self.test_session() as sess: - with variable_scope.variable_scope( - "root1", initializer=init_ops.constant_initializer(0.5)): - x = array_ops.ones([batch_size, num_units]) - # When number_of_groups = 1, G-LSTM is equivalent to regular LSTM - gcell = contrib_rnn_cell.GLSTMCell( - num_units=num_units, number_of_groups=number_of_groups) - cell = rnn_cell.LSTMCell(num_units=num_units) - self.assertTrue(isinstance(gcell.state_size, tuple)) - zero_state = gcell.zero_state( - batch_size=batch_size, dtype=dtypes.float32) - gh, gs = gcell(x, zero_state) - h, g = cell(x, zero_state) + # Try with input dimension equal to num_units or not. + for num_inputs in [num_units, num_units + number_of_groups]: + with self.test_session() as sess: + with variable_scope.variable_scope( + "root1_%d" % num_inputs, + initializer=init_ops.constant_initializer(0.5)): + x = array_ops.ones([batch_size, num_inputs]) + # When number_of_groups = 1, G-LSTM is equivalent to regular LSTM + gcell = contrib_rnn_cell.GLSTMCell( + num_units=num_units, number_of_groups=number_of_groups) + cell = rnn_cell.LSTMCell(num_units=num_units) + self.assertTrue(isinstance(gcell.state_size, tuple)) + zero_state = gcell.zero_state( + batch_size=batch_size, dtype=dtypes.float32) + gh, gs = gcell(x, zero_state) + h, g = cell(x, zero_state) - sess.run([variables.global_variables_initializer()]) - glstm_result = sess.run([gh, gs]) - lstm_result = sess.run([h, g]) + sess.run([variables.global_variables_initializer()]) + glstm_result = sess.run([gh, gs]) + lstm_result = sess.run([h, g]) - self.assertAllClose(glstm_result[0], lstm_result[0], 1e-5) - self.assertAllClose(glstm_result[1], lstm_result[1], 1e-5) + self.assertAllClose(glstm_result[0], lstm_result[0], 1e-5) + self.assertAllClose(glstm_result[1], lstm_result[1], 1e-5) # Test that G-LSTM subgroup act like corresponding sub-LSTMs batch_size = 2 num_units = 4 number_of_groups = 2 - with self.test_session() as sess: + # Try with num_inputs equal to or not equal to num_units. + for num_inputs in [num_units, num_units + number_of_groups]: + with self.test_session() as sess: + with variable_scope.variable_scope( + "root2_%d" % num_inputs, + initializer=init_ops.constant_initializer(0.5)): + # input for G-LSTM with 2 groups + glstm_input = array_ops.ones([batch_size, num_inputs]) + gcell = contrib_rnn_cell.GLSTMCell( + num_units=num_units, number_of_groups=number_of_groups) + gcell_zero_state = gcell.zero_state( + batch_size=batch_size, dtype=dtypes.float32) + gh, gs = gcell(glstm_input, gcell_zero_state) + + # input for LSTM cell simulating single G-LSTM group + lstm_input = array_ops.ones( + [batch_size, num_inputs / number_of_groups]) + # note division by number_of_groups. This cell one simulates G-LSTM + # group + cell = rnn_cell.LSTMCell(num_units=int(num_units / number_of_groups)) + cell_zero_state = cell.zero_state( + batch_size=batch_size, dtype=dtypes.float32) + h, g = cell(lstm_input, cell_zero_state) + + sess.run([variables.global_variables_initializer()]) + [gh_res, h_res] = sess.run([gh, h]) + self.assertAllClose(gh_res[:, 0:int(num_units / number_of_groups)], + h_res, 1e-5) + self.assertAllClose(gh_res[:, int(num_units / number_of_groups):], + h_res, 1e-5) + + def testGLSTMCellFailure(self): + batch_size = 2 + num_units = 4 + number_of_groups = 2 + with self.test_session(): with variable_scope.variable_scope( - "root2", initializer=init_ops.constant_initializer(0.5)): - # input for G-LSTM with 2 groups - glstm_input = array_ops.ones([batch_size, num_units]) + "glstm_failure", initializer=init_ops.constant_initializer(0.5)): gcell = contrib_rnn_cell.GLSTMCell( num_units=num_units, number_of_groups=number_of_groups) gcell_zero_state = gcell.zero_state( batch_size=batch_size, dtype=dtypes.float32) - gh, gs = gcell(glstm_input, gcell_zero_state) - # input for LSTM cell simulating single G-LSTM group - lstm_input = array_ops.ones([batch_size, num_units / number_of_groups]) - # note division by number_of_groups. This cell one simulates G-LSTM group - cell = rnn_cell.LSTMCell(num_units=int(num_units / number_of_groups)) - cell_zero_state = cell.zero_state( - batch_size=batch_size, dtype=dtypes.float32) - h, g = cell(lstm_input, cell_zero_state) + # Try an input with statically-unknown innermost dimension. + glstm_input = array_ops.placeholder( + dtypes.float32, shape=[batch_size, None]) + with self.assertRaisesRegexp(ValueError, + "input size must be statically known"): + gcell(glstm_input, gcell_zero_state) - sess.run([variables.global_variables_initializer()]) - [gh_res, h_res] = sess.run([gh, h]) - self.assertAllClose(gh_res[:, 0:int(num_units / number_of_groups)], - h_res, 1e-5) - self.assertAllClose(gh_res[:, int(num_units / number_of_groups):], - h_res, 1e-5) + # Try an input whose innermost dimension isn't divisible into groups. + glstm_input = array_ops.placeholder( + dtypes.float32, shape=[batch_size, 3]) + with self.assertRaisesRegexp( + ValueError, + r"input size \(3\) must be divisible by number_of_groups \(2\)"): + gcell(glstm_input, gcell_zero_state) class LayerNormBasicLSTMCellTest(test.TestCase): diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index a6c2d9cdbb..6bea8d4a21 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2225,6 +2225,13 @@ class GLSTMCell(rnn_cell_impl.RNNCell): O. Kuchaiev and B. Ginsburg "Factorization Tricks for LSTM Networks", ICLR 2017 workshop. + + In brief, a G-LSTM cell consists of one LSTM sub-cell per group, where each + sub-cell operates on an evenly-sized sub-vector of the input and produces an + evenly-sized sub-vector of the output. For example, a G-LSTM cell with 128 + units and 4 groups consists of 4 LSTMs sub-cells with 32 units each. If that + G-LSTM cell is fed a 200-dim input, then each sub-cell receives a 50-dim part + of the input and produces a 32-dim part of the output. """ def __init__(self, @@ -2320,9 +2327,12 @@ class GLSTMCell(rnn_cell_impl.RNNCell): """Run one step of G-LSTM. Args: - inputs: input Tensor, 2D, [batch x num_units]. - state: this must be a tuple of state Tensors, both `2-D`, - with column sizes `c_state` and `m_state`. + inputs: input Tensor, 2D, [batch x num_inputs]. num_inputs must be + statically-known and evenly divisible into groups. The innermost + vectors of the inputs are split into evenly-sized sub-vectors and fed + into the per-group LSTM sub-cells. + state: this must be a tuple of state Tensors, both `2-D`, with column + sizes `c_state` and `m_state`. Returns: A tuple containing: @@ -2337,11 +2347,24 @@ class GLSTMCell(rnn_cell_impl.RNNCell): Raises: ValueError: If input size cannot be inferred from inputs via - static shape inference. + static shape inference, or if the input shape is incompatible + with the number of groups. """ (c_prev, m_prev) = state self._batch_size = inputs.shape[0].value or array_ops.shape(inputs)[0] + + # If the input size is statically-known, calculate and validate its group + # size. Otherwise, use the output group size. + input_size = inputs.shape[1].value + if input_size is None: + raise ValueError("input size must be statically known") + if input_size % self._number_of_groups != 0: + raise ValueError( + "input size (%d) must be divisible by number_of_groups (%d)" % + (input_size, self._number_of_groups)) + input_group_size = int(input_size / self._number_of_groups) + dtype = inputs.dtype scope = vs.get_variable_scope() with vs.variable_scope(scope, initializer=self._initializer): @@ -2354,8 +2377,7 @@ class GLSTMCell(rnn_cell_impl.RNNCell): with vs.variable_scope("group%d" % group_id): x_g_id = array_ops.concat( [ - self._get_input_for_group(inputs, group_id, - self._group_shape[0]), + self._get_input_for_group(inputs, group_id, input_group_size), self._get_input_for_group(m_prev, group_id, self._group_shape[0]) ], -- GitLab From d74c8ae1b89dd426837eddd4bb8b0881e3d60e82 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Tue, 6 Mar 2018 21:46:08 -0800 Subject: [PATCH 512/884] Minor fixes to tutorials/index.md and programmers_guide/index.md PiperOrigin-RevId: 188128441 --- tensorflow/docs_src/programmers_guide/index.md | 4 ++++ tensorflow/docs_src/tutorials/index.md | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/programmers_guide/index.md b/tensorflow/docs_src/programmers_guide/index.md index 7a5e90081d..e8c2fa6990 100644 --- a/tensorflow/docs_src/programmers_guide/index.md +++ b/tensorflow/docs_src/programmers_guide/index.md @@ -30,8 +30,12 @@ works. The units are as follows: can still be helpful. * @{$programmers_guide/saved_model}, which explains how to save and restore variables and models. + +## Accelerators + * @{$using_gpu} explains how TensorFlow assigns operations to devices and how you can change the arrangement manually. + * @{$using_tpu} explains how to modify `Estimator` programs to run on a TPU. ## ML Concepts diff --git a/tensorflow/docs_src/tutorials/index.md b/tensorflow/docs_src/tutorials/index.md index 8c697e48e5..af01d3eaa1 100644 --- a/tensorflow/docs_src/tutorials/index.md +++ b/tensorflow/docs_src/tutorials/index.md @@ -10,7 +10,7 @@ these tutorials. These tutorials cover different aspects of image recognition: - * @{$layers}, which introduces convolutional neural networks (CNNs) and + * @{$layers$MNIST}, which introduces convolutional neural networks (CNNs) and demonstrates how to build a CNN in TensorFlow. * @{$image_recognition}, which introduces the field of image recognition and uses a pre-trained model (Inception) for recognizing images. -- GitLab From e8779672c3f7430acda9f4f8304cfa59675a27df Mon Sep 17 00:00:00 2001 From: Shivani Agrawal Date: Tue, 6 Mar 2018 22:53:43 -0800 Subject: [PATCH 513/884] Typo correction, no method `set_stats_aggregator_op(..)` to associate `StatsAggregator` with `iterator`. PiperOrigin-RevId: 188132675 --- tensorflow/contrib/data/python/ops/stats_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/data/python/ops/stats_ops.py b/tensorflow/contrib/data/python/ops/stats_ops.py index 9cd1701c39..b5cf0fcfe9 100644 --- a/tensorflow/contrib/data/python/ops/stats_ops.py +++ b/tensorflow/contrib/data/python/ops/stats_ops.py @@ -47,7 +47,7 @@ class StatsAggregator(object): dataset = ... iterator = dataset.make_one_shot_iterator() stats_aggregator = stats_ops.StatsAggregator() - set_op = stats_op.set_stats_aggregator_op(iterator, stats_aggregator) + set_op = stats_aggregator.subscribe(iterator) with tf.Session() as sess: # Running `set_op` will associate `iterator` with `stats_aggregator`. -- GitLab From 079cb9ae0af7659323e05dc86372d0fc94cb8658 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Tue, 6 Mar 2018 23:42:36 -0800 Subject: [PATCH 514/884] Build definition cleanup. PiperOrigin-RevId: 188135683 --- tensorflow/contrib/lite/build_def.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/build_def.bzl b/tensorflow/contrib/lite/build_def.bzl index 19829e4991..2813d1c347 100644 --- a/tensorflow/contrib/lite/build_def.bzl +++ b/tensorflow/contrib/lite/build_def.bzl @@ -104,7 +104,7 @@ def tflite_jni_binary(name, """Builds a jni binary for TFLite.""" linkopts = linkopts + [ "-Wl,--version-script", # Export only jni functions & classes. - linkscript, + "$(location {})".format(linkscript), ] native.cc_binary( name=name, -- GitLab From c0824a4eeaffa7e30119fef21a5b689c972e6657 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 7 Mar 2018 03:37:07 -0800 Subject: [PATCH 515/884] [XLA:GPU] Rewrite elemental emission of bitcasts My first attempt at this only handled bitcasts that implement a reshape operation, now transposes or mixed bitcasts are handled as well. There is probably some optimization potential to reduce the amount of address arithmetic emitted to IR for a follow-up. This is already tested fairly well with the existing test suite, there are failing tests with layout_assignment before fusion without this change. PiperOrigin-RevId: 188155082 --- .../xla/service/elemental_ir_emitter.cc | 7 +++ .../compiler/xla/service/llvm_ir/ir_array.cc | 61 ++++++++++++++----- .../compiler/xla/service/llvm_ir/ir_array.h | 5 ++ 3 files changed, 59 insertions(+), 14 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 31c0f2233c..111c29593e 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1723,6 +1723,13 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( return ir_builder_->CreateLoad(ret_value_addr); }; case HloOpcode::kBitcast: + CHECK_EQ(ShapeUtil::ElementsIn(hlo->shape()), + ShapeUtil::ElementsIn(hlo->operand(0)->shape())); + return [this, hlo, &operand_to_generator](const IrArray::Index& index) { + const HloInstruction* operand = hlo->operand(0); + return operand_to_generator.at(operand)(index.SourceIndexOfBitcast( + hlo->shape(), operand->shape(), ir_builder_)); + }; case HloOpcode::kReshape: CHECK_EQ(ShapeUtil::ElementsIn(hlo->shape()), ShapeUtil::ElementsIn(hlo->operand(0)->shape())); diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 9aa0ce507b..4221a52fbe 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -29,18 +29,13 @@ limitations under the License. namespace xla { namespace llvm_ir { -IrArray::Index::Index(llvm::Value* linear, const Shape& shape, - llvm::IRBuilder<>* ir_builder) - : multidim_(ShapeUtil::Rank(shape)), - linear_(linear), - layout_(shape.layout()), - dims_(shape.dimensions().begin(), shape.dimensions().end()) { - CHECK(LayoutUtil::HasLayout(shape)) - << "Shape " << ShapeUtil::HumanStringWithLayout(shape) - << " should have a layout."; +static void Delinearize(std::vector* multidim, + llvm::Value* linear, const Shape& shape, + llvm::IRBuilder<>* ir_builder) { int64 divisor = 1; - for (int64 i = 0; i < layout_.minor_to_major_size(); ++i) { - int64 dimension = layout_.minor_to_major(i); + const Layout& layout = shape.layout(); + for (int64 i = 0; i < layout.minor_to_major_size(); ++i) { + int64 dimension = layout.minor_to_major(i); int64 size_of_current_dimension = shape.dimensions(dimension); // If i is not the last dimension, compute @@ -54,16 +49,28 @@ IrArray::Index::Index(llvm::Value* linear, const Shape& shape, // memory lives in one big allocation, so cuda-memcheck can't detect // out-of-bounds accesses. auto* quot = ir_builder->CreateUDiv(linear, ir_builder->getInt64(divisor)); - if (i < layout_.minor_to_major_size() - 1) { - multidim_[dimension] = ir_builder->CreateURem( + if (i < layout.minor_to_major_size() - 1) { + (*multidim)[dimension] = ir_builder->CreateURem( quot, ir_builder->getInt64(size_of_current_dimension)); } else { - multidim_[dimension] = quot; + (*multidim)[dimension] = quot; } divisor *= size_of_current_dimension; } } +IrArray::Index::Index(llvm::Value* linear, const Shape& shape, + llvm::IRBuilder<>* ir_builder) + : multidim_(ShapeUtil::Rank(shape)), + linear_(linear), + layout_(shape.layout()), + dims_(shape.dimensions().begin(), shape.dimensions().end()) { + CHECK(LayoutUtil::HasLayout(shape)) + << "Shape " << ShapeUtil::HumanStringWithLayout(shape) + << " should have a layout."; + Delinearize(&multidim_, linear, shape, ir_builder); +} + IrArray::Index::Index(tensorflow::gtl::ArraySlice multidim, llvm::Value* linear, const Shape& shape) : multidim_(multidim.begin(), multidim.end()), @@ -203,6 +210,32 @@ IrArray::Index IrArray::Index::SourceIndexOfTranspose( return Index(operand_multidim_index); } +IrArray::Index IrArray::Index::SourceIndexOfBitcast( + const Shape& shape, const Shape& operand_shape, + llvm::IRBuilder<>* builder) const { + CHECK(LayoutUtil::HasLayout(shape) && LayoutUtil::HasLayout(operand_shape)); + + // First linearize the index coming from the output of the bitcast. We want + // the physical index of the element in the buffer. This is like Linearize, + // but takes the layout into account. + int64 scale = 1; + llvm::Value* linear_index = builder->getInt64(0); + for (auto dimension : LayoutUtil::MinorToMajor(shape)) { + linear_index = builder->CreateAdd( + linear_index, + builder->CreateMul(multidim_[dimension], builder->getInt64(scale), "", + /*HasNUW=*/true, /*HasNSW=*/true), + "", /*HasNUW=*/true, /*HasNSW=*/true); + scale *= shape.dimensions(dimension); + } + + // Now delinearize it for the input of the bitcast. + std::vector multi_index(operand_shape.dimensions_size()); + Delinearize(&multi_index, linear_index, operand_shape, builder); + + return Index(multi_index, linear_index, operand_shape); +} + llvm::Value* IrArray::Index::Linearize( tensorflow::gtl::ArraySlice dimensions, llvm::IRBuilder<>* builder) const { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h index 387d462912..b942717512 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h @@ -134,6 +134,11 @@ class IrArray { tensorflow::gtl::ArraySlice dimension_mapping, llvm::IRBuilder<>* builder) const; + // Given that "this" is the target index of a bitcast from `operand_shape` + // to `shape` with the given dimension mapping, returns the source index. + Index SourceIndexOfBitcast(const Shape& shape, const Shape& operand_shape, + llvm::IRBuilder<>* builder) const; + // Linearizes the index into the given shape, i.e. reshapes it to rank-1 and // returns the index into the sole dimension 0 of the new shape. llvm::Value* Linearize(tensorflow::gtl::ArraySlice dimensions, -- GitLab From 4f0aa15e9635c33ca37f3aa714b10f4ca3199e7f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 03:44:48 -0800 Subject: [PATCH 516/884] Fix ShapeUtil::CompatibleIgnoringElementType for scalar vs tuple comparision Previously if the lhs was a scalar and the rhs was a tuple of arbitrary shape it reported them as compatible what is clearly wrong. PiperOrigin-RevId: 188155575 --- .../compiler/xla/service/shape_inference.cc | 3 ++- .../compiler/xla/service/user_computation.cc | 13 ++++++++----- tensorflow/compiler/xla/shape_util.cc | 15 +++++++++++++-- tensorflow/compiler/xla/shape_util.h | 1 + tensorflow/compiler/xla/shape_util_test.cc | 12 ++++++++++++ 5 files changed, 36 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index c54cb3b48d..915baecc56 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2394,7 +2394,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( "Select's pred operand must have PRED element type; got %s.", ShapeUtil::HumanString(pred).c_str()); } - if (ShapeUtil::SameDimensions(pred, on_true) || ShapeUtil::Rank(pred) == 0) { + if (ShapeUtil::CompatibleIgnoringElementType(pred, on_true) || + ShapeUtil::Rank(pred) == 0) { // By this stage we know that pred's element type is PRED. Therefore, this // check restricts pred to be a PRED scalar, or a PRED array with the same // dimensions as on_true and on_false. diff --git a/tensorflow/compiler/xla/service/user_computation.cc b/tensorflow/compiler/xla/service/user_computation.cc index 06735e9442..0dca30a804 100644 --- a/tensorflow/compiler/xla/service/user_computation.cc +++ b/tensorflow/compiler/xla/service/user_computation.cc @@ -3315,20 +3315,23 @@ void ComputationLowerer::Visit( HloInstruction* rhs = lookup_instruction(ternary_op_request.rhs()); HloInstruction* ehs = lookup_instruction(ternary_op_request.ehs()); auto hlo_opcode = TernaryOperationToHloOpcode(ternary_op_request.triop()); - - if (debug_options_.xla_eliminate_hlo_implicit_broadcast()) { - if (!ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) { + if (debug_options_.xla_eliminate_hlo_implicit_broadcast() && + !ShapeUtil::IsTuple(request.output_shape())) { + if (!ShapeUtil::IsTuple(lhs->shape()) && + !ShapeUtil::SameDimensions(request.output_shape(), lhs->shape())) { // lhs side is being implicitly broadcast. Change to explicit. lhs = ImplicitBroadcastToExplicitBroadcast(lhs, request.output_shape()); } - if (!ShapeUtil::SameDimensions(request.output_shape(), rhs->shape())) { + if (!ShapeUtil::IsTuple(rhs->shape()) && + !ShapeUtil::SameDimensions(request.output_shape(), rhs->shape())) { rhs = ImplicitBroadcastToExplicitBroadcast(rhs, request.output_shape()); } - if (!ShapeUtil::SameDimensions(request.output_shape(), ehs->shape())) { + if (!ShapeUtil::IsTuple(ehs->shape()) && + !ShapeUtil::SameDimensions(request.output_shape(), ehs->shape())) { ehs = ImplicitBroadcastToExplicitBroadcast(ehs, request.output_shape()); } diff --git a/tensorflow/compiler/xla/shape_util.cc b/tensorflow/compiler/xla/shape_util.cc index 9810e818f6..4f604e6f7c 100644 --- a/tensorflow/compiler/xla/shape_util.cc +++ b/tensorflow/compiler/xla/shape_util.cc @@ -609,6 +609,8 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { /* static */ bool ShapeUtil::SameDimensions(const Shape& lhs, const Shape& rhs) { + CHECK(ShapeUtil::IsArray(lhs)); + CHECK(ShapeUtil::IsArray(rhs)); return ContainersEqual(lhs.dimensions(), rhs.dimensions()); } @@ -617,7 +619,10 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { return rhs.element_type() == TUPLE && ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), Compatible); } - return SameDimensions(lhs, rhs) && SameElementType(lhs, rhs); + if (lhs.element_type() == OPAQUE) { + return rhs.element_type() == OPAQUE; + } + return SameElementType(lhs, rhs) && SameDimensions(lhs, rhs); } /* static */ bool ShapeUtil::CompatibleIgnoringElementType(const Shape& lhs, @@ -627,7 +632,10 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), CompatibleIgnoringElementType); } - return SameDimensions(lhs, rhs); + if (lhs.element_type() == OPAQUE) { + return rhs.element_type() == OPAQUE; + } + return ShapeUtil::IsArray(rhs) && SameDimensions(lhs, rhs); } /* static */ bool ShapeUtil::CompatibleIgnoringFpPrecision(const Shape& lhs, @@ -637,6 +645,9 @@ StatusOr ParseShapeStringInternal(tensorflow::StringPiece* s) { ContainersEqual(lhs.tuple_shapes(), rhs.tuple_shapes(), CompatibleIgnoringFpPrecision); } + if (lhs.element_type() == OPAQUE) { + return rhs.element_type() == OPAQUE; + } if (SameElementTypeIgnoringFpPrecision(lhs, rhs)) { return CompatibleIgnoringElementType(lhs, rhs); } diff --git a/tensorflow/compiler/xla/shape_util.h b/tensorflow/compiler/xla/shape_util.h index 92b365e072..3e130a02e2 100644 --- a/tensorflow/compiler/xla/shape_util.h +++ b/tensorflow/compiler/xla/shape_util.h @@ -209,6 +209,7 @@ class ShapeUtil { // Returns whether the LHS and RHS shapes have the same dimensions; note: does // not check element type. + // Precondition: IsArray(lhs) && IsArray(rhs) static bool SameDimensions(const Shape& lhs, const Shape& rhs); // Returns whether the lhs and rhs shapes have the same element type. diff --git a/tensorflow/compiler/xla/shape_util_test.cc b/tensorflow/compiler/xla/shape_util_test.cc index a357415698..424cfe37ea 100644 --- a/tensorflow/compiler/xla/shape_util_test.cc +++ b/tensorflow/compiler/xla/shape_util_test.cc @@ -238,6 +238,18 @@ TEST(ShapeUtilTest, IncompatibleTuplesWithDifferentDimensions) { EXPECT_FALSE(ShapeUtil::Compatible(tuple1, tuple2)); } +TEST(ShapeUtilTest, IncompatibleScalarVsTuple) { + Shape shape1 = ShapeUtil::MakeShape(F32, {}); + Shape shape2 = ShapeUtil::MakeTupleShape( + {ShapeUtil::MakeShape(F32, {3, 2}), ShapeUtil::MakeShape(U32, {})}); + EXPECT_FALSE(ShapeUtil::Compatible(shape1, shape2)); + EXPECT_FALSE(ShapeUtil::Compatible(shape2, shape1)); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringElementType(shape1, shape2)); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringElementType(shape2, shape1)); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringFpPrecision(shape1, shape2)); + EXPECT_FALSE(ShapeUtil::CompatibleIgnoringFpPrecision(shape2, shape1)); +} + TEST(ShapeUtilTest, CompareShapesWithPaddedDimensionsMismatch) { Shape shape1 = ShapeUtil::MakeShape(F32, {20, 30}); shape1.mutable_layout()->add_padded_dimensions(10); -- GitLab From 358fd36d0f2c23b725bf952d7c919e7d704a45ec Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 7 Mar 2018 04:22:40 -0800 Subject: [PATCH 517/884] [XLA:GPU] Move layout_assignment before fusion This will allow code simplification and opens up new optimization. Currently we don't emit layouts inside of fusion and tracing layouts through fusion is very hard. Changing the pipeline sidesteps this issue. This is mostly perf-neutral. PiperOrigin-RevId: 188158481 --- .../compiler/xla/service/gpu/gpu_compiler.cc | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc index 9e37acdf31..b41eb72d83 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc @@ -243,6 +243,22 @@ tensorflow::Status OptimizeHloModule(HloModule* hlo_module, TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); } + { + HloPassPipeline pipeline("layout_assignment"); + pipeline.AddPass( + hlo_module->mutable_entry_computation_layout()); + + // The LayoutAssignment pass may leave behind kCopy instructions which are + // duplicate or NOPs, so remove them with algebraic simplification and CSE. + pipeline.AddPass>( + /*is_layout_sensitive=*/true, + /*valid_bitcast_callback=*/[](const Shape&, const Shape&) { + return true; + }); + pipeline.AddPass(/*is_layout_sensitive=*/true); + TF_RETURN_IF_ERROR(pipeline.Run(hlo_module).status()); + } + { HloPassFix fusion("fusion"); fusion.AddInvariantChecker(); @@ -279,15 +295,6 @@ tensorflow::Status PrepareHloModuleForIrEmitting(HloModule* hlo_module) { HloPassPipeline pipeline("GPU-ir-emit-prepare"); pipeline.AddInvariantChecker(); - pipeline.AddPass( - hlo_module->mutable_entry_computation_layout()); - - // The LayoutAssignment pass may leave behind kCopy instructions which are - // duplicate or NOPs, so remove them with algebraic simplification and CSE. - pipeline.AddPass>( - /*is_layout_sensitive=*/true, - [](const Shape&, const Shape&) { return true; }); - pipeline.AddPass(/*is_layout_sensitive=*/true); // Copy insertion should be performed immediately before IR emission to avoid // inserting unnecessary copies (later pass adds an instruction which // materializes the value) or missing a necessary copy (later pass removes an -- GitLab From b2fcd7d80af4b7be7501135e043ef89ac9e65cb4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 7 Mar 2018 06:28:00 -0800 Subject: [PATCH 518/884] [XLA:GPU] Fuse broadcasts into reduction fusions We didn't do this because reconstructing a layout was hard. With layout_assignment before fusion this becomes much easier. Remove the limitations. PiperOrigin-RevId: 188167436 --- .../xla/service/gpu/instruction_fusion.cc | 11 ----- .../service/gpu/instruction_fusion_test.cc | 30 ++++++++++++++ .../xla/service/gpu/ir_emitter_unnested.cc | 40 ------------------- 3 files changed, 30 insertions(+), 51 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 870d241856..84504d29e0 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -71,17 +71,6 @@ bool GpuInstructionFusion::ShouldFuse(HloInstruction* consumer, return false; } - // We may need to know original operand layout to emit input fusion, and so - // far, we merely use the layout of an operand of the fusion node, which means - // we must fuse only elementwise operations. This restriction should be lifted - // later if we need to fuse other operations, e.g. transpose, for performance. - if ((IsReductionToVector(*consumer) || - (HloOpcode::kFusion == consumer->opcode() && - HloInstruction::FusionKind::kInput == consumer->fusion_kind())) && - !producer->IsElementwise()) { - return false; - } - // Cost condition: not fuse (simple, expensive producers) and (consumers who // reuse operand elements). if (producer->opcode() != HloOpcode::kFusion && diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index 373e5a5587..c81dbb7bf3 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -164,6 +164,36 @@ TEST_F(InstructionFusionTest, GetTupleElementFused) { EXPECT_EQ(HloOpcode::kGetTupleElement, fused_root->operand(1)->opcode()); } +// Tests that broadcasts fused into a fusion with a reduce root. +TEST_F(InstructionFusionTest, BroadcastIntoReduce) { + auto module = tools::Parse(R"( + HloModule test_module + + add { + lhs = f32[] parameter(0) + rhs = f32[] parameter(1) + ROOT add = f32[] add(lhs, rhs) + } + + ENTRY BroadcastIntoReduce { + constant = f32[] constant(1) + broadcast = f32[16,16,16,16]{3,2,1,0} broadcast(constant), dimensions={} + constant.1 = f32[] constant(0) + ROOT reduce = f32[] reduce(broadcast, constant.1), dimensions={0,1,2,3}, + to_apply=add + })") + .ValueOrDie(); + + EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); + + HloInstruction* root = module->entry_computation()->root_instruction(); + EXPECT_THAT(root, op::Fusion()); + EXPECT_THAT(root->fused_expression_root(), + op::Reduce(op::Broadcast(op::Parameter()), op::Parameter())); +} + TEST_F(InstructionFusionTest, BitcastIntoAdd) { auto module = tools::Parse(R"( HloModule test_module diff --git a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc index 065b3a0e31..4cfb613ae9 100644 --- a/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc +++ b/tensorflow/compiler/xla/service/gpu/ir_emitter_unnested.cc @@ -517,46 +517,6 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) { TF_RETURN_IF_ERROR(root->Accept(&fused_emitter)); Shape input_shape = root->operand(0)->shape(); - // EmitReductionToVector requires the input shape to have a layout, but - // fused instructions don't have one. So we determine its layout from - // the fusion's operands. The choice of the layout only affects - // performance but not correctness. - auto choose_input_layout = []( - tensorflow::gtl::ArraySlice operands, - Shape* input_shape) -> Status { - // Prefer the layout of an operand whose shape is compatible with - // input_shape. - for (const HloInstruction* operand : operands) { - if (ShapeUtil::Compatible(*input_shape, operand->shape())) { - return LayoutUtil::CopyLayoutBetweenShapes(operand->shape(), - input_shape); - } - } - // If no operand has a compatible shape, prefer an operand that has - // the same rank at least. - for (const HloInstruction* operand : operands) { - // Skip tuple-shaped operands; calling ShapeUtil::Rank on a - // tuple-shaped Shape is illegal. Perhaps more correct would be to - // recurse into them, but TODO(kramerb): Remove this code after - // assigning layouts to fusion nodes. - if (ShapeUtil::IsTuple(operand->shape())) { - continue; - } - if (ShapeUtil::Rank(*input_shape) == - ShapeUtil::Rank(operand->shape())) { - // Do not use CopyLayoutBetweenShapes because input_shape and - // operand->shape() may be incompatible. - *input_shape->mutable_layout() = operand->shape().layout(); - return Status::OK(); - } - } - // When all the above fails, which is rare, set the default layout. - LayoutUtil::SetToDefaultLayout(input_shape); - return Status::OK(); - }; - TF_RETURN_IF_ERROR( - choose_input_layout(fusion->operands(), &input_shape)); - return EmitReductionToVector( root, input_shape, fused_emitter.GetGenerator(root->operand(0)), fused_emitter.GetGenerator(root->operand(1)), root->dimensions(), -- GitLab From 9c4145bd6656e4f2dd06dfc7170ad2d149a88dda Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 06:30:08 -0800 Subject: [PATCH 519/884] Update the code to play more nicely with Python3. PiperOrigin-RevId: 188167618 --- .../contrib/py2tf/converters/control_flow.py | 8 ++++++++ tensorflow/contrib/py2tf/impl/api.py | 2 +- tensorflow/contrib/py2tf/pyct/compiler.py | 5 ++++- tensorflow/contrib/py2tf/pyct/compiler_test.py | 17 +++++++++++++++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/control_flow.py b/tensorflow/contrib/py2tf/converters/control_flow.py index d53e3e4fd6..762c26f0c7 100644 --- a/tensorflow/contrib/py2tf/converters/control_flow.py +++ b/tensorflow/contrib/py2tf/converters/control_flow.py @@ -171,6 +171,14 @@ class ControlFlowTransformer(transformer.Base): all_referenced = body_scope.referenced state = list(body_closure) + if not state: + # TODO(mdan): Implement this properly. + # To complete this statement, we need to check whether any variable + # created inside the body scope is used before being modified outside the + # scope. This should be done during activity analysis, and in general + # should cover the case where variables may not be initialized. + raise ValueError('cannot convert while loop: no outputs') + state_ssf = [ self.context.namer.new_symbol(s.ssf(), all_referenced) for s in state ] diff --git a/tensorflow/contrib/py2tf/impl/api.py b/tensorflow/contrib/py2tf/impl/api.py index 48100aac32..883b304089 100644 --- a/tensorflow/contrib/py2tf/impl/api.py +++ b/tensorflow/contrib/py2tf/impl/api.py @@ -234,7 +234,7 @@ def to_graph(e, module = gast.Module([]) for import_line in config.COMPILED_IMPORT_STATEMENTS: - module.body.append(parser.parse_str(import_line)) + module.body.extend(parser.parse_str(import_line).body) for dep in conversion_map.dependency_cache.values(): module.body.append(dep) compiled_node, compiled_src = compiler.ast_to_object(module) diff --git a/tensorflow/contrib/py2tf/pyct/compiler.py b/tensorflow/contrib/py2tf/pyct/compiler.py index 51cf6930e8..507dbc7ed3 100644 --- a/tensorflow/contrib/py2tf/pyct/compiler.py +++ b/tensorflow/contrib/py2tf/pyct/compiler.py @@ -39,7 +39,10 @@ def ast_to_source(node, indentation): astor.string_repr.pretty_string) generator.visit(node) generator.result.append('\n') - return astor.source_repr.pretty_source(generator.result).lstrip() + # In some versions of Python, literals may appear as actual values. This + # ensures everything is string. + code = map(str, generator.result) + return astor.source_repr.pretty_source(code).lstrip() def ast_to_object( diff --git a/tensorflow/contrib/py2tf/pyct/compiler_test.py b/tensorflow/contrib/py2tf/pyct/compiler_test.py index c1f84238ef..243f4c8153 100644 --- a/tensorflow/contrib/py2tf/pyct/compiler_test.py +++ b/tensorflow/contrib/py2tf/pyct/compiler_test.py @@ -23,11 +23,28 @@ import textwrap import gast from tensorflow.contrib.py2tf.pyct import compiler +from tensorflow.contrib.py2tf.pyct import parser from tensorflow.python.platform import test +from tensorflow.python.util import tf_inspect class CompilerTest(test.TestCase): + def test_parser_compile_idempotent(self): + + def test_fn(x): + a = True + b = '' + if a: + b = x + 1 + return b + + self.assertEqual( + textwrap.dedent(tf_inspect.getsource(test_fn)), + tf_inspect.getsource( + compiler.ast_to_object( + parser.parse_entity(test_fn)[0].body[0])[0].test_fn)) + def test_ast_to_source(self): node = gast.If( test=gast.Num(1), -- GitLab From fbc2b857e45c4fe8fcd2ce016b3bb63ea9b9f924 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Wed, 7 Mar 2018 08:33:46 -0800 Subject: [PATCH 520/884] Make sure the string returned is a string in Python 3 because of different string handling processes. PiperOrigin-RevId: 188180206 --- .../cluster_resolver/python/training/tpu_cluster_resolver.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 83d26a17a8..91874f9b5c 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -130,10 +130,11 @@ class TPUClusterResolver(ClusterResolver): should_resolve = self._shouldResolve() if not project and should_resolve: - project = self._requestComputeMetadata('project/project-id') + project = compat.as_str( + self._requestComputeMetadata('project/project-id')) if not zone and should_resolve: - zone_path = self._requestComputeMetadata('instance/zone') + zone_path = compat.as_str(self._requestComputeMetadata('instance/zone')) zone = zone_path.split('/')[-1] self._project = project -- GitLab From 2b211b681ac6264c61372d10c496e234bf2eda9b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 08:52:39 -0800 Subject: [PATCH 521/884] Add support for the "DEQUANTIZE" op. This cover only ops that are generated by TOCO in order to handle UINT8 input to floating-point models. PiperOrigin-RevId: 188182372 --- tensorflow/contrib/lite/builtin_ops.h | 1 + tensorflow/contrib/lite/kernels/BUILD | 14 ++ tensorflow/contrib/lite/kernels/dequantize.cc | 77 +++++++++++ .../contrib/lite/kernels/dequantize_test.cc | 65 ++++++++++ tensorflow/contrib/lite/kernels/register.cc | 2 + tensorflow/contrib/lite/model.cc | 1 + tensorflow/contrib/lite/nnapi_delegate.cc | 1 + tensorflow/contrib/lite/schema/schema.fbs | 6 +- .../contrib/lite/schema/schema_generated.h | 121 +++++++++++++++++- 9 files changed, 282 insertions(+), 6 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/dequantize.cc create mode 100644 tensorflow/contrib/lite/kernels/dequantize_test.cc diff --git a/tensorflow/contrib/lite/builtin_ops.h b/tensorflow/contrib/lite/builtin_ops.h index 7e08500980..2218ea8eac 100644 --- a/tensorflow/contrib/lite/builtin_ops.h +++ b/tensorflow/contrib/lite/builtin_ops.h @@ -32,6 +32,7 @@ typedef enum { kTfLiteBuiltinConcatenation = 2, kTfLiteBuiltinConv2d = 3, kTfLiteBuiltinDepthwiseConv2d = 4, + kTfLiteBuiltinDequantize = 6, kTfLiteBuiltinEmbeddingLookup = 7, kTfLiteBuiltinFullyConnected = 9, kTfLiteBuiltinHashtableLookup = 10, diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index a6be410dc8..8e9d427770 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -121,6 +121,7 @@ cc_library( "concatenation.cc", "conv.cc", "depthwise_conv.cc", + "dequantize.cc", "div.cc", "embedding_lookup.cc", "embedding_lookup_sparse.cc", @@ -295,6 +296,19 @@ tf_cc_test( ], ) +tf_cc_test( + name = "dequantize_test", + size = "small", + srcs = ["dequantize_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_absl//absl/memory", + "@com_google_googletest//:gtest", + ], +) + tf_cc_test( name = "basic_rnn_test", size = "small", diff --git a/tensorflow/contrib/lite/kernels/dequantize.cc b/tensorflow/contrib/lite/kernels/dequantize.cc new file mode 100644 index 0000000000..e685f2465f --- /dev/null +++ b/tensorflow/contrib/lite/kernels/dequantize.cc @@ -0,0 +1,77 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace builtin { +namespace dequantize { + +struct OpContext { + OpContext(TfLiteContext* context, TfLiteNode* node) { + input = GetInput(context, node, 0); + output = GetOutput(context, node, 0); + } + TfLiteTensor* input; + TfLiteTensor* output; +}; + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + OpContext op_context(context, node); + + TF_LITE_ENSURE(context, op_context.input->type == kTfLiteUInt8); + + op_context.output->type = kTfLiteFloat32; + return context->ResizeTensor(context, op_context.output, + TfLiteIntArrayCopy(op_context.input->dims)); +} + +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + OpContext op_context(context, node); + + auto zero_point = op_context.input->params.zero_point; + auto scale = op_context.input->params.scale; + + optimized_ops::Dequantize(GetTensorData(op_context.input), + GetTensorDims(op_context.input), zero_point, scale, + GetTensorData(op_context.output), + GetTensorDims(op_context.output)); + return kTfLiteOk; +} + +} // namespace dequantize + +TfLiteRegistration* Register_DEQUANTIZE_OPT() { + static TfLiteRegistration r = {nullptr, nullptr, dequantize::Prepare, + dequantize::Eval}; + return &r; +} + +TfLiteRegistration* Register_DEQUANTIZE() { return Register_DEQUANTIZE_OPT(); } + +} // namespace builtin +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/dequantize_test.cc b/tensorflow/contrib/lite/kernels/dequantize_test.cc new file mode 100644 index 0000000000..fcd7420617 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/dequantize_test.cc @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace { + +using ::testing::ElementsAreArray; + +class DequantizeOpModel : public SingleOpModel { + public: + DequantizeOpModel(std::initializer_list shape, float min, float max) { + input_ = AddInput({TensorType_UINT8, shape, min, max}); + output_ = AddOutput({TensorType_FLOAT32, shape}); + SetBuiltinOp(BuiltinOperator_DEQUANTIZE, BuiltinOptions_DequantizeOptions, + CreateDequantizeOptions(builder_).Union()); + + BuildInterpreter({GetShape(input_)}); + } + + void SetInput(std::initializer_list data) { + PopulateTensor(input_, data); + } + + std::vector GetOutput() { return ExtractVector(output_); } + + private: + int input_; + int output_; +}; + +TEST(SplitOpTest, FourDimensional) { + DequantizeOpModel m({2, 5}, -63.5, 64); + + m.SetInput({0, 1, 2, 3, 4, 251, 252, 253, 254, 255}); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {-63.5, -63, -62.5, -62, -61.5, 62, 62.5, 63, 63.5, 64}))); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 06b7ce4a97..9537b79a9a 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -66,6 +66,7 @@ TfLiteRegistration* Register_EXP(); TfLiteRegistration* Register_TOPK_V2(); TfLiteRegistration* Register_LOG_SOFTMAX(); TfLiteRegistration* Register_CAST(); +TfLiteRegistration* Register_DEQUANTIZE(); BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_RELU, Register_RELU()); @@ -121,6 +122,7 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_TOPK_V2, Register_TOPK_V2()); AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); + AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); } TfLiteRegistration* BuiltinOpResolver::FindOp( diff --git a/tensorflow/contrib/lite/model.cc b/tensorflow/contrib/lite/model.cc index 141d04afd7..8c456e70da 100644 --- a/tensorflow/contrib/lite/model.cc +++ b/tensorflow/contrib/lite/model.cc @@ -288,6 +288,7 @@ void* ParseOpData(const Operator* op, BuiltinOperator op_type, case BuiltinOperator_TOPK_V2: case BuiltinOperator_LOG_SOFTMAX: case BuiltinOperator_CAST: + case BuiltinOperator_DEQUANTIZE: break; case BuiltinOperator_LSH_PROJECTION: { TfLiteLSHProjectionParams* params = diff --git a/tensorflow/contrib/lite/nnapi_delegate.cc b/tensorflow/contrib/lite/nnapi_delegate.cc index 80036d8033..9d00d965d3 100644 --- a/tensorflow/contrib/lite/nnapi_delegate.cc +++ b/tensorflow/contrib/lite/nnapi_delegate.cc @@ -346,6 +346,7 @@ void AddOpsAndParams(tflite::Interpreter* interpreter, case tflite::BuiltinOperator_STRIDED_SLICE: case tflite::BuiltinOperator_EXP: case tflite::BuiltinOperator_LOG_SOFTMAX: + case tflite::BuiltinOperator_DEQUANTIZE: case tflite::BuiltinOperator_DELEGATE: case tflite::BuiltinOperator_CAST: FATAL("Op code %d is currently not delegated to NNAPI", builtin); diff --git a/tensorflow/contrib/lite/schema/schema.fbs b/tensorflow/contrib/lite/schema/schema.fbs index 5f617a7e12..04387fed33 100644 --- a/tensorflow/contrib/lite/schema/schema.fbs +++ b/tensorflow/contrib/lite/schema/schema.fbs @@ -75,7 +75,7 @@ enum BuiltinOperator : byte { CONV_2D = 3, DEPTHWISE_CONV_2D = 4, // DEPTH_TO_SPACE = 5, - // DEQUANTIZE = 6, + DEQUANTIZE = 6, EMBEDDING_LOOKUP = 7, // FLOOR = 8, FULLY_CONNECTED = 9, @@ -171,6 +171,7 @@ union BuiltinOptions { SplitOptions, LogSoftmaxOptions, CastOptions, + DequantizeOptions, } enum Padding : byte { SAME, VALID } @@ -379,6 +380,9 @@ table LogSoftmaxOptions { table CastOptions { } +table DequantizeOptions { +} + // An OperatorCode can be an enum value (BuiltinOperator) if the operator is a // builtin, or a string if the operator is custom. table OperatorCode { diff --git a/tensorflow/contrib/lite/schema/schema_generated.h b/tensorflow/contrib/lite/schema/schema_generated.h index fcacc9816a..b922de2081 100755 --- a/tensorflow/contrib/lite/schema/schema_generated.h +++ b/tensorflow/contrib/lite/schema/schema_generated.h @@ -142,6 +142,9 @@ struct LogSoftmaxOptionsT; struct CastOptions; struct CastOptionsT; +struct DequantizeOptions; +struct DequantizeOptionsT; + struct OperatorCode; struct OperatorCodeT; @@ -204,6 +207,7 @@ enum BuiltinOperator { BuiltinOperator_CONCATENATION = 2, BuiltinOperator_CONV_2D = 3, BuiltinOperator_DEPTHWISE_CONV_2D = 4, + BuiltinOperator_DEQUANTIZE = 6, BuiltinOperator_EMBEDDING_LOOKUP = 7, BuiltinOperator_FULLY_CONNECTED = 9, BuiltinOperator_HASHTABLE_LOOKUP = 10, @@ -254,13 +258,14 @@ enum BuiltinOperator { BuiltinOperator_MAX = BuiltinOperator_CAST }; -inline BuiltinOperator (&EnumValuesBuiltinOperator())[51] { +inline BuiltinOperator (&EnumValuesBuiltinOperator())[52] { static BuiltinOperator values[] = { BuiltinOperator_ADD, BuiltinOperator_AVERAGE_POOL_2D, BuiltinOperator_CONCATENATION, BuiltinOperator_CONV_2D, BuiltinOperator_DEPTHWISE_CONV_2D, + BuiltinOperator_DEQUANTIZE, BuiltinOperator_EMBEDDING_LOOKUP, BuiltinOperator_FULLY_CONNECTED, BuiltinOperator_HASHTABLE_LOOKUP, @@ -319,7 +324,7 @@ inline const char **EnumNamesBuiltinOperator() { "CONV_2D", "DEPTHWISE_CONV_2D", "", - "", + "DEQUANTIZE", "EMBEDDING_LOOKUP", "", "FULLY_CONNECTED", @@ -416,11 +421,12 @@ enum BuiltinOptions { BuiltinOptions_SplitOptions = 35, BuiltinOptions_LogSoftmaxOptions = 36, BuiltinOptions_CastOptions = 37, + BuiltinOptions_DequantizeOptions = 38, BuiltinOptions_MIN = BuiltinOptions_NONE, - BuiltinOptions_MAX = BuiltinOptions_CastOptions + BuiltinOptions_MAX = BuiltinOptions_DequantizeOptions }; -inline BuiltinOptions (&EnumValuesBuiltinOptions())[38] { +inline BuiltinOptions (&EnumValuesBuiltinOptions())[39] { static BuiltinOptions values[] = { BuiltinOptions_NONE, BuiltinOptions_Conv2DOptions, @@ -459,7 +465,8 @@ inline BuiltinOptions (&EnumValuesBuiltinOptions())[38] { BuiltinOptions_TopKV2Options, BuiltinOptions_SplitOptions, BuiltinOptions_LogSoftmaxOptions, - BuiltinOptions_CastOptions + BuiltinOptions_CastOptions, + BuiltinOptions_DequantizeOptions }; return values; } @@ -504,6 +511,7 @@ inline const char **EnumNamesBuiltinOptions() { "SplitOptions", "LogSoftmaxOptions", "CastOptions", + "DequantizeOptions", nullptr }; return names; @@ -666,6 +674,10 @@ template<> struct BuiltinOptionsTraits { static const BuiltinOptions enum_value = BuiltinOptions_CastOptions; }; +template<> struct BuiltinOptionsTraits { + static const BuiltinOptions enum_value = BuiltinOptions_DequantizeOptions; +}; + struct BuiltinOptionsUnion { BuiltinOptions type; void *value; @@ -993,6 +1005,14 @@ struct BuiltinOptionsUnion { return type == BuiltinOptions_CastOptions ? reinterpret_cast(value) : nullptr; } + DequantizeOptionsT *AsDequantizeOptions() { + return type == BuiltinOptions_DequantizeOptions ? + reinterpret_cast(value) : nullptr; + } + const DequantizeOptionsT *AsDequantizeOptions() const { + return type == BuiltinOptions_DequantizeOptions ? + reinterpret_cast(value) : nullptr; + } }; bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type); @@ -3696,6 +3716,46 @@ inline flatbuffers::Offset CreateCastOptions( flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBufferBuilder &_fbb, const CastOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +struct DequantizeOptionsT : public flatbuffers::NativeTable { + typedef DequantizeOptions TableType; + DequantizeOptionsT() { + } +}; + +struct DequantizeOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { + typedef DequantizeOptionsT NativeTableType; + bool Verify(flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + verifier.EndTable(); + } + DequantizeOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const; + void UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const; + static flatbuffers::Offset Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); +}; + +struct DequantizeOptionsBuilder { + flatbuffers::FlatBufferBuilder &fbb_; + flatbuffers::uoffset_t start_; + explicit DequantizeOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + DequantizeOptionsBuilder &operator=(const DequantizeOptionsBuilder &); + flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = flatbuffers::Offset(end); + return o; + } +}; + +inline flatbuffers::Offset CreateDequantizeOptions( + flatbuffers::FlatBufferBuilder &_fbb) { + DequantizeOptionsBuilder builder_(_fbb); + return builder_.Finish(); +} + +flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr); + struct OperatorCodeT : public flatbuffers::NativeTable { typedef OperatorCode TableType; BuiltinOperator builtin_code; @@ -3924,6 +3984,9 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table { const CastOptions *builtin_options_as_CastOptions() const { return builtin_options_type() == BuiltinOptions_CastOptions ? static_cast(builtin_options()) : nullptr; } + const DequantizeOptions *builtin_options_as_DequantizeOptions() const { + return builtin_options_type() == BuiltinOptions_DequantizeOptions ? static_cast(builtin_options()) : nullptr; + } const flatbuffers::Vector *custom_options() const { return GetPointer *>(VT_CUSTOM_OPTIONS); } @@ -4098,6 +4161,10 @@ template<> inline const CastOptions *Operator::builtin_options_as() return builtin_options_as_CastOptions(); } +template<> inline const DequantizeOptions *Operator::builtin_options_as() const { + return builtin_options_as_DequantizeOptions(); +} + struct OperatorBuilder { flatbuffers::FlatBufferBuilder &fbb_; flatbuffers::uoffset_t start_; @@ -5603,6 +5670,29 @@ inline flatbuffers::Offset CreateCastOptions(flatbuffers::FlatBuffe _fbb); } +inline DequantizeOptionsT *DequantizeOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const { + auto _o = new DequantizeOptionsT(); + UnPackTo(_o, _resolver); + return _o; +} + +inline void DequantizeOptions::UnPackTo(DequantizeOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const { + (void)_o; + (void)_resolver; +} + +inline flatbuffers::Offset DequantizeOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) { + return CreateDequantizeOptions(_fbb, _o, _rehasher); +} + +inline flatbuffers::Offset CreateDequantizeOptions(flatbuffers::FlatBufferBuilder &_fbb, const DequantizeOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) { + (void)_rehasher; + (void)_o; + struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const DequantizeOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va; + return tflite::CreateDequantizeOptions( + _fbb); +} + inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const { auto _o = new OperatorCodeT(); UnPackTo(_o, _resolver); @@ -5931,6 +6021,10 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob auto ptr = reinterpret_cast(obj); return verifier.VerifyTable(ptr); } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } default: return false; } } @@ -6097,6 +6191,10 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c auto ptr = reinterpret_cast(obj); return ptr->UnPack(resolver); } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(obj); + return ptr->UnPack(resolver); + } default: return nullptr; } } @@ -6251,6 +6349,10 @@ inline flatbuffers::Offset BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff auto ptr = reinterpret_cast(value); return CreateCastOptions(_fbb, ptr, _rehasher).Union(); } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(value); + return CreateDequantizeOptions(_fbb, ptr, _rehasher).Union(); + } default: return 0; } } @@ -6405,6 +6507,10 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL value = new CastOptionsT(*reinterpret_cast(u.value)); break; } + case BuiltinOptions_DequantizeOptions: { + value = new DequantizeOptionsT(*reinterpret_cast(u.value)); + break; + } default: break; } @@ -6597,6 +6703,11 @@ inline void BuiltinOptionsUnion::Reset() { delete ptr; break; } + case BuiltinOptions_DequantizeOptions: { + auto ptr = reinterpret_cast(value); + delete ptr; + break; + } default: break; } value = nullptr; -- GitLab From 1a56de30593ae08a1f0e01021ff217a19bf41bfa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 09:08:27 -0800 Subject: [PATCH 522/884] Add a template helper that generates expressions from single-statement nodes. PiperOrigin-RevId: 188184507 --- tensorflow/contrib/py2tf/pyct/templates.py | 14 ++++++++++++ .../contrib/py2tf/pyct/templates_test.py | 22 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/tensorflow/contrib/py2tf/pyct/templates.py b/tensorflow/contrib/py2tf/pyct/templates.py index 7021e2ba93..cdd71dc56d 100644 --- a/tensorflow/contrib/py2tf/pyct/templates.py +++ b/tensorflow/contrib/py2tf/pyct/templates.py @@ -165,3 +165,17 @@ def replace(template, **replacements): if isinstance(results, list): return [qual_names.resolve(r) for r in results] return qual_names.resolve(results) + + +def replace_as_expression(template, **replacements): + """Variant of replace that generates expressions, instead of code blocks.""" + replacement = replace(template, **replacements) + if len(replacement) != 1: + raise ValueError( + 'single expression expected; for more general templates use replace') + node = replacement[0] + if not isinstance(node, gast.Expr): + raise ValueError( + 'the template is expected to generate an expression node; instead ' + 'found %s' % node) + return node.value diff --git a/tensorflow/contrib/py2tf/pyct/templates_test.py b/tensorflow/contrib/py2tf/pyct/templates_test.py index 0d1c1c5d9e..d7835b80a7 100644 --- a/tensorflow/contrib/py2tf/pyct/templates_test.py +++ b/tensorflow/contrib/py2tf/pyct/templates_test.py @@ -96,6 +96,28 @@ class TemplatesTest(test.TestCase): with self.assertRaises(ValueError): templates.replace(template, foo=1) + def replace_as_expression(self): + template = """ + foo(a) + """ + + node = templates.replace(template, foo='bar', a='baz') + self.assertTrue(node is gast.Call) + self.assertEqual(node.func.id, 'bar') + self.assertEqual(node.func.args[0].id, 'baz') + + def replace_as_expression_restrictions(self): + template = """ + foo(a) + bar(b) + """ + with self.assertRaises(ValueError): + templates.replace_as_expression(template) + with self.assertRaises(ValueError): + templates.replace('') + with self.assertRaises(ValueError): + templates.replace('a = b') + if __name__ == '__main__': test.main() -- GitLab From d2d185e35b8d6cb2471528a429d094a6cb91006d Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 7 Mar 2018 09:16:32 -0800 Subject: [PATCH 523/884] [tf.data] Expose `tf.contrib.data.SqlDataset`. PiperOrigin-RevId: 188185438 --- tensorflow/contrib/data/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 1777727de8..1311119e79 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -23,6 +23,7 @@ removing existing functionality. See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@Counter +@@SqlDataset @@batch_and_drop_remainder @@bucket_by_sequence_length -- GitLab From ea974c64578d6d181b402c6c9bf05e7d6bf68961 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 7 Mar 2018 09:31:48 -0800 Subject: [PATCH 524/884] Fix tf.train.Saver's max_to_keep when executing eagerly. It was keeping everything, since the list of things to delete was reset in build() and build() was called every save. PiperOrigin-RevId: 188187349 --- tensorflow/python/training/saver.py | 14 +++-- tensorflow/python/training/saver_test.py | 76 +++++++++++++++++++++++- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index 6c80562968..df3ccce63e 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -1299,6 +1299,11 @@ class Saver(object): self._write_version = write_version self._pad_step_number = pad_step_number self._filename = filename + self._last_checkpoints = [] + self._checkpoints_to_be_deleted = [] + if context.in_eager_mode(): + self._next_checkpoint_time = ( + time.time() + self._keep_checkpoint_every_n_hours * 3600) if not defer_build and context.in_graph_mode(): self.build() if self.saver_def: @@ -1359,11 +1364,10 @@ class Saver(object): self.saver_def.restore_op_name, self._name) self._check_saver_def() - # Updates next checkpoint time. - self._next_checkpoint_time = ( - time.time() + self.saver_def.keep_checkpoint_every_n_hours * 3600) - self._last_checkpoints = [] - self._checkpoints_to_be_deleted = [] + if context.in_graph_mode(): # Set in __init__ when executing eagerly. + # Updates next checkpoint time. + self._next_checkpoint_time = ( + time.time() + self.saver_def.keep_checkpoint_every_n_hours * 3600) def _check_saver_def(self): if not isinstance(self.saver_def, saver_pb2.SaverDef): diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 4fd3b58da1..1021ccae5f 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -1059,6 +1059,77 @@ class MaxToKeepTest(test.TestCase): self.assertEqual(checkpoint_state.all_model_checkpoint_paths, all_model_checkpoint_paths) + def testMaxToKeepEager(self): + with context.eager_mode(): + save_dir = self._get_test_dir("max_to_keep_non_sharded") + + v = variable_scope.variable(10.0, name="v") + save = saver_module.Saver({"v": v}, max_to_keep=2) + self.evaluate(variables.global_variables_initializer()) + if context.in_graph_mode(): + self.assertEqual([], save.last_checkpoints) + + s1 = save.save(None, os.path.join(save_dir, "s1")) + self.assertEqual([s1], save.last_checkpoints) + self.assertTrue(saver_module.checkpoint_exists(s1)) + self.assertCheckpointState( + model_checkpoint_path=s1, + all_model_checkpoint_paths=[s1], + save_dir=save_dir) + + s2 = save.save(None, os.path.join(save_dir, "s2")) + self.assertEqual([s1, s2], save.last_checkpoints) + self.assertTrue(saver_module.checkpoint_exists(s1)) + self.assertTrue(saver_module.checkpoint_exists(s2)) + self.assertCheckpointState( + model_checkpoint_path=s2, + all_model_checkpoint_paths=[s1, s2], + save_dir=save_dir) + + s3 = save.save(None, os.path.join(save_dir, "s3")) + self.assertEqual([s2, s3], save.last_checkpoints) + self.assertFalse(saver_module.checkpoint_exists(s1)) + self.assertTrue(saver_module.checkpoint_exists(s2)) + self.assertTrue(saver_module.checkpoint_exists(s3)) + self.assertCheckpointState( + model_checkpoint_path=s3, + all_model_checkpoint_paths=[s2, s3], + save_dir=save_dir) + + # Create a second helper, identical to the first. + save2 = saver_module.Saver({"v": v}, max_to_keep=2) + save2.set_last_checkpoints(save.last_checkpoints) + + # Exercise the first helper. + + # Adding s2 again (old s2 is removed first, then new s2 appended) + s2 = save.save(None, os.path.join(save_dir, "s2")) + self.assertEqual([s3, s2], save.last_checkpoints) + self.assertFalse(saver_module.checkpoint_exists(s1)) + self.assertTrue(saver_module.checkpoint_exists(s3)) + self.assertTrue(saver_module.checkpoint_exists(s2)) + self.assertCheckpointState( + model_checkpoint_path=s2, + all_model_checkpoint_paths=[s3, s2], + save_dir=save_dir) + + # Adding s1 (s3 should now be deleted as oldest in list) + s1 = save.save(None, os.path.join(save_dir, "s1")) + self.assertEqual([s2, s1], save.last_checkpoints) + self.assertFalse(saver_module.checkpoint_exists(s3)) + self.assertTrue(saver_module.checkpoint_exists(s2)) + self.assertCheckpointState( + model_checkpoint_path=s1, + all_model_checkpoint_paths=[s2, s1], + save_dir=save_dir) + + s2 = save2.save(None, os.path.join(save_dir, "s2")) + self.assertEqual([s3, s2], save2.last_checkpoints) + # Created by the first helper. + self.assertTrue(saver_module.checkpoint_exists(s1)) + # Deleted by the first helper. + self.assertFalse(saver_module.checkpoint_exists(s3)) + def testNonSharded(self): save_dir = self._get_test_dir("max_to_keep_non_sharded") @@ -1321,15 +1392,16 @@ class KeepCheckpointEveryNHoursTest(test.TestCase): gfile.MakeDirs(test_dir) return test_dir + @test_util.run_in_graph_and_eager_modes() @test.mock.patch.object(saver_module, "time") def testNonSharded(self, mock_time): save_dir = self._get_test_dir("keep_checkpoint_every_n_hours") with self.test_session() as sess: - v = variables.Variable([10.0], name="v") + v = variable_scope.variable([10.0], name="v") # Run the initializer NOW to avoid the 0.5s overhead of the first Run() # call, which throws the test timing off in fastbuild mode. - variables.global_variables_initializer().run() + self.evaluate(variables.global_variables_initializer()) # Create a saver that will keep the last 2 checkpoints plus one every 0.7 # seconds. start_time = time.time() -- GitLab From 8c5d50852f29f04aae10675c50113b5bb8fb2507 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 7 Mar 2018 09:34:44 -0800 Subject: [PATCH 525/884] Add instrumentation interfaces to the GCS file system. PiperOrigin-RevId: 188187793 --- tensorflow/core/platform/cloud/BUILD | 2 +- .../core/platform/cloud/curl_http_request.cc | 20 ++ .../core/platform/cloud/curl_http_request.h | 5 + .../platform/cloud/curl_http_request_test.cc | 203 ++++++++++++++++++ .../core/platform/cloud/gcs_dns_cache_test.cc | 2 +- .../core/platform/cloud/gcs_file_system.cc | 19 ++ .../core/platform/cloud/gcs_file_system.h | 50 ++++- .../platform/cloud/gcs_file_system_test.cc | 69 ++++++ tensorflow/core/platform/cloud/gcs_throttle.h | 4 +- tensorflow/core/platform/cloud/http_request.h | 43 ++++ 10 files changed, 412 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/platform/cloud/BUILD b/tensorflow/core/platform/cloud/BUILD index 0a17a419d3..21636641e7 100644 --- a/tensorflow/core/platform/cloud/BUILD +++ b/tensorflow/core/platform/cloud/BUILD @@ -49,7 +49,7 @@ cc_library( srcs = ["ram_file_block_cache.cc"], hdrs = ["ram_file_block_cache.h"], copts = tf_copts(), - visibility = ["//tensorflow:__subpackages__"], + visibility = ["//visibility:public"], deps = [ ":file_block_cache", "//tensorflow/core:lib", diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 35bdcba737..20d9285a70 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -228,10 +228,17 @@ void CurlHttpRequest::AddAuthBearerHeader(const string& auth_token) { } } +void CurlHttpRequest::SetRequestStats(RequestStats* stats) { + CheckNotSent(); + CHECK(stats_ == nullptr) << "SetRequestStats already called"; + stats_ = stats; +} + void CurlHttpRequest::SetDeleteRequest() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; + method_ = RequestMethod::kDelete; TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"), "Setting delete request"); @@ -242,6 +249,7 @@ Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; + method_ = RequestMethod::kPut; if (put_body_) { fclose(put_body_); } @@ -271,6 +279,7 @@ void CurlHttpRequest::SetPutEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; + method_ = RequestMethod::kPut; TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting put request"); curl_headers_ = @@ -289,6 +298,7 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; + method_ = RequestMethod::kPost; curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( @@ -309,6 +319,7 @@ void CurlHttpRequest::SetPostEmptyBody() { CheckNotSent(); CheckMethodNotSet(); is_method_set_ = true; + method_ = RequestMethod::kPost; TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), "Setting POST request"); @@ -507,6 +518,10 @@ Status CurlHttpRequest::Send() { libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer), "Setting error buffer"); + if (stats_ != nullptr) { + stats_->RecordRequest(this, uri_, method_); + } + const CURLcode curl_result = libcurl_->curl_easy_perform(curl_); TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( curl_result, "Performing request. Detailed error: ", error_buffer); @@ -599,6 +614,11 @@ Status CurlHttpRequest::Send() { if (!result.ok()) { response_buffer_->clear(); } + + if (stats_ != nullptr) { + stats_->RecordResponse(this, uri_, method_, result); + } + return result; } diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index c9f60cb5fc..2a9be81f28 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -75,6 +75,8 @@ class CurlHttpRequest : public HttpRequest { /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token. void AddAuthBearerHeader(const string& auth_token) override; + void SetRequestStats(RequestStats* stats) override; + /// Makes the request a DELETE request. void SetDeleteRequest() override; @@ -186,6 +188,8 @@ class CurlHttpRequest : public HttpRequest { curl_slist* curl_headers_ = nullptr; curl_slist* resolve_list_ = nullptr; + RequestStats* stats_ = nullptr; + std::vector default_response_buffer_; std::unordered_map response_headers_; @@ -213,6 +217,7 @@ class CurlHttpRequest : public HttpRequest { // Store the URI to help disambiguate requests when errors occur. string uri_; + RequestMethod method_ = RequestMethod::kGet; // Limit the size of a http response that is copied into an error message. const size_t response_to_error_limit_ = 500; diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 4cded9b81b..0f0ccba050 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -634,5 +634,208 @@ TEST(CurlHttpRequestTest, ProgressIsStuck) { status.error_message()); } +class TestStats : public HttpRequest::RequestStats { + public: + ~TestStats() override = default; + + void RecordRequest(const HttpRequest* request, const string& uri, + HttpRequest::RequestMethod method) override { + has_recorded_request_ = true; + record_request_request_ = request; + record_request_uri_ = uri; + record_request_method_ = method; + } + + void RecordResponse(const HttpRequest* request, const string& uri, + HttpRequest::RequestMethod method, + const Status& result) override { + has_recorded_response_ = true; + record_response_request_ = request; + record_response_uri_ = uri; + record_response_method_ = method; + record_response_result_ = result; + } + + const HttpRequest* record_request_request_ = nullptr; + string record_request_uri_ = "http://www.testuri.com"; + HttpRequest::RequestMethod record_request_method_ = + HttpRequest::RequestMethod::kGet; + + const HttpRequest* record_response_request_ = nullptr; + string record_response_uri_ = "http://www.testuri.com"; + HttpRequest::RequestMethod record_response_method_ = + HttpRequest::RequestMethod::kGet; + Status record_response_result_; + + bool has_recorded_request_ = false; + bool has_recorded_response_ = false; +}; + +class StatsTestFakeLibCurl : public FakeLibCurl { + public: + StatsTestFakeLibCurl(TestStats* stats, const string& response_content, + uint64 response_code) + : FakeLibCurl(response_content, response_code), stats_(stats) {} + CURLcode curl_easy_perform(CURL* curl) override { + CHECK(!performed_request_); + performed_request_ = true; + stats_had_recorded_request_ = stats_->has_recorded_request_; + stats_had_recorded_response_ = stats_->has_recorded_response_; + return FakeLibCurl::curl_easy_perform(curl); + }; + + TestStats* stats_; + bool performed_request_ = false; + bool stats_had_recorded_request_; + bool stats_had_recorded_response_; +}; + +TEST(CurlHttpRequestTest, StatsGetSuccessful) { + TestStats stats; + StatsTestFakeLibCurl libcurl(&stats, "get response", 200); + CurlHttpRequest http_request(&libcurl); + + std::vector scratch; + scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end()); + scratch.reserve(100); + + http_request.SetRequestStats(&stats); + + http_request.SetUri("http://www.testuri.com"); + http_request.AddAuthBearerHeader("fake-bearer"); + http_request.SetRange(100, 199); + http_request.SetResultBuffer(&scratch); + TF_EXPECT_OK(http_request.Send()); + + EXPECT_EQ("get response", string(scratch.begin(), scratch.end())); + + // Check interaction with stats. + ASSERT_TRUE(stats.has_recorded_request_); + EXPECT_EQ(&http_request, stats.record_request_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_request_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kGet, stats.record_request_method_); + + ASSERT_TRUE(stats.has_recorded_response_); + EXPECT_EQ(&http_request, stats.record_response_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_response_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kGet, stats.record_response_method_); + TF_EXPECT_OK(stats.record_response_result_); + + // Check interaction with libcurl. + EXPECT_TRUE(libcurl.performed_request_); + EXPECT_TRUE(libcurl.stats_had_recorded_request_); + EXPECT_FALSE(libcurl.stats_had_recorded_response_); +} + +TEST(CurlHttpRequestTest, StatsGetNotFound) { + TestStats stats; + StatsTestFakeLibCurl libcurl(&stats, "get other response", 404); + CurlHttpRequest http_request(&libcurl); + + std::vector scratch; + scratch.insert(scratch.begin(), kTestContent.begin(), kTestContent.end()); + scratch.reserve(100); + + http_request.SetRequestStats(&stats); + + http_request.SetUri("http://www.testuri.com"); + http_request.AddAuthBearerHeader("fake-bearer"); + http_request.SetRange(100, 199); + http_request.SetResultBuffer(&scratch); + Status s = http_request.Send(); + + // Check interaction with stats. + ASSERT_TRUE(stats.has_recorded_request_); + EXPECT_EQ(&http_request, stats.record_request_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_request_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kGet, stats.record_request_method_); + + ASSERT_TRUE(stats.has_recorded_response_); + EXPECT_EQ(&http_request, stats.record_response_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_response_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kGet, stats.record_response_method_); + EXPECT_TRUE(errors::IsNotFound(stats.record_response_result_)); + EXPECT_EQ(s, stats.record_response_result_); + + // Check interaction with libcurl. + EXPECT_TRUE(libcurl.performed_request_); + EXPECT_TRUE(libcurl.stats_had_recorded_request_); + EXPECT_FALSE(libcurl.stats_had_recorded_response_); +} + +TEST(CurlHttpRequestTest, StatsPost) { + TestStats stats; + + FakeLibCurl libcurl("", 200); + CurlHttpRequest http_request(&libcurl); + + http_request.SetRequestStats(&stats); + + string content = "post body content"; + + http_request.SetUri("http://www.testuri.com"); + http_request.SetPostFromBuffer(content.c_str(), content.size()); + TF_EXPECT_OK(http_request.Send()); + + // Check interaction with stats. + ASSERT_TRUE(stats.has_recorded_request_); + EXPECT_EQ(&http_request, stats.record_request_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_request_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kPost, stats.record_request_method_); + + ASSERT_TRUE(stats.has_recorded_response_); + EXPECT_EQ(&http_request, stats.record_response_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_response_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kPost, stats.record_response_method_); + TF_EXPECT_OK(stats.record_response_result_); +} + +TEST(CurlHttpRequestTest, StatsDelete) { + TestStats stats; + + FakeLibCurl libcurl("", 200); + CurlHttpRequest http_request(&libcurl); + http_request.SetRequestStats(&stats); + http_request.SetUri("http://www.testuri.com"); + http_request.SetDeleteRequest(); + TF_EXPECT_OK(http_request.Send()); + + // Check interaction with stats. + ASSERT_TRUE(stats.has_recorded_request_); + EXPECT_EQ(&http_request, stats.record_request_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_request_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kDelete, stats.record_request_method_); + + ASSERT_TRUE(stats.has_recorded_response_); + EXPECT_EQ(&http_request, stats.record_response_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_response_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kDelete, stats.record_response_method_); + TF_EXPECT_OK(stats.record_response_result_); +} + +TEST(CurlHttpRequestTest, StatsPut) { + TestStats stats; + + FakeLibCurl libcurl("", 200); + CurlHttpRequest http_request(&libcurl); + http_request.SetRequestStats(&stats); + http_request.SetUri("http://www.testuri.com"); + http_request.AddAuthBearerHeader("fake-bearer"); + http_request.SetPutEmptyBody(); + TF_EXPECT_OK(http_request.Send()); + + // Check interaction with stats. + ASSERT_TRUE(stats.has_recorded_request_); + EXPECT_EQ(&http_request, stats.record_request_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_request_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kPut, stats.record_request_method_); + + ASSERT_TRUE(stats.has_recorded_response_); + EXPECT_EQ(&http_request, stats.record_response_request_); + EXPECT_EQ("http://www.testuri.com", stats.record_response_uri_); + EXPECT_EQ(HttpRequest::RequestMethod::kPut, stats.record_response_method_); + TF_EXPECT_OK(stats.record_response_result_); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc index 8be452ff44..237ce6b5e5 100644 --- a/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc +++ b/tensorflow/core/platform/cloud/gcs_dns_cache_test.cc @@ -36,7 +36,7 @@ class TestHttpRequest : public HttpRequest { } void AddAuthBearerHeader(const string& auth_token) override {} - + void SetRequestStats(HttpRequest::RequestStats* stats) override {} void SetDeleteRequest() override {} Status SetPutFromFile(const string& body_filepath, size_t offset) override { diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 84b65cec4f..1691826483 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -813,6 +813,10 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, request->SetResultBufferDirect(buffer, n); request->SetTimeouts(timeouts_.connect, timeouts_.idle, timeouts_.read); + if (stats_ != nullptr) { + stats_->RecordBlockLoadRequest(filename, offset); + } + TF_RETURN_WITH_CONTEXT_IF_ERROR(request->Send(), " when reading gs://", bucket, "/", object); @@ -821,6 +825,10 @@ Status GcsFileSystem::LoadBufferFromGCS(const string& filename, size_t offset, VLOG(1) << "Successful read of gs://" << bucket << "/" << object << " @ " << offset << " of size: " << bytes_read; + if (stats_ != nullptr) { + stats_->RecordBlockRetrieved(filename, offset, bytes_read); + } + throttle_.RecordResponse(bytes_read); if (bytes_read < block_size()) { @@ -1455,6 +1463,13 @@ void GcsFileSystem::FlushCaches() { matching_paths_cache_->Clear(); } +void GcsFileSystem::SetStats(GcsStatsInterface* stats) { + CHECK(stats_ == nullptr) << "SetStats() has already been called."; + CHECK(stats != nullptr); + stats_ = stats; + stats_->Init(this, &throttle_, file_block_cache_.get()); +} + // Creates an HttpRequest and sets several parameters that are common to all // requests. All code (in GcsFileSystem) that creates an HttpRequest should // go through this method, rather than directly using http_request_factory_. @@ -1474,6 +1489,10 @@ Status GcsFileSystem::CreateHttpRequest(std::unique_ptr* request) { additional_header_->second); } + if (stats_ != nullptr) { + new_request->SetRequestStats(stats_->HttpStats()); + } + if (!throttle_.AdmitRequest()) { return errors::Unavailable("Request throttled"); } diff --git a/tensorflow/core/platform/cloud/gcs_file_system.h b/tensorflow/core/platform/cloud/gcs_file_system.h index e8edde8a44..703c8d5778 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.h +++ b/tensorflow/core/platform/cloud/gcs_file_system.h @@ -32,6 +32,39 @@ limitations under the License. namespace tensorflow { +class GcsFileSystem; + +/// GcsStatsInterface allows for instrumentation of the GCS file system. +/// +/// GcsStatsInterface and its subclasses must be safe to use from multiple +/// threads concurrently. +/// +/// WARNING! This is an experimental interface that may change or go away at any +/// time. +class GcsStatsInterface { + public: + /// Init is called by the GcsFileSystem immediately after being registered. + virtual void Init(GcsFileSystem* fs, GcsThrottle* throttle, + const FileBlockCache* block_cache) = 0; + + /// RecordBlockLoadRequest is called to record a block load request is about + /// to be made. + virtual void RecordBlockLoadRequest(const string& file, size_t offset) = 0; + + /// RecordBlockRetrieved is called once a block within the file has been + /// retrieved. + virtual void RecordBlockRetrieved(const string& file, size_t offset, + size_t bytes_transferred) = 0; + + /// HttpStats is called to optionally provide a RequestStats listener + /// to be annotated on every HTTP request made to the GCS API. + /// + /// HttpStats() may return nullptr. + virtual HttpRequest::RequestStats* HttpStats() = 0; + + virtual ~GcsStatsInterface() = default; +}; + /// Google Cloud Storage implementation of a file system. /// /// The clients should use RetryingGcsFileSystem defined below, @@ -90,6 +123,9 @@ class GcsFileSystem : public FileSystem { void FlushCaches() override; + /// Set an object to collect runtime statistics from the GcsFilesystem. + void SetStats(GcsStatsInterface* stats); + /// These accessors are mainly for testing purposes, to verify that the /// environment variables that control these parameters are handled correctly. size_t block_size() const { return file_block_cache_->block_size(); } @@ -205,6 +241,8 @@ class GcsFileSystem : public FileSystem { TimeoutConfig timeouts_; + GcsStatsInterface* stats_ = nullptr; // Not owned. + /// The initial delay for exponential backoffs when retrying failed calls. const int64 initial_retry_delay_usec_ = 1000000L; @@ -217,8 +255,16 @@ class GcsFileSystem : public FileSystem { /// Google Cloud Storage implementation of a file system with retry on failures. class RetryingGcsFileSystem : public RetryingFileSystem { public: - RetryingGcsFileSystem() - : RetryingFileSystem(std::unique_ptr(new GcsFileSystem)) {} + RetryingGcsFileSystem() : RetryingGcsFileSystem(new GcsFileSystem) {} + + void SetStats(GcsStatsInterface* stats) { underlying_->SetStats(stats); } + + private: + explicit RetryingGcsFileSystem(GcsFileSystem* fs) + : RetryingFileSystem(std::unique_ptr(fs)), underlying_(fs) {} + + // TODO(b/74259157): Refactor RetryingFileSystem to avoid holding this ptr. + GcsFileSystem* underlying_; }; } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system_test.cc b/tensorflow/core/platform/cloud/gcs_file_system_test.cc index cd9fd3adea..8516421614 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system_test.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system_test.cc @@ -2621,5 +2621,74 @@ TEST(GcsFileSystemTest, CreateHttpRequest) { TF_EXPECT_OK(request->Send()); } +TEST(GcsFileSystemTest, NewRandomAccessFile_StatsRecording) { + class TestGcsStats : public GcsStatsInterface { + public: + void Init(GcsFileSystem* fs, GcsThrottle* throttle, + const FileBlockCache* block_cache) override { + CHECK(fs_ == nullptr); + CHECK(throttle_ == nullptr); + CHECK(block_cache_ == nullptr); + + fs_ = fs; + throttle_ = throttle; + block_cache_ = block_cache; + } + + void RecordBlockLoadRequest(const string& file, size_t offset) override { + block_load_request_file_ = file; + } + + void RecordBlockRetrieved(const string& file, size_t offset, + size_t bytes_transferred) override { + block_retrieved_file_ = file; + block_retrieved_bytes_transferred_ = bytes_transferred; + } + + HttpRequest::RequestStats* HttpStats() override { return nullptr; } + + GcsFileSystem* fs_ = nullptr; + GcsThrottle* throttle_ = nullptr; + const FileBlockCache* block_cache_ = nullptr; + + string block_load_request_file_; + string block_retrieved_file_; + size_t block_retrieved_bytes_transferred_ = 0; + }; + + std::vector requests({new FakeHttpRequest( + "Uri: https://storage.googleapis.com/bucket/random_access.txt\n" + "Auth Token: fake_token\n" + "Range: 0-5\n" + "Timeouts: 5 1 20\n", + "012345")}); + GcsFileSystem fs(std::unique_ptr(new FakeAuthProvider), + std::unique_ptr( + new FakeHttpRequestFactory(&requests)), + 0 /* block size */, 0 /* max bytes */, 0 /* max staleness */, + 0 /* stat cache max age */, 0 /* stat cache max entries */, + 0 /* matching paths cache max age */, + 0 /* matching paths cache max entries */, + 0 /* initial retry delay */, kTestTimeoutConfig, + nullptr /* gcs additional header */); + + TestGcsStats stats; + fs.SetStats(&stats); + EXPECT_EQ(stats.fs_, &fs); + + std::unique_ptr file; + TF_EXPECT_OK(fs.NewRandomAccessFile("gs://bucket/random_access.txt", &file)); + + char scratch[6]; + StringPiece result; + + TF_EXPECT_OK(file->Read(0, sizeof(scratch), &result, scratch)); + EXPECT_EQ("012345", result); + + EXPECT_EQ("gs://bucket/random_access.txt", stats.block_load_request_file_); + EXPECT_EQ("gs://bucket/random_access.txt", stats.block_retrieved_file_); + EXPECT_EQ(6, stats.block_retrieved_bytes_transferred_); +} + } // namespace } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_throttle.h b/tensorflow/core/platform/cloud/gcs_throttle.h index 6d5eed7338..97a858e3fe 100644 --- a/tensorflow/core/platform/cloud/gcs_throttle.h +++ b/tensorflow/core/platform/cloud/gcs_throttle.h @@ -118,7 +118,9 @@ class GcsThrottle { /** * is_enabled determines if the throttle is enabled. * - * If !is_enabled(), AdmitRequest() will always return true. + * If !is_enabled(), AdmitRequest() will always return true. To enable the + * throttle, call SetConfig passing in a configuration that has enabled set to + * true. */ bool is_enabled() LOCKS_EXCLUDED(mu_) { mutex_lock l(mu_); diff --git a/tensorflow/core/platform/cloud/http_request.h b/tensorflow/core/platform/cloud/http_request.h index df8a5b86a0..2343bca608 100644 --- a/tensorflow/core/platform/cloud/http_request.h +++ b/tensorflow/core/platform/cloud/http_request.h @@ -47,6 +47,46 @@ class HttpRequest { virtual HttpRequest* Create() = 0; }; + /// RequestMethod is used to capture what type of HTTP request is made and + /// is used in conjunction with RequestStats for instrumentation and + /// monitoring of HTTP requests and their responses. + enum class RequestMethod : char { + kGet, + kPost, + kPut, + kDelete, + }; + + /// RequestMethodName converts a RequestMethod to the canonical method string. + inline static const char* RequestMethodName(RequestMethod m) { + switch (m) { + case RequestMethod::kGet: + return "GET"; + case RequestMethod::kPost: + return "POST"; + case RequestMethod::kPut: + return "PUT"; + case RequestMethod::kDelete: + return "DELETE"; + default: + return "???"; + } + } + + /// RequestStats is a class that can be used to instrument an Http Request. + class RequestStats { + public: + virtual ~RequestStats() = default; + + /// RecordRequest is called right before a request is sent on the wire. + virtual void RecordRequest(const HttpRequest* request, const string& uri, + RequestMethod method) = 0; + + /// RecordResponse is called after the response has been received. + virtual void RecordResponse(const HttpRequest* request, const string& uri, + RequestMethod method, const Status& result) = 0; + }; + HttpRequest() {} virtual ~HttpRequest() {} @@ -73,6 +113,9 @@ class HttpRequest { /// Sets the 'Authorization' header to the value of 'Bearer ' + auth_token. virtual void AddAuthBearerHeader(const string& auth_token) = 0; + /// Sets the RequestStats object to use to record the request and response. + virtual void SetRequestStats(RequestStats* stats) = 0; + /// Makes the request a DELETE request. virtual void SetDeleteRequest() = 0; -- GitLab From b9f06e07c417f9d96cb59a4898328a98d0df37b2 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 7 Mar 2018 09:50:46 -0800 Subject: [PATCH 526/884] Add missing equality assertion between the shape of the 2 inputs to the tile op. PiperOrigin-RevId: 188190067 --- tensorflow/core/ops/array_ops.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index 267ce88440..eeb458a287 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1547,6 +1547,9 @@ REGISTER_OP("Tile") TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &multiples)); if (c->RankKnown(input)) { TF_RETURN_IF_ERROR(c->WithRank(multiples, c->Rank(input), &multiples)); + ShapeHandle dummy; + TF_RETURN_IF_ERROR( + c->Merge(c->input(1), c->Vector(c->Rank(input)), &dummy)); } if (!c->RankKnown(multiples)) { -- GitLab From c6806ae8fcefa6deb701ff06a50a060348bcee90 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 7 Mar 2018 09:51:14 -0800 Subject: [PATCH 527/884] Switch the eager GAN MNIST example to object-based checkpointing - Removes variable_scopes, since they're no longer necessary (duplicate variable names are OK) - Switches up the counters a bit (global_step -> step_counter, checkpoint the epoch counter) PiperOrigin-RevId: 188190128 --- .../eager/python/examples/gan/mnist.py | 67 +++++++++---------- .../eager/python/examples/gan/mnist_test.py | 8 ++- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist.py b/tensorflow/contrib/eager/python/examples/gan/mnist.py index 5f51d52622..2b7e199fad 100644 --- a/tensorflow/contrib/eager/python/examples/gan/mnist.py +++ b/tensorflow/contrib/eager/python/examples/gan/mnist.py @@ -195,7 +195,8 @@ def generator_loss(discriminator_gen_outputs): def train_one_epoch(generator, discriminator, generator_optimizer, - discriminator_optimizer, dataset, log_interval, noise_dim): + discriminator_optimizer, dataset, step_counter, + log_interval, noise_dim): """Trains `generator` and `discriminator` models on `dataset`. Args: @@ -204,7 +205,8 @@ def train_one_epoch(generator, discriminator, generator_optimizer, generator_optimizer: Optimizer to use for generator. discriminator_optimizer: Optimizer to use for discriminator. dataset: Dataset of images to train on. - log_interval: How many global steps to wait between logging and collecting + step_counter: An integer variable, used to write summaries regularly. + log_interval: How many steps to wait between logging and collecting summaries. noise_dim: Dimension of noise vector to use. """ @@ -213,9 +215,10 @@ def train_one_epoch(generator, discriminator, generator_optimizer, total_discriminator_loss = 0.0 for (batch_index, images) in enumerate(tfe.Iterator(dataset)): with tf.device('/cpu:0'): - tf.assign_add(tf.train.get_global_step(), 1) + tf.assign_add(step_counter, 1) - with tf.contrib.summary.record_summaries_every_n_global_steps(log_interval): + with tf.contrib.summary.record_summaries_every_n_global_steps( + log_interval, global_step=step_counter): current_batch_size = images.shape[0] noise = tf.random_uniform( shape=[current_batch_size, noise_dim], @@ -243,12 +246,10 @@ def train_one_epoch(generator, discriminator, generator_optimizer, discriminator_grad = g.gradient(discriminator_loss_val, discriminator.variables) - with tf.variable_scope('generator'): - generator_optimizer.apply_gradients( - zip(generator_grad, generator.variables)) - with tf.variable_scope('discriminator'): - discriminator_optimizer.apply_gradients( - zip(discriminator_grad, discriminator.variables)) + generator_optimizer.apply_gradients( + zip(generator_grad, generator.variables)) + discriminator_optimizer.apply_gradients( + zip(discriminator_grad, discriminator.variables)) if log_interval and batch_index > 0 and batch_index % log_interval == 0: print('Batch #%d\tAverage Generator Loss: %.6f\t' @@ -269,13 +270,14 @@ def main(_): tf.data.Dataset.from_tensor_slices(data.train.images).shuffle(60000) .batch(FLAGS.batch_size)) - # Create the models and optimizers - generator = Generator(data_format) - discriminator = Discriminator(data_format) - with tf.variable_scope('generator'): - generator_optimizer = tf.train.AdamOptimizer(FLAGS.lr) - with tf.variable_scope('discriminator'): - discriminator_optimizer = tf.train.AdamOptimizer(FLAGS.lr) + # Create the models and optimizers. + model_objects = { + 'generator': Generator(data_format), + 'discriminator': Discriminator(data_format), + 'generator_optimizer': tf.train.AdamOptimizer(FLAGS.lr), + 'discriminator_optimizer': tf.train.AdamOptimizer(FLAGS.lr), + 'step_counter': tf.train.get_or_create_global_step(), + } # Prepare summary writer and checkpoint info summary_writer = tf.contrib.summary.create_summary_file_writer( @@ -284,25 +286,22 @@ def main(_): latest_cpkt = tf.train.latest_checkpoint(FLAGS.checkpoint_dir) if latest_cpkt: print('Using latest checkpoint at ' + latest_cpkt) + checkpoint = tfe.Checkpoint(**model_objects) + # Restore variables on creation if a checkpoint exists. + checkpoint.restore(latest_cpkt) with tf.device(device): - for epoch in range(1, 101): - with tfe.restore_variables_on_create(latest_cpkt): - global_step = tf.train.get_or_create_global_step() - start = time.time() - with summary_writer.as_default(): - train_one_epoch(generator, discriminator, generator_optimizer, - discriminator_optimizer, dataset, FLAGS.log_interval, - FLAGS.noise) - end = time.time() - print('\nTrain time for epoch #%d (global step %d): %f' % - (epoch, global_step.numpy(), end - start)) - - all_variables = ( - generator.variables + discriminator.variables + - generator_optimizer.variables() + - discriminator_optimizer.variables() + [global_step]) - tfe.Saver(all_variables).save(checkpoint_prefix, global_step=global_step) + for _ in range(100): + start = time.time() + with summary_writer.as_default(): + train_one_epoch(dataset=dataset, log_interval=FLAGS.log_interval, + noise_dim=FLAGS.noise, **model_objects) + end = time.time() + checkpoint.save(checkpoint_prefix) + print('\nTrain time for epoch #%d (step %d): %f' % + (checkpoint.save_counter.numpy(), + checkpoint.step_counter.numpy(), + end - start)) if __name__ == '__main__': diff --git a/tensorflow/contrib/eager/python/examples/gan/mnist_test.py b/tensorflow/contrib/eager/python/examples/gan/mnist_test.py index 4a3ca8d82b..bd35e50c1f 100644 --- a/tensorflow/contrib/eager/python/examples/gan/mnist_test.py +++ b/tensorflow/contrib/eager/python/examples/gan/mnist_test.py @@ -62,7 +62,7 @@ class MnistEagerGanBenchmark(tf.test.Benchmark): for _ in range(measure_batches)] measure_dataset = tf.data.Dataset.from_tensor_slices(measure_images) - tf.train.get_or_create_global_step() + step_counter = tf.train.get_or_create_global_step() with tf.device(device()): # Create the models and optimizers generator = mnist.Generator(data_format()) @@ -78,13 +78,15 @@ class MnistEagerGanBenchmark(tf.test.Benchmark): # warm up mnist.train_one_epoch(generator, discriminator, generator_optimizer, discriminator_optimizer, - burn_dataset, log_interval=SUMMARY_INTERVAL, + burn_dataset, step_counter, + log_interval=SUMMARY_INTERVAL, noise_dim=NOISE_DIM) # measure start = time.time() mnist.train_one_epoch(generator, discriminator, generator_optimizer, discriminator_optimizer, - measure_dataset, log_interval=SUMMARY_INTERVAL, + measure_dataset, step_counter, + log_interval=SUMMARY_INTERVAL, noise_dim=NOISE_DIM) self._report('train', start, measure_batches, batch_size) -- GitLab From be0fa12386c019ffcc65bba5005f3a9e4ad4348c Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 7 Mar 2018 09:53:52 -0800 Subject: [PATCH 528/884] [tf.data] Improve docstring for `tf.data.Dataset.padded_batch()`. PiperOrigin-RevId: 188190458 --- tensorflow/python/data/ops/dataset_ops.py | 30 +++++++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 7c5aa4c767..6539e91c13 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -774,11 +774,31 @@ class Dataset(object): def padded_batch(self, batch_size, padded_shapes, padding_values=None): """Combines consecutive elements of this dataset into padded batches. - Like `Dataset.dense_to_sparse_batch()`, this method combines - multiple consecutive elements of this dataset, which might have - different shapes, into a single element. The tensors in the - resulting element have an additional outer dimension, and are - padded to the respective shape in `padded_shapes`. + This transformation combines multiple consecutive elements of the input + dataset into a single element. Like @{tf.data.Dataset.batch}, the tensors + in the resulting element have an additional outer dimension, which will be + `batch_size` for all but the last element, and `N % batch_size` for the + last element (where `N` is the number of elements in this dataset). Unlike + @{tf.data.Dataset.batch}, the elements may have different shapes for some + of their components, and this transformation will pad each component to + the respective shape in `padding_shapes`. The `padding_shapes` argument + determines the resulting shape for each dimension of each component in an + output element: + + * If the dimension is a constant (e.g. `tf.Dimension(37)`), the component + will be padded out to that length in that dimension. + * If the dimension is unknown (e.g. `tf.Dimension(None)`), the component + will be padded out to the maximum length of all elements in that + dimension. + + NOTE: If the number of elements (`N`) in this dataset is not an exact + multiple of `batch_size`, the final batch contain smaller tensors with + shape `N % batch_size` in the batch dimension. If your program depends on + the batches having the same shape, consider using the + @{tf.contrib.data.padded_batch_and_drop_remainder} transformation instead. + + See also @{tf.contrib.data.dense_to_sparse_batch}, which combines elements + that may have different shapes into a @{tf.SparseTensor}. Args: batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of -- GitLab From add71a1f1b60c0ed6bae73ef794c600e4d7c1f2d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 09:57:48 -0800 Subject: [PATCH 529/884] boosted_trees: fix the comments about gain by removing a confusing dash. PiperOrigin-RevId: 188191012 --- .../boosted_trees/lib/learner/common/stats/node-stats.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h index cd925f6b65..794ba2bcb0 100644 --- a/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h +++ b/tensorflow/contrib/boosted_trees/lib/learner/common/stats/node-stats.h @@ -137,7 +137,7 @@ struct NodeStats { Eigen::MatrixXf hessian = TensorToEigenMatrix(grad_stats.second.t, grad_dim, grad_dim); // I is an identity matrix. - // The gain in general form is -g^T (H+l2 I)^-1 g. + // The gain in general form is g^T (H+l2 I)^-1 g. // The node weights are -(H+l2 I)^-1 g. Eigen::MatrixXf identity; identity.setIdentity(grad_dim, grad_dim); @@ -240,7 +240,7 @@ struct NodeStats { // given regularized Hessian and gradient vector g. void CalculateWeightAndGain(const Eigen::MatrixXf& hessian_and_reg, const Eigen::VectorXf& g) { - // The gain in general form is -g^T (Hessian_and_regularization)^-1 g. + // The gain in general form is g^T (Hessian_and_regularization)^-1 g. // The node weights are -(Hessian_and_regularization)^-1 g. Eigen::VectorXf weight; // If we want to calculate x = K^-1 v, instead of explicitly calculating -- GitLab From f249d55f701ed175ba32e89ae6ba29273e69e987 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Wed, 7 Mar 2018 09:58:22 -0800 Subject: [PATCH 530/884] Migrate Halton Sequence sampler into tensorflow_probability. PiperOrigin-RevId: 188191091 --- tensorflow/contrib/bayesflow/BUILD | 20 - tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/halton_sequence_test.py | 198 ---------- .../bayesflow/python/ops/halton_sequence.py | 33 -- .../python/ops/halton_sequence_impl.py | 361 ------------------ 5 files changed, 614 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/halton_sequence.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 2a32ea6952..8b5c6cec61 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -145,26 +145,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "halton_sequence_test", - size = "medium", - srcs = ["python/kernel_tests/halton_sequence_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/python:array_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_ops", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - ], - tags = ["no_mac"], # b/73192243 -) - cuda_py_test( name = "hmc_test", size = "large", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 156a2ef8cf..32f2df4b88 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -22,7 +22,6 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.bayesflow.python.ops import custom_grad -from tensorflow.contrib.bayesflow.python.ops import halton_sequence from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import layers from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings @@ -36,7 +35,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ 'custom_grad', 'entropy', - 'halton_sequence', 'hmc', 'layers', 'metropolis_hastings', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py deleted file mode 100644 index 6b42bca6f9..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/halton_sequence_test.py +++ /dev/null @@ -1,198 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for halton_sequence.py.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import halton_sequence as halton -from tensorflow.contrib.bayesflow.python.ops import monte_carlo_impl as monte_carlo_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.platform import test - - -mc = monte_carlo_lib - - -class HaltonSequenceTest(test.TestCase): - - def test_known_values_small_bases(self): - with self.test_session(): - # The first five elements of the non-randomized Halton sequence - # with base 2 and 3. - expected = np.array(((1. / 2, 1. / 3), - (1. / 4, 2. / 3), - (3. / 4, 1. / 9), - (1. / 8, 4. / 9), - (5. / 8, 7. / 9)), dtype=np.float32) - sample = halton.sample(2, num_results=5, randomized=False) - self.assertAllClose(expected, sample.eval(), rtol=1e-6) - - def test_sequence_indices(self): - """Tests access of sequence elements by index.""" - with self.test_session(): - dim = 5 - indices = math_ops.range(10, dtype=dtypes.int32) - sample_direct = halton.sample(dim, num_results=10, randomized=False) - sample_from_indices = halton.sample(dim, sequence_indices=indices, - randomized=False) - self.assertAllClose(sample_direct.eval(), sample_from_indices.eval(), - rtol=1e-6) - - def test_dtypes_works_correctly(self): - """Tests that all supported dtypes work without error.""" - with self.test_session(): - dim = 3 - sample_float32 = halton.sample(dim, num_results=10, dtype=dtypes.float32, - seed=11) - sample_float64 = halton.sample(dim, num_results=10, dtype=dtypes.float64, - seed=21) - self.assertEqual(sample_float32.eval().dtype, np.float32) - self.assertEqual(sample_float64.eval().dtype, np.float64) - - def test_normal_integral_mean_and_var_correctly_estimated(self): - n = int(1000) - # This test is almost identical to the similarly named test in - # monte_carlo_test.py. The only difference is that we use the Halton - # samples instead of the random samples to evaluate the expectations. - # MC with pseudo random numbers converges at the rate of 1/ Sqrt(N) - # (N=number of samples). For QMC in low dimensions, the expected convergence - # rate is ~ 1/N. Hence we should only need 1e3 samples as compared to the - # 1e6 samples used in the pseudo-random monte carlo. - with self.test_session(): - mu_p = array_ops.constant([-1.0, 1.0], dtype=dtypes.float64) - mu_q = array_ops.constant([0.0, 0.0], dtype=dtypes.float64) - sigma_p = array_ops.constant([0.5, 0.5], dtype=dtypes.float64) - sigma_q = array_ops.constant([1.0, 1.0], dtype=dtypes.float64) - p = normal_lib.Normal(loc=mu_p, scale=sigma_p) - q = normal_lib.Normal(loc=mu_q, scale=sigma_q) - - cdf_sample = halton.sample(2, num_results=n, dtype=dtypes.float64, - seed=1729) - q_sample = q.quantile(cdf_sample) - - # Compute E_p[X]. - e_x = mc.expectation_importance_sampler( - f=lambda x: x, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, - seed=42) - - # Compute E_p[X^2]. - e_x2 = mc.expectation_importance_sampler( - f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, z=q_sample, - seed=1412) - - stddev = math_ops.sqrt(e_x2 - math_ops.square(e_x)) - # Keep the tolerance levels the same as in monte_carlo_test.py. - self.assertEqual(p.batch_shape, e_x.get_shape()) - self.assertAllClose(p.mean().eval(), e_x.eval(), rtol=0.01) - self.assertAllClose(p.stddev().eval(), stddev.eval(), rtol=0.02) - - def test_docstring_example(self): - # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_results = 1000 - dim = 3 - with self.test_session(): - sample = halton.sample(dim, num_results=num_results, randomized=False) - - # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional - # hypercube. - powers = math_ops.range(1.0, limit=dim + 1) - integral = math_ops.reduce_mean( - math_ops.reduce_prod(sample ** powers, axis=-1)) - true_value = 1.0 / math_ops.reduce_prod(powers + 1.0) - - # Produces a relative absolute error of 1.7%. - self.assertAllClose(integral.eval(), true_value.eval(), rtol=0.02) - - # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sequence_indices argument can be used to do this. - - sequence_indices = math_ops.range(start=1000, limit=1000 + num_results, - dtype=dtypes.int32) - sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, - randomized=False) - - integral_leaped = math_ops.reduce_mean( - math_ops.reduce_prod(sample_leaped ** powers, axis=-1)) - self.assertAllClose(integral_leaped.eval(), true_value.eval(), rtol=0.05) - - def test_randomized_qmc_basic(self): - """Tests the randomization of the Halton sequences.""" - # This test is identical to the example given in Owen (2017), Figure 5. - - dim = 20 - num_results = 2000 - replica = 5 - - with self.test_session(): - sample = halton.sample(dim, num_results=num_results, seed=121117) - f = math_ops.reduce_mean(math_ops.reduce_sum(sample, axis=1) ** 2) - values = [f.eval() for _ in range(replica)] - self.assertAllClose(np.mean(values), 101.6667, atol=np.std(values) * 2) - - def test_partial_sum_func_qmc(self): - """Tests the QMC evaluation of (x_j + x_{j+1} ...+x_{n})^2. - - A good test of QMC is provided by the function: - - f(x_1,..x_n, x_{n+1}, ..., x_{n+m}) = (x_{n+1} + ... x_{n+m} - m / 2)^2 - - with the coordinates taking values in the unit interval. The mean and - variance of this function (with the uniform distribution over the - unit-hypercube) is exactly calculable: - - = m / 12, Var(f) = m (5m - 3) / 360 - - The purpose of the "shift" (if n > 0) in the coordinate dependence of the - function is to provide a test for Halton sequence which exhibit more - dependence in the higher axes. - - This test confirms that the mean squared error of RQMC estimation falls - as O(N^(2-e)) for any e>0. - """ - - n, m = 10, 10 - dim = n + m - num_results_lo, num_results_hi = 1000, 10000 - replica = 20 - true_mean = m / 12. - - def func_estimate(x): - return math_ops.reduce_mean( - (math_ops.reduce_sum(x[:, -m:], axis=-1) - m / 2.0) ** 2) - - with self.test_session(): - sample_lo = halton.sample(dim, num_results=num_results_lo, seed=1925) - sample_hi = halton.sample(dim, num_results=num_results_hi, seed=898128) - f_lo, f_hi = func_estimate(sample_lo), func_estimate(sample_hi) - - estimates = np.array([(f_lo.eval(), f_hi.eval()) for _ in range(replica)]) - var_lo, var_hi = np.mean((estimates - true_mean) ** 2, axis=0) - - # Expect that the variance scales as N^2 so var_hi / var_lo ~ k / 10^2 - # with k a fudge factor accounting for the residual N dependence - # of the QMC error and the sampling error. - log_rel_err = np.log(100 * var_hi / var_lo) - self.assertAllClose(log_rel_err, 0.0, atol=1.2) - - -if __name__ == '__main__': - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py deleted file mode 100644 index 49d747d538..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/halton_sequence.py +++ /dev/null @@ -1,33 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Support for low discrepancy Halton sequences. - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.halton_sequence_impl import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'sample', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py b/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py deleted file mode 100644 index 35962109bc..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/halton_sequence_impl.py +++ /dev/null @@ -1,361 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Quasi Monte Carlo support: Halton sequence. - -@@sample -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops - -__all__ = [ - 'sample', -] - - -# The maximum dimension we support. This is limited by the number of primes -# in the _PRIMES array. -_MAX_DIMENSION = 1000 - - -def sample(dim, - num_results=None, - sequence_indices=None, - dtype=None, - randomized=True, - seed=None, - name=None): - r"""Returns a sample from the `dim` dimensional Halton sequence. - - Warning: The sequence elements take values only between 0 and 1. Care must be - taken to appropriately transform the domain of a function if it differs from - the unit cube before evaluating integrals using Halton samples. It is also - important to remember that quasi-random numbers without randomization are not - a replacement for pseudo-random numbers in every context. Quasi random numbers - are completely deterministic and typically have significant negative - autocorrelation unless randomization is used. - - Computes the members of the low discrepancy Halton sequence in dimension - `dim`. The `dim`-dimensional sequence takes values in the unit hypercube in - `dim` dimensions. Currently, only dimensions up to 1000 are supported. The - prime base for the k-th axes is the k-th prime starting from 2. For example, - if `dim` = 3, then the bases will be [2, 3, 5] respectively and the first - element of the non-randomized sequence will be: [0.5, 0.333, 0.2]. For a more - complete description of the Halton sequences see: - https://en.wikipedia.org/wiki/Halton_sequence. For low discrepancy sequences - and their applications see: - https://en.wikipedia.org/wiki/Low-discrepancy_sequence. - - If `randomized` is true, this function produces a scrambled version of the - Halton sequence introduced by Owen in arXiv:1706.02808. For the advantages of - randomization of low discrepancy sequences see: - https://en.wikipedia.org/wiki/Quasi-Monte_Carlo_method#Randomization_of_quasi-Monte_Carlo - - The number of samples produced is controlled by the `num_results` and - `sequence_indices` parameters. The user must supply either `num_results` or - `sequence_indices` but not both. - The former is the number of samples to produce starting from the first - element. If `sequence_indices` is given instead, the specified elements of - the sequence are generated. For example, sequence_indices=tf.range(10) is - equivalent to specifying n=10. - - Example Use: - - ```python - bf = tf.contrib.bayesflow - - # Produce the first 1000 members of the Halton sequence in 3 dimensions. - num_results = 1000 - dim = 3 - sample = bf.halton_sequence.sample(dim, num_results=num_results, seed=127) - - # Evaluate the integral of x_1 * x_2^2 * x_3^3 over the three dimensional - # hypercube. - powers = tf.range(1.0, limit=dim + 1) - integral = tf.reduce_mean(tf.reduce_prod(sample ** powers, axis=-1)) - true_value = 1.0 / tf.reduce_prod(powers + 1.0) - with tf.Session() as session: - values = session.run((integral, true_value)) - - # Produces a relative absolute error of 1.7%. - print ("Estimated: %f, True Value: %f" % values) - - # Now skip the first 1000 samples and recompute the integral with the next - # thousand samples. The sequence_indices argument can be used to do this. - - - sequence_indices = tf.range(start=1000, limit=1000 + num_results, - dtype=tf.int32) - sample_leaped = halton.sample(dim, sequence_indices=sequence_indices, - seed=111217) - - integral_leaped = tf.reduce_mean(tf.reduce_prod(sample_leaped ** powers, - axis=-1)) - with tf.Session() as session: - values = session.run((integral_leaped, true_value)) - # Now produces a relative absolute error of 0.05%. - print ("Leaped Estimated: %f, True Value: %f" % values) - ``` - - Args: - dim: Positive Python `int` representing each sample's `event_size.` Must - not be greater than 1000. - num_results: (Optional) positive Python `int`. The number of samples to - generate. Either this parameter or sequence_indices must be specified but - not both. If this parameter is None, then the behaviour is determined by - the `sequence_indices`. - sequence_indices: (Optional) `Tensor` of dtype int32 and rank 1. The - elements of the sequence to compute specified by their position in the - sequence. The entries index into the Halton sequence starting with 0 and - hence, must be whole numbers. For example, sequence_indices=[0, 5, 6] will - produce the first, sixth and seventh elements of the sequence. If this - parameter is None, then the `num_results` parameter must be specified - which gives the number of desired samples starting from the first sample. - dtype: (Optional) The dtype of the sample. One of `float32` or `float64`. - Default is `float32`. - randomized: (Optional) bool indicating whether to produce a randomized - Halton sequence. If True, applies the randomization described in - Owen (2017) [arXiv:1706.02808]. - seed: (Optional) Python integer to seed the random number generator. Only - used if `randomized` is True. If not supplied and `randomized` is True, - no seed is set. - name: (Optional) Python `str` describing ops managed by this function. If - not supplied the name of this function is used. - - Returns: - halton_elements: Elements of the Halton sequence. `Tensor` of supplied dtype - and `shape` `[num_results, dim]` if `num_results` was specified or shape - `[s, dim]` where s is the size of `sequence_indices` if `sequence_indices` - were specified. - - Raises: - ValueError: if both `sequence_indices` and `num_results` were specified or - if dimension `dim` is less than 1 or greater than 1000. - """ - if dim < 1 or dim > _MAX_DIMENSION: - raise ValueError( - 'Dimension must be between 1 and {}. Supplied {}'.format(_MAX_DIMENSION, - dim)) - if (num_results is None) == (sequence_indices is None): - raise ValueError('Either `num_results` or `sequence_indices` must be' - ' specified but not both.') - - dtype = dtype or dtypes.float32 - if not dtype.is_floating: - raise ValueError('dtype must be of `float`-type') - - with ops.name_scope(name, 'sample', values=[sequence_indices]): - # Here and in the following, the shape layout is as follows: - # [sample dimension, event dimension, coefficient dimension]. - # The coefficient dimension is an intermediate axes which will hold the - # weights of the starting integer when expressed in the (prime) base for - # an event dimension. - indices = _get_indices(num_results, sequence_indices, dtype) - radixes = array_ops.constant(_PRIMES[0:dim], dtype=dtype, shape=[dim, 1]) - - max_sizes_by_axes = _base_expansion_size(math_ops.reduce_max(indices), - radixes) - - max_size = math_ops.reduce_max(max_sizes_by_axes) - - # The powers of the radixes that we will need. Note that there is a bit - # of an excess here. Suppose we need the place value coefficients of 7 - # in base 2 and 3. For 2, we will have 3 digits but we only need 2 digits - # for base 3. However, we can only create rectangular tensors so we - # store both expansions in a [2, 3] tensor. This leads to the problem that - # we might end up attempting to raise large numbers to large powers. For - # example, base 2 expansion of 1024 has 10 digits. If we were in 10 - # dimensions, then the 10th prime (29) we will end up computing 29^10 even - # though we don't need it. We avoid this by setting the exponents for each - # axes to 0 beyond the maximum value needed for that dimension. - exponents_by_axes = array_ops.tile([math_ops.range(max_size)], [dim, 1]) - - # The mask is true for those coefficients that are irrelevant. - weight_mask = exponents_by_axes >= max_sizes_by_axes - capped_exponents = array_ops.where( - weight_mask, array_ops.zeros_like(exponents_by_axes), exponents_by_axes) - weights = radixes ** capped_exponents - # The following computes the base b expansion of the indices. Suppose, - # x = a0 + a1*b + a2*b^2 + ... Then, performing a floor div of x with - # the vector (1, b, b^2, b^3, ...) will produce - # (a0 + s1 * b, a1 + s2 * b, ...) where s_i are coefficients we don't care - # about. Noting that all a_i < b by definition of place value expansion, - # we see that taking the elements mod b of the above vector produces the - # place value expansion coefficients. - coeffs = math_ops.floor_div(indices, weights) - coeffs *= 1 - math_ops.cast(weight_mask, dtype) - coeffs %= radixes - if not randomized: - coeffs /= radixes - return math_ops.reduce_sum(coeffs / weights, axis=-1) - coeffs = _randomize(coeffs, radixes, seed=seed) - # Remove the contribution from randomizing the trailing zero for the - # axes where max_size_by_axes < max_size. This will be accounted - # for separately below (using zero_correction). - coeffs *= 1 - math_ops.cast(weight_mask, dtype) - coeffs /= radixes - base_values = math_ops.reduce_sum(coeffs / weights, axis=-1) - - # The randomization used in Owen (2017) does not leave 0 invariant. While - # we have accounted for the randomization of the first `max_size_by_axes` - # coefficients, we still need to correct for the trailing zeros. Luckily, - # this is equivalent to adding a uniform random value scaled so the first - # `max_size_by_axes` coefficients are zero. The following statements perform - # this correction. - zero_correction = random_ops.random_uniform([dim, 1], seed=seed, - dtype=dtype) - zero_correction /= (radixes ** max_sizes_by_axes) - return base_values + array_ops.reshape(zero_correction, [-1]) - - -def _randomize(coeffs, radixes, seed=None): - """Applies the Owen randomization to the coefficients.""" - given_dtype = coeffs.dtype - coeffs = math_ops.to_int32(coeffs) - num_coeffs = array_ops.shape(coeffs)[-1] - radixes = array_ops.reshape(math_ops.to_int32(radixes), [-1]) - perms = _get_permutations(num_coeffs, radixes, seed=seed) - perms = array_ops.reshape(perms, [-1]) - radix_sum = math_ops.reduce_sum(radixes) - radix_offsets = array_ops.reshape(math_ops.cumsum(radixes, exclusive=True), - [-1, 1]) - offsets = radix_offsets + math_ops.range(num_coeffs) * radix_sum - permuted_coeffs = array_ops.gather(perms, coeffs + offsets) - return math_ops.cast(permuted_coeffs, dtype=given_dtype) - - -def _get_permutations(num_results, dims, seed=None): - """Uniform iid sample from the space of permutations. - - Draws a sample of size `num_results` from the group of permutations of degrees - specified by the `dims` tensor. These are packed together into one tensor - such that each row is one sample from each of the dimensions in `dims`. For - example, if dims = [2,3] and num_results = 2, the result is a tensor of shape - [2, 2 + 3] and the first row of the result might look like: - [1, 0, 2, 0, 1]. The first two elements are a permutation over 2 elements - while the next three are a permutation over 3 elements. - - Args: - num_results: A positive scalar `Tensor` of integral type. The number of - draws from the discrete uniform distribution over the permutation groups. - dims: A 1D `Tensor` of the same dtype as `num_results`. The degree of the - permutation groups from which to sample. - seed: (Optional) Python integer to seed the random number generator. - - Returns: - permutations: A `Tensor` of shape `[num_results, sum(dims)]` and the same - dtype as `dims`. - """ - sample_range = math_ops.range(num_results) - def generate_one(d): - fn = lambda _: random_ops.random_shuffle(math_ops.range(d), seed=seed) - return functional_ops.map_fn(fn, sample_range) - return array_ops.concat([generate_one(d) for d in array_ops.unstack(dims)], - axis=-1) - - -def _get_indices(n, sequence_indices, dtype, name=None): - """Generates starting points for the Halton sequence procedure. - - The k'th element of the sequence is generated starting from a positive integer - which must be distinct for each `k`. It is conventional to choose the starting - point as `k` itself (or `k+1` if k is zero based). This function generates - the starting integers for the required elements and reshapes the result for - later use. - - Args: - n: Positive `int`. The number of samples to generate. If this - parameter is supplied, then `sequence_indices` should be None. - sequence_indices: `Tensor` of dtype int32 and rank 1. The entries - index into the Halton sequence starting with 0 and hence, must be whole - numbers. For example, sequence_indices=[0, 5, 6] will produce the first, - sixth and seventh elements of the sequence. If this parameter is not None - then `n` must be None. - dtype: The dtype of the sample. One of `float32` or `float64`. - Default is `float32`. - name: Python `str` name which describes ops created by this function. - - Returns: - indices: `Tensor` of dtype `dtype` and shape = `[n, 1, 1]`. - """ - with ops.name_scope(name, '_get_indices', [n, sequence_indices]): - if sequence_indices is None: - sequence_indices = math_ops.range(n, dtype=dtype) - else: - sequence_indices = math_ops.cast(sequence_indices, dtype) - - # Shift the indices so they are 1 based. - indices = sequence_indices + 1 - - # Reshape to make space for the event dimension and the place value - # coefficients. - return array_ops.reshape(indices, [-1, 1, 1]) - - -def _base_expansion_size(num, bases): - """Computes the number of terms in the place value expansion. - - Let num = a0 + a1 b + a2 b^2 + ... ak b^k be the place value expansion of - `num` in base b (ak <> 0). This function computes and returns `k+1` for each - base `b` specified in `bases`. - - This can be inferred from the base `b` logarithm of `num` as follows: - $$k = Floor(log_b (num)) + 1 = Floor( log(num) / log(b)) + 1$$ - - Args: - num: Scalar `Tensor` of dtype either `float32` or `float64`. The number to - compute the base expansion size of. - bases: `Tensor` of the same dtype as num. The bases to compute the size - against. - - Returns: - Tensor of same dtype and shape as `bases` containing the size of num when - written in that base. - """ - return math_ops.floor(math_ops.log(num) / math_ops.log(bases)) + 1 - - -def _primes_less_than(n): - # Based on - # https://stackoverflow.com/questions/2068372/fastest-way-to-list-all-primes-below-n-in-python/3035188#3035188 - """Returns sorted array of primes such that `2 <= prime < n`.""" - small_primes = np.array((2, 3, 5)) - if n <= 6: - return small_primes[small_primes < n] - sieve = np.ones(n // 3 + (n % 6 == 2), dtype=np.bool) - sieve[0] = False - m = int(n ** 0.5) // 3 + 1 - for i in range(m): - if not sieve[i]: - continue - k = 3 * i + 1 | 1 - sieve[k ** 2 // 3::2 * k] = False - sieve[(k ** 2 + 4 * k - 2 * k * (i & 1)) // 3::2 * k] = False - return np.r_[2, 3, 3 * np.nonzero(sieve)[0] + 1 | 1] - -_PRIMES = _primes_less_than(7919+1) - - -assert len(_PRIMES) == _MAX_DIMENSION -- GitLab From 0c7b8bb3a6495d03a090a123eec373a46d8678cb Mon Sep 17 00:00:00 2001 From: Billy Lamberta Date: Wed, 7 Mar 2018 10:47:57 -0800 Subject: [PATCH 531/884] Docs: Add simple_save section to SavedModel APIs, and add to article intro. Rename headers to make consistent. PiperOrigin-RevId: 188199437 --- .../docs_src/programmers_guide/saved_model.md | 128 +++++++++--------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index f18d50b282..b5f63a8e3b 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -1,38 +1,33 @@ -# Saving and Restoring +# Save and Restore -This document explains how to save and restore -@{$variables$variables} and models. +The @{tf.train.Saver} class provides methods to save and restore models. The +@{tf.saved_model.simple_save} function is an easy way to build a +@{tf.saved_model$saved model} suitable for serving. +[Estimators](/programmers_guide/estimators) automatically save and restore +variables in the `model_dir`. -Important: TensorFlow model files are code. Be careful with untrusted code. -See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md) -for details. - -## Saving and restoring variables - -A TensorFlow variable provides the best way to represent shared, persistent -state manipulated by your program. (See @{$variables$Variables} for details.) -This section explains how to save and restore variables. -Note that Estimators automatically saves and restores variables -(in the `model_dir`). +## Save and restore variables -The `tf.train.Saver` class provides methods for saving and restoring models. -The `tf.train.Saver` constructor adds `save` and `restore` ops to the graph -for all, or a specified list, of the variables in the graph. The `Saver` -object provides methods to run these ops, specifying paths for the checkpoint -files to write to or read from. +TensorFlow @{$variables} are the best way to represent shared, persistent state +manipulated by your program. The `tf.train.Saver` constructor adds `save` and +`restore` ops to the graph for all, or a specified list, of the variables in the +graph. The `Saver` object provides methods to run these ops, specifying paths +for the checkpoint files to write to or read from. -The saver will restore all variables already defined in your model. If you're +`Saver` restores all variables already defined in your model. If you're loading a model without knowing how to build its graph (for example, if you're writing a generic program to load models), then read the [Overview of saving and restoring models](#models) section later in this document. -TensorFlow saves variables in binary **checkpoint files** that, -roughly speaking, map variable names to tensor values. - +TensorFlow saves variables in binary *checkpoint files* that map variable +names to tensor values. +Caution: TensorFlow model files are code. Be careful with untrusted code. +See [Using TensorFlow Securely](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md) +for details. -### Saving variables +### Save variables Create a `Saver` with `tf.train.Saver()` to manage all variables in the model. For example, the following snippet demonstrates how to call the @@ -64,9 +59,7 @@ with tf.Session() as sess: print("Model saved in path: %s" % save_path) ``` - - -### Restoring variables +### Restore variables The `tf.train.Saver` object not only saves variables to checkpoint files, it also restores variables. Note that when you restore variables you do not have @@ -95,14 +88,11 @@ with tf.Session() as sess: print("v2 : %s" % v2.eval()) ``` -Notes: - -* There is not a physical file called "/tmp/model.ckpt". It is the **prefix** - of filenames created for the checkpoint. Users only interact with the - prefix instead of physical checkpoint files. +Note: There is not a physical file called `/tmp/model.ckpt`. It is the *prefix* of +filenames created for the checkpoint. Users only interact with the prefix +instead of physical checkpoint files. - -### Choosing which variables to save and restore +### Choose variables to save and restore If you do not pass any arguments to `tf.train.Saver()`, the saver handles all variables in the graph. Each variable is saved under the name that was passed @@ -201,29 +191,42 @@ chkp.print_tensors_in_checkpoint_file("/tmp/model.ckpt", tensor_name='v2', all_t -## Overview of saving and restoring models +## Save and restore models + +Use `SavedModel` to save and load your model—variables, the graph, and the +graph's metadata. This is a language-neutral, recoverable, hermetic +serialization format that enables higher-level systems and tools to produce, +consume, and transform TensorFlow models. TensorFlow provides several ways to +interact with `SavedModel`, including the @{tf.saved_model} APIs, +@{tf.estimator.Estimator}, and a command-line interface. + -When you want to save and load variables, the graph, and the -graph's metadata--basically, when you want to save or restore -your model--we recommend using SavedModel. -**SavedModel** is a language-neutral, recoverable, hermetic -serialization format. SavedModel enables higher-level systems -and tools to produce, consume, and transform TensorFlow models. -TensorFlow provides several mechanisms for interacting with -SavedModel, including tf.saved_model APIs, Estimator APIs and a CLI. +## Build and load a SavedModel +### Simple save -## APIs to build and load a SavedModel +The easiest way to create a `SavedModel` is to use the @{tf.saved_model.simple_save} +function: -This section focuses on the APIs for building and loading a SavedModel, -particularly when using lower-level TensorFlow APIs. +```python +simple_save(session, + export_dir, + inputs={"x": x, "y": y}, + outputs={"z": z}) +``` +This configures the `SavedModel` so it can be loaded by +[TensorFlow serving](/serving/serving_basic) and supports the +[Predict API](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/predict.proto). +To access the classify, regress, or multi-inference APIs, use the manual +`SavedModel` builder APIs or an @{tf.estimator.Estimator}. -### Building a SavedModel +### Manually build a SavedModel -We provide a Python implementation of the SavedModel -@{tf.saved_model.builder$builder}. -The `SavedModelBuilder` class provides functionality to +If your use case isn't covered by @{tf.saved_model.simple_save}, use the manual +@{tf.saved_model.builder$builder APIs} to create a `SavedModel`. + +The @{tf.saved_model.builder.SavedModelBuilder} class provides functionality to save multiple `MetaGraphDef`s. A **MetaGraph** is a dataflow graph, plus its associated variables, assets, and signatures. A **`MetaGraphDef`** is the protocol buffer representation of a MetaGraph. A **signature** is @@ -264,7 +267,7 @@ builder.save() ``` -### Loading a SavedModel in Python +### Load a SavedModel in Python The Python version of the SavedModel @{tf.saved_model.loader$loader} @@ -288,7 +291,7 @@ with tf.Session(graph=tf.Graph()) as sess: ``` -### Loading a SavedModel in C++ +### Load a SavedModel in C++ The C++ version of the SavedModel [loader](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/cc/saved_model/loader.h) @@ -306,7 +309,7 @@ LoadSavedModel(session_options, run_options, export_dir, {kSavedModelTagTrain}, &bundle); ``` -### Loading and Serving a SavedModel in TensorFlow Serving +### Load and serve a SavedModel in TensorFlow serving You can easily load and serve a SavedModel with the TensorFlow Serving Model Server binary. See [instructions](https://www.tensorflow.org/serving/setup#installing_using_apt-get) @@ -374,7 +377,7 @@ SavedModel format. This section explains how to: * Serve the model from a local server and request predictions. -### Preparing serving inputs +### Prepare serving inputs During training, an @{$premade_estimators#input_fn$`input_fn()`} ingests data and prepares it for use by the model. At serving time, similarly, a @@ -448,7 +451,7 @@ to expect and how to map them to your model's expected inputs. By contrast, the *output* portion of the signature is determined by the model. -### Performing the export +### Perform the export To export your trained Estimator, call @{tf.estimator.Estimator.export_savedmodel} with the export base path and @@ -471,7 +474,7 @@ Session. > Note: It is your responsibility to garbage-collect old exports. > Otherwise, successive exports will accumulate under `export_dir_base`. -### Specifying the outputs of a custom model +### Specify the outputs of a custom model When writing a custom `model_fn`, you must populate the `export_outputs` element of the @{tf.estimator.EstimatorSpec} return value. This is a dict of @@ -503,7 +506,7 @@ indicating which `SignatureDef` will be served when an inference request does not specify one. -### Serving the exported model locally +### Serve the exported model locally For local deployment, you can serve your model using [TensorFlow Serving](https://github.com/tensorflow/serving), an open-source project that loads a @@ -522,7 +525,7 @@ bazel-bin/tensorflow_serving/model_servers/tensorflow_model_server --port=9000 - Now you have a server listening for inference requests via gRPC on port 9000! -### Requesting predictions from a local server +### Request predictions from a local server The server responds to gRPC requests according to the [PredictionService](https://github.com/tensorflow/serving/blob/master/tensorflow_serving/apis/prediction_service.proto#L15) @@ -615,7 +618,7 @@ passing in sample inputs in various formats (for example, Python expressions) and then fetching the output. -### Installing the SavedModel CLI +### Install the SavedModel CLI Broadly speaking, you can install TensorFlow in either of the following two ways: @@ -842,7 +845,7 @@ For example: `=[{"age":[22,24],"education":["BS","MS"]}]` ``` -#### Save Output +#### Save output By default, the SavedModel CLI writes output to stdout. If a directory is passed to `--outdir` option, the outputs will be saved as npy files named after @@ -851,7 +854,7 @@ output tensor keys under the given directory. Use `--overwrite` to overwrite existing output files. -#### TensorFlow Debugger (tfdbg) Integration +#### TensorFlow debugger (tfdbg) integration If `--tf_debug` option is set, the SavedModel CLI will use the TensorFlow Debugger (tfdbg) to watch the intermediate Tensors and runtime @@ -958,6 +961,3 @@ of checkpoints and assets: Each graph is associated with a specific set of tags, which enables identification during a load or restore operation. - - - -- GitLab From 6cba251133bbbb0303934b03d062174bc8b25000 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Wed, 7 Mar 2018 10:58:44 -0800 Subject: [PATCH 532/884] Properly parse input strings in the dependency optimizer PiperOrigin-RevId: 188201284 --- .../optimizers/dependency_optimizer.cc | 27 +++++++++------ .../optimizers/dependency_optimizer_test.cc | 33 +++++++++++++++++++ 2 files changed, 50 insertions(+), 10 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index b47cba5ff7..bb4b916f46 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -346,16 +346,23 @@ void DependencyOptimizer::OptimizeNode(int node_idx, CHECK(!IsControlInput(input_to_forward)); for (int j = 0; j < consumer->input_size(); ++j) { const string& old_input = consumer->input(j); - if (old_input == node_name) { - new_input = input_to_forward; - node_map_->UpdateInput(consumer->name(), old_input, new_input); - consumer->set_input(j, new_input); - found_input = true; - } else if (old_input == AsControlDependency(NodeName(node_name))) { - new_input = AsControlDependency(NodeName(input_to_forward)); - node_map_->UpdateInput(consumer->name(), old_input, new_input); - consumer->set_input(j, new_input); - found_input = true; + int old_input_pos; + string old_input_node_name = + ParseNodeName(old_input, &old_input_pos); + if (old_input_node_name == node_name) { + if (old_input_pos >= 0) { + // Regular input + new_input = input_to_forward; + node_map_->UpdateInput(consumer->name(), old_input, new_input); + consumer->set_input(j, new_input); + found_input = true; + } else { + // Control dependency + new_input = AsControlDependency(NodeName(input_to_forward)); + node_map_->UpdateInput(consumer->name(), old_input, new_input); + consumer->set_input(j, new_input); + found_input = true; + } } } CHECK(found_input); diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc index 33d6b992d2..08659cbf6f 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -515,6 +515,39 @@ TEST_F(DependencyOptimizerTest, ChangeToNoop_Identity) { } } +TEST_F(DependencyOptimizerTest, IdentityInputs) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output b = ops::Placeholder(scope.WithOpName("b"), DT_BOOL); + Output x = ops::RandomUniform(scope.WithOpName("x"), {1, 2}, DT_FLOAT); + auto s = ops::Switch(scope.WithOpName("s"), x, b); + + // Identity nodes to be removed. + auto id_f = ops::Identity(scope.WithOpName("id_f"), s.output_false); + auto id_t = ops::Identity(scope.WithOpName("id_t"), s.output_true); + + // Output + Output out1 = ops::Identity(scope.WithOpName("out1"), id_f); + Output out2 = ops::Identity(scope.WithOpName("out2"), id_t); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch = {"out1", "out2"}; + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(6, output.node_size()); + EXPECT_EQ("out1", output.node(4).name()); + EXPECT_EQ(1, output.node(4).input_size()); + EXPECT_EQ("s", output.node(4).input(0)); + + EXPECT_EQ("out2", output.node(5).name()); + EXPECT_EQ(1, output.node(5).input_size()); + EXPECT_EQ("s:1", output.node(5).input(0)); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From c905620906f306bfe222118276ffff199deb0367 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 11:04:21 -0800 Subject: [PATCH 533/884] Optimizations to DepthwiseConv using 3x3 filters. PiperOrigin-RevId: 188202344 --- .../contrib/lite/kernels/internal/BUILD | 1 + .../internal/optimized/depthwiseconv_uint8.h | 17 + .../depthwiseconv_uint8_3x3_filter.h | 653 ++++++++++++++++++ 3 files changed, 671 insertions(+) create mode 100644 tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index d5dd2cbf14..c7290c2aaa 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -149,6 +149,7 @@ cc_library( "common.h", "optimized/depthwiseconv_float.h", "optimized/depthwiseconv_uint8.h", + "optimized/depthwiseconv_uint8_3x3_filter.h", "optimized/optimized_ops.h", ], copts = tflite_copts(), diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h index dbc4f0d6fd..08674a6c59 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8.h @@ -18,6 +18,7 @@ limitations under the License. #include "fixedpoint/fixedpoint.h" #include "public/gemmlowp.h" #include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h" #include "tensorflow/contrib/lite/kernels/internal/types.h" namespace tflite { @@ -1692,6 +1693,22 @@ inline void DepthwiseConv(const uint8* input_data, const Dims<4>& input_dims, const int output_width = ArraySize(output_dims, 1); TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); +#ifdef __aarch64__ + // Call kernel optimized for depthwise convolutions using 3x3 filters, + // stride = 1, no padding, depth_multiplier = 1 and depth a multiple of 16. + if (filter_width == 3 && filter_height == 3 && depth_multiplier == 1 && + stride_width == 1 && stride_height == 1 && pad_width == 0 && + pad_height == 0 && (input_depth % 16) == 0) { + DepthwiseConv3by3FilterDepth16( + input_data, input_dims, input_offset, filter_data, filter_dims, + filter_offset, bias_data, bias_dims, stride_width, stride_height, + pad_width, pad_height, depth_multiplier, output_offset, + output_multiplier, output_shift, output_activation_min, + output_activation_max, output_data, output_dims); + return; + } +#endif + static const int kAccBufferMaxSize = 2048; int32 acc_buffer[kAccBufferMaxSize]; TFLITE_DCHECK_GE(kAccBufferMaxSize, output_depth); diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h new file mode 100644 index 0000000000..e0335b2c74 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h @@ -0,0 +1,653 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_3X3_FILTER_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_3X3_FILTER_H_ + +#include "fixedpoint/fixedpoint.h" +#include "public/gemmlowp.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" + +namespace tflite { +namespace optimized_ops { + +#ifdef __aarch64__ + +inline void preload_l1_keep(const uint8* ptr) { +#ifdef GEMMLOWP_ARM_64 + asm volatile("prfm pldl1keep, [%[ptr]]\n" ::[ptr] "r"(ptr) :); +#else + gemmlowp::Prefetch(ptr); +#endif +} + +// Implementation of quantized DepthwiseConv for 3x3 filters. + +// Below are helper structs to remove the use of arrays. +// There is an llvm bug that causes significant slowdown when using arrays for +// NEON intrinsics vector data types. +// See: https://bugs.llvm.org/show_bug.cgi?id=34945 + +struct Int32x16 { + int32x4_t v0, v1, v2, v3; +}; + +struct Int16x16 { + int16x8_t low, high; +}; + +struct Int16x16x3 { + Int16x16 v0, v1, v2; +}; + +struct Filter3x3x16 { + Int16x16x3 r0, r1, r2; +}; + +// Loads 3x3 filter of depth 16 and adds filter offsets. +inline Filter3x3x16 LoadFilterDepth16(const uint8* filter_ptr, + int32 filter_offset, int output_depth) { + Filter3x3x16 filter; + + uint8x8_t temp_u8_0, temp_u8_1, temp_u8_2, temp_u8_3, temp_u8_4, temp_u8_5, + temp_u8_6, temp_u8_7, temp_u8_8, temp_u8_9, temp_u8_10, temp_u8_11, + temp_u8_12, temp_u8_13, temp_u8_14, temp_u8_15, temp_u8_16, temp_u8_17; + int16x8_t filter_offset_vec = vdupq_n_s16(filter_offset); + + temp_u8_0 = vld1_u8(filter_ptr + 0 * output_depth); + temp_u8_1 = vld1_u8(filter_ptr + 0 * output_depth + 8); + temp_u8_2 = vld1_u8(filter_ptr + 1 * output_depth); + temp_u8_3 = vld1_u8(filter_ptr + 1 * output_depth + 8); + temp_u8_4 = vld1_u8(filter_ptr + 2 * output_depth); + temp_u8_5 = vld1_u8(filter_ptr + 2 * output_depth + 8); + + temp_u8_6 = vld1_u8(filter_ptr + 3 * output_depth); + temp_u8_7 = vld1_u8(filter_ptr + 3 * output_depth + 8); + temp_u8_8 = vld1_u8(filter_ptr + 4 * output_depth); + temp_u8_9 = vld1_u8(filter_ptr + 4 * output_depth + 8); + temp_u8_10 = vld1_u8(filter_ptr + 5 * output_depth); + temp_u8_11 = vld1_u8(filter_ptr + 5 * output_depth + 8); + + temp_u8_12 = vld1_u8(filter_ptr + 6 * output_depth); + temp_u8_13 = vld1_u8(filter_ptr + 6 * output_depth + 8); + temp_u8_14 = vld1_u8(filter_ptr + 7 * output_depth); + temp_u8_15 = vld1_u8(filter_ptr + 7 * output_depth + 8); + temp_u8_16 = vld1_u8(filter_ptr + 8 * output_depth); + temp_u8_17 = vld1_u8(filter_ptr + 8 * output_depth + 8); + + filter.r0.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_0)); + filter.r0.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_1)); + filter.r0.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_2)); + filter.r0.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_3)); + filter.r0.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_4)); + filter.r0.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_5)); + + filter.r1.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_6)); + filter.r1.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_7)); + filter.r1.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_8)); + filter.r1.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_9)); + filter.r1.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_10)); + filter.r1.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_11)); + + filter.r2.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_12)); + filter.r2.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_13)); + filter.r2.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_14)); + filter.r2.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_15)); + filter.r2.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_u8_16)); + filter.r2.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_u8_17)); + + filter.r0.v0.low = vaddq_s16(filter.r0.v0.low, filter_offset_vec); + filter.r0.v0.high = vaddq_s16(filter.r0.v0.high, filter_offset_vec); + filter.r0.v1.low = vaddq_s16(filter.r0.v1.low, filter_offset_vec); + filter.r0.v1.high = vaddq_s16(filter.r0.v1.high, filter_offset_vec); + filter.r0.v2.low = vaddq_s16(filter.r0.v2.low, filter_offset_vec); + filter.r0.v2.high = vaddq_s16(filter.r0.v2.high, filter_offset_vec); + + filter.r1.v0.low = vaddq_s16(filter.r1.v0.low, filter_offset_vec); + filter.r1.v0.high = vaddq_s16(filter.r1.v0.high, filter_offset_vec); + filter.r1.v1.low = vaddq_s16(filter.r1.v1.low, filter_offset_vec); + filter.r1.v1.high = vaddq_s16(filter.r1.v1.high, filter_offset_vec); + filter.r1.v2.low = vaddq_s16(filter.r1.v2.low, filter_offset_vec); + filter.r1.v2.high = vaddq_s16(filter.r1.v2.high, filter_offset_vec); + + filter.r2.v0.low = vaddq_s16(filter.r2.v0.low, filter_offset_vec); + filter.r2.v0.high = vaddq_s16(filter.r2.v0.high, filter_offset_vec); + filter.r2.v1.low = vaddq_s16(filter.r2.v1.low, filter_offset_vec); + filter.r2.v1.high = vaddq_s16(filter.r2.v1.high, filter_offset_vec); + filter.r2.v2.low = vaddq_s16(filter.r2.v2.low, filter_offset_vec); + filter.r2.v2.high = vaddq_s16(filter.r2.v2.high, filter_offset_vec); + + return filter; +} + +// Loads 3 input cells of depth 16 and adds input offsets. +inline Int16x16x3 LoadInputRowDepth16(const uint8* ptr, int input_depth, + int32 input_offset, + Int16x16x3 input_row) { + uint8x8_t temp_0, temp_1; + int16x8_t offset_vec = vdupq_n_s16(input_offset); + + temp_0 = vld1_u8(ptr + 0 * input_depth); + temp_1 = vld1_u8(ptr + 0 * input_depth + 8); + input_row.v0.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_row.v0.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_row.v0.low = vaddq_s16(input_row.v0.low, offset_vec); + input_row.v0.high = vaddq_s16(input_row.v0.high, offset_vec); + + temp_0 = vld1_u8(ptr + 1 * input_depth); + temp_1 = vld1_u8(ptr + 1 * input_depth + 8); + input_row.v1.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_row.v1.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_row.v1.low = vaddq_s16(input_row.v1.low, offset_vec); + input_row.v1.high = vaddq_s16(input_row.v1.high, offset_vec); + + temp_0 = vld1_u8(ptr + 2 * input_depth); + temp_1 = vld1_u8(ptr + 2 * input_depth + 8); + input_row.v2.low = vreinterpretq_s16_u16(vmovl_u8(temp_0)); + input_row.v2.high = vreinterpretq_s16_u16(vmovl_u8(temp_1)); + input_row.v2.low = vaddq_s16(input_row.v2.low, offset_vec); + input_row.v2.high = vaddq_s16(input_row.v2.high, offset_vec); + + return input_row; +} + +// Performs multiply accumulate on 3 inputs of depth 16. +inline Int32x16 MultiplyAccumulateRowDepth16(Int32x16 output, + const Int16x16x3& filter_row, + const Int16x16x3& input_row) { + output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v0.low), + vget_low_s16(input_row.v0.low)); + output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v0.low), + vget_high_s16(input_row.v0.low)); + output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v0.high), + vget_low_s16(input_row.v0.high)); + output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v0.high), + vget_high_s16(input_row.v0.high)); + + output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v1.low), + vget_low_s16(input_row.v1.low)); + output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v1.low), + vget_high_s16(input_row.v1.low)); + output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v1.high), + vget_low_s16(input_row.v1.high)); + output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v1.high), + vget_high_s16(input_row.v1.high)); + + output.v0 = vmlal_s16(output.v0, vget_low_s16(filter_row.v2.low), + vget_low_s16(input_row.v2.low)); + output.v1 = vmlal_s16(output.v1, vget_high_s16(filter_row.v2.low), + vget_high_s16(input_row.v2.low)); + output.v2 = vmlal_s16(output.v2, vget_low_s16(filter_row.v2.high), + vget_low_s16(input_row.v2.high)); + output.v3 = vmlal_s16(output.v3, vget_high_s16(filter_row.v2.high), + vget_high_s16(input_row.v2.high)); + + return output; +} + +// Applies activation, offset and downquantize on a set of accumulator +// registers of depth 16. Stores results to output. +inline void DownquantizeAndStoreDepth16(Int32x16 acc, int32 output_multiplier, + int output_shift, + int32x4_t output_offset_vec, + int32x4_t output_activation_min_vec, + int32x4_t output_activation_max_vec, + uint8* output_ptr) { + // Fixed-point multiplication. + acc.v0 = vqrdmulhq_n_s32(acc.v0, output_multiplier); + acc.v1 = vqrdmulhq_n_s32(acc.v1, output_multiplier); + acc.v2 = vqrdmulhq_n_s32(acc.v2, output_multiplier); + acc.v3 = vqrdmulhq_n_s32(acc.v3, output_multiplier); + + using gemmlowp::RoundingDivideByPOT; + acc.v0 = RoundingDivideByPOT(acc.v0, output_shift); + acc.v1 = RoundingDivideByPOT(acc.v1, output_shift); + acc.v2 = RoundingDivideByPOT(acc.v2, output_shift); + acc.v3 = RoundingDivideByPOT(acc.v3, output_shift); + + // Add the output offset. + acc.v0 = vaddq_s32(acc.v0, output_offset_vec); + acc.v1 = vaddq_s32(acc.v1, output_offset_vec); + acc.v2 = vaddq_s32(acc.v2, output_offset_vec); + acc.v3 = vaddq_s32(acc.v3, output_offset_vec); + + // Apply the activation function. + acc.v0 = vmaxq_s32(acc.v0, output_activation_min_vec); + acc.v1 = vmaxq_s32(acc.v1, output_activation_min_vec); + acc.v2 = vmaxq_s32(acc.v2, output_activation_min_vec); + acc.v3 = vmaxq_s32(acc.v3, output_activation_min_vec); + + acc.v0 = vminq_s32(acc.v0, output_activation_max_vec); + acc.v1 = vminq_s32(acc.v1, output_activation_max_vec); + acc.v2 = vminq_s32(acc.v2, output_activation_max_vec); + acc.v3 = vminq_s32(acc.v3, output_activation_max_vec); + + // Saturating cast to uint8 and store to destination. + int16x4_t acc_tlla_s16 = vqmovn_s32(acc.v0); + int16x4_t acc_tllb_s16 = vqmovn_s32(acc.v1); + int16x4_t acc_tlha_s16 = vqmovn_s32(acc.v2); + int16x4_t acc_tlhb_s16 = vqmovn_s32(acc.v3); + + int16x8_t res_s16_0 = vcombine_s16(acc_tlla_s16, acc_tllb_s16); + int16x8_t res_s16_1 = vcombine_s16(acc_tlha_s16, acc_tlhb_s16); + uint8x8_t res_u8_0 = vqmovun_s16(res_s16_0); + uint8x8_t res_u8_1 = vqmovun_s16(res_s16_1); + vst1q_u8(output_ptr, vcombine_u8(res_u8_0, res_u8_1)); +} + +// A kernel that is optimized on the number of output cells in the x and y +// direction, and the stride. Assumes 3x3 filters of 16 depth. +template +struct ConvKernel3x3FilterDepth16 {}; + +template <> +struct ConvKernel3x3FilterDepth16<1, 2, 1> { + static void Run(const Filter3x3x16& filter, const uint8* input_ptr, + int input_depth, int32 input_offset, int input_row_width, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_depth, int output_width) { + // 16 depth accumulators for the 2 outputs. + Int32x16 acc0, acc1; + + // Accumulators for top filter. + acc0.v0 = vld1q_s32(bias_ptr); + acc0.v1 = vld1q_s32(bias_ptr + 4); + acc0.v2 = vld1q_s32(bias_ptr + 8); + acc0.v3 = vld1q_s32(bias_ptr + 12); + // Accumulators for bottom filter. + acc1.v0 = vld1q_s32(bias_ptr); + acc1.v1 = vld1q_s32(bias_ptr + 4); + acc1.v2 = vld1q_s32(bias_ptr + 8); + acc1.v3 = vld1q_s32(bias_ptr + 12); + + // Main multiply accumulate work. + { + // Load inputs for one filter row at a time. + Int16x16x3 input; + + // Do first row of top filter. + input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r0, input); + + // Do second row of top filter. + input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, + input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r1, input); + + // The inputs to second row of the top filter are also the inputs to the + // first row of the bottom filter. + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r0, input); + + // Do third row of top filter. + input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, + input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r2, input); + + // The inputs to third row of the top filter are also the inputs to the + // second row of the bottom filter. + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r1, input); + + // Do third row of bottom filter. + input = LoadInputRowDepth16(input_ptr + 3 * input_row_width, input_depth, + input_offset, input); + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r2, input); + } + + // Apply activation, downquantize and store. + int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); + int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); + + DownquantizeAndStoreDepth16(acc0, output_multiplier, output_shift, + output_offset_vec, output_activation_min_vec, + output_activation_max_vec, output_ptr); + + DownquantizeAndStoreDepth16(acc1, output_multiplier, output_shift, + output_offset_vec, output_activation_min_vec, + output_activation_max_vec, + output_ptr + output_depth * output_width); + } +}; + +template <> +struct ConvKernel3x3FilterDepth16<1, 2, 2> { + static void Run(const Filter3x3x16& filter, const uint8* input_ptr, + int input_depth, int32 input_offset, int input_row_width, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_depth, int output_width) { + // 16 depth accumulators for the 2 outputs. + Int32x16 acc0, acc1; + + // Accumulators for top filter. + acc0.v0 = vld1q_s32(bias_ptr); + acc0.v1 = vld1q_s32(bias_ptr + 4); + acc0.v2 = vld1q_s32(bias_ptr + 8); + acc0.v3 = vld1q_s32(bias_ptr + 12); + // Accumulators for bottom filter. + acc1.v0 = vld1q_s32(bias_ptr); + acc1.v1 = vld1q_s32(bias_ptr + 4); + acc1.v2 = vld1q_s32(bias_ptr + 8); + acc1.v3 = vld1q_s32(bias_ptr + 12); + + // Main multiply accumulate work. + { + // Load inputs for one filter row at a time. + Int16x16x3 input; + + // Do first row of top filter. + input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r0, input); + + // Do second row of top filter. + input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, + input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r1, input); + + // Do third row of top filter. + input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, + input_offset, input); + acc0 = MultiplyAccumulateRowDepth16(acc0, filter.r2, input); + + // The inputs to third row of the top filter are also the inputs + // to first row of the bottom filter. + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r0, input); + + // Do second row of bottom filter. + input = LoadInputRowDepth16(input_ptr + 3 * input_row_width, input_depth, + input_offset, input); + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r1, input); + + // Do third row of bottom filter. + input = LoadInputRowDepth16(input_ptr + 4 * input_row_width, input_depth, + input_offset, input); + acc1 = MultiplyAccumulateRowDepth16(acc1, filter.r2, input); + } + + // Apply activation, downquantize and store. + int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); + int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); + + DownquantizeAndStoreDepth16(acc0, output_multiplier, output_shift, + output_offset_vec, output_activation_min_vec, + output_activation_max_vec, output_ptr); + + DownquantizeAndStoreDepth16(acc1, output_multiplier, output_shift, + output_offset_vec, output_activation_min_vec, + output_activation_max_vec, + output_ptr + output_depth * output_width); + } +}; + +template <> +struct ConvKernel3x3FilterDepth16<1, 1> { + static void Run(const Filter3x3x16& filter, const uint8* input_ptr, + int input_depth, int32 input_offset, int input_row_width, + const int32* bias_ptr, int32 output_offset, + int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_ptr, int output_depth, int output_width) { + Int32x16 acc; + acc.v0 = vld1q_s32(bias_ptr); + acc.v1 = vld1q_s32(bias_ptr + 4); + acc.v2 = vld1q_s32(bias_ptr + 8); + acc.v3 = vld1q_s32(bias_ptr + 12); + + // Main multiply accumulate work. + { + // Load inputs for one filter row at a time. + Int16x16x3 input; + + // Do first row. + input = LoadInputRowDepth16(input_ptr, input_depth, input_offset, input); + acc = MultiplyAccumulateRowDepth16(acc, filter.r0, input); + + // Do second row. + input = LoadInputRowDepth16(input_ptr + input_row_width, input_depth, + input_offset, input); + acc = MultiplyAccumulateRowDepth16(acc, filter.r1, input); + + // Do third row. + input = LoadInputRowDepth16(input_ptr + 2 * input_row_width, input_depth, + input_offset, input); + acc = MultiplyAccumulateRowDepth16(acc, filter.r2, input); + } + + // Apply activation, downquantize and store. + int32x4_t output_offset_vec = vdupq_n_s32(output_offset); + int32x4_t output_activation_min_vec = vdupq_n_s32(output_activation_min); + int32x4_t output_activation_max_vec = vdupq_n_s32(output_activation_max); + + DownquantizeAndStoreDepth16(acc, output_multiplier, output_shift, + output_offset_vec, output_activation_min_vec, + output_activation_max_vec, output_ptr); + } +}; + +inline void DepthwiseConv3by3FilterDepth16( + const uint8* input_data, const Dims<4>& input_dims, int32 input_offset, + const uint8* filter_data, const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, int stride_width, + int stride_height, int pad_width, int pad_height, int depth_multiplier, + int32 output_offset, int32 output_multiplier, int output_shift, + int32 output_activation_min, int32 output_activation_max, + uint8* output_data, const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int input_depth = ArraySize(input_dims, 0); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + + // Algorithm assumes below constraints. It is optimized for depth multiplier + // of 1, 3x3 filter, no padding, strides 1 and 2. + TFLITE_DCHECK(output_depth == input_depth * depth_multiplier); + TFLITE_DCHECK(depth_multiplier == 1); + TFLITE_DCHECK(filter_height == 3); + TFLITE_DCHECK(filter_width == 3); + TFLITE_DCHECK(pad_height == 0); + TFLITE_DCHECK(pad_width == 0); + TFLITE_DCHECK(stride_width == 1); + TFLITE_DCHECK(stride_height == 1); + + // The number of outputs to process in the main loop. + const int num_x_outputs = 1; + const int num_y_outputs = 2; + + const int input_row_width = output_depth * (input_width + 2 * pad_width); + const int input_batch_size = + input_row_width * (input_height + 2 * pad_height); + const int output_batch_size = output_depth * output_width * output_height; + const int input_ptr_x_increment = input_depth * stride_width; + + // Calculate extents of non-boundary loop. + int out_x_start = 0; + for (; out_x_start < input_width; out_x_start++) { + int in_x = (out_x_start * stride_width) - pad_width; + if (in_x >= 0) { + break; + } + } + int out_x_end = output_width - 1; + for (; out_x_end >= 0; out_x_end--) { + int in_x = (out_x_end * stride_width) - pad_width; + int in_x_end = in_x + filter_width + (num_x_outputs - 1) * stride_width; + if (in_x_end <= input_width) { + out_x_end++; + break; + } + } + int out_y_start = 0; + for (; out_y_start < input_height; out_y_start++) { + int in_y = (out_y_start * stride_height) - pad_height; + if (in_y >= 0) { + break; + } + } + int out_y_end = output_height - 1; + for (; out_y_end >= 0; out_y_end--) { + int in_y = (out_y_end * stride_height) - pad_height; + int in_y_end = in_y + filter_height + (num_y_outputs - 1) * stride_height; + if (in_y_end <= input_height) { + out_y_end++; + break; + } + } + + // Offsets for preloading inputs. + const int i0 = 0; + const int i1 = input_depth; + const int i2 = 2 * input_depth; + const int i3 = input_row_width; + const int i4 = input_row_width + input_depth; + const int i5 = input_row_width + 2 * input_depth; + const int i6 = 2 * input_row_width; + const int i7 = 2 * input_row_width + input_depth; + const int i8 = 2 * input_row_width + 2 * input_depth; + const int i9 = 3 * input_row_width; + const int i10 = 3 * input_row_width + input_depth; + const int i11 = 3 * input_row_width + 2 * input_depth; + + for (int b = 0; b < batches; ++b) { + const int32* bias_ptr = bias_data; + const uint8* filter_ptr = filter_data; + + const int in_batch_offset = b * input_batch_size; + const int out_batch_offset = b * output_batch_size; + + int depth = 0; + for (; depth <= output_depth - 16; depth += 16) { + Filter3x3x16 filter = + LoadFilterDepth16(filter_ptr, filter_offset, output_depth); + + // Handle 1x2 outputs. + int out_y = out_y_start; + for (; out_y < out_y_end; out_y += num_y_outputs) { + int out_x = out_x_start; + + int in_y_offset = + stride_height * input_row_width * (out_y + pad_height); + int in_x_offset = stride_width * input_depth * (out_x + pad_width); + + const uint8* input_ptr = + input_data + depth + in_x_offset + in_y_offset + in_batch_offset; + + uint8* output_ptr = output_data + depth + (out_x * output_depth) + + (output_depth * output_width * out_y) + + out_batch_offset; + + // Preload inputs. If input depth is large, preload every value of the + // input for this depth range. Otherwise, preload only the first values + // of each row. + if (input_depth >= 32) { + preload_l1_keep(input_ptr + i0); + preload_l1_keep(input_ptr + i1); + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i3); + preload_l1_keep(input_ptr + i4); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i6); + preload_l1_keep(input_ptr + i7); + preload_l1_keep(input_ptr + i8); + preload_l1_keep(input_ptr + i9); + preload_l1_keep(input_ptr + i10); + preload_l1_keep(input_ptr + i11); + } else { + preload_l1_keep(input_ptr + i0); + preload_l1_keep(input_ptr + i3); + preload_l1_keep(input_ptr + i6); + preload_l1_keep(input_ptr + i9); + } + + for (; out_x < out_x_end; out_x += num_x_outputs) { + ConvKernel3x3FilterDepth16<1, 2, 1>::Run( + filter, input_ptr, input_depth, input_offset, input_row_width, + bias_ptr, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, + output_depth, output_width); + + input_ptr += input_ptr_x_increment * num_x_outputs; + output_ptr += output_depth * num_x_outputs; + + // Preload the next inputs depending on stride. + if (stride_width == 1) { + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i8); + preload_l1_keep(input_ptr + i11); + } else if (stride_width == 2) { + preload_l1_keep(input_ptr + i1); + preload_l1_keep(input_ptr + i2); + preload_l1_keep(input_ptr + i4); + preload_l1_keep(input_ptr + i5); + preload_l1_keep(input_ptr + i7); + preload_l1_keep(input_ptr + i8); + preload_l1_keep(input_ptr + i10); + preload_l1_keep(input_ptr + i11); + } + } + + // Handle the rest of the right side. + for (; out_x < output_width; out_x++) { + // This code path can only be reached if we're handling >1 x outputs + // at a time or support padding. + } + } + + // Handle the rest of the bottom side. + for (; out_y < output_height; out_y++) { + int out_x = out_x_start; + + int in_y_offset = + stride_height * input_row_width * (out_y + pad_height); + int in_x_offset = stride_width * input_depth * (out_x + pad_width); + + const uint8* input_ptr = + input_data + depth + in_x_offset + in_y_offset + in_batch_offset; + + uint8* output_ptr = output_data + depth + (out_x * output_depth) + + (output_depth * output_width * out_y) + + out_batch_offset; + + for (; out_x < output_width; out_x++) { + ConvKernel3x3FilterDepth16<1, 1>::Run( + filter, input_ptr, input_depth, input_offset, input_row_width, + bias_ptr, output_offset, output_multiplier, output_shift, + output_activation_min, output_activation_max, output_ptr, + output_depth, output_width); + + input_ptr += input_ptr_x_increment; + output_ptr += output_depth; + } + } + filter_ptr += 16; + bias_ptr += 16; + } + } +} + +#endif // __aarch64__ + +} // namespace optimized_ops +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_OPTIMIZED_DEPTHWISECONV_UINT8_3X3_FILTER_H_ -- GitLab From 040571b4fd6a24d1cfaf4d7f954841d7f33d2b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yan=20Facai=20=28=E9=A2=9C=E5=8F=91=E6=89=8D=29?= Date: Thu, 8 Mar 2018 03:11:06 +0800 Subject: [PATCH 534/884] add rolling window batch operation for tf.data.Dataset (#16123) * ENH: add slide_dataset_op * TST: add test case * DOC: add docment * CLN: implement sliding_window_batch * CLN: hiddent SlideDataset * CLN: remove Dataset.slide * DOC: 2017 -> 2018 * CLN: use push_back * DOC: drop the final smaller block * CLN: rename slide_size -> window_size * CLN: rename slide_step -> stride * DOC: no default for stride at c++ side * DOC: revise comments * BLD: expose sliding_window_batch API * CLN: code style * DOC: revise documents * CLN: move to IteratorContext * TST: remove contrib.dataset_ops * DOC: move desp to api def * CLN: fix python 2 indent * DOC: used by core.apply method --- tensorflow/contrib/data/__init__.py | 4 + .../contrib/data/python/kernel_tests/BUILD | 17 ++ .../kernel_tests/slide_dataset_op_test.py | 242 +++++++++++++++++ tensorflow/contrib/data/python/ops/BUILD | 1 + tensorflow/contrib/data/python/ops/sliding.py | 102 +++++++ .../base_api/api_def_SlideDataset.pbtxt | 18 ++ tensorflow/core/kernels/data/BUILD | 14 + .../core/kernels/data/slide_dataset_op.cc | 252 ++++++++++++++++++ tensorflow/core/ops/dataset_ops.cc | 12 +- 9 files changed, 661 insertions(+), 1 deletion(-) create mode 100644 tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py create mode 100644 tensorflow/contrib/data/python/ops/sliding.py create mode 100644 tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt create mode 100644 tensorflow/core/kernels/data/slide_dataset_op.cc diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 1777727de8..ab6489ab4c 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -38,6 +38,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@rejection_resample @@scan @@shuffle_and_repeat +@@sliding_window_batch @@sloppy_interleave @@unbatch @@ -69,6 +70,9 @@ from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample from tensorflow.contrib.data.python.ops.scan_ops import scan from tensorflow.contrib.data.python.ops.shuffle_ops import shuffle_and_repeat +from tensorflow.contrib.data.python.ops.sliding import sliding_window_batch +from tensorflow.python.data.ops.iterator_ops import Iterator +from tensorflow.python.ops.parsing_ops import parse_single_example_v2 as parse_single_example # pylint: enable=unused-import from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 22bcf90dd4..a157acc020 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -495,6 +495,23 @@ py_test( ], ) +tf_py_test( + name = "slide_dataset_op_test", + size = "small", + srcs = ["slide_dataset_op_test.py"], + additional_deps = [ + "//tensorflow/contrib/data/python/ops:dataset_ops", + "//tensorflow/contrib/data/python/ops:transformation_ops", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:dtypes", + "//tensorflow/python:errors", + "//tensorflow/python:math_ops", + "//tensorflow/python:sparse_tensor", + "//third_party/py/numpy", + ], +) + filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py new file mode 100644 index 0000000000..33c48e20be --- /dev/null +++ b/tensorflow/contrib/data/python/kernel_tests/slide_dataset_op_test.py @@ -0,0 +1,242 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for the experimental input pipeline ops.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.data.python.ops import sliding +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import test + + +class SlideDatasetTest(test.TestCase): + + def testSlideDataset(self): + """Test an dataset that maps a TF function across its input elements.""" + components = (np.arange(7), + np.array([[1, 2, 3]]) * np.arange(7)[:, np.newaxis], + np.array(37.0) * np.arange(7)) + + count = array_ops.placeholder(dtypes.int64, shape=[]) + window_size = array_ops.placeholder(dtypes.int64, shape=[]) + stride = array_ops.placeholder(dtypes.int64, shape=[]) + + def _map_fn(x, y, z): + return math_ops.square(x), math_ops.square(y), math_ops.square(z) + + # The pipeline is TensorSliceDataset -> MapDataset(square_3) -> + # RepeatDataset(count) -> _SlideDataset(window_size, stride). + iterator = (dataset_ops.Dataset.from_tensor_slices(components) + .map(_map_fn) + .repeat(count) + .apply(sliding.sliding_window_batch(window_size, stride)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + self.assertEqual([[None] + list(c.shape[1:]) for c in components], + [t.shape.as_list() for t in get_next]) + + with self.test_session() as sess: + # Slide over a finite input, where the window_size divides the + # total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 14, stride: 7}) + # Same formula with convolution layer. + num_batches = (20 * 7 - 14) // 7 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(14): + self.assertAllEqual(component[(i*7 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, where the window_size does not + # divide the total number of elements. + sess.run(init_op, feed_dict={count: 20, window_size: 17, stride: 9}) + + num_batches = (20 * 7 - 17) // 9 + 1 + for i in range(num_batches): + result = sess.run(get_next) + for component, result_component in zip(components, result): + for j in range(17): + self.assertAllEqual(component[(i*9 + j) % 7]**2, + result_component[j]) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over a finite input, which is less than window_size, + # should fail straight away. + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + sess.run(init_op, feed_dict={count: 1, window_size: 10, stride: 8}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Slide over an empty input should fail straight away. + sess.run(init_op, feed_dict={count: 0, window_size: 8, stride: 4}) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + # Empty window_size should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 0, stride: 0}) + + # Invalid stride should be an initialization time error. + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 0}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 3}) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(init_op, feed_dict={count: 14, window_size: 3, stride: 5}) + + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def testSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 0], [2, 0], [3, 0], [4, 0]], + values=[i * 3, i * 3 + 1, i * 3 + 2, i * 3 + 3, i * 3 + 4], + dense_shape=[5, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideSparseWithDifferentDenseShapes(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=array_ops.expand_dims( + math_ops.range(i, dtype=dtypes.int64), 1), + values=array_ops.fill([math_ops.to_int32(i)], i), + dense_shape=[i]) + + iterator = dataset_ops.Dataset.range(10).map(_sparse).apply( + sliding.sliding_window_batch(5, 3)).make_initializable_iterator() + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + num_batches = (10 - 5) // 3 + 1 + for i in range(num_batches): + actual = sess.run(get_next) + expected_indices = [] + expected_values = [] + for j in range(5): + for k in range(i * 3 + j): + expected_indices.append([j, k]) + expected_values.append(i * 3 + j) + expected = sparse_tensor.SparseTensorValue( + indices=expected_indices, + values=expected_values, + dense_shape=[5, i * 3 + 5 - 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testNestedSlideSparse(self): + + def _sparse(i): + return sparse_tensor.SparseTensorValue( + indices=[[0]], values=(i * [1]), dense_shape=[1]) + + iterator = (dataset_ops.Dataset.range(10) + .map(_sparse) + .apply(sliding.sliding_window_batch(4, 2)) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + # Slide: 1st batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + # Slide: 2nd batch. + actual = sess.run(get_next) + expected = sparse_tensor.SparseTensorValue( + indices=[[0, 0, 0], [0, 1, 0], [0, 2, 0], [0, 3, 0], + [1, 0, 0], [1, 1, 0], [1, 2, 0], [1, 3, 0], + [2, 0, 0], [2, 1, 0], [2, 2, 0], [2, 3, 0]], + values=[2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9], + dense_shape=[3, 4, 1]) + self.assertTrue(sparse_tensor.is_sparse(actual)) + self.assertSparseValuesEqual(actual, expected) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + def testSlideShapeError(self): + + def generator(): + yield [1.0, 2.0, 3.0] + yield [4.0, 5.0, 6.0] + yield [7.0, 8.0, 9.0, 10.0] + + iterator = (dataset_ops.Dataset.from_generator(generator, dtypes.float32, + output_shapes=[None]) + .apply(sliding.sliding_window_batch(3, 1)) + .make_initializable_iterator()) + next_element = iterator.get_next() + + with self.test_session() as sess: + sess.run(iterator.initializer) + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r"Cannot batch tensors with different shapes in component 0. " + r"First element had shape \[3\] and element 2 had shape \[4\]."): + sess.run(next_element) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 16fe31675f..1c26296d62 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -104,6 +104,7 @@ py_library( "interleave_ops.py", "resampling.py", "scan_ops.py", + "sliding.py", "stats_ops.py", "threadpool.py", "unique.py", diff --git a/tensorflow/contrib/data/python/ops/sliding.py b/tensorflow/contrib/data/python/ops/sliding.py new file mode 100644 index 0000000000..19cc3cb89f --- /dev/null +++ b/tensorflow/contrib/data/python/ops/sliding.py @@ -0,0 +1,102 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Sliding dataset transformations.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.util import nest +from tensorflow.python.data.util import sparse +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import gen_dataset_ops + + +class _SlideDataset(dataset_ops.Dataset): + """A `Dataset` that passes a sliding window over its input.""" + + def __init__(self, input_dataset, window_size, stride=1): + """See `sliding_window_batch` for details.""" + super(_SlideDataset, self).__init__() + self._input_dataset = input_dataset + self._window_size = ops.convert_to_tensor( + window_size, dtype=dtypes.int64, name="window_size") + self._stride = ops.convert_to_tensor( + stride, dtype=dtypes.int64, name="stride") + + def _as_variant_tensor(self): + return gen_dataset_ops.slide_dataset( + self._input_dataset._as_variant_tensor(), # pylint: disable=protected-access + window_size=self._window_size, + stride=self._stride, + output_shapes=nest.flatten( + sparse.as_dense_shapes(self.output_shapes, self.output_classes)), + output_types=nest.flatten( + sparse.as_dense_types(self.output_types, self.output_classes))) + + @property + def output_classes(self): + return self._input_dataset.output_classes + + @property + def output_shapes(self): + input_shapes = self._input_dataset.output_shapes + return nest.pack_sequence_as(input_shapes, [ + tensor_shape.vector(None).concatenate(s) + for s in nest.flatten(self._input_dataset.output_shapes) + ]) + + @property + def output_types(self): + return self._input_dataset.output_types + + +def sliding_window_batch(window_size, stride=1): + """A sliding window with size of `window_size` and step of `stride`. + + This transformation passes a sliding window over this dataset. The + window size is `window_size` and step size is `stride`. If the left + elements cannot fill up the sliding window, this transformation will + drop the final smaller element. For example: + + ```python + # NOTE: The following examples use `{ ... }` to represent the + # contents of a dataset. + a = { [1], [2], [3], [4], [5], [6] } + + a.apply(tf.contrib.data.sliding_window_batch(window_size=3, stride=2)) == + { + [[1], [2], [3]], + [[3], [4], [5]], + } + ``` + + Args: + window_size: A `tf.int64` scalar `tf.Tensor`, representing the number of + elements in the sliding window. + stride: (Optional.) A `tf.int64` scalar `tf.Tensor`, representing the + steps moving the sliding window forward for one iteration. The default + is `1`. It must be in `[1, window_size)`. + + Returns: + A `Dataset` transformation function, which can be passed to + @{tf.data.Dataset.apply}. + """ + def _apply_fn(dataset): + return _SlideDataset(dataset, window_size, stride) + + return _apply_fn diff --git a/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt new file mode 100644 index 0000000000..9fabe7863e --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_SlideDataset.pbtxt @@ -0,0 +1,18 @@ +op { + graph_op_name: "SlideDataset" + in_arg { + name: "window_size" + description: <(ctx, "window_size", &window_size)); + OP_REQUIRES_OK(ctx, + ParseScalarArgument(ctx, "stride", &stride)); + OP_REQUIRES( + ctx, window_size > 0, + errors::InvalidArgument("Window size must be greater than zero.")); + OP_REQUIRES( + ctx, stride > 0 && stride < window_size, + errors::InvalidArgument("Stride must be in [1, window_size).")); + + *output = new Dataset(ctx, window_size, stride, input); + } + + private: + class Dataset : public GraphDatasetBase { + public: + Dataset(OpKernelContext* ctx, int64 window_size, int64 stride, const DatasetBase* input) + : GraphDatasetBase(ctx), window_size_(window_size), stride_(stride), input_(input) { + input_->Ref(); + + const auto& input_shapes = input_->output_shapes(); + output_shapes_.reserve(input_shapes.size()); + for (const auto& input_shape : input_shapes) { + output_shapes_.emplace_back( + PartialTensorShape({-1}).Concatenate(input_shape)); + } + } + + ~Dataset() override { input_->Unref(); } + + std::unique_ptr MakeIterator( + const string& prefix) const override { + return std::unique_ptr(new Iterator( + Iterator::Params{this, strings::StrCat(prefix, "::Slide")})); + } + + const DataTypeVector& output_dtypes() const override { + return input_->output_dtypes(); + } + + const std::vector& output_shapes() const override { + return output_shapes_; + } + + string DebugString() override { + return strings::StrCat("SlideDatasetOp(", window_size_, ", ", stride_, ")::Dataset"); + } + + protected: + Status AsGraphDefInternal(OpKernelContext* ctx, DatasetGraphDefBuilder* b, + Node** output) const override { + Node* input_graph_node = nullptr; + TF_RETURN_IF_ERROR(b->AddParentDataset(ctx, input_, &input_graph_node)); + Node* window_size = nullptr; + Node* stride = nullptr; + TF_RETURN_IF_ERROR(b->AddScalar(window_size_, &window_size)); + TF_RETURN_IF_ERROR(b->AddScalar(stride_, &stride)); + TF_RETURN_IF_ERROR( + b->AddDataset(this, {input_graph_node, window_size, stride}, output)); + return Status::OK(); + } + + private: + + class Iterator : public DatasetIterator { + public: + explicit Iterator(const Params& params) + : DatasetIterator(params), + input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} + + Status GetNextInternal(IteratorContext* ctx, + std::vector* out_tensors, + bool* end_of_sequence) override { + const int64 window_size = dataset()->window_size_; + const int64 stride = dataset()->stride_; + std::vector> batch_elements; + { + mutex_lock l(mu_); + if (!input_impl_) { + *end_of_sequence = true; + return Status::OK(); + } + batch_elements.reserve(window_size); + const bool first_call = cache_.empty(); + if (first_call) { + cache_.reserve(window_size); + } else { + // Reuse cache in the previous iteration. + cache_.swap(batch_elements); + } + // Fill up with new elements. + *end_of_sequence = false; + for (size_t i = batch_elements.size(); i < window_size && !*end_of_sequence; + ++i) { + std::vector batch_element_tuple; + TF_RETURN_IF_ERROR(input_impl_->GetNext(ctx, &batch_element_tuple, + end_of_sequence)); + if (!*end_of_sequence) { + batch_elements.push_back(std::move(batch_element_tuple)); + } else { + input_impl_.reset(); + } + } + // Drop the final smaller blocks. + if (batch_elements.size() < window_size) { + DCHECK(*end_of_sequence); + return Status::OK(); + } + // Cache the data used for the next iteration. + for (size_t i = stride; i < window_size; ++i) { + cache_.emplace_back(batch_elements[i]); + } + } + + // Construct output tensors. + // Those codes below are copied from batch_dataset_op.cc. + const size_t num_tuple_components = batch_elements[0].size(); + const int64 num_batch_elements = batch_elements.size(); + for (size_t component_index = 0; component_index < num_tuple_components; + ++component_index) { + const Tensor& first_element = batch_elements[0][component_index]; + TensorShape batch_component_shape({num_batch_elements}); + batch_component_shape.AppendShape(first_element.shape()); + Tensor batch_component(cpu_allocator(), first_element.dtype(), + batch_component_shape); + // Build the output tuple component by copying one slice + // from each input element in the batch. + for (size_t i = 0; i < num_batch_elements; ++i) { + if (batch_elements[i][component_index].shape() != + first_element.shape()) { + return errors::InvalidArgument( + "Cannot batch tensors with different shapes in component ", + component_index, ". First element had shape ", + first_element.shape().DebugString(), " and element ", i, + " had shape ", + batch_elements[i][component_index].shape().DebugString(), + "."); + } + TF_RETURN_IF_ERROR(batch_util::CopyElementToSlice( + std::move(batch_elements[i][component_index]), &batch_component, + i)); + } + out_tensors->emplace_back(std::move(batch_component)); + } + *end_of_sequence = false; + return Status::OK(); + } + + protected: + Status SaveInternal(IteratorStateWriter* writer) override { + mutex_lock l(mu_); + if (!input_impl_) { + TF_RETURN_IF_ERROR( + writer->WriteScalar(full_name("input_impl_empty"), "")); + } else { + TF_RETURN_IF_ERROR(SaveParent(writer, input_impl_)); + } + // Save cache. + TF_RETURN_IF_ERROR( + writer->WriteScalar(strings::StrCat("cache_size"), cache_.size())); + for (int64 i = 0; i < cache_.size(); i++) { + TF_RETURN_IF_ERROR(writer->WriteScalar( + strings::StrCat("cache[", i, "]_size"), cache_[i].size())); + for (int64 j = 0; j < cache_[i].size(); j++) { + TF_RETURN_IF_ERROR(writer->WriteTensor( + strings::StrCat("cache[", i, "][", j, "]"), cache_[i][j])); + } + } + return Status::OK(); + } + + Status RestoreInternal(IteratorContext* ctx, + IteratorStateReader* reader) override { + mutex_lock l(mu_); + if (!reader->Contains(full_name("input_impl_empty"))) { + TF_RETURN_IF_ERROR(RestoreParent(ctx, reader, input_impl_)); + } else { + input_impl_.reset(); + } + // Restore cache. + int64 cache_size; + TF_RETURN_IF_ERROR( + reader->ReadScalar(strings::StrCat("cache_size"), &cache_size)); + cache_.resize(cache_size); + for (int64 i = 0; i < cache_size; i++) { + int64 vector_size; + TF_RETURN_IF_ERROR(reader->ReadScalar( + strings::StrCat("cache[", i, "]_size"), &vector_size)); + cache_[i].resize(vector_size); + for (int64 j = 0; j < vector_size; j++) { + TF_RETURN_IF_ERROR(reader->ReadTensor( + strings::StrCat("cache[", i, "][", j, "]"), &cache_[i][j])); + } + } + return Status::OK(); + } + + private: + mutex mu_; + std::vector> cache_ GUARDED_BY(mu_); + std::unique_ptr input_impl_ GUARDED_BY(mu_); + }; + + const int64 window_size_; + const int64 stride_; + const DatasetBase* const input_; + std::vector output_shapes_; + }; +}; + +REGISTER_KERNEL_BUILDER(Name("SlideDataset").Device(DEVICE_CPU), + SlideDatasetOp); + +} // namespace + +} // namespace tensorflow diff --git a/tensorflow/core/ops/dataset_ops.cc b/tensorflow/core/ops/dataset_ops.cc index bdbbf6d7c3..9a4b616e5d 100644 --- a/tensorflow/core/ops/dataset_ops.cc +++ b/tensorflow/core/ops/dataset_ops.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -265,6 +265,16 @@ REGISTER_OP("BatchDataset") .Attr("output_shapes: list(shape) >= 1") .SetShapeFn(shape_inference::ScalarShape); +// TODO(mrry): move SlideDataset to contrib in the future. +REGISTER_OP("SlideDataset") + .Input("input_dataset: variant") + .Input("window_size: int64") + .Input("stride: int64") + .Output("handle: variant") + .Attr("output_types: list(type) >= 1") + .Attr("output_shapes: list(shape) >= 1") + .SetShapeFn(shape_inference::ScalarShape); + REGISTER_OP("PaddedBatchDataset") .Input("input_dataset: variant") .Input("batch_size: int64") -- GitLab From 36c91bba08963ed4f7363b5e3d6f5ac9f6e9004d Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Wed, 7 Mar 2018 11:12:11 -0800 Subject: [PATCH 535/884] Move `tf.contrib.bayesflow.layers` to `tfp.layers`. PiperOrigin-RevId: 188203941 --- tensorflow/contrib/bayesflow/BUILD | 48 - tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/docstring_util_test.py | 87 - .../layers_conv_variational_test.py | 521 ---- .../layers_dense_variational_test.py | 443 --- .../bayesflow/python/ops/docstring_util.py | 88 - .../contrib/bayesflow/python/ops/layers.py | 67 - .../python/ops/layers_conv_variational.py | 2486 ----------------- .../python/ops/layers_dense_variational.py | 955 ------- .../bayesflow/python/ops/layers_util.py | 191 -- 10 files changed, 4888 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/docstring_util.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/layers.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/layers_util.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 8b5c6cec61..e1b34d6deb 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -76,54 +76,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "docstring_util_test", - size = "small", - srcs = ["python/kernel_tests/docstring_util_test.py"], - additional_deps = [ - ":bayesflow_py", - "//tensorflow/python:client_testlib", - ], -) - -cuda_py_test( - name = "layers_conv_variational_test", - size = "small", - srcs = ["python/kernel_tests/layers_conv_variational_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:gradients", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn_ops", - ], -) - -cuda_py_test( - name = "layers_dense_variational_test", - size = "small", - srcs = ["python/kernel_tests/layers_dense_variational_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:gradients", - "//tensorflow/python:linalg_ops", - "//tensorflow/python:math_ops", - "//tensorflow/python:nn_ops", - ], -) - cuda_py_test( name = "monte_carlo_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 32f2df4b88..bff8ac2cf6 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -23,7 +23,6 @@ from __future__ import print_function # pylint: disable=unused-import,line-too-long from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import hmc -from tensorflow.contrib.bayesflow.python.ops import layers from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo from tensorflow.contrib.bayesflow.python.ops import optimizers @@ -36,7 +35,6 @@ _allowed_symbols = [ 'custom_grad', 'entropy', 'hmc', - 'layers', 'metropolis_hastings', 'monte_carlo', 'optimizers', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py deleted file mode 100644 index 8ed500b19d..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/docstring_util_test.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for docstring utilities.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.bayesflow.python.ops import docstring_util -from tensorflow.python.platform import test - - -class DocstringUtil(test.TestCase): - - def _testFunction(self): - doc_args = """x: Input to return as output. - y: Baz.""" - @docstring_util.expand_docstring(args=doc_args) - def foo(x): - # pylint: disable=g-doc-args - """Hello world. - - Args: - @{args} - - Returns: - x. - """ - # pylint: enable=g-doc-args - return x - - true_docstring = """Hello world. - - Args: - x: Input to return as output. - y: Baz. - - Returns: - x. - """ - self.assertEqual(foo.__doc__, true_docstring) - - def _testClassInit(self): - doc_args = """x: Input to return as output. - y: Baz.""" - - class Foo(object): - - @docstring_util.expand_docstring(args=doc_args) - def __init__(self, x, y): - # pylint: disable=g-doc-args - """Hello world. - - Args: - @{args} - - Bar. - """ - # pylint: enable=g-doc-args - pass - - true_docstring = """Hello world. - - Args: - x: Input to return as output. - y: Baz. - - Bar. - """ - self.assertEqual(Foo.__doc__, true_docstring) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py deleted file mode 100644 index 750afb6654..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_conv_variational_test.py +++ /dev/null @@ -1,521 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for convolutional Bayesian layers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import layers_conv_variational as prob_layers_lib -from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.ops.distributions import util as distribution_util -from tensorflow.python.platform import test - - -class Counter(object): - """Helper class to manage incrementing a counting `int`.""" - - def __init__(self): - self._value = -1 - - @property - def value(self): - return self._value - - def __call__(self): - self._value += 1 - return self._value - - -class MockDistribution(independent_lib.Independent): - """Monitors layer calls to the underlying distribution.""" - - def __init__(self, result_sample, result_log_prob, loc=None, scale=None): - self.result_sample = result_sample - self.result_log_prob = result_log_prob - self.result_loc = loc - self.result_scale = scale - self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0) - if loc is not None and scale is not None: - self.result_distribution = normal_lib.Normal(loc=self.result_loc, - scale=self.result_scale) - self.called_log_prob = Counter() - self.called_sample = Counter() - self.called_loc = Counter() - self.called_scale = Counter() - - def log_prob(self, *args, **kwargs): - self.called_log_prob() - return self.result_log_prob - - def sample(self, *args, **kwargs): - self.called_sample() - return self.result_sample - - @property - def distribution(self): # for dummy check on Independent(Normal) - return self.result_distribution - - @property - def loc(self): - self.called_loc() - return self.result_loc - - @property - def scale(self): - self.called_scale() - return self.result_scale - - -class MockKLDivergence(object): - """Monitors layer calls to the divergence implementation.""" - - def __init__(self, result): - self.result = result - self.args = [] - self.called = Counter() - - def __call__(self, *args, **kwargs): - self.called() - self.args.append(args) - return self.result - - -class ConvVariational(test.TestCase): - - def _testKLPenaltyKernel(self, layer_class): - with self.test_session(): - layer = layer_class(filters=2, kernel_size=3) - if layer_class in (prob_layers_lib.Conv1DReparameterization, - prob_layers_lib.Conv1DFlipout): - inputs = random_ops.random_uniform([2, 3, 1], seed=1) - elif layer_class in (prob_layers_lib.Conv2DReparameterization, - prob_layers_lib.Conv2DFlipout): - inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1) - elif layer_class in (prob_layers_lib.Conv3DReparameterization, - prob_layers_lib.Conv3DFlipout): - inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1) - - # No keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 0) - self.assertListEqual(layer.losses, losses) - - _ = layer(inputs) - - # Yes keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 1) - self.assertListEqual(layer.losses, losses) - - def _testKLPenaltyBoth(self, layer_class): - def _make_normal(dtype, *args): # pylint: disable=unused-argument - return normal_lib.Normal( - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)) - with self.test_session(): - layer = layer_class( - filters=2, - kernel_size=3, - bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(), - bias_prior_fn=_make_normal) - if layer_class in (prob_layers_lib.Conv1DReparameterization, - prob_layers_lib.Conv1DFlipout): - inputs = random_ops.random_uniform([2, 3, 1], seed=1) - elif layer_class in (prob_layers_lib.Conv2DReparameterization, - prob_layers_lib.Conv2DFlipout): - inputs = random_ops.random_uniform([2, 3, 3, 1], seed=1) - elif layer_class in (prob_layers_lib.Conv3DReparameterization, - prob_layers_lib.Conv3DFlipout): - inputs = random_ops.random_uniform([2, 3, 3, 3, 1], seed=1) - - # No keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 0) - self.assertListEqual(layer.losses, losses) - - _ = layer(inputs) - - # Yes keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 2) - self.assertListEqual(layer.losses, losses) - - def _testConvSetUp(self, layer_class, batch_size, depth=None, - height=None, width=None, channels=None, filters=None, - **kwargs): - seed = Counter() - if layer_class in (prob_layers_lib.Conv1DReparameterization, - prob_layers_lib.Conv1DFlipout): - inputs = random_ops.random_uniform( - [batch_size, width, channels], seed=seed()) - kernel_size = (2,) - elif layer_class in (prob_layers_lib.Conv2DReparameterization, - prob_layers_lib.Conv2DFlipout): - inputs = random_ops.random_uniform( - [batch_size, height, width, channels], seed=seed()) - kernel_size = (2, 2) - elif layer_class in (prob_layers_lib.Conv3DReparameterization, - prob_layers_lib.Conv3DFlipout): - inputs = random_ops.random_uniform( - [batch_size, depth, height, width, channels], seed=seed()) - kernel_size = (2, 2, 2) - - kernel_shape = kernel_size + (channels, filters) - kernel_posterior = MockDistribution( - loc=random_ops.random_uniform(kernel_shape, seed=seed()), - scale=random_ops.random_uniform(kernel_shape, seed=seed()), - result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()), - result_sample=random_ops.random_uniform(kernel_shape, seed=seed())) - kernel_prior = MockDistribution( - result_log_prob=random_ops.random_uniform(kernel_shape, seed=seed()), - result_sample=random_ops.random_uniform(kernel_shape, seed=seed())) - kernel_divergence = MockKLDivergence( - result=random_ops.random_uniform(kernel_shape, seed=seed())) - - bias_size = (filters,) - bias_posterior = MockDistribution( - result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), - result_sample=random_ops.random_uniform(bias_size, seed=seed())) - bias_prior = MockDistribution( - result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), - result_sample=random_ops.random_uniform(bias_size, seed=seed())) - bias_divergence = MockKLDivergence( - result=random_ops.random_uniform(bias_size, seed=seed())) - - layer = layer_class( - filters=filters, - kernel_size=kernel_size, - padding="SAME", - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - kernel_prior_fn=lambda *args: kernel_prior, - kernel_divergence_fn=kernel_divergence, - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - bias_prior_fn=lambda *args: bias_prior, - bias_divergence_fn=bias_divergence, - **kwargs) - - outputs = layer(inputs) - - kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - return (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, - layer, inputs, outputs, kl_penalty, kernel_shape) - - def _testConvReparameterization(self, layer_class): - batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5 - with self.test_session() as sess: - (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, layer, inputs, - outputs, kl_penalty, kernel_shape) = self._testConvSetUp( - layer_class, batch_size, - depth=depth, height=height, width=width, channels=channels, - filters=filters) - - convolution_op = nn_ops.Convolution( - tensor_shape.TensorShape(inputs.shape), - filter_shape=tensor_shape.TensorShape(kernel_shape), - padding="SAME") - expected_outputs = convolution_op(inputs, kernel_posterior.result_sample) - expected_outputs = nn.bias_add(expected_outputs, - bias_posterior.result_sample, - data_format="NHWC") - - [ - expected_outputs_, actual_outputs_, - expected_kernel_, actual_kernel_, - expected_kernel_divergence_, actual_kernel_divergence_, - expected_bias_, actual_bias_, - expected_bias_divergence_, actual_bias_divergence_, - ] = sess.run([ - expected_outputs, outputs, - kernel_posterior.result_sample, layer.kernel_posterior_tensor, - kernel_divergence.result, kl_penalty[0], - bias_posterior.result_sample, layer.bias_posterior_tensor, - bias_divergence.result, kl_penalty[1], - ]) - - self.assertAllClose( - expected_kernel_, actual_kernel_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_, actual_bias_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_outputs_, actual_outputs_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_kernel_divergence_, actual_kernel_divergence_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_divergence_, actual_bias_divergence_, - rtol=1e-6, atol=0.) - - self.assertAllEqual( - [[kernel_posterior.distribution, - kernel_prior.distribution, - kernel_posterior.result_sample]], - kernel_divergence.args) - - self.assertAllEqual( - [[bias_posterior.distribution, - bias_prior.distribution, - bias_posterior.result_sample]], - bias_divergence.args) - - def _testConvFlipout(self, layer_class): - batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5 - with self.test_session() as sess: - (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, layer, inputs, - outputs, kl_penalty, kernel_shape) = self._testConvSetUp( - layer_class, batch_size, - depth=depth, height=height, width=width, channels=channels, - filters=filters, seed=44) - - convolution_op = nn_ops.Convolution( - tensor_shape.TensorShape(inputs.shape), - filter_shape=tensor_shape.TensorShape(kernel_shape), - padding="SAME") - - expected_kernel_posterior_affine = normal_lib.Normal( - loc=array_ops.zeros_like(kernel_posterior.result_loc), - scale=kernel_posterior.result_scale) - expected_kernel_posterior_affine_tensor = ( - expected_kernel_posterior_affine.sample(seed=42)) - - expected_outputs = convolution_op( - inputs, kernel_posterior.distribution.loc) - - input_shape = array_ops.shape(inputs) - output_shape = array_ops.shape(expected_outputs) - batch_shape = array_ops.expand_dims(input_shape[0], 0) - channels = input_shape[-1] - rank = len(inputs.get_shape()) - 2 - - sign_input = random_ops.random_uniform( - array_ops.concat([batch_shape, - array_ops.expand_dims(channels, 0)], 0), - minval=0, - maxval=2, - dtype=dtypes.int32, - seed=layer.seed) - sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype) - sign_output = random_ops.random_uniform( - array_ops.concat([batch_shape, - array_ops.expand_dims(filters, 0)], 0), - minval=0, - maxval=2, - dtype=dtypes.int32, - seed=distribution_util.gen_new_seed( - layer.seed, salt="conv_flipout")) - sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype) - for _ in range(rank): - sign_input = array_ops.expand_dims(sign_input, 1) # 2D ex: (B, 1, 1, C) - sign_output = array_ops.expand_dims(sign_output, 1) - - sign_input = array_ops.tile( # tile for element-wise op broadcasting - sign_input, - [1] + [input_shape[i + 1] for i in range(rank)] + [1]) - sign_output = array_ops.tile( - sign_output, - [1] + [output_shape[i + 1] for i in range(rank)] + [1]) - - perturbed_inputs = convolution_op( - inputs * sign_input, expected_kernel_posterior_affine_tensor) - perturbed_inputs *= sign_output - - expected_outputs += perturbed_inputs - expected_outputs = nn.bias_add(expected_outputs, - bias_posterior.result_sample, - data_format="NHWC") - - [ - expected_outputs_, actual_outputs_, - expected_kernel_divergence_, actual_kernel_divergence_, - expected_bias_, actual_bias_, - expected_bias_divergence_, actual_bias_divergence_, - ] = sess.run([ - expected_outputs, outputs, - kernel_divergence.result, kl_penalty[0], - bias_posterior.result_sample, layer.bias_posterior_tensor, - bias_divergence.result, kl_penalty[1], - ]) - - self.assertAllClose( - expected_bias_, actual_bias_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_outputs_, actual_outputs_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_kernel_divergence_, actual_kernel_divergence_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_divergence_, actual_bias_divergence_, - rtol=1e-6, atol=0.) - - self.assertAllEqual( - [[kernel_posterior.distribution, kernel_prior.distribution, None]], - kernel_divergence.args) - - self.assertAllEqual( - [[bias_posterior.distribution, - bias_prior.distribution, - bias_posterior.result_sample]], - bias_divergence.args) - - def _testRandomConvFlipout(self, layer_class): - batch_size, depth, height, width, channels, filters = 2, 4, 4, 4, 3, 5 - with self.test_session() as sess: - seed = Counter() - if layer_class in (prob_layers_lib.Conv1DReparameterization, - prob_layers_lib.Conv1DFlipout): - inputs = random_ops.random_uniform( - [batch_size, width, channels], seed=seed()) - kernel_size = (2,) - elif layer_class in (prob_layers_lib.Conv2DReparameterization, - prob_layers_lib.Conv2DFlipout): - inputs = random_ops.random_uniform( - [batch_size, height, width, channels], seed=seed()) - kernel_size = (2, 2) - elif layer_class in (prob_layers_lib.Conv3DReparameterization, - prob_layers_lib.Conv3DFlipout): - inputs = random_ops.random_uniform( - [batch_size, depth, height, width, channels], seed=seed()) - kernel_size = (2, 2, 2) - - kernel_shape = kernel_size + (channels, filters) - bias_size = (filters,) - - kernel_posterior = MockDistribution( - loc=random_ops.random_uniform( - kernel_shape, seed=seed()), - scale=random_ops.random_uniform( - kernel_shape, seed=seed()), - result_log_prob=random_ops.random_uniform( - kernel_shape, seed=seed()), - result_sample=random_ops.random_uniform( - kernel_shape, seed=seed())) - bias_posterior = MockDistribution( - loc=random_ops.random_uniform( - bias_size, seed=seed()), - scale=random_ops.random_uniform( - bias_size, seed=seed()), - result_log_prob=random_ops.random_uniform( - bias_size, seed=seed()), - result_sample=random_ops.random_uniform( - bias_size, seed=seed())) - layer_one = layer_class( - filters=filters, - kernel_size=kernel_size, - padding="SAME", - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - seed=44) - layer_two = layer_class( - filters=filters, - kernel_size=kernel_size, - padding="SAME", - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - seed=45) - - outputs_one = layer_one(inputs) - outputs_two = layer_two(inputs) - - outputs_one_, outputs_two_ = sess.run([ - outputs_one, outputs_two]) - - self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)), - np.prod(outputs_one_.shape)) - - def testKLPenaltyKernelConv1DReparameterization(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv1DReparameterization) - - def testKLPenaltyKernelConv2DReparameterization(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv2DReparameterization) - - def testKLPenaltyKernelConv3DReparameterization(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv3DReparameterization) - - def testKLPenaltyKernelConv1DFlipout(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv1DFlipout) - - def testKLPenaltyKernelConv2DFlipout(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv2DFlipout) - - def testKLPenaltyKernelConv3DFlipout(self): - self._testKLPenaltyKernel(prob_layers_lib.Conv3DFlipout) - - def testKLPenaltyBothConv1DReparameterization(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv1DReparameterization) - - def testKLPenaltyBothConv2DReparameterization(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv2DReparameterization) - - def testKLPenaltyBothConv3DReparameterization(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv3DReparameterization) - - def testKLPenaltyBothConv1DFlipout(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv1DFlipout) - - def testKLPenaltyBothConv2DFlipout(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv2DFlipout) - - def testKLPenaltyBothConv3DFlipout(self): - self._testKLPenaltyBoth(prob_layers_lib.Conv3DFlipout) - - def testConv1DReparameterization(self): - self._testConvReparameterization(prob_layers_lib.Conv1DReparameterization) - - def testConv2DReparameterization(self): - self._testConvReparameterization(prob_layers_lib.Conv2DReparameterization) - - def testConv3DReparameterization(self): - self._testConvReparameterization(prob_layers_lib.Conv3DReparameterization) - - def testConv1DFlipout(self): - self._testConvFlipout(prob_layers_lib.Conv1DFlipout) - - def testConv2DFlipout(self): - self._testConvFlipout(prob_layers_lib.Conv2DFlipout) - - def testConv3DFlipout(self): - self._testConvFlipout(prob_layers_lib.Conv3DFlipout) - - def testRandomConv1DFlipout(self): - self._testRandomConvFlipout(prob_layers_lib.Conv1DFlipout) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py deleted file mode 100644 index 342f38ccec..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/layers_dense_variational_test.py +++ /dev/null @@ -1,443 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for dense Bayesian layers.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import layers_dense_variational as prob_layers_lib -from tensorflow.contrib.bayesflow.python.ops import layers_util as prob_layers_util -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.ops.distributions import util as distribution_util -from tensorflow.python.platform import test - - -class Counter(object): - """Helper class to manage incrementing a counting `int`.""" - - def __init__(self): - self._value = -1 - - @property - def value(self): - return self._value - - def __call__(self): - self._value += 1 - return self._value - - -class MockDistribution(independent_lib.Independent): - """Monitors layer calls to the underlying distribution.""" - - def __init__(self, result_sample, result_log_prob, loc=None, scale=None): - self.result_sample = result_sample - self.result_log_prob = result_log_prob - self.result_loc = loc - self.result_scale = scale - self.result_distribution = normal_lib.Normal(loc=0.0, scale=1.0) - if loc is not None and scale is not None: - self.result_distribution = normal_lib.Normal(loc=self.result_loc, - scale=self.result_scale) - self.called_log_prob = Counter() - self.called_sample = Counter() - self.called_loc = Counter() - self.called_scale = Counter() - - def log_prob(self, *args, **kwargs): - self.called_log_prob() - return self.result_log_prob - - def sample(self, *args, **kwargs): - self.called_sample() - return self.result_sample - - @property - def distribution(self): # for dummy check on Independent(Normal) - return self.result_distribution - - @property - def loc(self): - self.called_loc() - return self.result_loc - - @property - def scale(self): - self.called_scale() - return self.result_scale - - -class MockKLDivergence(object): - """Monitors layer calls to the divergence implementation.""" - - def __init__(self, result): - self.result = result - self.args = [] - self.called = Counter() - - def __call__(self, *args, **kwargs): - self.called() - self.args.append(args) - return self.result - - -class DenseVariational(test.TestCase): - - def _testKLPenaltyKernel(self, layer_class): - with self.test_session(): - layer = layer_class(units=2) - inputs = random_ops.random_uniform([2, 3], seed=1) - - # No keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 0) - self.assertListEqual(layer.losses, losses) - - _ = layer(inputs) - - # Yes keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 1) - self.assertListEqual(layer.losses, losses) - - def _testKLPenaltyBoth(self, layer_class): - def _make_normal(dtype, *args): # pylint: disable=unused-argument - return normal_lib.Normal( - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)) - with self.test_session(): - layer = layer_class( - units=2, - bias_posterior_fn=prob_layers_util.default_mean_field_normal_fn(), - bias_prior_fn=_make_normal) - inputs = random_ops.random_uniform([2, 3], seed=1) - - # No keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 0) - self.assertListEqual(layer.losses, losses) - - _ = layer(inputs) - - # Yes keys. - losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - self.assertEqual(len(losses), 2) - self.assertListEqual(layer.losses, losses) - - def _testDenseSetUp(self, layer_class, batch_size, in_size, out_size, - **kwargs): - seed = Counter() - inputs = random_ops.random_uniform([batch_size, in_size], seed=seed()) - - kernel_size = [in_size, out_size] - kernel_posterior = MockDistribution( - loc=random_ops.random_uniform(kernel_size, seed=seed()), - scale=random_ops.random_uniform(kernel_size, seed=seed()), - result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), - result_sample=random_ops.random_uniform(kernel_size, seed=seed())) - kernel_prior = MockDistribution( - result_log_prob=random_ops.random_uniform(kernel_size, seed=seed()), - result_sample=random_ops.random_uniform(kernel_size, seed=seed())) - kernel_divergence = MockKLDivergence( - result=random_ops.random_uniform(kernel_size, seed=seed())) - - bias_size = [out_size] - bias_posterior = MockDistribution( - result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), - result_sample=random_ops.random_uniform(bias_size, seed=seed())) - bias_prior = MockDistribution( - result_log_prob=random_ops.random_uniform(bias_size, seed=seed()), - result_sample=random_ops.random_uniform(bias_size, seed=seed())) - bias_divergence = MockKLDivergence( - result=random_ops.random_uniform(bias_size, seed=seed())) - - layer = layer_class( - units=out_size, - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - kernel_prior_fn=lambda *args: kernel_prior, - kernel_divergence_fn=kernel_divergence, - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - bias_prior_fn=lambda *args: bias_prior, - bias_divergence_fn=bias_divergence, - **kwargs) - - outputs = layer(inputs) - - kl_penalty = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) - return (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, - layer, inputs, outputs, kl_penalty) - - def testKLPenaltyKernelReparameterization(self): - self._testKLPenaltyKernel(prob_layers_lib.DenseReparameterization) - - def testKLPenaltyKernelLocalReparameterization(self): - self._testKLPenaltyKernel(prob_layers_lib.DenseLocalReparameterization) - - def testKLPenaltyKernelFlipout(self): - self._testKLPenaltyKernel(prob_layers_lib.DenseFlipout) - - def testKLPenaltyBothReparameterization(self): - self._testKLPenaltyBoth(prob_layers_lib.DenseReparameterization) - - def testKLPenaltyBothLocalReparameterization(self): - self._testKLPenaltyBoth(prob_layers_lib.DenseLocalReparameterization) - - def testKLPenaltyBothFlipout(self): - self._testKLPenaltyBoth(prob_layers_lib.DenseFlipout) - - def testDenseReparameterization(self): - batch_size, in_size, out_size = 2, 3, 4 - with self.test_session() as sess: - (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, layer, inputs, - outputs, kl_penalty) = self._testDenseSetUp( - prob_layers_lib.DenseReparameterization, - batch_size, in_size, out_size) - - expected_outputs = ( - math_ops.matmul(inputs, kernel_posterior.result_sample) + - bias_posterior.result_sample) - - [ - expected_outputs_, actual_outputs_, - expected_kernel_, actual_kernel_, - expected_kernel_divergence_, actual_kernel_divergence_, - expected_bias_, actual_bias_, - expected_bias_divergence_, actual_bias_divergence_, - ] = sess.run([ - expected_outputs, outputs, - kernel_posterior.result_sample, layer.kernel_posterior_tensor, - kernel_divergence.result, kl_penalty[0], - bias_posterior.result_sample, layer.bias_posterior_tensor, - bias_divergence.result, kl_penalty[1], - ]) - - self.assertAllClose( - expected_kernel_, actual_kernel_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_, actual_bias_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_outputs_, actual_outputs_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_kernel_divergence_, actual_kernel_divergence_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_divergence_, actual_bias_divergence_, - rtol=1e-6, atol=0.) - - self.assertAllEqual( - [[kernel_posterior.distribution, - kernel_prior.distribution, - kernel_posterior.result_sample]], - kernel_divergence.args) - - self.assertAllEqual( - [[bias_posterior.distribution, - bias_prior.distribution, - bias_posterior.result_sample]], - bias_divergence.args) - - def testDenseLocalReparameterization(self): - batch_size, in_size, out_size = 2, 3, 4 - with self.test_session() as sess: - (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, layer, inputs, - outputs, kl_penalty) = self._testDenseSetUp( - prob_layers_lib.DenseLocalReparameterization, - batch_size, in_size, out_size) - - expected_kernel_posterior_affine = normal_lib.Normal( - loc=math_ops.matmul(inputs, kernel_posterior.result_loc), - scale=math_ops.matmul( - inputs**2., kernel_posterior.result_scale**2)**0.5) - expected_kernel_posterior_affine_tensor = ( - expected_kernel_posterior_affine.sample(seed=42)) - expected_outputs = (expected_kernel_posterior_affine_tensor + - bias_posterior.result_sample) - - [ - expected_outputs_, actual_outputs_, - expected_kernel_divergence_, actual_kernel_divergence_, - expected_bias_, actual_bias_, - expected_bias_divergence_, actual_bias_divergence_, - ] = sess.run([ - expected_outputs, outputs, - kernel_divergence.result, kl_penalty[0], - bias_posterior.result_sample, layer.bias_posterior_tensor, - bias_divergence.result, kl_penalty[1], - ]) - - self.assertAllClose( - expected_bias_, actual_bias_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_outputs_, actual_outputs_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_kernel_divergence_, actual_kernel_divergence_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_divergence_, actual_bias_divergence_, - rtol=1e-6, atol=0.) - - self.assertAllEqual( - [[kernel_posterior.distribution, - kernel_prior.distribution, - None]], - kernel_divergence.args) - - self.assertAllEqual( - [[bias_posterior.distribution, - bias_prior.distribution, - bias_posterior.result_sample]], - bias_divergence.args) - - def testDenseFlipout(self): - batch_size, in_size, out_size = 2, 3, 4 - with self.test_session() as sess: - (kernel_posterior, kernel_prior, kernel_divergence, - bias_posterior, bias_prior, bias_divergence, layer, inputs, - outputs, kl_penalty) = self._testDenseSetUp( - prob_layers_lib.DenseFlipout, - batch_size, in_size, out_size, seed=44) - - expected_kernel_posterior_affine = normal_lib.Normal( - loc=array_ops.zeros_like(kernel_posterior.result_loc), - scale=kernel_posterior.result_scale) - expected_kernel_posterior_affine_tensor = ( - expected_kernel_posterior_affine.sample(seed=42)) - - sign_input = random_ops.random_uniform( - [batch_size, in_size], - minval=0, - maxval=2, - dtype=dtypes.int32, - seed=layer.seed) - sign_input = math_ops.cast(2 * sign_input - 1, inputs.dtype) - sign_output = random_ops.random_uniform( - [batch_size, out_size], - minval=0, - maxval=2, - dtype=dtypes.int32, - seed=distribution_util.gen_new_seed( - layer.seed, salt="dense_flipout")) - sign_output = math_ops.cast(2 * sign_output - 1, inputs.dtype) - perturbed_inputs = math_ops.matmul( - inputs * sign_input, expected_kernel_posterior_affine_tensor) - perturbed_inputs *= sign_output - - expected_outputs = math_ops.matmul(inputs, kernel_posterior.result_loc) - expected_outputs += perturbed_inputs - expected_outputs += bias_posterior.result_sample - - [ - expected_outputs_, actual_outputs_, - expected_kernel_divergence_, actual_kernel_divergence_, - expected_bias_, actual_bias_, - expected_bias_divergence_, actual_bias_divergence_, - ] = sess.run([ - expected_outputs, outputs, - kernel_divergence.result, kl_penalty[0], - bias_posterior.result_sample, layer.bias_posterior_tensor, - bias_divergence.result, kl_penalty[1], - ]) - - self.assertAllClose( - expected_bias_, actual_bias_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_outputs_, actual_outputs_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_kernel_divergence_, actual_kernel_divergence_, - rtol=1e-6, atol=0.) - self.assertAllClose( - expected_bias_divergence_, actual_bias_divergence_, - rtol=1e-6, atol=0.) - - self.assertAllEqual( - [[kernel_posterior.distribution, kernel_prior.distribution, None]], - kernel_divergence.args) - - self.assertAllEqual( - [[bias_posterior.distribution, - bias_prior.distribution, - bias_posterior.result_sample]], - bias_divergence.args) - - def testRandomDenseFlipout(self): - batch_size, in_size, out_size = 2, 3, 4 - with self.test_session() as sess: - seed = Counter() - inputs = random_ops.random_uniform([batch_size, in_size], seed=seed()) - - kernel_posterior = MockDistribution( - loc=random_ops.random_uniform( - [in_size, out_size], seed=seed()), - scale=random_ops.random_uniform( - [in_size, out_size], seed=seed()), - result_log_prob=random_ops.random_uniform( - [in_size, out_size], seed=seed()), - result_sample=random_ops.random_uniform( - [in_size, out_size], seed=seed())) - bias_posterior = MockDistribution( - loc=random_ops.random_uniform( - [out_size], seed=seed()), - scale=random_ops.random_uniform( - [out_size], seed=seed()), - result_log_prob=random_ops.random_uniform( - [out_size], seed=seed()), - result_sample=random_ops.random_uniform( - [out_size], seed=seed())) - layer_one = prob_layers_lib.DenseFlipout( - units=out_size, - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - seed=44) - layer_two = prob_layers_lib.DenseFlipout( - units=out_size, - kernel_posterior_fn=lambda *args: kernel_posterior, - kernel_posterior_tensor_fn=lambda d: d.sample(seed=42), - bias_posterior_fn=lambda *args: bias_posterior, - bias_posterior_tensor_fn=lambda d: d.sample(seed=43), - seed=45) - - outputs_one = layer_one(inputs) - outputs_two = layer_two(inputs) - - outputs_one_, outputs_two_ = sess.run([ - outputs_one, outputs_two]) - - self.assertLess(np.sum(np.isclose(outputs_one_, outputs_two_)), out_size) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/docstring_util.py b/tensorflow/contrib/bayesflow/python/ops/docstring_util.py deleted file mode 100644 index 081f2d5a8b..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/docstring_util.py +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for programmable docstrings. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import re -import six - - -def expand_docstring(**kwargs): - """Decorator to programmatically expand the docstring. - - Args: - **kwargs: Keyword arguments to set. For each key-value pair `k` and `v`, - the key is found as `@{k}` in the docstring and replaced with `v`. - - Returns: - Decorated function. - """ - def _fn_wrapped(fn): - """Original function with modified `__doc__` attribute.""" - doc = _trim(fn.__doc__) - for k, v in six.iteritems(kwargs): - # Capture each @{k} reference to replace with v. - # We wrap the replacement in a function so no backslash escapes - # are processed. - pattern = r'@\{' + str(k) + r'\}' - doc = re.sub(pattern, lambda match: v, doc) # pylint: disable=cell-var-from-loop - fn.__doc__ = doc - return fn - return _fn_wrapped - - -def _trim(docstring): - """Trims docstring indentation. - - In general, multi-line docstrings carry their level of indentation when - defined under a function or class method. This function standardizes - indentation levels by removing them. Taken from PEP 257 docs. - - Args: - docstring: Python string to trim indentation. - - Returns: - Trimmed docstring. - """ - if not docstring: - return '' - # Convert tabs to spaces (following the normal Python rules) - # and split into a list of lines: - lines = docstring.expandtabs().splitlines() - # Determine minimum indentation (first line doesn't count): - indent = None - for line in lines[1:]: - stripped = line.lstrip() - if stripped: - if indent is None: - indent = len(line) - len(stripped) - else: - indent = min(indent, len(line) - len(stripped)) - # Remove indentation (first line is special): - trimmed = [lines[0].strip()] - if indent is not None: - for line in lines[1:]: - trimmed.append(line[indent:].rstrip()) - # Strip off trailing and leading blank lines: - while trimmed and not trimmed[-1]: - trimmed.pop() - while trimmed and not trimmed[0]: - trimmed.pop(0) - # Return a single string: - return '\n'.join(trimmed) diff --git a/tensorflow/contrib/bayesflow/python/ops/layers.py b/tensorflow/contrib/bayesflow/python/ops/layers.py deleted file mode 100644 index 610613dca5..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/layers.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Probabilistic neural layers. - -See @{tf.contrib.bayesflow.layers}. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.layers_conv_variational import * -from tensorflow.contrib.bayesflow.python.ops.layers_dense_variational import * -from tensorflow.contrib.bayesflow.python.ops.layers_util import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'Convolution1DReparameterization', - 'Convolution2DReparameterization', - 'Convolution3DReparameterization', - 'Convolution1DFlipout', - 'Convolution2DFlipout', - 'Convolution3DFlipout', - 'Conv1DReparameterization', - 'Conv2DReparameterization', - 'Conv3DReparameterization', - 'Conv1DFlipout', - 'Conv2DFlipout', - 'Conv3DFlipout', - 'convolution1d_reparameterization', - 'convolution2d_reparameterization', - 'convolution3d_reparameterization', - 'convolution1d_flipout', - 'convolution2d_flipout', - 'convolution3d_flipout', - 'conv1d_reparameterization', - 'conv2d_reparameterization', - 'conv3d_reparameterization', - 'conv1d_flipout', - 'conv2d_flipout', - 'conv3d_flipout', - 'DenseReparameterization', - 'DenseLocalReparameterization', - 'DenseFlipout', - 'dense_reparameterization', - 'dense_local_reparameterization', - 'dense_flipout', - 'default_loc_scale_fn', - 'default_mean_field_normal_fn', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py deleted file mode 100644 index cb80718f71..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/layers_conv_variational.py +++ /dev/null @@ -1,2486 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Convolutional variational layer classes and their functional aliases. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.bayesflow.python.ops import docstring_util -from tensorflow.contrib.bayesflow.python.ops import layers_util -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.layers import base as layers_lib -from tensorflow.python.layers import utils -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import standard_ops -from tensorflow.python.ops.distributions import kullback_leibler as kl_lib -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.ops.distributions import util as distribution_util - -doc_args = """activation: Activation function. Set it to None to maintain a - linear activation. - activity_regularizer: Optional regularizer function for the output. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - kernel_posterior_fn: Python `callable` which creates - `tf.distributions.Distribution` instance representing the surrogate - posterior of the `kernel` parameter. Default value: - `default_mean_field_normal_fn()`. - kernel_posterior_tensor_fn: Python `callable` which takes a - `tf.distributions.Distribution` instance and returns a representative - value. Default value: `lambda d: d.sample()`. - kernel_prior_fn: Python `callable` which creates `tf.distributions` - instance. See `default_mean_field_normal_fn` docstring for required - parameter signature. - Default value: `tf.distributions.Normal(loc=0., scale=1.)`. - kernel_divergence_fn: Python `callable` which takes the surrogate posterior - distribution, prior distribution and random variate sample(s) from the - surrogate posterior and computes or approximates the KL divergence. The - distributions are `tf.distributions.Distribution`-like instances and the - sample is a `Tensor`. - bias_posterior_fn: Python `callable` which creates - `tf.distributions.Distribution` instance representing the surrogate - posterior of the `bias` parameter. Default value: - `default_mean_field_normal_fn(is_singular=True)` (which creates an - instance of `tf.distributions.Deterministic`). - bias_posterior_tensor_fn: Python `callable` which takes a - `tf.distributions.Distribution` instance and returns a representative - value. Default value: `lambda d: d.sample()`. - bias_prior_fn: Python `callable` which creates `tf.distributions` instance. - See `default_mean_field_normal_fn` docstring for required parameter - signature. Default value: `None` (no prior, no variational inference) - bias_divergence_fn: Python `callable` which takes the surrogate posterior - distribution, prior distribution and random variate sample(s) from the - surrogate posterior and computes or approximates the KL divergence. The - distributions are `tf.distributions.Distribution`-like instances and the - sample is a `Tensor`. - name: A string, the name of the layer.""" - - -class _ConvVariational(layers_lib.Layer): - """Abstract nD convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - rank: Python integer, dimensionality of convolution. - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - rank, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - rank: An integer, the rank of the convolution, e.g. "2" for 2D - convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - length of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, ..., - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, ...)`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(_ConvVariational, self).__init__( - trainable=trainable, - name=name, - activity_regularizer=activity_regularizer, - **kwargs) - self.rank = rank - self.filters = filters - self.kernel_size = utils.normalize_tuple(kernel_size, rank, "kernel_size") - self.strides = utils.normalize_tuple(strides, rank, "strides") - self.padding = utils.normalize_padding(padding) - self.data_format = utils.normalize_data_format(data_format) - self.dilation_rate = utils.normalize_tuple( - dilation_rate, rank, "dilation_rate") - self.activation = activation - self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2) - self.kernel_posterior_fn = kernel_posterior_fn - self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn - self.kernel_prior_fn = kernel_prior_fn - self.kernel_divergence_fn = kernel_divergence_fn - self.bias_posterior_fn = bias_posterior_fn - self.bias_posterior_tensor_fn = bias_posterior_tensor_fn - self.bias_prior_fn = bias_prior_fn - self.bias_divergence_fn = bias_divergence_fn - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - if self.data_format == "channels_first": - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis].value is None: - raise ValueError("The channel dimension of the inputs " - "should be defined. Found `None`.") - input_dim = input_shape[channel_axis].value - kernel_shape = self.kernel_size + (input_dim, self.filters) - dtype = dtypes.as_dtype(self.dtype) - - # Must have a posterior kernel. - self.kernel_posterior = self.kernel_posterior_fn( - dtype, kernel_shape, "kernel_posterior", - self.trainable, self.add_variable) - - if self.kernel_prior_fn is None: - self.kernel_prior = None - else: - self.kernel_prior = self.kernel_prior_fn( - dtype, kernel_shape, "kernel_prior", - self.trainable, self.add_variable) - self._built_kernel_divergence = False - - if self.bias_posterior_fn is None: - self.bias_posterior = None - else: - self.bias_posterior = self.bias_posterior_fn( - dtype, (self.filters,), "bias_posterior", - self.trainable, self.add_variable) - - if self.bias_prior_fn is None: - self.bias_prior = None - else: - self.bias_prior = self.bias_prior_fn( - dtype, (self.filters,), "bias_prior", - self.trainable, self.add_variable) - self._built_bias_divergence = False - - self.input_spec = layers_lib.InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - self._convolution_op = nn_ops.Convolution( - input_shape, - filter_shape=tensor_shape.TensorShape(kernel_shape), - dilation_rate=self.dilation_rate, - strides=self.strides, - padding=self.padding.upper(), - data_format=utils.convert_data_format(self.data_format, - self.rank + 2)) - - self.built = True - - def call(self, inputs): - inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) - - outputs = self._apply_variational_kernel(inputs) - outputs = self._apply_variational_bias(outputs) - if self.activation is not None: - outputs = self.activation(outputs) - if not self._built_kernel_divergence: - kernel_posterior = self.kernel_posterior - kernel_prior = self.kernel_prior - if isinstance(self.kernel_posterior, independent_lib.Independent): - kernel_posterior = kernel_posterior.distribution - if isinstance(self.kernel_prior, independent_lib.Independent): - kernel_prior = kernel_prior.distribution - self._apply_divergence(self.kernel_divergence_fn, - kernel_posterior, - kernel_prior, - self.kernel_posterior_tensor, - name="divergence_kernel") - self._built_kernel_divergence = True - if not self._built_bias_divergence: - bias_posterior = self.bias_posterior - bias_prior = self.bias_prior - if isinstance(self.bias_posterior, independent_lib.Independent): - bias_posterior = bias_posterior.distribution - if isinstance(self.bias_prior, independent_lib.Independent): - bias_prior = bias_prior.distribution - self._apply_divergence(self.bias_divergence_fn, - bias_posterior, - bias_prior, - self.bias_posterior_tensor, - name="divergence_bias") - self._built_bias_divergence = True - return outputs - - def _apply_variational_bias(self, inputs): - if self.bias_posterior is None: - self.bias_posterior_tensor = None - return inputs - self.bias_posterior_tensor = self.bias_posterior_tensor_fn( - self.bias_posterior) - outputs = inputs - if self.data_format == "channels_first": - if self.rank == 1: - # nn.bias_add does not accept a 1D input tensor. - bias = array_ops.reshape(self.bias_posterior_tensor, - (1, self.filters, 1)) - outputs += bias - if self.rank == 2: - outputs = nn.bias_add(outputs, - self.bias_posterior_tensor, - data_format="NCHW") - if self.rank == 3: - # As of Mar 2017, direct addition is significantly slower than - # bias_add when computing gradients. To use bias_add, we collapse Z - # and Y into a single dimension to obtain a 4D input tensor. - outputs_shape = outputs.shape.as_list() - outputs_4d = array_ops.reshape(outputs, - [outputs_shape[0], outputs_shape[1], - outputs_shape[2] * outputs_shape[3], - outputs_shape[4]]) - outputs_4d = nn.bias_add(outputs_4d, - self.bias_posterior_tensor, - data_format="NCHW") - outputs = array_ops.reshape(outputs_4d, outputs_shape) - else: - outputs = nn.bias_add(outputs, - self.bias_posterior_tensor, - data_format="NHWC") - return outputs - - def _apply_divergence(self, divergence_fn, posterior, prior, - posterior_tensor, name): - if (divergence_fn is None or - posterior is None or - prior is None): - divergence = None - return - divergence = standard_ops.identity( - divergence_fn( - posterior, prior, posterior_tensor), - name=name) - self.add_loss(divergence) - - def _compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).as_list() - if self.data_format == "channels_last": - space = input_shape[1:-1] - new_space = [] - for i in range(len(space)): - new_dim = utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - return tensor_shape.TensorShape([input_shape[0]] + new_space + - [self.filters]) - else: - space = input_shape[2:] - new_space = [] - for i in range(len(space)): - new_dim = utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - return tensor_shape.TensorShape([input_shape[0], self.filters] + - new_space) - - -class _ConvReparameterization(_ConvVariational): - """Abstract nD convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - rank: Python integer, dimensionality of convolution. - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - rank, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - rank: An integer, the rank of the convolution, e.g. "2" for 2D - convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - length of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, ..., - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, ...)`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(_ConvReparameterization, self).__init__( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, **kwargs) - - def _apply_variational_kernel(self, inputs): - self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn( - self.kernel_posterior) - self.kernel_posterior_affine = None - self.kernel_posterior_affine_tensor = None - outputs = self._convolution_op(inputs, self.kernel_posterior_tensor) - return outputs - - -class Conv1DReparameterization(_ConvReparameterization): - """1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 128, 1]) - net = tfp.layers.Conv1DReparameterization(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.reshape(net, [-1, 128 * 64]) - logits = tfp.layers.DenseReparameterization(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, length, - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv1DReparameterization, self).__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv1d_reparameterization( - inputs, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for 1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 128, 1]) - net = tfp.layers.conv1d_reparameterization(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.reshape(net, [-1, 128 * 64]) - logits = tfp.layers.dense_reparameterization(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - # pylint: enable=g-doc-args - layer = Conv1DReparameterization( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class Conv2DReparameterization(_ConvReparameterization): - """2D convolution layer (e.g. spatial convolution over images). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 32, 32, 3]) - net = tfp.layers.Conv2DReparameterization(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.layers.MaxPooling2D(pool_size=2, - strides=2, - padding="SAME")(net) - net = tf.reshape(net, [-1, 8 * 8 * 64]) - logits = tfp.layers.DenseReparameterization(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=(1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, height, - width, channels)` while `channels_first` corresponds to inputs with - shape `(batch, channels, height, width)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv2DReparameterization, self).__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv2d_reparameterization( - inputs, - filters, - kernel_size, - strides=(1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for the 2D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 32, 32, 3]) - net = tfp.layers.conv2d_reparameterization(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.layers.max_pooling2d(net, - pool_size=2, - strides=2, - padding="SAME") - net = tf.reshape(net, [-1, 8 * 8 * 64]) - logits = tfp.layers.dense_reparameterization(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - # pylint: enable=g-doc-args - layer = Conv2DReparameterization( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class Conv3DReparameterization(_ConvReparameterization): - """3D convolution layer (e.g. spatial convolution over volumes). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 256, 32, 32, 3]) - net = tfp.layers.Conv3DReparameterization(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.layers.MaxPooling2D(pool_size=2, - strides=2, - padding="SAME")(net) - net = tf.reshape(net, [-1, 256 * 8 * 8 * 64]) - logits = tfp.layers.DenseReparameterization(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=(1, 1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, depth, - height, width, channels)` while `channels_first` corresponds to inputs - with shape `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv3DReparameterization, self).__init__( - rank=3, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv3d_reparameterization( - inputs, - filters, - kernel_size, - strides=(1, 1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for the 3D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the reparameterization - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 256, 32, 32, 3]) - net = tfp.layers.conv3d_reparameterization(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.layers.max_pooling2d(net, - pool_size=2, - strides=2, - padding="SAME") - net = tf.reshape(net, [-1, 256 * 8 * 8 * 64]) - logits = tfp.layers.dense_reparameterization(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - # pylint: enable=g-doc-args - layer = Conv3DReparameterization( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class _ConvFlipout(_ConvVariational): - """Abstract nD convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - rank: Python integer, dimensionality of convolution. - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - seed: Python integer, used to create random seeds. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - rank, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - rank: An integer, the rank of the convolution, e.g. "2" for 2D - convolution. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - length of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, ..., - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, ...)`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(_ConvFlipout, self).__init__( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, **kwargs) - self.seed = seed - - def _apply_variational_kernel(self, inputs): - if (not isinstance(self.kernel_posterior, independent_lib.Independent) or - not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)): - raise TypeError( - "`{}` requires " - "`kernel_posterior_fn` produce an instance of " - "`tf.distributions.Independent(tf.distributions.Normal)` " - "(saw: \"{}\").".format( - type(self).__name__, self.kernel_posterior.name)) - self.kernel_posterior_affine = normal_lib.Normal( - loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc), - scale=self.kernel_posterior.distribution.scale) - self.kernel_posterior_affine_tensor = ( - self.kernel_posterior_tensor_fn(self.kernel_posterior_affine)) - self.kernel_posterior_tensor = None - - outputs = self._convolution_op( - inputs, self.kernel_posterior.distribution.loc) - - input_shape = array_ops.shape(inputs) - output_shape = array_ops.shape(outputs) - batch_shape = array_ops.expand_dims(input_shape[0], 0) - channels = input_shape[-1] - - sign_input = layers_util.random_sign( - array_ops.concat([batch_shape, - array_ops.expand_dims(channels, 0)], 0), - dtype=inputs.dtype, - seed=self.seed) - sign_output = layers_util.random_sign( - array_ops.concat([batch_shape, - array_ops.expand_dims(self.filters, 0)], 0), - dtype=inputs.dtype, - seed=distribution_util.gen_new_seed( - self.seed, salt="conv_flipout")) - for _ in range(self.rank): - sign_input = array_ops.expand_dims(sign_input, 1) # 2D ex: (B, 1, 1, C) - sign_output = array_ops.expand_dims(sign_output, 1) - - sign_input = array_ops.tile( # tile for element-wise op broadcasting - sign_input, - [1] + [input_shape[i + 1] for i in range(self.rank)] + [1]) - sign_output = array_ops.tile( - sign_output, - [1] + [output_shape[i + 1] for i in range(self.rank)] + [1]) - - perturbed_inputs = self._convolution_op( - inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output - - outputs += perturbed_inputs - return outputs - - -class Conv1DFlipout(_ConvFlipout): - """1D convolution layer (e.g. temporal convolution) with Flipout. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - seed: Python integer, used to create random seeds. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 128, 1]) - net = tfp.layers.Conv1DFlipout(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.reshape(net, [-1, 128 * 64]) - logits = tfp.layers.DenseFlipout(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, length, - channels)` while `channels_first` corresponds to inputs with shape - `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv1DFlipout, self).__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv1d_flipout( - inputs, - filters, - kernel_size, - strides=1, - padding="valid", - data_format="channels_last", - dilation_rate=1, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for 1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, specifying the - length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, length, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, length)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 128, 1]) - net = tfp.layers.conv1d_flipout(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.reshape(net, [-1, 128 * 64]) - logits = tfp.layers.dense_flipout(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - # pylint: enable=g-doc-args - layer = Conv1DFlipout( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class Conv2DFlipout(_ConvFlipout): - """2D convolution layer (e.g. spatial convolution over images) with Flipout. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - seed: Python integer, used to create random seeds. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 32, 32, 3]) - net = tfp.layers.Conv2DFlipout(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.layers.MaxPooling2D(pool_size=2, - strides=2, - padding="SAME")(net) - net = tf.reshape(net, [-1, 8 * 8 * 64]) - logits = tfp.layers.DenseFlipout(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=(1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, height, - width, channels)` while `channels_first` corresponds to inputs with - shape `(batch, channels, height, width)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv2DFlipout, self).__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv2d_flipout( - inputs, - filters, - kernel_size, - strides=(1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for the 2D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, height, width)`. - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 32, 32, 3]) - net = tfp.layers.conv2d_flipout(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.layers.max_pooling2d(net, - pool_size=2, - strides=2, - padding="SAME") - net = tf.reshape(net, [-1, 8 * 8 * 64]) - logits = tfp.layers.dense_flipout(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - # pylint: enable=g-doc-args - layer = Conv2DFlipout( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class Conv3DFlipout(_ConvFlipout): - """3D convolution layer (e.g. spatial convolution over volumes) with Flipout. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - filters: Python integer, dimensionality of the output space. - kernel_size: Size of the convolution window. - strides: Stride length of convolution. - padding: Python string describing padding approach. - data_format: Python string describing input data's dimensions. - dilation_rate: Dilation rate for an atrous convolution. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - seed: Python integer, used to create random seeds. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 256, 32, 32, 3]) - net = tfp.layers.Conv3DFlipout(64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu)(net) - net = tf.layers.MaxPooling2D(pool_size=2, - strides=2, - padding="SAME")(net) - net = tf.reshape(net, [-1, 256 * 8 * 8 * 64]) - logits = tfp.layers.DenseFlipout(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - filters, - kernel_size, - strides=(1, 1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or - `channels_first`. The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape `(batch, depth, - height, width, channels)` while `channels_first` corresponds to inputs - with shape `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - """ - # pylint: enable=g-doc-args - super(Conv3DFlipout, self).__init__( - rank=3, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, **kwargs) - - -@docstring_util.expand_docstring(args=doc_args) -def conv3d_flipout( - inputs, - filters, - kernel_size, - strides=(1, 1, 1), - padding="valid", - data_format="channels_last", - dilation_rate=(1, 1, 1), - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Functional interface for the 3D convolution layer. - - This layer creates a convolution kernel that is convolved - (actually cross-correlated) with the layer input to produce a tensor of - outputs. It may also include a bias addition and activation function - on the outputs. It assumes the `kernel` and/or `bias` are drawn from - distributions. - - By default, the layer implements a stochastic forward pass via - sampling from the kernel and bias posteriors, - ```none - outputs = f(inputs; kernel, bias), kernel, bias ~ posterior - ``` - where f denotes the layer's calculation. It uses the Flipout - estimator [1], which performs a Monte Carlo approximation of the - distribution integrating over the `kernel` and `bias`. Flipout uses - roughly twice as many floating point operations as the - reparameterization estimator but has the advantage of significantly - lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - filters: Integer, the dimensionality of the output space (i.e. the number - of filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along the depth, - height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - dilation_rate: An integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - @{args} - reuse: Boolean, whether to reuse the weights of a previous layer - by the same name. - - Returns: - Output tensor. - - Raises: - ValueError: if eager execution is enabled. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tf.reshape(features, [-1, 256, 32, 32, 3]) - net = tfp.layers.conv3d_flipout(net, - filters=64, - kernel_size=5, - padding="SAME", - activation=tf.nn.relu) - net = tf.layers.max_pooling2d(net, - pool_size=2, - strides=2, - padding="SAME") - net = tf.reshape(net, [-1, 256 * 8 * 8 * 64]) - logits = tfp.layers.dense_flipout(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Yeming Wen, Paul Vicol, Jimmy Ba, Dustin Tran, Roger Grosse. - International Conference on Learning Representations, 2018. - """ - # pylint: enable=g-doc-args - layer = Conv3DFlipout( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -# Aliases - -Convolution1DReparameterization = Conv1DReparameterization -Convolution2DReparameterization = Conv2DReparameterization -Convolution3DReparameterization = Conv3DReparameterization -convolution1d_reparameterization = conv1d_reparameterization -convolution2d_reparameterization = conv2d_reparameterization -convolution3d_reparameterization = conv3d_reparameterization -Convolution1DFlipout = Conv1DFlipout -Convolution2DFlipout = Conv2DFlipout -Convolution3DFlipout = Conv3DFlipout -convolution1d_flipout = conv1d_flipout -convolution2d_flipout = conv2d_flipout -convolution3d_flipout = conv3d_flipout diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py b/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py deleted file mode 100644 index 1f1d8fda2a..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/layers_dense_variational.py +++ /dev/null @@ -1,955 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Dense Bayesian layer using KL-divergence based variational inference. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.contrib.bayesflow.python.ops import docstring_util -from tensorflow.contrib.bayesflow.python.ops import layers_util -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_shape -from tensorflow.python.layers import base as layers_lib -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import nn -from tensorflow.python.ops import standard_ops -from tensorflow.python.ops.distributions import kullback_leibler as kl_lib -from tensorflow.python.ops.distributions import normal as normal_lib -from tensorflow.python.ops.distributions import util as distribution_util - - -doc_args = """units: Integer or Long, dimensionality of the output space. - activation: Activation function (`callable`). Set it to None to maintain a - linear activation. - activity_regularizer: Regularizer function for the output. - trainable: Boolean, if `True` also add variables to the graph collection - `GraphKeys.TRAINABLE_VARIABLES` (see `tf.Variable`). - kernel_posterior_fn: Python `callable` which creates - `tf.distributions.Distribution` instance representing the surrogate - posterior of the `kernel` parameter. Default value: - `default_mean_field_normal_fn()`. - kernel_posterior_tensor_fn: Python `callable` which takes a - `tf.distributions.Distribution` instance and returns a representative - value. Default value: `lambda d: d.sample()`. - kernel_prior_fn: Python `callable` which creates `tf.distributions` - instance. See `default_mean_field_normal_fn` docstring for required - parameter signature. - Default value: `tf.distributions.Normal(loc=0., scale=1.)`. - kernel_divergence_fn: Python `callable` which takes the surrogate posterior - distribution, prior distribution and random variate sample(s) from the - surrogate posterior and computes or approximates the KL divergence. The - distributions are `tf.distributions.Distribution`-like instances and the - sample is a `Tensor`. - bias_posterior_fn: Python `callable` which creates - `tf.distributions.Distribution` instance representing the surrogate - posterior of the `bias` parameter. Default value: - `default_mean_field_normal_fn(is_singular=True)` (which creates an - instance of `tf.distributions.Deterministic`). - bias_posterior_tensor_fn: Python `callable` which takes a - `tf.distributions.Distribution` instance and returns a representative - value. Default value: `lambda d: d.sample()`. - bias_prior_fn: Python `callable` which creates `tf.distributions` instance. - See `default_mean_field_normal_fn` docstring for required parameter - signature. Default value: `None` (no prior, no variational inference) - bias_divergence_fn: Python `callable` which takes the surrogate posterior - distribution, prior distribution and random variate sample(s) from the - surrogate posterior and computes or approximates the KL divergence. The - distributions are `tf.distributions.Distribution`-like instances and the - sample is a `Tensor`. - seed: Python scalar `int` which initializes the random number - generator. Default value: `None` (i.e., use global seed). - name: Python `str`, the name of the layer. Layers with the same name will - share `tf.Variable`s, but to avoid mistakes we require `reuse=True` in - such cases. - reuse: Python `bool`, whether to reuse the `tf.Variable`s of a previous - layer by the same name.""" - - -class _DenseVariational(layers_lib.Layer): - """Abstract densely-connected class (private, used as implementation base). - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - @{args} - """ - # pylint: enable=g-doc-args - super(_DenseVariational, self).__init__( - trainable=trainable, - name=name, - activity_regularizer=activity_regularizer, - **kwargs) - self.units = units - self.activation = activation - self.input_spec = layers_lib.InputSpec(min_ndim=2) - self.kernel_posterior_fn = kernel_posterior_fn - self.kernel_posterior_tensor_fn = kernel_posterior_tensor_fn - self.kernel_prior_fn = kernel_prior_fn - self.kernel_divergence_fn = kernel_divergence_fn - self.bias_posterior_fn = bias_posterior_fn - self.bias_posterior_tensor_fn = bias_posterior_tensor_fn - self.bias_prior_fn = bias_prior_fn - self.bias_divergence_fn = bias_divergence_fn - - def build(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape) - in_size = input_shape.with_rank_at_least(2)[-1].value - if in_size is None: - raise ValueError("The last dimension of the inputs to `Dense` " - "should be defined. Found `None`.") - self._input_spec = layers_lib.InputSpec(min_ndim=2, axes={-1: in_size}) - dtype = dtypes.as_dtype(self.dtype) - - # Must have a posterior kernel. - self.kernel_posterior = self.kernel_posterior_fn( - dtype, [in_size, self.units], "kernel_posterior", - self.trainable, self.add_variable) - - if self.kernel_prior_fn is None: - self.kernel_prior = None - else: - self.kernel_prior = self.kernel_prior_fn( - dtype, [in_size, self.units], "kernel_prior", - self.trainable, self.add_variable) - self._built_kernel_divergence = False - - if self.bias_posterior_fn is None: - self.bias_posterior = None - else: - self.bias_posterior = self.bias_posterior_fn( - dtype, [self.units], "bias_posterior", - self.trainable, self.add_variable) - - if self.bias_prior_fn is None: - self.bias_prior = None - else: - self.bias_prior = self.bias_prior_fn( - dtype, [self.units], "bias_prior", - self.trainable, self.add_variable) - self._built_bias_divergence = False - - self.built = True - - def call(self, inputs): - inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) - - outputs = self._apply_variational_kernel(inputs) - outputs = self._apply_variational_bias(outputs) - if self.activation is not None: - outputs = self.activation(outputs) # pylint: disable=not-callable - if not self._built_kernel_divergence: - kernel_posterior = self.kernel_posterior - kernel_prior = self.kernel_prior - if isinstance(self.kernel_posterior, independent_lib.Independent): - kernel_posterior = kernel_posterior.distribution - if isinstance(self.kernel_prior, independent_lib.Independent): - kernel_prior = kernel_prior.distribution - self._apply_divergence(self.kernel_divergence_fn, - kernel_posterior, - kernel_prior, - self.kernel_posterior_tensor, - name="divergence_kernel") - self._built_kernel_divergence = True - if not self._built_bias_divergence: - bias_posterior = self.bias_posterior - bias_prior = self.bias_prior - if isinstance(self.bias_posterior, independent_lib.Independent): - bias_posterior = bias_posterior.distribution - if isinstance(self.bias_prior, independent_lib.Independent): - bias_prior = bias_prior.distribution - self._apply_divergence(self.bias_divergence_fn, - bias_posterior, - bias_prior, - self.bias_posterior_tensor, - name="divergence_bias") - self._built_bias_divergence = True - return outputs - - def _apply_variational_bias(self, inputs): - if self.bias_posterior is None: - self.bias_posterior_tensor = None - return inputs - self.bias_posterior_tensor = self.bias_posterior_tensor_fn( - self.bias_posterior) - return nn.bias_add(inputs, self.bias_posterior_tensor) - - def _apply_divergence(self, divergence_fn, posterior, prior, - posterior_tensor, name): - if (divergence_fn is None or - posterior is None or - prior is None): - divergence = None - return - divergence = standard_ops.identity( - divergence_fn( - posterior, prior, posterior_tensor), - name=name) - self.add_loss(divergence) - - def _matmul(self, inputs, kernel): - if inputs.shape.ndims <= 2: - return standard_ops.matmul(inputs, kernel) - # To handle broadcasting, we must use `tensordot`. - return standard_ops.tensordot(inputs, kernel, axes=[[-1], [0]]) - - def _compute_output_shape(self, input_shape): - input_shape = tensor_shape.TensorShape(input_shape).with_rank_at_least(2) - if input_shape[-1].value is None: - raise ValueError( - "The innermost dimension of input_shape must be defined, " - "but saw: {}".format(input_shape)) - return input_shape[:-1].concatenate(self.units) - - -class DenseReparameterization(_DenseVariational): - """Densely-connected layer class with reparameterization estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the reparameterization estimator [1], which performs a Monte Carlo - approximation of the distribution integrating over the `kernel` and - `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.DenseReparameterization( - 512, activation=tf.nn.relu)(features) - logits = tfp.layers.DenseReparameterization(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn( - is_singular=True), - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - @{args} - """ - # pylint: enable=g-doc-args - super(DenseReparameterization, self).__init__( - units=units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - **kwargs) - - def _apply_variational_kernel(self, inputs): - self.kernel_posterior_tensor = self.kernel_posterior_tensor_fn( - self.kernel_posterior) - self.kernel_posterior_affine = None - self.kernel_posterior_affine_tensor = None - return self._matmul(inputs, self.kernel_posterior_tensor) - - -@docstring_util.expand_docstring(args=doc_args) -def dense_reparameterization( - inputs, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn(is_singular=True), # pylint: disable=line-too-long - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Densely-connected layer with reparameterization estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the reparameterization estimator [1], which performs a Monte Carlo - approximation of the distribution integrating over the `kernel` and - `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - @{args} - - Returns: - output: `Tensor` representing a the affine transformed input under a random - draw from the surrogate posterior distribution. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.dense_reparameterization( - features, 512, activation=tf.nn.relu) - logits = tfp.layers.dense_reparameterization(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Auto-Encoding Variational Bayes." - Diederik P. Kingma, Max Welling. - International Conference on Learning Representations, 2014. - """ - # pylint: enable=g-doc-args - layer = DenseReparameterization( - units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class DenseLocalReparameterization(_DenseVariational): - """Densely-connected layer class with local reparameterization estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the local reparameterization estimator [1], which performs a - Monte Carlo approximation of the distribution on the hidden units - induced by the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.DenseLocalReparameterization( - 512, activation=tf.nn.relu)(features) - logits = tfp.layers.DenseLocalReparameterization(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses local reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Variational Dropout and the Local Reparameterization Trick." - Diederik P. Kingma, Tim Salimans, Max Welling. - Neural Information Processing Systems, 2015. - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn( - is_singular=True), - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - @{args} - """ - # pylint: enable=g-doc-args - super(DenseLocalReparameterization, self).__init__( - units=units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - **kwargs) - - def _apply_variational_kernel(self, inputs): - if (not isinstance(self.kernel_posterior, independent_lib.Independent) or - not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)): - raise TypeError( - "`DenseLocalReparameterization` requires " - "`kernel_posterior_fn` produce an instance of " - "`tf.distributions.Independent(tf.distributions.Normal)` " - "(saw: \"{}\").".format(self.kernel_posterior.name)) - self.kernel_posterior_affine = normal_lib.Normal( - loc=self._matmul(inputs, self.kernel_posterior.distribution.loc), - scale=standard_ops.sqrt(self._matmul( - standard_ops.square(inputs), - standard_ops.square(self.kernel_posterior.distribution.scale)))) - self.kernel_posterior_affine_tensor = ( - self.kernel_posterior_tensor_fn(self.kernel_posterior_affine)) - self.kernel_posterior_tensor = None - return self.kernel_posterior_affine_tensor - - -@docstring_util.expand_docstring(args=doc_args) -def dense_local_reparameterization( - inputs, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn( - is_singular=True), - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Densely-connected layer with local reparameterization estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the local reparameterization estimator [1], which performs a - Monte Carlo approximation of the distribution on the hidden units - induced by the `kernel` and `bias`. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - @{args} - - Returns: - output: `Tensor` representing a the affine transformed input under a random - draw from the surrogate posterior distribution. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.dense_local_reparameterization( - features, 512, activation=tf.nn.relu) - logits = tfp.layers.dense_local_reparameterization(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses local reparameterization gradients to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Variational Dropout and the Local Reparameterization Trick." - Diederik P. Kingma, Tim Salimans, Max Welling. - Neural Information Processing Systems, 2015. - """ - # pylint: enable=g-doc-args - layer = DenseLocalReparameterization( - units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) - - -class DenseFlipout(_DenseVariational): - """Densely-connected layer class with Flipout estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the Flipout estimator [1], which performs a Monte Carlo - approximation of the distribution integrating over the `kernel` and - `bias`. Flipout uses roughly twice as many floating point operations - as the reparameterization estimator but has the advantage of - significantly lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Properties: - units: Python integer, dimensionality of the output space. - activation: Activation function (`callable`). - activity_regularizer: Regularizer function for the output. - kernel_posterior_fn: `callable` returning posterior. - kernel_posterior_tensor_fn: `callable` operating on posterior. - kernel_prior_fn: `callable` returning prior. - kernel_divergence_fn: `callable` returning divergence. - bias_posterior_fn: `callable` returning posterior. - bias_posterior_tensor_fn: `callable` operating on posterior. - bias_prior_fn: `callable` returning prior. - bias_divergence_fn: `callable` returning divergence. - seed: Python integer, used to create random seeds. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.DenseFlipout( - 512, activation=tf.nn.relu)(features) - logits = tfp.layers.DenseFlipout(10)(net) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Anonymous. OpenReview, 2017. - https://openreview.net/forum?id=rJnpifWAb - """ - - @docstring_util.expand_docstring(args=doc_args) - def __init__( - self, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn( - is_singular=True), - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - **kwargs): - # pylint: disable=g-doc-args - """Construct layer. - - Args: - @{args} - """ - # pylint: enable=g-doc-args - super(DenseFlipout, self).__init__( - units=units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - name=name, - **kwargs) - self.seed = seed - - def _apply_variational_kernel(self, inputs): - if (not isinstance(self.kernel_posterior, independent_lib.Independent) or - not isinstance(self.kernel_posterior.distribution, normal_lib.Normal)): - raise TypeError( - "`DenseFlipout` requires " - "`kernel_posterior_fn` produce an instance of " - "`tf.distributions.Independent(tf.distributions.Normal)` " - "(saw: \"{}\").".format(self.kernel_posterior.name)) - self.kernel_posterior_affine = normal_lib.Normal( - loc=array_ops.zeros_like(self.kernel_posterior.distribution.loc), - scale=self.kernel_posterior.distribution.scale) - self.kernel_posterior_affine_tensor = ( - self.kernel_posterior_tensor_fn(self.kernel_posterior_affine)) - self.kernel_posterior_tensor = None - - input_shape = array_ops.shape(inputs) - batch_shape = input_shape[:-1] - - sign_input = layers_util.random_sign( - input_shape, - dtype=inputs.dtype, - seed=self.seed) - sign_output = layers_util.random_sign( - array_ops.concat([batch_shape, - array_ops.expand_dims(self.units, 0)], 0), - dtype=inputs.dtype, - seed=distribution_util.gen_new_seed( - self.seed, salt="dense_flipout")) - perturbed_inputs = self._matmul( - inputs * sign_input, self.kernel_posterior_affine_tensor) * sign_output - - outputs = self._matmul(inputs, self.kernel_posterior.distribution.loc) - outputs += perturbed_inputs - return outputs - - -@docstring_util.expand_docstring(args=doc_args) -def dense_flipout( - inputs, - units, - activation=None, - activity_regularizer=None, - trainable=True, - kernel_posterior_fn=layers_util.default_mean_field_normal_fn(), - kernel_posterior_tensor_fn=lambda d: d.sample(), - kernel_prior_fn=lambda dtype, *args: normal_lib.Normal( # pylint: disable=g-long-lambda - loc=dtype.as_numpy_dtype(0.), scale=dtype.as_numpy_dtype(1.)), - kernel_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - bias_posterior_fn=layers_util.default_mean_field_normal_fn( - is_singular=True), - bias_posterior_tensor_fn=lambda d: d.sample(), - bias_prior_fn=None, - bias_divergence_fn=lambda q, p, ignore: kl_lib.kl_divergence(q, p), - seed=None, - name=None, - reuse=None): - # pylint: disable=g-doc-args - """Densely-connected layer with Flipout estimator. - - This layer implements the Bayesian variational inference analogue to - a dense layer by assuming the `kernel` and/or the `bias` are drawn - from distributions. By default, the layer implements a stochastic - forward pass via sampling from the kernel and bias posteriors, - - ```none - kernel, bias ~ posterior - outputs = activation(matmul(inputs, kernel) + bias) - ``` - - It uses the Flipout estimator [1], which performs a Monte Carlo - approximation of the distribution integrating over the `kernel` and - `bias`. Flipout uses roughly twice as many floating point operations - as the reparameterization estimator but has the advantage of - significantly lower variance. - - The arguments permit separate specification of the surrogate posterior - (`q(W|x)`), prior (`p(W)`), and divergence for both the `kernel` and `bias` - distributions. - - Args: - inputs: Tensor input. - @{args} - - Returns: - output: `Tensor` representing a the affine transformed input under a random - draw from the surrogate posterior distribution. - - #### Examples - - We illustrate a Bayesian neural network with [variational inference]( - https://en.wikipedia.org/wiki/Variational_Bayesian_methods), - assuming a dataset of `features` and `labels`. - - ```python - tfp = tf.contrib.bayesflow - - net = tfp.layers.dense_flipout( - features, 512, activation=tf.nn.relu) - logits = tfp.layers.dense_flipout(net, 10) - neg_log_likelihood = tf.nn.softmax_cross_entropy_with_logits( - labels=labels, logits=logits) - kl = sum(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) - loss = neg_log_likelihood + kl - train_op = tf.train.AdamOptimizer().minimize(loss) - ``` - - It uses the Flipout gradient estimator to minimize the - Kullback-Leibler divergence up to a constant, also known as the - negative Evidence Lower Bound. It consists of the sum of two terms: - the expected negative log-likelihood, which we approximate via - Monte Carlo; and the KL divergence, which is added via regularizer - terms which are arguments to the layer. - - [1]: "Flipout: Efficient Pseudo-Independent Weight Perturbations on - Mini-Batches." - Anonymous. OpenReview, 2017. - https://openreview.net/forum?id=rJnpifWAb - """ - # pylint: enable=g-doc-args - layer = DenseFlipout( - units, - activation=activation, - activity_regularizer=activity_regularizer, - trainable=trainable, - kernel_posterior_fn=kernel_posterior_fn, - kernel_posterior_tensor_fn=kernel_posterior_tensor_fn, - kernel_prior_fn=kernel_prior_fn, - kernel_divergence_fn=kernel_divergence_fn, - bias_posterior_fn=bias_posterior_fn, - bias_posterior_tensor_fn=bias_posterior_tensor_fn, - bias_prior_fn=bias_prior_fn, - bias_divergence_fn=bias_divergence_fn, - seed=seed, - name=name, - dtype=inputs.dtype.base_dtype, - _scope=name, - _reuse=reuse) - return layer.apply(inputs) diff --git a/tensorflow/contrib/bayesflow/python/ops/layers_util.py b/tensorflow/contrib/bayesflow/python/ops/layers_util.py deleted file mode 100644 index 8c1fb203f7..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/layers_util.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities for probabilistic layers. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.distributions.python.ops import deterministic as deterministic_lib -from tensorflow.contrib.distributions.python.ops import independent as independent_lib -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import nn_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops.distributions import normal as normal_lib - - -def default_loc_scale_fn( - is_singular=False, - loc_initializer=init_ops.random_normal_initializer(stddev=0.1), - untransformed_scale_initializer=init_ops.random_normal_initializer( - mean=-3., stddev=0.1), - loc_regularizer=None, - untransformed_scale_regularizer=None, - loc_constraint=None, - untransformed_scale_constraint=None): - """Makes closure which creates `loc`, `scale` params from `tf.get_variable`. - - This function produces a closure which produces `loc`, `scale` using - `tf.get_variable`. The closure accepts the following arguments: - - dtype: Type of parameter's event. - shape: Python `list`-like representing the parameter's event shape. - name: Python `str` name prepended to any created (or existing) - `tf.Variable`s. - trainable: Python `bool` indicating all created `tf.Variable`s should be - added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. - add_variable_fn: `tf.get_variable`-like `callable` used to create (or - access existing) `tf.Variable`s. - - Args: - is_singular: Python `bool` indicating if `scale is None`. Default: `False`. - loc_initializer: Initializer function for the `loc` parameters. - The default is `tf.random_normal_initializer(mean=0., stddev=0.1)`. - untransformed_scale_initializer: Initializer function for the `scale` - parameters. Default value: `tf.random_normal_initializer(mean=-3., - stddev=0.1)`. This implies the softplus transformed result has mean - approximately `0.05` and std. deviation approximately `0.005`. - loc_regularizer: Regularizer function for the `loc` parameters. - The default (`None`) is to use the `tf.get_variable` default. - untransformed_scale_regularizer: Regularizer function for the `scale` - parameters. The default (`None`) is to use the `tf.get_variable` default. - loc_constraint: An optional projection function to be applied to the - loc after being updated by an `Optimizer`. The function must take as input - the unprojected variable and must return the projected variable (which - must have the same shape). Constraints are not safe to use when doing - asynchronous distributed training. - The default (`None`) is to use the `tf.get_variable` default. - untransformed_scale_constraint: An optional projection function to be - applied to the `scale` parameters after being updated by an `Optimizer` - (e.g. used to implement norm constraints or value constraints). The - function must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are not - safe to use when doing asynchronous distributed training. The default - (`None`) is to use the `tf.get_variable` default. - - Returns: - default_loc_scale_fn: Python `callable` which instantiates `loc`, `scale` - parameters from args: `dtype, shape, name, trainable, add_variable_fn`. - """ - def _fn(dtype, shape, name, trainable, add_variable_fn): - """Creates `loc`, `scale` parameters.""" - loc = add_variable_fn( - name=name + "_loc", - shape=shape, - initializer=loc_initializer, - regularizer=loc_regularizer, - constraint=loc_constraint, - dtype=dtype, - trainable=trainable) - if is_singular: - return loc, None - untransformed_scale = add_variable_fn( - name=name + "_untransformed_scale", - shape=shape, - initializer=untransformed_scale_initializer, - regularizer=untransformed_scale_regularizer, - constraint=untransformed_scale_constraint, - dtype=dtype, - trainable=trainable) - scale = (np.finfo(dtype.as_numpy_dtype).eps + - nn_ops.softplus(untransformed_scale)) - return loc, scale - return _fn - - -def default_mean_field_normal_fn( - is_singular=False, - loc_initializer=None, - untransformed_scale_initializer=None, - loc_regularizer=None, - untransformed_scale_regularizer=None, - loc_constraint=None, - untransformed_scale_constraint=None): - """Creates a function to build Normal distributions with trainable params. - - This function produces a closure which produces `tf.distributions.Normal` - parameterized by a loc` and `scale` each created using `tf.get_variable`. The - produced closure accepts the following arguments: - - name: Python `str` name prepended to any created (or existing) - `tf.Variable`s. - shape: Python `list`-like representing the parameter's event shape. - dtype: Type of parameter's event. - trainable: Python `bool` indicating all created `tf.Variable`s should be - added to the graph collection `GraphKeys.TRAINABLE_VARIABLES`. - add_variable_fn: `tf.get_variable`-like `callable` used to create (or - access existing) `tf.Variable`s. - - Args: - is_singular: Python `bool` if `True`, forces the special case limit of - `scale->0`, i.e., a `Deterministic` distribution. - loc_initializer: Initializer function for the `loc` parameters. - If `None` (default), values are initialized using the default - initializer used by `tf.get_variable`. - untransformed_scale_initializer: Initializer function for the `scale` - parameters. If `None` (default), values are initialized using the default - initializer used by `tf.get_variable`. - loc_regularizer: Regularizer function for the `loc` parameters. - untransformed_scale_regularizer: Regularizer function for the `scale` - parameters. - loc_constraint: An optional projection function to be applied to the - loc after being updated by an `Optimizer`. The function must take as input - the unprojected variable and must return the projected variable (which - must have the same shape). Constraints are not safe to use when doing - asynchronous distributed training. - untransformed_scale_constraint: An optional projection function to be - applied to the `scale` parameters after being updated by an `Optimizer` - (e.g. used to implement norm constraints or value constraints). The - function must take as input the unprojected variable and must return the - projected variable (which must have the same shape). Constraints are not - safe to use when doing asynchronous distributed training. - - Returns: - make_normal_fn: Python `callable` which creates a `tf.distributions.Normal` - using from args: `dtype, shape, name, trainable, add_variable_fn`. - """ - loc_scale_fn_ = default_loc_scale_fn( - is_singular, - loc_initializer, - untransformed_scale_initializer, - loc_regularizer, - untransformed_scale_regularizer, - loc_constraint, - untransformed_scale_constraint) - def _fn(dtype, shape, name, trainable, add_variable_fn): - """Creates multivariate `Deterministic` or `Normal` distribution.""" - loc, scale = loc_scale_fn_(dtype, shape, name, trainable, add_variable_fn) - if scale is None: - dist = deterministic_lib.Deterministic(loc=loc) - else: - dist = normal_lib.Normal(loc=loc, scale=scale) - reinterpreted_batch_ndims = array_ops.shape(dist.batch_shape_tensor())[0] - return independent_lib.Independent( - dist, reinterpreted_batch_ndims=reinterpreted_batch_ndims) - return _fn - - -def random_sign(shape, dtype=dtypes.float32, seed=None): - """Draw values from {-1, 1} uniformly, i.e., Rademacher distribution.""" - random_bernoulli = random_ops.random_uniform(shape, minval=0, maxval=2, - dtype=dtypes.int32, - seed=seed) - return math_ops.cast(2 * random_bernoulli - 1, dtype) -- GitLab From 85d02dcef3b0f0900b3d363056be4e177d4d70ab Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Wed, 7 Mar 2018 11:27:12 -0800 Subject: [PATCH 536/884] Making sure that the proc FLR doesn't get deleted before lib_ (in FunctionBufferingResource). PiperOrigin-RevId: 188206611 --- .../contrib/data/kernels/prefetching_kernels.cc | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/data/kernels/prefetching_kernels.cc b/tensorflow/contrib/data/kernels/prefetching_kernels.cc index c0155e8d91..1baac3ea52 100644 --- a/tensorflow/contrib/data/kernels/prefetching_kernels.cc +++ b/tensorflow/contrib/data/kernels/prefetching_kernels.cc @@ -36,12 +36,14 @@ using FunctionBufferCallback = std::function; class FunctionBufferingResource : public ResourceBase { public: FunctionBufferingResource(FunctionLibraryRuntime* lib, + std::unique_ptr pflr, const NameAttrList& func, int64 buffer_size, const string& source_device, const string& target_device, const std::vector& func_args, int64 thread_pool_size) : lib_(lib), + pflr_(std::move(pflr)), func_(func), buffer_size_(buffer_size), source_device_(source_device), @@ -223,6 +225,7 @@ class FunctionBufferingResource : public ResourceBase { mutex mu_; FunctionLibraryRuntime* lib_; + std::unique_ptr pflr_; NameAttrList func_; const int64 buffer_size_; const string source_device_; @@ -242,7 +245,7 @@ class FunctionBufferingResource : public ResourceBase { class FunctionBufferResourceHandleOp : public OpKernel { public: explicit FunctionBufferResourceHandleOp(OpKernelConstruction* ctx) - : OpKernel(ctx), flib_def_(nullptr), pflr_(nullptr) { + : OpKernel(ctx), flib_def_(nullptr) { OP_REQUIRES_OK(ctx, ctx->GetAttr("f", &func_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("buffer_size", &buffer_size_)); OP_REQUIRES_OK(ctx, ctx->GetAttr("container", &container_)); @@ -283,18 +286,19 @@ class FunctionBufferResourceHandleOp : public OpKernel { if (!initialized_) { OP_REQUIRES_OK(ctx, cinfo_.Init(ctx->resource_manager(), def())); FunctionLibraryRuntime* clone_lib; - OP_REQUIRES_OK(ctx, lib->Clone(&flib_def_, &pflr_, &clone_lib)); + std::unique_ptr pflr; + OP_REQUIRES_OK(ctx, lib->Clone(&flib_def_, &pflr, &clone_lib)); // Create the resource. FunctionBufferingResource* buffer; OP_REQUIRES_OK( ctx, ctx->resource_manager()->LookupOrCreate( cinfo_.container(), cinfo_.name(), &buffer, - [clone_lib, &source_device, &target_device, func_args, + [clone_lib, &pflr, &source_device, &target_device, func_args, this](FunctionBufferingResource** ptr) { *ptr = new FunctionBufferingResource( - clone_lib, func_, buffer_size_, source_device, - target_device, func_args, thread_pool_size_); + clone_lib, std::move(pflr), func_, buffer_size_, + source_device, target_device, func_args, thread_pool_size_); return Status::OK(); })); OP_REQUIRES_OK(ctx, buffer->Instantiate()); @@ -311,7 +315,6 @@ class FunctionBufferResourceHandleOp : public OpKernel { ContainerInfo cinfo_ GUARDED_BY(mu_); bool initialized_ GUARDED_BY(mu_) = false; std::unique_ptr flib_def_; - std::unique_ptr pflr_; NameAttrList func_; int64 buffer_size_; string container_; -- GitLab From 19881403d77e12fdba9443d6d8b3b379cc3bb8b2 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 7 Mar 2018 11:44:18 -0800 Subject: [PATCH 537/884] add error message when importing contrib.tensorrt without libnvinfer --- tensorflow/contrib/tensorrt/python/__init__.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 0b2321b5fc..120904b8b6 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -19,7 +19,16 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long -from tensorflow.contrib.tensorrt.python.ops import trt_engine_op -from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph -from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph +try: + from tensorflow.contrib.tensorrt.python.ops import trt_engine_op + from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph + from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph +except: + no_trt_message = ( + '**** Failed to initialize TensorRT. This is either because the TensorRT' + ' installation path is not in LD_LIBRARY_PATH, or because you do not have it' + ' installed. If not installed, please go to' + ' https://developer.nvidia.com/tensorrt to download and install' + ' TensorRT ****''') + print(no_trt_message) # pylint: enable=unused-import,line-too-long -- GitLab From 58fe7d26afa435560e7a0d8ca6fc8d670d2477da Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 11:53:21 -0800 Subject: [PATCH 538/884] Support for transpose convolution. Includes striding, and a reference implementation. PiperOrigin-RevId: 188210975 --- .../internal/optimized/optimized_ops.h | 73 +++++++++++ .../internal/reference/reference_ops.h | 61 +++++++++ .../contrib/lite/toco/export_tensorflow.cc | 36 ++++++ .../propagate_array_data_types.cc | 5 + .../propagate_fixed_sizes.cc | 116 +++++++++++++++++- .../contrib/lite/toco/import_tensorflow.cc | 86 +++++++++---- tensorflow/contrib/lite/toco/model.h | 16 ++- 7 files changed, 363 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index 3866f86d38..f1937228f6 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -768,6 +768,7 @@ inline void DilatedConv(const float* input_data, const Dims<4>& input_dims, float output_activation_max, float* output_data, const Dims<4>& output_dims, float* im2col_data, const Dims<4>& im2col_dims) { + gemmlowp::ScopedProfilingLabel label("DilatedConv"); // This is a copy of the reference Conv implementation. We do not currently // have an optimized path for dilation. (void)im2col_data; // only used in optimized code. @@ -4725,6 +4726,78 @@ void Transpose(const T* input, const Dims<4>& input_dims, T* output, } } +inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("TransposeConv"); + // THIS FUNCTION IS A COPY FROM reference_ops.h. + // To optimize, start by using the conv code with transposed weights for the + // case of stride_height = stride_width = 1. + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a "scatter" + // access pattern, where we loop through all the input elements, computing + // their influence on the output, rather than looping through the output + // elements in the typical "gather" access pattern of a conv. We therefore + // must initialize the output array to zero. + for (int batch = 0; batch < batches; ++batch) { + for (int out_y = 0; out_y < output_height; ++out_y) { + for (int out_x = 0; out_x < output_width; ++out_x) { + for (int out_channel = 0; out_channel < output_depth; ++out_channel) { + output_data[Offset(output_dims, out_channel, out_x, out_y, batch)] = + 0.0f; + } + } + } + } + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < input_depth; + ++out_channel) { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + float input_value = input_data[Offset(input_dims, in_channel, + in_x, in_y, batch)]; + float filter_value = + filter_data[Offset(filter_dims, out_channel, filter_x, + filter_y, in_channel)]; + output_data[Offset(output_dims, out_channel, out_x, out_y, + batch)] += input_value * filter_value; + } + } + } + } + } + } + } + } +} + } // namespace optimized_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 53de21697b..84f6cf6e4f 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -3084,6 +3084,67 @@ void Transpose(const T* input, const Dims<4>& input_dims, T* output, } } +inline void TransposeConv(const float* input_data, const Dims<4>& input_dims, + const float* filter_data, const Dims<4>& filter_dims, + int stride_width, int stride_height, int pad_width, + int pad_height, float* output_data, + const Dims<4>& output_dims) { + const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); + const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 0, output_dims, 0); + const int input_height = ArraySize(input_dims, 2); + const int input_width = ArraySize(input_dims, 1); + const int filter_height = ArraySize(filter_dims, 2); + const int filter_width = ArraySize(filter_dims, 1); + const int output_height = ArraySize(output_dims, 2); + const int output_width = ArraySize(output_dims, 1); + + // Although transpose convolution simplifies to convolution with transposed + // weights for strides of 1, non-unitary striding complicates matters. To + // keep this reference implementation as clear as possible, we use a "scatter" + // access pattern, where we loop through all the input elements, computing + // their influence on the output, rather than looping through the output + // elements in the typical "gather" access pattern of a conv. We therefore + // must initialize the output array to zero. + for (int i = 0; i < RequiredBufferSizeForDims(output_dims); i++) { + output_data[i] = 0.0f; + } + + // Loop through input elements one at a time. + for (int batch = 0; batch < batches; ++batch) { + for (int in_y = 0; in_y < input_height; ++in_y) { + for (int in_x = 0; in_x < input_width; ++in_x) { + for (int in_channel = 0; in_channel < input_depth; ++in_channel) { + // Loop through the output elements it will influence + const int out_x_origin = (in_x * stride_width) - pad_width; + const int out_y_origin = (in_y * stride_height) - pad_height; + for (int filter_y = 0; filter_y < filter_height; ++filter_y) { + for (int filter_x = 0; filter_x < filter_width; ++filter_x) { + for (int out_channel = 0; out_channel < output_depth; + ++out_channel) { + // Compute output element location + const int out_x = out_x_origin + filter_x; + const int out_y = out_y_origin + filter_y; + // We cannot accumulate out of bounds + if ((out_x >= 0) && (out_x < output_width) && (out_y >= 0) && + (out_y < output_height)) { + float input_value = input_data[Offset(input_dims, in_channel, + in_x, in_y, batch)]; + float filter_value = + filter_data[Offset(filter_dims, out_channel, filter_x, + filter_y, in_channel)]; + output_data[Offset(output_dims, out_channel, out_x, out_y, + batch)] += input_value * filter_value; + } + } + } + } + } + } + } + } +} + } // namespace reference_ops } // namespace tflite diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 6900468ec6..695def7ba3 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -548,6 +548,38 @@ void ConvertDepthwiseConvOperator(const Model& model, } } +void ConvertTransposeConvOperator(const Model& model, + const TransposeConvOperator& src_op, + GraphDef* tensorflow_graph) { + auto* conv2d_op = tensorflow_graph->add_node(); + conv2d_op->set_op("Conv2DBackpropInput"); + conv2d_op->set_name(src_op.outputs[0]); + *conv2d_op->add_input() = src_op.inputs[0]; + *conv2d_op->add_input() = src_op.inputs[1]; + *conv2d_op->add_input() = src_op.inputs[2]; + (*conv2d_op->mutable_attr())["T"].set_type(DT_FLOAT); + const string& weights_array_name = WalkUpToConstantArray( + model, src_op.inputs[TransposeConvOperator::WEIGHTS]); + const auto& weights_array = model.GetArray(weights_array_name); + CHECK(weights_array.buffer->type == ArrayDataType::kFloat); + ConvertFloatTensorConst(model, weights_array_name, AxesOrder::kOHWI, + AxesOrder::kHWIO, tensorflow_graph); + auto& strides = (*conv2d_op->mutable_attr())["strides"]; + strides.mutable_list()->add_i(1); + strides.mutable_list()->add_i(src_op.stride_height); + strides.mutable_list()->add_i(src_op.stride_width); + strides.mutable_list()->add_i(1); + string padding; + if (src_op.padding.type == PaddingType::kSame) { + padding = "SAME"; + } else if (src_op.padding.type == PaddingType::kValid) { + padding = "VALID"; + } else { + LOG(FATAL) << "Bad padding (only SAME and VALID are supported)"; + } + (*conv2d_op->mutable_attr())["padding"].set_s(padding); +} + void ConvertDepthToSpaceOperator(const Model& model, const DepthToSpaceOperator& src_op, GraphDef* tensorflow_graph) { @@ -1859,6 +1891,10 @@ void ConvertOperator(const Model& model, const Operator& src_op, ConvertExpandDimsOperator(model, static_cast(src_op), tensorflow_graph); + } else if (src_op.type == OperatorType::kTransposeConv) { + ConvertTransposeConvOperator( + model, static_cast(src_op), + tensorflow_graph); } else { LOG(FATAL) << "Unhandled operator type " << OperatorTypeName(src_op.type); } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc index bde947f78d..778da39bf1 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_array_data_types.cc @@ -71,6 +71,11 @@ bool PropagateArrayDataTypes::Run(Model* model, std::size_t op_index) { CHECK_GE(op->inputs.size(), 2); const ArrayDataType data_type = model->GetArray(op->inputs[1]).data_type; SetDataTypeForAllOutputs(model, op, data_type); + } else if (op->type == OperatorType::kTransposeConv) { + // These operators produce an output with the same type as their 3rd input + CHECK_GE(op->inputs.size(), 3); + const ArrayDataType data_type = model->GetArray(op->inputs[2]).data_type; + SetDataTypeForAllOutputs(model, op, data_type); } else if (op->type == OperatorType::kCast) { // Data type of the Cast op is specified. CHECK_EQ(op->outputs.size(), 1); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc index fc26f997a6..375848a7d4 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_fixed_sizes.cc @@ -190,6 +190,116 @@ void ProcessConvOperator(Model* model, ConvOperator* op) { } } +void ProcessTransposeConvOperator(Model* model, TransposeConvOperator* op) { + // TransposeConv is unique in that it is specifically given the output shape + // as a 1D array on it's 1st input. Theoretically then, resolving the output + // shape is as easy as waiting for this input to be resolved. However, we also + // have to calculate the padding which requires the weights shape. So, we + // might as well calculate the output shape and ensure it matches the + // specified one + + // Check if we have already run. + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.has_shape()) { + return; + } + + // SPECIFIED OUTPUT SHAPE + // The below is the specified, or prescribed output shape, _given_ to the + // operator as an input. + auto& specified_output_shape_array = + model->GetArray(op->inputs[TransposeConvOperator::OUTPUT_SHAPE]); + if (!specified_output_shape_array.has_shape() || + !specified_output_shape_array.buffer) { + // Yield until the specified output shape is resolved as a constant + return; + } + + CHECK(specified_output_shape_array.data_type == ArrayDataType::kInt32) + << "TransposeConv input_dims must be int32"; + + CHECK(specified_output_shape_array.shape().dimensions_count() == 1 && + specified_output_shape_array.shape().dims(0) == 4) + << "TransposeConv requires a 1D, 4 element array on it's 0th input " + "specifying the output shape. \"" + << op->inputs[TransposeConvOperator::OUTPUT_SHAPE] << "\" had shape " + << toco::ShapeToString(specified_output_shape_array.shape()); + + // COMPUTE PADDING + // We require the weights shape to calculate padding. + const auto& weights_array = + model->GetArray(op->inputs[TransposeConvOperator::WEIGHTS]); + if (!weights_array.has_shape()) { + // Yield until weights dims have been resolved. + return; + } + const auto& weights_shape = weights_array.shape(); + CHECK_EQ(weights_shape.dimensions_count(), 4) + << "TransposeConv weights must have 4 input dimensions. Input weights \"" + << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " + << toco::ShapeToString(weights_shape) << "."; + + CHECK(weights_shape.dims(0) == 1 && weights_shape.dims(3) == 1) + << "TransposeConv weights dimensions must begin and end with 1. Input " + "weights \"" + << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " + << toco::ShapeToString(weights_shape) << "."; + + // Compute padding + const int kheight = weights_shape.dims(1); + const int kwidth = weights_shape.dims(2); + op->padding.GetOrCreateFixedPadding(); + if (op->padding.type == PaddingType::kValid) { + op->padding.fixed->height = 0; + op->padding.fixed->width = 0; + } else if (op->padding.type == PaddingType::kSame) { + op->padding.fixed->height = (kheight - 1) / 2; + op->padding.fixed->width = (kwidth - 1) / 2; + } else { + LOG(FATAL) << "TransposeConv only supports SAME or VALID padding"; + } + + // VALIDATE OUTPUT SHAPE + // Compute the output shape from the input and weights shapes to verify it + // agrees with the specified output shape. + const auto& input_array = + model->GetArray(op->inputs[TransposeConvOperator::DATA_INPUT]); + if (!input_array.has_shape()) { + // Yield until input dims have been resolved. + return; + } + const auto& input_shape = input_array.shape(); + CHECK_EQ(input_shape.dimensions_count(), 4) + << "TransposeConv input shape must have 4 dimensions. Input \"" + << op->inputs[TransposeConvOperator::WEIGHTS] << "\" had shape " + << toco::ShapeToString(weights_shape) << "."; + + // Compute output shape + const int input_width = input_shape.dims(2); + const int input_height = input_shape.dims(1); + int output_height = op->stride_height * (input_height - 1); + int output_width = op->stride_width * (input_width - 1); + if (op->padding.type == PaddingType::kValid) { + output_height += kheight; + output_width += kwidth; + } else if (op->padding.type == PaddingType::kSame) { + output_height += 1; + output_width += 1; + } + + CHECK(specified_output_shape_array.GetBuffer().data == + std::vector({input_shape.dims(0), output_height, output_width, + weights_shape.dims(3)})) + << "Specified output shape: " << ShapeToString(output_array.shape()) + << ", does not agree with shape computed from input data and weights: [" + << input_shape.dims(0) << ", " << output_height << ", " << output_width + << ", " << weights_shape.dims(3) << "]."; + + // SUCCESS: Set the op's output shape according to the specified output shape. + *(output_array.mutable_shape()->mutable_dims()) = + specified_output_shape_array.GetBuffer().data; +} + void ProcessDepthwiseConvOperator(Model* model, DepthwiseConvOperator* op) { if (!EnsureBiasVectorShape(model, op)) { return; @@ -1300,7 +1410,7 @@ void ProcessTransposeOperator(Model* model, TransposeOperator* op) { std::vector const& perm = perm_array.GetBuffer().data; CHECK_EQ(perm.size(), input_shape.dimensions_count()) - << "Transpose permutation input " << op->inputs[0] + << "Transpose permutation input " << op->inputs[1] << " must be same length as input dimensions"; std::vector* output_dims = output_array.mutable_shape()->mutable_dims(); for (int i = 0; i < perm.size(); i++) { @@ -1402,8 +1512,8 @@ bool PropagateFixedSizes::Run(Model* model, std::size_t op_index) { ProcessConvOperator(model, static_cast(op)); break; case OperatorType::kTransposeConv: - // Unimplemented, hopefully another graph transformation will drop it or - // rewrite it. + ProcessTransposeConvOperator(model, + static_cast(op)); break; case OperatorType::kDepthwiseConv: ProcessDepthwiseConvOperator(model, diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 41abca864d..50aeafdf8d 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -351,6 +351,18 @@ void CheckInputsCount(const NodeDef& node, << " input(s) other than control dependencies: " << node.DebugString(); } +template +string CreateConstArray(Model* model, string const& name, + std::vector > const& data) { + // Utility function to create a const 1D array, useful for input parameters. + string array_name = toco::AvailableArrayName(*model, name); + auto& array = model->GetOrCreateArray(array_name); + array.data_type = T; + array.mutable_shape()->mutable_dims()->emplace_back(data.size()); + array.GetMutableBuffer().data = data; + return array_name; +} + void ConvertConstOperator(const NodeDef& node, const TensorFlowImportFlags& tf_import_flags, Model* model) { @@ -1436,12 +1448,8 @@ void ConvertFusedBatchNormOperator(const NodeDef& node, const string& moving_variance_input = node.input(4); // Create an array holding the epsilon value (typically, 0.001). - const string epsilon_array_name = node.name() + "_epsilon_array"; - auto& epsilon_array = model->GetOrCreateArray(epsilon_array_name); - epsilon_array.data_type = ArrayDataType::kFloat; - *epsilon_array.mutable_shape()->mutable_dims() = {1}; - epsilon_array.GetMutableBuffer().data.push_back( - GetFloatAttr(node, "epsilon")); + const string epsilon_array_name = CreateConstArray( + model, node.name() + "_epsilon_array", {GetFloatAttr(node, "epsilon")}); // Add epsilon to the moving variance. const string epsilon_add_op_name = node.name() + "_epsilon"; @@ -1569,16 +1577,56 @@ void ConvertTransposeConvOperator(const NodeDef& node, CHECK_EQ(node.op(), "Conv2DBackpropInput"); CheckInputsCount(node, tf_import_flags, 3); auto* op = new TransposeConvOperator; - op->inputs.push_back(node.input(2)); - op->inputs.push_back(node.input(1)); op->inputs.push_back(node.input(0)); + op->inputs.push_back(node.input(1)); + op->inputs.push_back(node.input(2)); op->outputs.push_back(node.name()); const auto& strides = GetListAttr(node, "strides"); - CHECK_EQ(strides.i_size(), 4); - CHECK_EQ(strides.i(0), 1); op->stride_height = strides.i(1); op->stride_width = strides.i(2); - CHECK_EQ(strides.i(3), 1); + CHECK_EQ(strides.i_size(), 4) + << "Can only import TransposeConv ops with 4D strides. TensorFlow op \"" + << node.name() << "\" has " << strides.i_size() << "D strides."; + CHECK((strides.i(0) == 1) && (strides.i(3) == 1)) + << "Can only import TransposeConv ops with striding along the height " + "(1st) or width (2nd) axis. TensorFlow op \"" + << node.name() << "\" had strides:[ " << strides.i(0) << ", " + << strides.i(1) << ", " << strides.i(2) << ", " << strides.i(3) << "]."; + op->stride_height = strides.i(1); + op->stride_width = strides.i(2); + if (HasAttr(node, "dilations")) { + const auto& dilations = GetListAttr(node, "dilations"); + CHECK_EQ(dilations.i_size(), 4) + << "Dilation unsupported in TransposeConv. TensorFlow op \"" + << node.name() << "\" had dilations"; + CHECK((dilations.i(0) == 1) && (dilations.i(1) == 1) && + (dilations.i(1) == 1) && (dilations.i(3) == 1)) + << "Dilation unsupported in TransposeConv. TensorFlow op \"" + << node.name() << "\" had dilations:[ " << dilations.i(0) << ", " + << dilations.i(1) << ", " << dilations.i(2) << ", " << dilations.i(3) + << "]."; + } + + const string& weights_name = node.input(TransposeConvOperator::WEIGHTS); + const string& transposed_weights_name = weights_name + "_transposed"; + // Check if a TransposeOperator was already created for these weights + // (can happen when multiple layers share the same weights). + const Operator* existing_transpose = + GetOpWithOutput(*model, transposed_weights_name); + if (existing_transpose) { + CHECK(existing_transpose->type == OperatorType::kTranspose); + } else { + // Transpose weights from HWIO order to OHWI order, which is more efficient + // for computation + TransposeOperator* transpose = new TransposeOperator; + string perm_array = CreateConstArray( + model, node.name() + "_transpose_perm", {3, 0, 1, 2}); + transpose->inputs = {weights_name, perm_array}; + transpose->outputs = {transposed_weights_name}; + model->operators.emplace_back(transpose); + } + op->inputs[1] = transposed_weights_name; + auto const& padding = GetStringAttr(node, "padding"); if (padding == "SAME") { op->padding.type = PaddingType::kSame; @@ -1874,19 +1922,9 @@ void ConvertTopKV2Operator(const NodeDef& node, op->inputs.push_back(node.input(0)); // K can be encoded as attr (TopK) convert it to a const. if (HasAttr(node, "k")) { - // Convert attribute into const tensor. - const string array_name = node.name() + "k"; - auto& array = model->GetOrCreateArray(array_name); - array.data_type = ArrayDataType::kInt32; - // Size of array is always 1. - array.mutable_shape()->mutable_dims()->emplace_back(1); - - auto& output_int_data = - array.GetMutableBuffer().data; - output_int_data.resize(1); - output_int_data[0] = GetIntAttr(node, "k"); - op->inputs.push_back(array_name); - + string k_array = CreateConstArray( + model, node.name() + "k", {GetIntAttr(node, "k")}); + op->inputs.push_back(k_array); } else { CheckInputsCount(node, tf_import_flags, 2); op->inputs.push_back(node.input(1)); diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index ed0dedc003..cd3eb06602 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -846,19 +846,29 @@ struct SqueezeOperator : Operator { }; // Inputs: -// inputs[0]: required: the input activations array -// inputs[1]: required: the Conv weights -// channel. +// inputs[0]: required: the output shape +// inputs[1]: required: the weights +// inputs[2]: required: the input activations array +// NOTE: The input activations is NOT the first input. +// // // Outputs: // outputs[0]: required: the output activations array // // TensorFlow equivalent: Conv2DBackpropInput struct TransposeConvOperator : Operator { + enum Inputs { + OUTPUT_SHAPE = 0, + WEIGHTS = 1, + DATA_INPUT = 2, + }; + TransposeConvOperator() : Operator(OperatorType::kTransposeConv) {} Padding padding; int stride_width = 0; int stride_height = 0; + // Dilation is possible with transpose convolution, but Tensorflow does not + // currently support it, so we omit it. }; // Given a tensor input, this operation calculates element-wise exponential -- GitLab From 808b569e85df8d63590740f05bc14d964efc4801 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 12:01:42 -0800 Subject: [PATCH 539/884] Convert functions with multiple returns to use a single return. PiperOrigin-RevId: 188212324 --- tensorflow/contrib/py2tf/converters/BUILD | 12 + .../contrib/py2tf/converters/single_return.py | 317 ++++++++++++++++++ .../py2tf/converters/single_return_test.py | 189 +++++++++++ tensorflow/contrib/py2tf/impl/conversion.py | 5 + .../py2tf/pyct/static_analysis/activity.py | 9 + 5 files changed, 532 insertions(+) create mode 100644 tensorflow/contrib/py2tf/converters/single_return.py create mode 100644 tensorflow/contrib/py2tf/converters/single_return_test.py diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index 78f46bc05f..fa7718c93e 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -29,6 +29,7 @@ py_library( "logical_expressions.py", "name_scopes.py", "side_effect_guards.py", + "single_return.py", ], srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], @@ -179,3 +180,14 @@ py_test( "//tensorflow/python:client_testlib", ], ) + +py_test( + name = "single_return_test", + srcs = ["single_return_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":test_lib", + "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/python:client_testlib", + ], +) diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py new file mode 100644 index 0000000000..90bc22008f --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -0,0 +1,317 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Canonicalizes functions with multiple returns to use just one.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast + +from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import ast_util +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.contrib.py2tf.pyct.static_analysis.annos import NodeAnno + + +# TODO(mdan): Move this logic into transformer_base. +class BodyVisitor(transformer.Base): + """Walks breadth- or depth-first the list-of-nodes bodies of AST nodes.""" + + def __init__(self, context, depth_first=False): + self.depth_first = depth_first + self.changes_made = False + super(BodyVisitor, self).__init__(context) + + def visit_nodelist(self, nodelist): + for node in nodelist: + if isinstance(node, list): + node = self.visit_nodelist(node) + else: + node = self.generic_visit(node) + return nodelist + + def visit_If(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + node.orelse = self.visit_nodelist(node.orelse) + if not self.depth_first: + node = self.generic_visit(node) + return node + + def visit_For(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + node.orelse = self.visit_nodelist(node.orelse) + if not self.depth_first: + node = self.generic_visit(node) + return node + + def visit_While(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + node.orelse = self.visit_nodelist(node.orelse) + if not self.depth_first: + node = self.generic_visit(node) + return node + + def visit_Try(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + node.orelse = self.visit_nodelist(node.orelse) + node.finalbody = self.visit_nodelist(node.finalbody) + for i in range(len(node.handlers)): + node.handlers[i].body = self.visit_nodelist(node.handlers[i].body) + if not self.depth_first: + node = self.generic_visit(node) + return node + + def visit_With(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + if not self.depth_first: + node = self.generic_visit(node) + return node + + def visit_FunctionDef(self, node): + if self.depth_first: + node = self.generic_visit(node) + node.body = self.visit_nodelist(node.body) + self.generic_visit(node) + if not self.depth_first: + node = self.generic_visit(node) + return node + + +class FoldElse(BodyVisitor): + + def visit_nodelist(self, nodelist): + for i in range(len(nodelist)): + node = nodelist[i] + if isinstance(node, gast.If): + true_branch_returns = isinstance(node.body[-1], gast.Return) + false_branch_returns = len(node.orelse) and isinstance( + node.orelse[-1], gast.Return) + # If the last node in the if body is a return, + # then every line after this if statement effectively + # belongs in the else. + if true_branch_returns and not false_branch_returns: + for j in range(i + 1, len(nodelist)): + nodelist[i].orelse.append(ast_util.copy_clean(nodelist[j])) + if nodelist[i + 1:]: + self.changes_made = True + return nodelist[:i + 1] + elif not true_branch_returns and false_branch_returns: + for j in range(i + 1, len(nodelist)): + nodelist[i].body.append(ast_util.copy_clean(nodelist[j])) + if nodelist[i + 1:]: + self.changes_made = True + return nodelist[:i + 1] + elif true_branch_returns and false_branch_returns: + if nodelist[i + 1:]: + raise ValueError( + 'Unreachable code after conditional where both branches return.' + ) + return nodelist + elif isinstance(node, gast.Return) and nodelist[i + 1:]: + raise ValueError( + 'Cannot have statements after a return in the same basic block') + return nodelist + + +def contains_return(node): + for n in gast.walk(node): + if isinstance(n, gast.Return): + return True + return False + + +class LiftReturn(transformer.Base): + """Move return statements out of If and With blocks.""" + + def __init__(self, context): + self.changes_made = False + self.common_return_name = None + super(LiftReturn, self).__init__(context) + + def visit_If(self, node): + # Depth-first traversal of if statements + node = self.generic_visit(node) + + # We check if both branches return, and if so, lift the return out of the + # conditional. We don't enforce that the true and false branches either + # both return or both do not, because FoldElse might move a return + # into a branch after this transform completes. FoldElse and LiftReturn + # are alternately run until the code reaches a fixed point. + true_branch_returns = isinstance(node.body[-1], gast.Return) + false_branch_returns = len(node.orelse) and isinstance( + node.orelse[-1], gast.Return) + if true_branch_returns and false_branch_returns: + node.body[-1] = templates.replace( + 'a = b', a=self.common_return_name, b=node.body[-1].value)[0] + node.orelse[-1] = templates.replace( + 'a = b', a=self.common_return_name, b=node.orelse[-1].value)[0] + return_node = templates.replace('return a', a=self.common_return_name)[0] + self.changes_made = True + return [node, return_node] + else: + return node + + def visit_With(self, node): + # Depth-first traversal of syntax + node = self.generic_visit(node) + + # If the with statement returns, lift the return + if isinstance(node.body[-1], gast.Return): + node.body[-1] = templates.replace( + 'a = b', a=self.common_return_name, b=node.body[-1].value)[0] + return_node = templates.replace('return a', a=self.common_return_name)[0] + node = self.generic_visit(node) + self.changes_made = True + return [node, return_node] + else: + return node + + def visit_FunctionDef(self, node): + # Ensure we're doing depth-first traversal + last_return_name = self.common_return_name + body_scope = anno.getanno(node, NodeAnno.BODY_SCOPE) + referenced_names = body_scope.referenced + self.common_return_name = self.context.namer.new_symbol( + 'return_', referenced_names) + node = self.generic_visit(node) + self.common_return_name = last_return_name + return node + + +class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): + """Throws an error if code returns inside loops or try/except.""" + + # First, throw an error if we detect a return statement in a loop. + # TODO(alexbw): we need to learn to handle returns inside a loop, + # but don't currently have the TF constructs to do so (need something + # that looks vaguely like a goto). + + def __init__(self): + self.cant_return = False + super(gast.NodeVisitor, self).__init__() + + def visit_While(self, node): + self.cant_return = True + self.generic_visit(node) + self.cant_return = False + + def visit_For(self, node): + self.cant_return = True + self.generic_visit(node) + self.cant_return = False + + def visit_Try(self, node): + self.cant_return = True + self.generic_visit(node) + self.cant_return = False + + def visit_Return(self, node): + if self.cant_return: + raise ValueError( + 'Pyflow currently does not support `return` statements in loops. ' + 'Try assigning to a variable in the while loop, and returning ' + 'outside of the loop') + + +class DetectReturnInConditional(gast.NodeVisitor): + """Assert that no return statements are present in conditionals.""" + + def __init__(self): + self.cant_return = False + super(DetectReturnInConditional, self).__init__() + + def visit_If(self, node): + self.cant_return = True + self.generic_visit(node) + self.cant_return = False + + def visit_Return(self, node): + if self.cant_return: + raise ValueError( + 'After transforms, a conditional contained a `return `statement, ' + 'which is not allowed. This is a bug, and should not happen.') + + +class DetectReturnInFunctionDef(gast.NodeVisitor): + + def visit_FunctionDef(self, node): + self.generic_visit(node) + if not contains_return(node): + raise ValueError( + 'Each function definition should contain at least one return.') + + +def transform(node, context): + """Ensure a function has only a single return. + + This transforms an AST node with multiple returns successively into containing + only a single return node. + There are a few restrictions on what we can handle: + - An AST being transformed must contain at least one return. + - No returns allowed in loops. We have to know the type of the return value, + and we currently don't have either a type inference system to discover it, + nor do we have a mechanism for late type binding in TensorFlow. + - After all transformations are finished, a Return node is not allowed inside + control flow. If we were unable to move a return outside of control flow, + this is an error. + + Args: + node: an AST node to transform + context: a context object + + Returns: + new_node: an AST with a single return value + + Raises: + ValueError: if the AST is structured so that we can't perform the + transform. + """ + # Make sure that the function has at least one return statement + # TODO(alexbw): turning off this assertion for now -- + # we need to not require this in e.g. class constructors. + # DetectReturnInFunctionDef().visit(node) + + # Make sure there's no returns in unsupported locations (loops, try/except) + DetectReturnInUnsupportedControlFlow().visit(node) + + while True: + + # Try to lift all returns out of if statements and with blocks + lr = LiftReturn(context) + node = lr.visit(node) + changes_made = lr.changes_made + fe = FoldElse(context) + node = fe.visit(node) + changes_made = changes_made or fe.changes_made + + if not changes_made: + break + + # Make sure we've scrubbed all returns from conditionals + DetectReturnInConditional().visit(node) + + return node diff --git a/tensorflow/contrib/py2tf/converters/single_return_test.py b/tensorflow/contrib/py2tf/converters/single_return_test.py new file mode 100644 index 0000000000..2ea7a9d6d3 --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/single_return_test.py @@ -0,0 +1,189 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for single_return module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.py2tf.converters import single_return +from tensorflow.python.framework.ops import name_scope +from tensorflow.python.platform import test + + +class SingleReturnTest(converter_test_base.TestCase): + + def compiled_fn(self, test_fn, *args): + node = self.parse_and_analyze(test_fn, {}) + node = single_return.transform(node, self.ctx) + module = self.compiled(node, *args) + return module + + def test_noop(self): + # Noop + def test_fn(x): + return x + + with self.compiled_fn(test_fn) as result: + self.assertEqual(test_fn(2.0), result.test_fn(2.0)) + + def test_return_expression(self): + # ANF + def test_fn(x): + return x * x + + with self.compiled_fn(test_fn) as result: + x = 2 + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_merge(self): + # Simple merge + def test_fn(x): + if x > 0: + return x + else: + return x * x + + with self.compiled_fn(test_fn) as result: + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_orphan_branch(self): + + def test_fn(x): + if x > 0: + return x + + with self.assertRaises(ValueError): + self.compiled_fn(test_fn) + + def test_lift_body_into_false_branch(self): + + def test_fn(x): + if x > 0: + return x + return x * x + + with self.compiled_fn(test_fn) as result: + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_lift_body_into_true_branch(self): + + def test_fn(x): + if x < 0: + x *= x + else: + # TODO(alexbw): linter bug here that requires us suppress this warning. + return x # pylint: disable=undefined-loop-variable + return x + + with self.compiled_fn(test_fn) as result: + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_nested_if(self): + + def test_fn(x): + if x > 0: + if x < 5: + return x + else: + return x * x + else: + return x * x * x + + with self.compiled_fn(test_fn) as result: + for x in [-2, 2, 5]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_context_manager(self): + + def test_fn(x): + + with name_scope(''): + return x * x + + with self.compiled_fn(test_fn) as result: + result.name_scope = name_scope + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_context_manager_in_conditional(self): + + def test_fn(x): + if x > 0: + with name_scope(''): + return x * x + else: + return x + + with self.compiled_fn(test_fn, name_scope) as result: + result.name_scope = name_scope + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def text_conditional_in_context_manager(self): + + def test_fn(x): + with name_scope(''): + if x > 0: + return x * x + else: + return x + + with self.compiled_fn(test_fn) as result: + result.name_scope = name_scope + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_no_return(self): + + def test_fn(x): + x *= x + + with self.compiled_fn(test_fn) as result: + self.assertEqual(test_fn(2), result.test_fn(2)) + + def test_nested_functiondefs(self): + + def test_fn(x): + + def inner_fn(y): + if y > 0: + return y * y + else: + return y + + return inner_fn(x) + + with self.compiled_fn(test_fn) as result: + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_loop(self): + + def test_fn(x): + for _ in range(10): + return x + return x + + with self.assertRaises(ValueError): + self.compiled_fn(test_fn) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 97ee4ca435..96e7b1a53e 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -32,6 +32,7 @@ from tensorflow.contrib.py2tf.converters import for_loops from tensorflow.contrib.py2tf.converters import logical_expressions from tensorflow.contrib.py2tf.converters import name_scopes from tensorflow.contrib.py2tf.converters import side_effect_guards +from tensorflow.contrib.py2tf.converters import single_return from tensorflow.contrib.py2tf.impl import config from tensorflow.contrib.py2tf.impl import naming from tensorflow.contrib.py2tf.pyct import context @@ -297,6 +298,7 @@ def node_to_graph(node, ctx, nocompile_decorators): # to re-run the analysis. node = _static_analysis_pass(node, ctx) + # Past this point, line numbers are no longer accurate so we ignore the # source. # TODO(mdan): Is it feasible to reconstruct intermediate source code? @@ -311,6 +313,9 @@ def node_to_graph(node, ctx, nocompile_decorators): node = continue_statements.transform(node, ctx) ctx.namespace['len'] = len + node = _static_analysis_pass(node, ctx) + node = single_return.transform(node, ctx) + node = _static_analysis_pass(node, ctx) node = for_loops.transform(node, ctx) # for_loops may insert new global references. diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py index 22925afe7c..87fc8c979c 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/activity.py @@ -268,6 +268,15 @@ class ActivityAnalizer(transformer.Base): self.scope = current_scope return node + def visit_With(self, node): + current_scope = self.scope + with_scope = Scope(current_scope, isolated=False) + self.scope = with_scope + self.generic_visit(node) + anno.setanno(node, NodeAnno.BODY_SCOPE, with_scope) + self.scope = current_scope + return node + def visit_If(self, node): self.visit(node.test) node = self._process_parallel_blocks(node, -- GitLab From 37cef895bfe06913477b87917cbee7284aefa7cd Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 7 Mar 2018 12:03:56 -0800 Subject: [PATCH 540/884] eager: Rename in_eager_mode to executing_eagerly and get rid of in_graph_mode. This is in preparation to introduce one public, stable symbol: tf.executing_eagerly() (i.e., part of moving APIs related to eager execution from "contrib" to a namespace where we provide API stability guarantees) PiperOrigin-RevId: 188212646 --- .../contrib/data/python/ops/threadpool.py | 2 +- .../eager/python/checkpointable_utils.py | 14 +- .../eager/python/checkpointable_utils_test.py | 32 ++--- tensorflow/contrib/eager/python/datasets.py | 2 +- tensorflow/contrib/eager/python/evaluator.py | 29 +++-- .../contrib/eager/python/metrics_impl.py | 23 ++-- tensorflow/contrib/eager/python/network.py | 8 +- tensorflow/contrib/eager/python/saver.py | 4 +- tensorflow/contrib/eager/python/tfe.py | 6 +- tensorflow/contrib/eager/python/tfe_test.py | 3 +- .../python/ops/critical_section_ops.py | 8 +- .../contrib/layers/python/layers/layers.py | 2 +- .../contrib/metrics/python/ops/metric_ops.py | 4 +- .../contrib/nccl/python/ops/nccl_ops.py | 2 +- .../opt/python/training/addsign_test.py | 6 +- .../opt/python/training/powersign_test.py | 6 +- .../rnn/python/kernel_tests/core_rnn_test.py | 16 +-- tensorflow/contrib/summary/summary_ops.py | 12 +- tensorflow/python/data/ops/dataset_ops.py | 4 +- .../python/data/util/random_seed_test.py | 2 +- tensorflow/python/eager/benchmarks_test.py | 2 +- tensorflow/python/eager/context.py | 28 ++-- tensorflow/python/eager/core_test.py | 9 +- tensorflow/python/eager/function.py | 42 +++--- tensorflow/python/eager/graph_callable.py | 2 +- .../python/eager/python_eager_op_gen.cc | 2 +- tensorflow/python/eager/pywrap_tfe_test.py | 2 +- tensorflow/python/estimator/estimator.py | 2 +- tensorflow/python/framework/constant_op.py | 2 +- tensorflow/python/framework/function.py | 6 +- tensorflow/python/framework/meta_graph.py | 4 +- tensorflow/python/framework/ops.py | 40 +++--- tensorflow/python/framework/ops_test.py | 25 ++-- tensorflow/python/framework/random_seed.py | 20 +-- .../python/framework/random_seed_test.py | 8 +- tensorflow/python/framework/tensor_util.py | 2 +- tensorflow/python/framework/test_util.py | 2 +- .../python/keras/_impl/keras/backend.py | 16 +-- .../keras/_impl/keras/engine/base_layer.py | 2 +- .../keras/_impl/keras/engine/input_layer.py | 2 +- .../keras/_impl/keras/engine/network.py | 20 +-- .../keras/_impl/keras/engine/topology_test.py | 29 +++-- .../keras/_impl/keras/engine/training.py | 44 +++---- .../_impl/keras/layers/convolutional_test.py | 22 ++-- .../python/keras/_impl/keras/layers/core.py | 2 +- .../keras/_impl/keras/layers/normalization.py | 2 +- .../keras/_impl/keras/layers/pooling_test.py | 2 +- .../keras/_impl/keras/layers/recurrent.py | 6 +- .../kernel_tests/atrous_convolution_test.py | 4 +- .../python/kernel_tests/check_ops_test.py | 37 +++--- .../python/kernel_tests/py_func_test.py | 4 +- .../resource_variable_ops_test.py | 35 ++--- tensorflow/python/kernel_tests/rnn_test.py | 78 +++++------ .../python/kernel_tests/slice_op_test.py | 2 +- .../python/kernel_tests/template_test.py | 6 +- .../kernel_tests/tensor_array_ops_test.py | 123 ++++++------------ .../kernel_tests/variable_scope_test.py | 28 ++-- tensorflow/python/layers/base.py | 65 ++++----- tensorflow/python/layers/base_test.py | 32 ++--- tensorflow/python/layers/convolutional.py | 4 +- tensorflow/python/layers/core.py | 4 +- tensorflow/python/layers/core_test.py | 12 +- tensorflow/python/layers/normalization.py | 16 ++- tensorflow/python/ops/array_grad.py | 8 +- tensorflow/python/ops/array_ops.py | 29 ++--- tensorflow/python/ops/check_ops.py | 41 +++--- tensorflow/python/ops/control_flow_ops.py | 14 +- tensorflow/python/ops/custom_gradient.py | 2 +- tensorflow/python/ops/data_flow_ops.py | 40 +++--- tensorflow/python/ops/functional_ops.py | 8 +- tensorflow/python/ops/gradients_impl.py | 9 +- tensorflow/python/ops/io_ops.py | 2 +- tensorflow/python/ops/lookup_ops.py | 8 +- tensorflow/python/ops/losses/losses_impl.py | 2 +- tensorflow/python/ops/math_grad.py | 8 +- tensorflow/python/ops/math_ops.py | 10 +- tensorflow/python/ops/math_ops_test.py | 4 +- tensorflow/python/ops/metrics_impl.py | 60 ++++----- tensorflow/python/ops/nn_grad.py | 2 +- tensorflow/python/ops/nn_ops.py | 9 +- tensorflow/python/ops/numerics.py | 2 +- .../python/ops/resource_variable_ops.py | 41 +++--- tensorflow/python/ops/rnn.py | 10 +- tensorflow/python/ops/rnn_cell_impl.py | 15 ++- tensorflow/python/ops/script_ops.py | 2 +- tensorflow/python/ops/state_ops.py | 2 +- tensorflow/python/ops/template.py | 6 +- tensorflow/python/ops/tensor_array_ops.py | 10 +- tensorflow/python/ops/variable_scope.py | 38 +++--- tensorflow/python/ops/variables.py | 29 +++-- tensorflow/python/profiler/model_analyzer.py | 4 +- tensorflow/python/profiler/tfprof_logger.py | 4 +- tensorflow/python/summary/summary.py | 4 +- tensorflow/python/summary/writer/writer.py | 2 +- tensorflow/python/training/adam.py | 6 +- tensorflow/python/training/adam_test.py | 4 +- tensorflow/python/training/checkpointable.py | 8 +- .../python/training/gradient_descent.py | 2 +- tensorflow/python/training/input.py | 10 +- .../training/learning_rate_decay_test.py | 2 +- tensorflow/python/training/momentum_test.py | 16 +-- tensorflow/python/training/optimizer.py | 33 ++--- .../python/training/queue_runner_impl.py | 4 +- tensorflow/python/training/saver.py | 96 +++++++------- tensorflow/python/training/saver_test.py | 28 ++-- .../python/training/saver_test_utils.py | 8 +- tensorflow/python/training/slot_creator.py | 10 +- tensorflow/python/training/supervisor.py | 4 +- tensorflow/python/training/training_util.py | 4 +- tensorflow/python/util/tf_should_use.py | 2 +- 110 files changed, 789 insertions(+), 853 deletions(-) diff --git a/tensorflow/contrib/data/python/ops/threadpool.py b/tensorflow/contrib/data/python/ops/threadpool.py index 3f85aa84cd..56f67e1766 100644 --- a/tensorflow/contrib/data/python/ops/threadpool.py +++ b/tensorflow/contrib/data/python/ops/threadpool.py @@ -44,7 +44,7 @@ class PrivateThreadPool(object): def __init__(self, num_threads, display_name=None): """Creates a `PrivateThreadPool` with the given number of threads.""" - if context.in_eager_mode(): + if context.executing_eagerly(): shared_name = _generate_shared_name("privatethreadpool") self._resource = gen_dataset_ops.thread_pool_handle( num_threads=num_threads, diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index cd742991af..1fa150f3c6 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -395,7 +395,7 @@ class CheckpointLoadStatus(_LoadStatus): def run_restore_ops(self, session=None): """Run operations to restore objects in the dependency graph.""" - if context.in_eager_mode(): + if context.executing_eagerly(): return # Run eagerly if session is None: session = ops.get_default_session() @@ -459,7 +459,7 @@ class InitializationOnlyStatus(_LoadStatus): session: The session to run initialization ops in. If `None`, uses the default session. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return # run eagerly if session is None: session = ops.get_default_session() @@ -491,7 +491,7 @@ class NameBasedSaverStatus(_LoadStatus): date=None, instructions=_DEPRECATED_RESTORE_INSTRUCTIONS) def run_restore_ops(self, session=None): """Load the name-based training checkpoint using a new `tf.train.Saver`.""" - if session is None and context.in_graph_mode(): + if session is None and not context.executing_eagerly(): session = ops.get_default_session() saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access sess=session, save_path=self._save_path) @@ -548,7 +548,7 @@ class CheckpointableSaver(object): # Allow passing in a weak reference to avoid reference cycles when # `Checkpointable` objects save themselves. self._root_checkpointable_ref = root_checkpointable - if context.in_graph_mode(): + if not context.executing_eagerly(): with ops.device("/cpu:0"): self._file_prefix_placeholder = constant_op.constant("model") else: @@ -597,7 +597,7 @@ class CheckpointableSaver(object): """ named_variables, graph_proto = _serialize_object_graph( self._root_checkpointable) - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() if in_graph_mode: if session is None: session = ops.get_default_session() @@ -714,7 +714,7 @@ class CheckpointableSaver(object): """ if save_path is None: return InitializationOnlyStatus(self._root_checkpointable) - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() if in_graph_mode: if session is None: session = ops.get_default_session() @@ -850,7 +850,7 @@ class Checkpoint(core_checkpointable.Checkpointable): def save(self, file_prefix, session=None): """Save a checkpoint. Wraps `tfe.CheckpointableSaver.save`.""" - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() if in_graph_mode: if session is None: session = ops.get_default_session() diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index 9ec89edce8..fd9fc098b3 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -108,14 +108,14 @@ class InterfaceTests(test.TestCase): [0., 0.]], self.evaluate(bare_initializer)) self.assertEqual("a_variable:0", obj.a_variable.name) self.assertEqual("duplicate:0", other_duplicate.name) - if context.in_graph_mode(): - # The .name attribute may be globally influenced, but the checkpoint name - # won't be (tested below). - self.assertEqual("duplicate_1:0", duplicate.name) - else: + if context.executing_eagerly(): # When executing eagerly, there's no uniquification of variable names. The # checkpoint name will be the same. self.assertEqual("duplicate:0", duplicate.name) + else: + # The .name attribute may be globally influenced, but the checkpoint name + # won't be (tested below). + self.assertEqual("duplicate_1:0", duplicate.name) named_variables, _ = checkpointable_utils._serialize_object_graph(obj) expected_checkpoint_names = ( "a_variable/.ATTRIBUTES/VARIABLE_VALUE", @@ -165,7 +165,7 @@ class CheckpointingTests(test.TestCase): optimizer_step = training_util.get_or_create_global_step() root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model, optimizer_step=optimizer_step) - if context.in_eager_mode(): + if context.executing_eagerly(): optimizer.minimize( lambda: model(input_value), global_step=optimizer_step) @@ -268,7 +268,7 @@ class CheckpointingTests(test.TestCase): root_checkpointable = checkpointable_utils.Checkpoint( optimizer=optimizer, model=model) input_value = constant_op.constant([[3.]]) - if context.in_eager_mode(): + if context.executing_eagerly(): optimizer.minimize( lambda: model(input_value)) else: @@ -293,7 +293,7 @@ class CheckpointingTests(test.TestCase): self.assertAllEqual([42.], self.evaluate(model._named_dense.variables[1])) self.assertAllEqual(1, self.evaluate(root_checkpointable.save_counter)) self.assertAllEqual([1.5], self.evaluate(m_bias_slot)) - if context.in_graph_mode(): + if not context.executing_eagerly(): return # Restore-on-create is only supported when executing eagerly on_create_model = MyModel() on_create_optimizer = adam.AdamOptimizer(0.001) @@ -400,7 +400,7 @@ class CheckpointingTests(test.TestCase): optimizer.minimize, functools.partial(model, input_value), global_step=root.global_step) - if context.in_graph_mode(): + if not context.executing_eagerly(): train_fn = functools.partial(self.evaluate, train_fn()) status.initialize_or_restore() for _ in range(num_training_steps): @@ -524,7 +524,9 @@ class CheckpointingTests(test.TestCase): root.var = checkpointable_utils.add_variable( root, name="var", initializer=0.) optimizer = adam.AdamOptimizer(0.1) - if context.in_graph_mode(): + if context.executing_eagerly(): + optimizer.minimize(root.var.read_value) + else: train_op = optimizer.minimize(root.var) # Note that `optimizer` has not been added as a dependency of # `root`. Create a one-off grouping so that slot variables for `root.var` @@ -532,8 +534,6 @@ class CheckpointingTests(test.TestCase): self.evaluate(checkpointable_utils.gather_initializers( checkpointable_utils.Checkpoint(root=root, optimizer=optimizer))) self.evaluate(train_op) - else: - optimizer.minimize(root.var.read_value) self.evaluate(state_ops.assign(root.var, 12.)) no_slots_path = checkpointable_utils.CheckpointableSaver(root).save( os.path.join(checkpoint_directory, "no_slots")) @@ -561,7 +561,7 @@ class CheckpointingTests(test.TestCase): with self.assertRaisesRegexp(AssertionError, "beta1_power"): slot_status.assert_consumed() self.assertEqual(12., self.evaluate(new_root.var)) - if context.in_eager_mode(): + if context.executing_eagerly(): # Slot variables are only created with restoring initializers when # executing eagerly. self.assertEqual(14., self.evaluate( @@ -569,7 +569,9 @@ class CheckpointingTests(test.TestCase): else: self.assertIs(new_root.optimizer.get_slot(name="m", var=new_root.var), None) - if context.in_graph_mode(): + if context.executing_eagerly(): + new_root.optimizer.minimize(new_root.var.read_value) + else: train_op = new_root.optimizer.minimize(new_root.var) # The slot variable now exists; restore() didn't create it, but we should # now have a restore op for it. @@ -577,8 +579,6 @@ class CheckpointingTests(test.TestCase): self.assertEqual(14., self.evaluate( new_root.optimizer.get_slot(name="m", var=new_root.var))) self.evaluate(train_op) - else: - new_root.optimizer.minimize(new_root.var.read_value) slot_status.assert_consumed() @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 36b7d6d009..30a7642dd3 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -68,7 +68,7 @@ class Iterator(object): RuntimeError: When invoked without eager execution enabled. """ - if not context.in_eager_mode(): + if not context.executing_eagerly(): raise RuntimeError( "{} objects can only be used when eager execution is enabled, use " "tf.data.Dataset.make_initializable_iterator or " diff --git a/tensorflow/contrib/eager/python/evaluator.py b/tensorflow/contrib/eager/python/evaluator.py index 68e7b5421f..37c8f0d47a 100644 --- a/tensorflow/contrib/eager/python/evaluator.py +++ b/tensorflow/contrib/eager/python/evaluator.py @@ -57,7 +57,7 @@ class Evaluator(object): self._model = model self._metrics = {} self._evaluators = {} - if context.in_graph_mode(): + if not context.executing_eagerly(): self.call = function.defun(self.call) # ---- API for users ---- @@ -90,7 +90,7 @@ class Evaluator(object): Only for graph execution. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Evaluator.init_variables() not needed when " "eager execution is enabled.") return control_flow_ops.group([m.init_variables() for _, m in self.metrics]) @@ -113,7 +113,8 @@ class Evaluator(object): with summary_ops.create_file_writer( summary_logdir).as_default(), summary_ops.always_record_summaries(): return self._all_metric_results() - if context.in_eager_mode(): + + if context.executing_eagerly(): return f() else: return function.defun(f)() @@ -158,16 +159,16 @@ class Evaluator(object): @end_compatibility """ summary_logdir = kwargs.pop("summary_logdir", None) - if context.in_graph_mode(): - call_op = self.__call__(dataset.make_one_shot_iterator().get_next(), - *args, **kwargs) - init_op = self.init_variables() - results_op = self.all_metric_results(summary_logdir) - return (init_op, call_op, results_op) - # Eager case - for example in datasets.Iterator(dataset): - self.__call__(example, *args, **kwargs) - return self.all_metric_results(summary_logdir) + if context.executing_eagerly(): + for example in datasets.Iterator(dataset): + self.__call__(example, *args, **kwargs) + return self.all_metric_results(summary_logdir) + # Graph construction + call_op = self.__call__(dataset.make_one_shot_iterator().get_next(), *args, + **kwargs) + init_op = self.init_variables() + results_op = self.all_metric_results(summary_logdir) + return (init_op, call_op, results_op) @staticmethod def run_evaluation(init_op, call_op, results_op, sess=None): @@ -192,7 +193,7 @@ class Evaluator(object): Only for graph execution. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Evaluator.run_evaluation() not supported when " "eager execution is enabled.") sess = sess or ops.get_default_session() diff --git a/tensorflow/contrib/eager/python/metrics_impl.py b/tensorflow/contrib/eager/python/metrics_impl.py index a34c4f758a..1490c2ccac 100644 --- a/tensorflow/contrib/eager/python/metrics_impl.py +++ b/tensorflow/contrib/eager/python/metrics_impl.py @@ -109,13 +109,13 @@ class Metric(checkpointable.CheckpointableBase): pos = scope.name.rfind(scope_name) self._name = name + scope.name[pos + len(scope_name):] self._scope = scope - if context.in_graph_mode(): + if context.executing_eagerly(): + self._construction_scope = context.eager_mode + else: # We make self.call() into a graph callable here, so that we can # return a single op that performs all of the variable updates. self._construction_scope = ops.get_default_graph().as_default self.call = function.defun(self.call) - else: - self._construction_scope = context.eager_mode # ---- API for users ---- def __call__(self, *args, **kwargs): @@ -156,10 +156,11 @@ class Metric(checkpointable.CheckpointableBase): initialization. Under eager execution, the variables are reset to their initial values as a side effect and this function returns None. """ - if context.in_graph_mode(): + if context.executing_eagerly(): + for v in self._vars: + v.assign(self._initial_values[v]) + else: return control_flow_ops.group([v.initializer for v in self._vars]) - for v in self._vars: - v.assign(self._initial_values[v]) # ---- To be implemented by descendants --- def build(self, *args, **kwargs): @@ -201,10 +202,10 @@ class Metric(checkpointable.CheckpointableBase): def value(self): """In graph mode returns the result Tensor while in eager the callable.""" - if context.in_graph_mode(): - return self.result() - else: + if context.executing_eagerly(): return self.result + else: + return self.result() # We can support two different strategies of for doing data-parallel # distributed metric computations: @@ -246,7 +247,7 @@ class Metric(checkpointable.CheckpointableBase): """***Only for use by descendants of Metric***.""" if self._built: raise RuntimeError("Can't call add_variable() except in build().") - if context.in_eager_mode(): + if context.executing_eagerly(): collections = None else: if self._use_global_variables: @@ -270,7 +271,7 @@ class Metric(checkpointable.CheckpointableBase): # Checkpointable. overwrite=True) self._vars.append(v) - if context.in_eager_mode(): + if context.executing_eagerly(): self._initial_values[v] = v.value() return v diff --git a/tensorflow/contrib/eager/python/network.py b/tensorflow/contrib/eager/python/network.py index e3c13cbd2e..4c937716e8 100644 --- a/tensorflow/contrib/eager/python/network.py +++ b/tensorflow/contrib/eager/python/network.py @@ -639,7 +639,7 @@ def _make_custom_getter_for_deferred_restorations(): # Mark as already restored from this checkpoint. delayed_restoration.checkpointed_variables_to_restore[ checkpoint_name] = None - if context.in_graph_mode(): + if not context.executing_eagerly(): delayed_restoration.session.run(variable.initializer) if found_value: # Error checking should run even if we've already restored a value. @@ -772,7 +772,7 @@ def save_network_checkpoint( variable_map[mapped_name]._shared_name, variable._shared_name, network.scope_name)) - if context.in_eager_mode(): + if context.executing_eagerly(): sess = None else: sess = ops.get_default_session() @@ -853,7 +853,7 @@ def _restore_existing_variables(network, save_path, map_func, user_map_func): network_name=network.name, network_scope_name=network.scope_name)) if existing_variables_by_checkpoint_name: - if context.in_eager_mode(): + if context.executing_eagerly(): sess = None else: sess = ops.get_default_session() @@ -880,7 +880,7 @@ def _set_restore_on_create(network, save_path, map_func, user_map_func, # _DeferredRestoration objects once a Network has been built (so that # restoring in a loop does not take increasing amounts of memory). if checkpointed_variables_to_restore: - if context.in_eager_mode(): + if context.executing_eagerly(): sess = None else: sess = ops.get_default_session() diff --git a/tensorflow/contrib/eager/python/saver.py b/tensorflow/contrib/eager/python/saver.py index 62421849c7..fdaca90fd1 100644 --- a/tensorflow/contrib/eager/python/saver.py +++ b/tensorflow/contrib/eager/python/saver.py @@ -73,7 +73,7 @@ def restore_variables_on_create(save_path, map_func=None): NotFoundError: If the variable is not found in checkpoint. ValueError: If not used in eager mode or map_func is not callable. """ - if context.in_graph_mode(): + if not context.executing_eagerly(): raise ValueError( "Currently, restore_variables_on_create can only be used with " "eager execution enabled.") @@ -131,7 +131,7 @@ class Saver(object): Raises: RuntimeError: if invoked when eager execution has not been enabled. """ - if context.in_graph_mode(): + if not context.executing_eagerly(): raise RuntimeError("tfe.Saver can only be used when eager " "execution is enabled. Use tf.train.Saver when " "building graphs.") diff --git a/tensorflow/contrib/eager/python/tfe.py b/tensorflow/contrib/eager/python/tfe.py index 5bddd26a0a..5aabc9aae8 100644 --- a/tensorflow/contrib/eager/python/tfe.py +++ b/tensorflow/contrib/eager/python/tfe.py @@ -60,8 +60,8 @@ To use, at program startup, call `tfe.enable_eager_execution()`. @@Checkpointable @@CheckpointableSaver +@@executing_eagerly @@in_eager_mode -@@in_graph_mode @@run_test_in_graph_and_eager_modes @@ -93,8 +93,7 @@ from tensorflow.python.eager import function from tensorflow.python.eager.context import DEVICE_PLACEMENT_EXPLICIT from tensorflow.python.eager.context import DEVICE_PLACEMENT_WARN from tensorflow.python.eager.context import DEVICE_PLACEMENT_SILENT -from tensorflow.python.eager.context import in_eager_mode -from tensorflow.python.eager.context import in_graph_mode +from tensorflow.python.eager.context import executing_eagerly from tensorflow.python.eager.context import list_devices from tensorflow.python.eager.context import num_gpus from tensorflow.python.eager.execution_callbacks import add_execution_callback @@ -122,5 +121,6 @@ implicit_value_and_gradients = backprop.implicit_val_and_grad gradients_function = backprop.gradients_function value_and_gradients_function = backprop.val_and_grad_function GradientTape = backprop.GradientTape # pylint: disable=invalid-name +in_eager_mode = executing_eagerly remove_undocumented(__name__) diff --git a/tensorflow/contrib/eager/python/tfe_test.py b/tensorflow/contrib/eager/python/tfe_test.py index b6659c2a17..e80ccbb74d 100644 --- a/tensorflow/contrib/eager/python/tfe_test.py +++ b/tensorflow/contrib/eager/python/tfe_test.py @@ -47,7 +47,8 @@ class TFETest(test_util.TensorFlowTestCase): def testVariableError(self): with self.assertRaisesRegexp( - RuntimeError, r'Variable not supported in Eager mode'): + RuntimeError, + r'Variable not supported when eager execution is enabled'): variables.Variable(initial_value=1.0) def testGradients(self): diff --git a/tensorflow/contrib/framework/python/ops/critical_section_ops.py b/tensorflow/contrib/framework/python/ops/critical_section_ops.py index ab603cc18e..cc19372acf 100644 --- a/tensorflow/contrib/framework/python/ops/critical_section_ops.py +++ b/tensorflow/contrib/framework/python/ops/critical_section_ops.py @@ -154,7 +154,7 @@ class CriticalSection(object): self._handle = gen_resource_variable_ops.mutex_v2( shared_name=shared_name, container=container, name=name) - if context.in_graph_mode(): + if not context.executing_eagerly(): ops.add_to_collections(CRITICAL_SECTIONS, self) @property @@ -221,7 +221,7 @@ class CriticalSection(object): "This is illegal and would cause deadlocks. " "CriticalSection: %s." % self._handle) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Collections and op introspection does not work in eager # mode. This is generally ok; since eager mode (as of # writing) executes sequentially anyway. @@ -250,7 +250,7 @@ class CriticalSection(object): return x.identity() elif isinstance(x, ops.Operation): return control_flow_ops.group(x) - elif context.in_eager_mode() and x is None: + elif context.executing_eagerly() and x is None: return None else: return array_ops.identity(x) @@ -274,7 +274,7 @@ class CriticalSection(object): with ops.control_dependencies([ensure_lock_exists]): outputs = nest.map_structure(identity, r) - if context.in_graph_mode(): + if not context.executing_eagerly(): signature = _ExecutionSignature( op=lock.op, handle=self._handle, diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index b2ea75c7e1..559c0c63da 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -2746,7 +2746,7 @@ def softmax(logits, scope=None): logits_2d = array_ops.reshape(logits, [-1, num_logits]) predictions = nn.softmax(logits_2d) predictions = array_ops.reshape(predictions, array_ops.shape(logits)) - if context.in_graph_mode(): + if not context.executing_eagerly(): predictions.set_shape(logits.get_shape()) return predictions diff --git a/tensorflow/contrib/metrics/python/ops/metric_ops.py b/tensorflow/contrib/metrics/python/ops/metric_ops.py index 31e274c5fd..0fee584f8e 100644 --- a/tensorflow/contrib/metrics/python/ops/metric_ops.py +++ b/tensorflow/contrib/metrics/python/ops/metric_ops.py @@ -1263,7 +1263,7 @@ def _compute_placement_auc(labels, predictions, weights, alpha, weights_for_true = ordered_weights * float_labels_for_true weights_for_false = ordered_weights * float_labels_for_false - # For each set of weights with the same segmented indices, we add up the + # For each set of weights with the same segmented indices, we add up the # weight values. Note that for each label, we deliberately rely on weights # for the opposite label. weight_totals_for_true = math_ops.segment_sum(weights_for_false, @@ -3646,7 +3646,7 @@ def cohen_kappa(labels, `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.contrib.metrics.cohen_kappa is not supported' 'when eager execution is enabled.') if num_classes < 2: diff --git a/tensorflow/contrib/nccl/python/ops/nccl_ops.py b/tensorflow/contrib/nccl/python/ops/nccl_ops.py index 8dc038b9ac..794372a1f4 100644 --- a/tensorflow/contrib/nccl/python/ops/nccl_ops.py +++ b/tensorflow/contrib/nccl/python/ops/nccl_ops.py @@ -267,5 +267,5 @@ def _check_device(tensor, expected=None): def _check_graph_mode(): - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError('Nccl ops are not supported in eager mode') diff --git a/tensorflow/contrib/opt/python/training/addsign_test.py b/tensorflow/contrib/opt/python/training/addsign_test.py index bd19ee3e7a..08d45ed73f 100644 --- a/tensorflow/contrib/opt/python/training/addsign_test.py +++ b/tensorflow/contrib/opt/python/training/addsign_test.py @@ -97,7 +97,7 @@ class AddSignTest(test.TestCase): global_step=global_step) neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), global_step=global_step) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) @@ -108,13 +108,13 @@ class AddSignTest(test.TestCase): # last 3 steps with negative gradient (sign(gm) should be -1) for t in range(1, 8): if t < 5: - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(update) elif t > 1: opt.apply_gradients(zip([grads0, grads1], [var0, var1]), global_step=global_step) else: - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(neg_update) elif t > 1: opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), diff --git a/tensorflow/contrib/opt/python/training/powersign_test.py b/tensorflow/contrib/opt/python/training/powersign_test.py index ff7b1a72d4..5214082dd6 100644 --- a/tensorflow/contrib/opt/python/training/powersign_test.py +++ b/tensorflow/contrib/opt/python/training/powersign_test.py @@ -99,7 +99,7 @@ class PowerSignTest(test.TestCase): neg_update = opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), global_step=global_step) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) @@ -110,13 +110,13 @@ class PowerSignTest(test.TestCase): # last 3 steps with negative gradient (sign(gm) should be -1) for t in range(1, 8): if t < 5: - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(update) elif t > 1: opt.apply_gradients(zip([grads0, grads1], [var0, var1]), global_step=global_step) else: - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(neg_update) elif t > 1: opt.apply_gradients(zip([-grads0, -grads1], [var0, var1]), diff --git a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py index 57521c6a9b..de5df91292 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py @@ -869,7 +869,7 @@ class LSTMTest(test.TestCase): num_proj = 4 max_length = 8 sequence_length = [4, 6] - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with self.test_session(graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -934,8 +934,7 @@ class LSTMTest(test.TestCase): if in_graph_mode: self.assertAllEqual(outputs_static, outputs_dynamic) else: - self.assertAllEqual( - array_ops.stack(outputs_static).numpy(), outputs_dynamic.numpy()) + self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic) self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic)) @test_util.run_in_graph_and_eager_modes() @@ -946,7 +945,7 @@ class LSTMTest(test.TestCase): num_proj = 4 max_length = 8 sequence_length = [4, 6] - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with self.test_session(graph=ops_lib.Graph()) as sess: initializer = init_ops.random_uniform_initializer( -0.01, 0.01, seed=self._seed) @@ -1022,10 +1021,9 @@ class LSTMTest(test.TestCase): if in_graph_mode: self.assertAllEqual(outputs_static, outputs_dynamic) else: - self.assertAllEqual( - array_ops.stack(outputs_static).numpy(), outputs_dynamic.numpy()) - state_static = [s.numpy() for s in nest.flatten(state_static)] - state_dynamic = [s.numpy() for s in nest.flatten(state_dynamic)] + self.assertAllEqual(array_ops.stack(outputs_static), outputs_dynamic) + state_static = nest.flatten(state_static) + state_dynamic = nest.flatten(state_dynamic) self.assertAllEqual(np.hstack(state_static), np.hstack(state_dynamic)) def _testDynamicEquivalentToStaticRNN(self, use_sequence_length): @@ -1043,7 +1041,7 @@ class LSTMTest(test.TestCase): else: sequence_length = None - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() # TODO(b/68017812): Eager ignores operation seeds, so we need to create a # single cell and reuse it across the static and dynamic RNNs. Remove this diff --git a/tensorflow/contrib/summary/summary_ops.py b/tensorflow/contrib/summary/summary_ops.py index b6249fc92f..a61ce04ca2 100644 --- a/tensorflow/contrib/summary/summary_ops.py +++ b/tensorflow/contrib/summary/summary_ops.py @@ -110,7 +110,7 @@ class SummaryWriter(object): def __init__(self, resource): self._resource = resource - if context.in_eager_mode() and self._resource is not None: + if context.executing_eagerly() and self._resource is not None: self._resource_deleter = resource_variable_ops.EagerResourceDeleter( handle=self._resource, handle_device="cpu:0") @@ -158,7 +158,7 @@ def initialize( @{tf.contrib.summary.SummaryWriter}. ValueError: If session wasn't passed and no default session. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return if context.context().summary_writer_resource is None: raise RuntimeError("No default tf.contrib.summary.SummaryWriter found") @@ -269,7 +269,7 @@ def _make_summary_writer(name, factory, **kwargs): resource = gen_summary_ops.summary_writer(shared_name=name) # TODO(apassos): Consider doing this instead. # node = factory(resource, **kwargs) - # if not context.in_eager_mode(): + # if not context.executing_eagerly(): # ops.get_default_session().run(node) ops.add_to_collection(_SUMMARY_WRITER_INIT_COLLECTION_NAME, factory(resource, **kwargs)) @@ -295,7 +295,7 @@ def all_summary_ops(): Returns: The summary ops. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return None return ops.get_collection(ops.GraphKeys._SUMMARY_COLLECTION) # pylint: disable=protected-access @@ -309,7 +309,7 @@ def summary_writer_initializer_op(): Raises: RuntimeError: If in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "tf.contrib.summary.summary_writer_initializer_op is only " "supported in graph mode.") @@ -477,7 +477,7 @@ def graph(param, step=None, name=None): Raises: TypeError: If `param` isn't already a @{tf.Tensor} in graph mode. """ - if not context.in_eager_mode() and not isinstance(param, ops.Tensor): + if not context.executing_eagerly() and not isinstance(param, ops.Tensor): raise TypeError("graph() needs a tf.Tensor (e.g. tf.placeholder) in graph " "mode, but was: %s" % type(param)) writer = context.context().summary_writer_resource diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index 6539e91c13..e0d63b5ebc 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -91,7 +91,7 @@ class Dataset(object): Raises: RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "dataset.make_initializable_iterator is not supported when eager " "execution is enabled.") @@ -123,7 +123,7 @@ class Dataset(object): Raises: RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "dataset.make_one_shot_iterator is not supported when eager " "execution is enabled.") diff --git a/tensorflow/python/data/util/random_seed_test.py b/tensorflow/python/data/util/random_seed_test.py index c3a2dc0537..33227e82af 100644 --- a/tensorflow/python/data/util/random_seed_test.py +++ b/tensorflow/python/data/util/random_seed_test.py @@ -65,7 +65,7 @@ class RandomSeedTest(test.TestCase): self.assertEqual((g_seed, op_seed), toutput, msg=msg) random_seed.set_random_seed(None) - if context.in_graph_mode(): + if not context.executing_eagerly(): random_seed.set_random_seed(1) tinput = (1, None) toutput = (1, ops.get_default_graph()._last_id) # pylint: disable=protected-access diff --git a/tensorflow/python/eager/benchmarks_test.py b/tensorflow/python/eager/benchmarks_test.py index 551d5647dd..4255677a68 100644 --- a/tensorflow/python/eager/benchmarks_test.py +++ b/tensorflow/python/eager/benchmarks_test.py @@ -55,7 +55,7 @@ def c_tfe_py_fastpath_execute(a, transpose_b=False, name=None): ctx = context.context() - assert not ctx.in_graph_mode( + assert ctx.in_eager_mode( ), "The prototype doesn't contain C code for graph construction" try: return pywrap_tensorflow.TFE_Py_FastPathExecute( diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index fb27ab65fa..5d13aada63 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -260,12 +260,8 @@ class Context(object): if mode == EAGER_MODE: context_stack.pop() - def in_graph_mode(self): - """Returns True if current thread is in GRAPH mode.""" - return self._eager_context.mode == GRAPH_MODE - - def in_eager_mode(self): - """Returns True if current thread is in EAGER mode.""" + def executing_eagerly(self): + """Returns True if current thread has eager executing enabled.""" return self._eager_context.mode == EAGER_MODE def scalar_cache(self): @@ -522,23 +518,23 @@ def internal_operation_seed(): return context()._internal_operation_seed() # pylint: disable=protected-access -def in_graph_mode(): - """Returns True if current thread is in GRAPH mode for default context.""" - return context().in_graph_mode() +def executing_eagerly(): + """Returns True if the current thread has eager execution enabled.""" + return context().executing_eagerly() def in_eager_mode(): - """Returns True if current thread is in EAGER mode for default context.""" - return context().in_eager_mode() + """Use executing_eagerly() instead. This function will be removed.""" + return executing_eagerly() def graph_mode(): - """Context-manager to enable GRAPH mode for current thread.""" + """Context-manager to disable eager execution for the current thread.""" return context()._mode(GRAPH_MODE) # pylint: disable=protected-access def eager_mode(): - """Context-manager to enable EAGER mode for current thread.""" + """Context-manager to enable eager execution for the current thread.""" return context()._mode(EAGER_MODE) # pylint: disable=protected-access @@ -631,4 +627,8 @@ def export_run_metadata(): # (for example, enable_eager_execution in python/framework/ops.py), # but they do all import this file. Note that IS_IN_GRAPH_MODE and # in_graph_mode are both parameterless functions. -is_in_graph_mode.IS_IN_GRAPH_MODE = in_graph_mode +def _tmp_in_graph_mode(): + return not executing_eagerly() + + +is_in_graph_mode.IS_IN_GRAPH_MODE = _tmp_in_graph_mode diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index f8f1011e4e..d504ca0b05 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -57,8 +57,7 @@ class TFETest(test_util.TensorFlowTestCase): def testContext(self): ctx = context.Context() - self.assertFalse(ctx.in_graph_mode()) - self.assertTrue(ctx.in_eager_mode()) + self.assertTrue(ctx.executing_eagerly()) self.assertEqual('', ctx.scope_name) ctx.scope_name = 'foo' @@ -150,9 +149,9 @@ class TFETest(test_util.TensorFlowTestCase): def get_context_values(ctx): return [ - ctx.in_graph_mode(), - ctx.in_eager_mode(), ctx.scope_name, ctx.summary_writer_resource, - ctx.device_name, ctx.num_gpus() + ctx.executing_eagerly(), ctx.scope_name, ctx.summary_writer_resource, + ctx.device_name, + ctx.num_gpus() ] def get_values(ctx, values): diff --git a/tensorflow/python/eager/function.py b/tensorflow/python/eager/function.py index 655eaf3a1e..343012e552 100644 --- a/tensorflow/python/eager/function.py +++ b/tensorflow/python/eager/function.py @@ -112,7 +112,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False): """ del as_ref # Unused. - if context.in_eager_mode(): + if context.executing_eagerly(): return value default_graph = ops.get_default_graph() @@ -295,7 +295,7 @@ class _EagerDefinedFunction(object): proto_data = pywrap_tensorflow.TF_GetBuffer(buffer_) function_def = function_pb2.FunctionDef() function_def.ParseFromString(compat.as_bytes(proto_data)) - if context.in_eager_mode(): + if context.executing_eagerly(): _register(fn) self.definition = function_def self.name = function_def.signature.name @@ -438,7 +438,14 @@ class GraphModeFunction(object): all_args = args + self._extra_inputs signature = self._forward_fdef.signature ctx = context.context() - if ctx.in_graph_mode(): + if ctx.executing_eagerly(): + outputs = execute.execute( + str(signature.name), + num_outputs=len(signature.output_arg), + inputs=all_args, + attrs=None, + ctx=ctx) + else: g = ops.get_default_graph() g._add_function(self._forward_fdef) # pylint: disable=protected-access op = g.create_op( @@ -453,13 +460,6 @@ class GraphModeFunction(object): outputs, (ops.Tensor, type(None))) else list(outputs) for i, s in enumerate(self._output_shapes): outputs[i].set_shape(s) - else: - outputs = execute.execute( - str(signature.name), - num_outputs=len(signature.output_arg), - inputs=all_args, - attrs=None, - ctx=ctx) real_outputs = outputs[:len(self._returns)] side_outputs = outputs[len(self._returns):] @@ -530,7 +530,14 @@ class GraphModeFunction(object): return self._backprop_call(tensor_inputs) ctx = context.context() - if ctx.in_graph_mode(): + if ctx.executing_eagerly(): + result = execute.execute( + str(self._func_name), + num_outputs=self._num_outputs, + inputs=tensor_inputs + self._extra_inputs, + attrs=None, + ctx=ctx) + else: g = ops.get_default_graph() self.add_to_graph(g) signature = self._function_def.definition.signature @@ -547,13 +554,6 @@ class GraphModeFunction(object): return op for i, s in enumerate(self._output_shapes): result[i].set_shape(s) - else: - result = execute.execute( - str(self._func_name), - num_outputs=self._num_outputs, - inputs=tensor_inputs + self._extra_inputs, - attrs=None, - ctx=ctx) return self._build_call_outputs(result) @@ -666,7 +666,7 @@ def _defun_internal(name, func, args, kwds): if x not in all_ignored_ops) # Register any other functions defined in the graph # TODO(ashankar): Oh lord, forgive me for this lint travesty. - if context.in_eager_mode(): + if context.executing_eagerly(): for f in tmp_graph._functions.values(): # pylint: disable=protected-access # TODO(ashankar): What about the gradient registry? _register(f._c_func) # pylint: disable=protected-access @@ -906,7 +906,7 @@ class AutomaticControlDependencies(object): return tensor def __enter__(self): - if context.in_eager_mode(): + if context.executing_eagerly(): return self # This code assumes no other thread is adding ops to the graph while # we're adding ops to the graph. @@ -977,7 +977,7 @@ class AutomaticControlDependencies(object): merge_for_resource[o] = new_merge[0].op def __exit__(self, unused_type, unused_value, unused_traceback): - if context.in_eager_mode(): + if context.executing_eagerly(): return if self._graph is not ops.get_default_graph(): diff --git a/tensorflow/python/eager/graph_callable.py b/tensorflow/python/eager/graph_callable.py index 623f3564ad..ee5d87f083 100644 --- a/tensorflow/python/eager/graph_callable.py +++ b/tensorflow/python/eager/graph_callable.py @@ -406,7 +406,7 @@ def graph_callable(shape_and_dtypes): A callable graph object. """ # TODO(alive,apassos): support initialized_value and friends from tf.Variable. - assert context.in_eager_mode(), ( + assert context.executing_eagerly(), ( "graph_callable can only be used when Eager execution is enabled.") def decorator(func): return tf_decorator.make_decorator(func, diff --git a/tensorflow/python/eager/python_eager_op_gen.cc b/tensorflow/python/eager/python_eager_op_gen.cc index 3de7445a50..c2ce8efd7f 100644 --- a/tensorflow/python/eager/python_eager_op_gen.cc +++ b/tensorflow/python/eager/python_eager_op_gen.cc @@ -367,7 +367,7 @@ void GenEagerPythonOp::HandleGraphMode(const string& function_setup) { // Handle graph-mode case strings::StrAppend(&result_, " _ctx = _context.context()\n" - " if _ctx.in_graph_mode():\n", + " if not _ctx.executing_eagerly():\n", function_setup, " _, _, _op = _op_def_lib._apply_op_helper(\n"); AddBodyNoReturn(" "); diff --git a/tensorflow/python/eager/pywrap_tfe_test.py b/tensorflow/python/eager/pywrap_tfe_test.py index 46c5601f47..faaae40b3f 100644 --- a/tensorflow/python/eager/pywrap_tfe_test.py +++ b/tensorflow/python/eager/pywrap_tfe_test.py @@ -169,7 +169,7 @@ class Tests(test.TestCase): def testFastpathExecute_InvalidInputs(self): a_2_by_2 = random_ops.random_uniform((2, 2)) ctx = context.context() - assert not ctx.in_graph_mode( + assert ctx.executing_eagerly( ), "The prototype doesn't contain C code for graph construction" ctx_handle = ctx._handle # pylint: disable=protected-access diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 3e20fc2c74..8ed3e4cd19 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -166,7 +166,7 @@ class Estimator(object): ValueError: if this is called via a subclass and if that class overrides a member of `Estimator`. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( 'Estimators are not supported when eager execution is enabled.') diff --git a/tensorflow/python/framework/constant_op.py b/tensorflow/python/framework/constant_op.py index d3d8c9c154..782b505d6c 100644 --- a/tensorflow/python/framework/constant_op.py +++ b/tensorflow/python/framework/constant_op.py @@ -181,7 +181,7 @@ def constant(value, dtype=None, shape=None, name="Const", verify_shape=False): TypeError: if shape is incorrectly specified or unsupported. """ ctx = context.context() - if not ctx.in_graph_mode(): + if ctx.executing_eagerly(): t = convert_to_eager_tensor(value, ctx, dtype) if shape is None: return t diff --git a/tensorflow/python/framework/function.py b/tensorflow/python/framework/function.py index caa604999c..14d72d8a3d 100644 --- a/tensorflow/python/framework/function.py +++ b/tensorflow/python/framework/function.py @@ -489,10 +489,10 @@ class _DefinedFunction(object): # Adds this function into 'g'. # pylint: disable=protected-access - if context.in_graph_mode(): - g._add_function(self) - else: + if context.executing_eagerly(): context.context().add_function_def(self.definition) + else: + g._add_function(self) # pylint: enable=protected-access # Ensures related sub-routines are defined in 'g', too. diff --git a/tensorflow/python/framework/meta_graph.py b/tensorflow/python/framework/meta_graph.py index 4c1bd736d7..4bb9941bb7 100644 --- a/tensorflow/python/framework/meta_graph.py +++ b/tensorflow/python/framework/meta_graph.py @@ -695,7 +695,7 @@ def import_scoped_meta_graph(meta_graph_or_file, Raises: ValueError: If the graph_def contains unbound inputs. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError("Exporting/importing meta graphs is not supported when " "eager execution is enabled.") if isinstance(meta_graph_or_file, meta_graph_pb2.MetaGraphDef): @@ -856,7 +856,7 @@ def export_scoped_meta_graph(filename=None, Raises: ValueError: When the `GraphDef` is larger than 2GB. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError("Exporting/importing meta graphs is not supported when " "Eager Execution is enabled.") graph = graph or ops.get_default_graph() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 2a8319a19f..8ff247fdb1 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -395,10 +395,10 @@ class Tensor(_TensorLike): "Tensor._shape cannot be assigned, use Tensor.set_shape instead.") def __iter__(self): - if context.in_graph_mode(): + if not context.executing_eagerly(): raise TypeError( - "`Tensor` objects are not iterable when eager execution is not " - "enabled. To iterate over this tensor use `tf.map_fn`.") + "Tensor objects are not iterable when eager execution is not " + "enabled. To iterate over this tensor use tf.map_fn.") shape = self._shape_tuple() if shape is None: raise TypeError("Cannot iterate over a tensor with unknown shape.") @@ -772,7 +772,7 @@ class _EagerTensorBase(Tensor): six.raise_from(core._status_to_exception(e.code, e.message), None) # Record the copy on tape and define backprop copy as well. - if not context.in_graph_mode(): + if context.executing_eagerly(): self_device = self.device def grad_fun(dresult): return [dresult._copy(device_name=self_device)] @@ -993,7 +993,7 @@ def internal_convert_to_tensor(value, """ if ctx is None: ctx = context.context() - if ctx.in_eager_mode(): + if ctx.executing_eagerly(): # Fast path for EagerTensors that don't need any conversion. if isinstance(value, EagerTensor): # Note that we don't check that value's dtype matches the dtype @@ -4797,15 +4797,15 @@ def device(device_name_or_function): Raises: RuntimeError: If eager execution is enabled and a function is passed in. """ - if context.in_graph_mode(): - return get_default_graph().device(device_name_or_function) - else: + if context.executing_eagerly(): # TODO(agarwal): support device functions in EAGER mode. if callable(device_name_or_function): raise RuntimeError( "tf.device does not support functions when eager execution " "is enabled.") return context.device(device_name_or_function) + else: + return get_default_graph().device(device_name_or_function) @tf_export("container") @@ -4824,7 +4824,12 @@ def container(container_name): @tf_export("colocate_with") def colocate_with(op, ignore_existing=False): - if context.in_graph_mode(): + if context.executing_eagerly(): + if op is not None: + return device(op.device) + else: + return _NullContextmanager() + else: default_graph = get_default_graph() if isinstance(op, EagerTensor): if default_graph.building_function: @@ -4833,11 +4838,6 @@ def colocate_with(op, ignore_existing=False): raise ValueError("Encountered an Eager-defined Tensor during graph " "construction, but a function was not being built.") return default_graph.colocate_with(op, ignore_existing) - else: - if op is not None: - return device(op.device) - else: - return _NullContextmanager() @tf_export("control_dependencies") @@ -4857,10 +4857,10 @@ def control_dependencies(control_inputs): A context manager that specifies control dependencies for all operations constructed within the context. """ - if context.in_graph_mode(): - return get_default_graph().control_dependencies(control_inputs) - else: + if context.executing_eagerly(): return _NullContextmanager() + else: + return get_default_graph().control_dependencies(control_inputs) class _DefaultStack(threading.local): @@ -5123,7 +5123,7 @@ def init_scope(): """ # pylint: enable=g-doc-return-or-yield,line-too-long - if context.in_eager_mode(): + if context.executing_eagerly(): # Fastpath. with tape.stop_recording(): yield @@ -5705,7 +5705,7 @@ class name_scope(object): # pylint: disable=invalid-name self._default_name = default_name self._values = values self._ctx = context.context() - self._in_eager_mode = self._ctx.in_eager_mode() + self._in_eager_mode = self._ctx.executing_eagerly() def __enter__(self): """Start the scope block. @@ -5884,7 +5884,7 @@ def get_from_proto_function(collection_name): def _assert_collection_is_ok(collection_name): - if context.in_eager_mode(): + if context.executing_eagerly(): if collection_name in GraphKeys._VARIABLE_COLLECTIONS: # pylint: disable=protected-access raise ValueError("When Eager Execution is enabled, variable " "collections are not supported.") diff --git a/tensorflow/python/framework/ops_test.py b/tensorflow/python/framework/ops_test.py index 55576f0e88..c294f830bc 100644 --- a/tensorflow/python/framework/ops_test.py +++ b/tensorflow/python/framework/ops_test.py @@ -1763,7 +1763,13 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): return constant_op.constant(2.0) future.calls = 0 - if context.in_graph_mode(): + if context.executing_eagerly(): + a = constant_op.constant(1.0) + b = future() + with ops.control_dependencies([a, b]): + c = constant_op.constant(3.0) + self.assertEqual(future.calls, 1) + else: g = ops.Graph() with g.as_default(): a = constant_op.constant(1.0) @@ -1772,12 +1778,6 @@ class ControlDependenciesTest(test_util.TensorFlowTestCase): c = constant_op.constant(3.0) self.assertEqual(c.op.control_inputs, [a.op, b.op]) self.assertEqual(future.calls, 1) - else: - a = constant_op.constant(1.0) - b = future() - with ops.control_dependencies([a, b]): - c = constant_op.constant(3.0) - self.assertEqual(future.calls, 1) def testBasicWithConversion(self): g = ops.Graph() @@ -2150,11 +2150,11 @@ class InitScopeTest(test_util.TensorFlowTestCase): with ops.init_scope(): # Because g is building a function, init_scope should # escape out to the eager context. - self.assertTrue(context.in_eager_mode()) + self.assertTrue(context.executing_eagerly()) # g should be reinstated as the default graph, and the # graph context should be re-entered. self.assertIs(g, ops.get_default_graph()) - self.assertTrue(context.in_graph_mode()) + self.assertFalse(context.executing_eagerly()) def testStaysInEagerWhenOnlyEagerContextActive(self): with context.eager_mode(): @@ -2277,12 +2277,13 @@ class InitScopeTest(test_util.TensorFlowTestCase): with context.eager_mode(): def foo(): with ops.name_scope("inner"), ops.init_scope(): - if context.in_graph_mode(): - self.assertEqual(ops.get_name_scope(), "inner") - else: + if context.executing_eagerly(): # A trailing slash is always appended when eager execution is # enabled. self.assertEqual(context.context().scope_name, "inner/") + else: + self.assertEqual(ops.get_name_scope(), "inner") + foo() self.assertEqual(ops.get_name_scope(), "") foo_compiled = eager_function.defun(foo) diff --git a/tensorflow/python/framework/random_seed.py b/tensorflow/python/framework/random_seed.py index 1e74a790a3..b724432e00 100644 --- a/tensorflow/python/framework/random_seed.py +++ b/tensorflow/python/framework/random_seed.py @@ -52,20 +52,20 @@ def get_seed(op_seed): A tuple of two integers that should be used for the local seed of this operation. """ - is_graph_mode = context.in_graph_mode() + eager = context.executing_eagerly() - if is_graph_mode: - global_seed = ops.get_default_graph().seed - else: + if eager: global_seed = context.global_seed() + else: + global_seed = ops.get_default_graph().seed if global_seed is not None: if op_seed is None: # pylint: disable=protected-access - if is_graph_mode: - op_seed = ops.get_default_graph()._last_id - else: + if eager: op_seed = context.internal_operation_seed() + else: + op_seed = ops.get_default_graph()._last_id seeds = _truncate_seed(global_seed), _truncate_seed(op_seed) else: @@ -176,7 +176,7 @@ def set_random_seed(seed): Args: seed: integer. """ - if context.in_graph_mode(): - ops.get_default_graph().seed = seed - else: + if context.executing_eagerly(): context.set_global_seed(seed) + else: + ops.get_default_graph().seed = seed diff --git a/tensorflow/python/framework/random_seed_test.py b/tensorflow/python/framework/random_seed_test.py index b4c98ab8b2..1944922686 100644 --- a/tensorflow/python/framework/random_seed_test.py +++ b/tensorflow/python/framework/random_seed_test.py @@ -40,13 +40,13 @@ class RandomSeedTest(test.TestCase): ((2**31 - 1, 0), (0, 2**31 - 1)), # Don't wrap to (0, 0) either ((0, 2**31 - 1), (0, 2**31 - 1)), # Wrapping for the other argument ] - if context.in_graph_mode(): - # 0 will be the default_graph._lastid. - test_cases.append(((1, None), (1, 0))) - else: + if context.executing_eagerly(): # operation seed is random number generated based on global seed. # it's not tested due to possibility of platform or version difference. pass + else: + # 0 will be the default_graph._lastid. + test_cases.append(((1, None), (1, 0))) for tc in test_cases: tinput, toutput = tc[0], tc[1] random_seed.set_random_seed(tinput[0]) diff --git a/tensorflow/python/framework/tensor_util.py b/tensorflow/python/framework/tensor_util.py index 135562e831..984bcecdfe 100644 --- a/tensorflow/python/framework/tensor_util.py +++ b/tensorflow/python/framework/tensor_util.py @@ -828,7 +828,7 @@ def constant_value_as_shape(tensor): # pylint: disable=invalid-name Returns: A `TensorShape` based on the constant value of the given `tensor`. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return tensor_shape.as_shape( [dim if dim != -1 else None for dim in tensor.numpy()]) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 1c8398e686..9fc1154201 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -816,7 +816,7 @@ class TensorFlowTestCase(googletest.TestCase): Returns: tensors numpy values. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return self._eval_helper(tensors) else: sess = ops.get_default_session() diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 3d539f9a76..688dc070e6 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -343,7 +343,7 @@ def learning_phase(): Returns: Learning phase (scalar integer tensor or Python integer). """ - if context.in_eager_mode(): + if context.executing_eagerly(): if 'eager' not in _GRAPH_LEARNING_PHASES: # Fallback to inference mode as default. return 0 @@ -370,7 +370,7 @@ def set_learning_phase(value): global _GRAPH_LEARNING_PHASES # pylint: disable=global-variable-not-assigned if value not in {0, 1}: raise ValueError('Expected learning phase to be 0 or 1.') - if context.in_eager_mode(): + if context.executing_eagerly(): _GRAPH_LEARNING_PHASES['eager'] = value else: _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = value @@ -399,7 +399,7 @@ def learning_phase_scope(value): yield value finally: # Restore learning phase to initial value. - if context.in_eager_mode(): + if context.executing_eagerly(): _GRAPH_LEARNING_PHASES['eager'] = previous_value else: _GRAPH_LEARNING_PHASES[ops.get_default_graph()] = previous_value @@ -2625,7 +2625,7 @@ def get_value(x): Returns: A Numpy array. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return x.numpy() return x.eval(session=get_session()) @@ -2640,7 +2640,7 @@ def batch_get_value(tensors): Returns: A list of Numpy arrays. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return [x.numpy() for x in tensors] if tensors: return get_session().run(tensors) @@ -2658,7 +2658,7 @@ def set_value(x, value): (of the same shape). """ value = np.asarray(value, dtype=dtype(x)) - if context.in_eager_mode(): + if context.executing_eagerly(): x.assign(value) else: tf_dtype = dtypes_module.as_dtype(x.dtype.name.split('_')[0]) @@ -2681,7 +2681,7 @@ def batch_set_value(tuples): tuples: a list of tuples `(tensor, value)`. `value` should be a Numpy array. """ - if context.in_eager_mode(): + if context.executing_eagerly(): for x, value in tuples: x.assign(np.asarray(value, dtype=dtype(x))) else: @@ -3123,7 +3123,7 @@ def rnn(step_function, outputs_shape[1] = inputs_shape[1] outputs.set_shape(outputs_shape) - if not context.in_eager_mode(): + if not context.executing_eagerly(): last_output._uses_learning_phase = uses_learning_phase return last_output, outputs, new_states diff --git a/tensorflow/python/keras/_impl/keras/engine/base_layer.py b/tensorflow/python/keras/_impl/keras/engine/base_layer.py index 7f215f5645..5615241ae3 100644 --- a/tensorflow/python/keras/_impl/keras/engine/base_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/base_layer.py @@ -237,7 +237,7 @@ class Layer(tf_base_layers.Layer): """ # Actually call the layer (optionally building it). output = super(Layer, self).__call__(inputs, **kwargs) - if context.in_eager_mode(): + if context.executing_eagerly(): return output if hasattr(self, '_symbolic_set_inputs') and not self.inputs: diff --git a/tensorflow/python/keras/_impl/keras/engine/input_layer.py b/tensorflow/python/keras/_impl/keras/engine/input_layer.py index 8f9ea6f7a4..29a17555e0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/input_layer.py +++ b/tensorflow/python/keras/_impl/keras/engine/input_layer.py @@ -92,7 +92,7 @@ class InputLayer(base_layer.Layer): else: batch_input_shape = None - if context.in_eager_mode(): + if context.executing_eagerly(): # In eager mode, create a temporary placeholder to call the layer on. input_tensor = tf_base_layers._DeferredTensor( # pylint: disable=protected-access shape=batch_input_shape, diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index 143efd97a0..bde16cdeb0 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -99,11 +99,11 @@ class Network(base_layer.Layer): self._losses = [] # Used in symbolic mode only. self._scope = None # Never used. self._reuse = None # Never used. - if context.in_eager_mode(): + if context.executing_eagerly(): self._graph = None else: self._graph = ops.get_default_graph() # Used in symbolic mode only. - # A Network does not create weights of its own, thus has no dtype. + # A Network does not create weights of its own, thus has no dtype. self._dtype = None # All layers in order of horizontal graph traversal. @@ -126,7 +126,7 @@ class Network(base_layer.Layer): self.outputs = [outputs] # User-prodived argument validation. - if context.in_eager_mode(): + if context.executing_eagerly(): # Check that all inputs/outputs are DeferredTensors. for tensor in self.inputs: if not isinstance(tensor, tf_base_layers._DeferredTensor): # pylint: disable=protected-access @@ -275,7 +275,7 @@ class Network(base_layer.Layer): self._feed_input_names.append(layer.name) self._feed_input_shapes.append(K.int_shape(self.inputs[i])) # layer.input gives an error in eager mode - if context.in_graph_mode(): + if not context.executing_eagerly(): self._feed_inputs.append(layer.input) for layer in self._output_layers: self.output_names.append(layer.name) @@ -317,7 +317,7 @@ class Network(base_layer.Layer): raise NotImplementedError('`add_variable` is not supported on Networks.') def add_loss(self, *args, **kwargs): - if context.in_eager_mode(): + if context.executing_eagerly(): raise NotImplementedError('`add_loss` is not supported on Networks ' 'when eager execution is enabled.') super(Network, self).add_loss(*args, **kwargs) @@ -483,7 +483,7 @@ class Network(base_layer.Layer): Returns: A list of update ops. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return [] if not self.trainable and not self.stateful: @@ -530,7 +530,7 @@ class Network(base_layer.Layer): losses = [] for layer in self.layers: losses += layer.losses - if context.in_eager_mode(): + if context.executing_eagerly(): return losses if self.inputs: @@ -623,7 +623,7 @@ class Network(base_layer.Layer): else: masks = nest.flatten(mask) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Try to retrieve cached outputs if the layer has already been called # on these exact inputs. cache_key = (tf_layers_util.object_list_uid(inputs) @@ -829,7 +829,7 @@ class Network(base_layer.Layer): else: output_masks = [None for _ in range(len(output_tensors))] - if context.in_graph_mode(): + if not context.executing_eagerly(): if layer.activity_regularizer is not None: regularization_losses = [ layer.activity_regularizer(x) for x in output_tensors @@ -859,7 +859,7 @@ class Network(base_layer.Layer): if output_masks is not None: output_masks = output_masks[0] - if context.in_graph_mode(): + if not context.executing_eagerly(): # Update cache; # keys are based on ids on input tensors and inputs masks. cache_key = (tf_layers_util.object_list_uid(inputs) diff --git a/tensorflow/python/keras/_impl/keras/engine/topology_test.py b/tensorflow/python/keras/_impl/keras/engine/topology_test.py index 0058e66c29..b50277c8ff 100644 --- a/tensorflow/python/keras/_impl/keras/engine/topology_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/topology_test.py @@ -755,7 +755,17 @@ class TopologyConstructionTest(test.TestCase): def compute_mask(self, inputs, mask=None): return array_ops.ones_like(inputs) - if context.in_graph_mode(): + if context.executing_eagerly(): + a = constant_op.constant([2] * 32) + mask = constant_op.constant([0, 1] * 16) + a._keras_mask = mask + b = MaskedLayer().apply(a) + self.assertTrue(hasattr(b, '_keras_mask')) + self.assertAllEqual( + self.evaluate(array_ops.ones_like(mask)), + self.evaluate(getattr(b, '_keras_mask'))) + self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) + else: x = keras.Input(shape=(32,)) y = MaskedLayer()(x) # pylint: disable=not-callable network = keras.engine.Network(x, y) @@ -769,15 +779,6 @@ class TopologyConstructionTest(test.TestCase): x_2 = array_ops.placeholder(dtype='float32', shape=(None, 32)) y_2 = network(x_2) self.assertEqual(y_2.get_shape().as_list(), [None, 32]) - else: - a = constant_op.constant([2] * 32) - mask = constant_op.constant([0, 1] * 16) - a._keras_mask = mask - b = MaskedLayer().apply(a) - self.assertTrue(hasattr(b, '_keras_mask')) - self.assertAllEqual(self.evaluate(array_ops.ones_like(mask)), - self.evaluate(getattr(b, '_keras_mask'))) - self.assertAllEqual(self.evaluate(a * mask), self.evaluate(b)) def test_activity_regularization_with_model_composition(self): @@ -885,13 +886,13 @@ class DeferredModeTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testSimpleNetworkBuilding(self): inputs = keras.engine.Input(shape=(32,)) - if context.in_eager_mode(): + if context.executing_eagerly(): self.assertIsInstance(inputs, tf_base_layers._DeferredTensor) self.assertEqual(inputs.dtype.name, 'float32') self.assertEqual(inputs.shape.as_list(), [None, 32]) x = keras.layers.Dense(2)(inputs) - if context.in_eager_mode(): + if context.executing_eagerly(): self.assertIsInstance(x, tf_base_layers._DeferredTensor) self.assertEqual(x.dtype.name, 'float32') self.assertEqual(x.shape.as_list(), [None, 2]) @@ -900,7 +901,7 @@ class DeferredModeTest(test.TestCase): network = keras.engine.Network(inputs, outputs) self.assertIsInstance(network, keras.engine.Network) - if context.in_eager_mode(): + if context.executing_eagerly(): # It should be possible to call such a network on EagerTensors. inputs = constant_op.constant( np.random.random((10, 32)).astype('float32')) @@ -925,7 +926,7 @@ class DeferredModeTest(test.TestCase): c = keras.layers.Dense(2)(c) network = keras.engine.Network([input_a, input_b], [a, c]) - if context.in_eager_mode(): + if context.executing_eagerly(): a_val = constant_op.constant( np.random.random((10, 32)).astype('float32')) b_val = constant_op.constant( diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 81ab77094e..8b82c0b313 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -162,7 +162,7 @@ class Model(Network): `optimizer`, `loss`, `metrics` or `sample_weight_mode`. """ loss = loss or {} - if context.in_eager_mode() and not isinstance( + if context.executing_eagerly() and not isinstance( optimizer, (tf_optimizer_module.Optimizer, optimizers.TFOptimizer)): raise ValueError('Only TF native optimizers are supported in Eager mode.') @@ -170,13 +170,13 @@ class Model(Network): self.loss = loss self.metrics = metrics or [] self.loss_weights = loss_weights - if context.in_eager_mode() and sample_weight_mode is not None: + if context.executing_eagerly() and sample_weight_mode is not None: raise ValueError('sample_weight_mode is not supported in Eager mode.') self.sample_weight_mode = sample_weight_mode - if context.in_eager_mode() and weighted_metrics is not None: + if context.executing_eagerly() and weighted_metrics is not None: raise ValueError('weighted_metrics is not supported in Eager mode.') self.weighted_metrics = weighted_metrics - if context.in_eager_mode() and target_tensors is not None: + if context.executing_eagerly() and target_tensors is not None: raise ValueError('target_tensors is not supported in Eager mode.') self.target_tensors = target_tensors @@ -230,7 +230,7 @@ class Model(Network): skip_target_weighing_indices.append(i) # Prepare output masks. - if context.in_graph_mode(): + if not context.executing_eagerly(): masks = self.compute_mask(self.inputs, mask=None) if masks is None: masks = [None for _ in self.outputs] @@ -264,7 +264,7 @@ class Model(Network): self.loss_weights_list = loss_weights_list # initialization for Eager mode execution - if context.in_eager_mode(): + if context.executing_eagerly(): if target_tensors is not None: raise ValueError('target_tensors are not currently supported in Eager' 'mode.') @@ -738,13 +738,13 @@ class Model(Network): 'TensorFlow tensors. ' 'You passed: x=' + str(x) + '; y=' + str(y)) - if context.in_graph_mode(): + if context.executing_eagerly(): + target_tensors = None + else: # Handle target tensors if any passed. if not isinstance(y, (list, tuple)): y = [y] target_tensors = [v for v in y if tensor_util.is_tensor(v)] - else: - target_tensors = None self.compile(optimizer=self.optimizer, loss=self.loss, metrics=self.metrics, @@ -761,7 +761,7 @@ class Model(Network): # What follows is input validation and standardization to list format, # in the case where all inputs are value arrays. - if context.in_eager_mode(): + if context.executing_eagerly(): # In eager mode, do not do shape validation. feed_input_names = self.input_names feed_input_shapes = None @@ -784,7 +784,7 @@ class Model(Network): exception_prefix='input') if y is not None: - if context.in_eager_mode(): + if context.executing_eagerly(): feed_output_names = self.output_names feed_output_shapes = None # Sample weighting not supported in this case. @@ -835,7 +835,7 @@ class Model(Network): ] # Check that all arrays have the same length. training_utils.check_array_lengths(x, y, sample_weights) - if self._is_graph_network and not context.in_eager_mode(): + if self._is_graph_network and not context.executing_eagerly(): # Additional checks to avoid users mistakenly using improper loss fns. training_utils.check_loss_and_target_compatibility( y, self._feed_loss_fns, feed_output_shapes) @@ -874,7 +874,7 @@ class Model(Network): whether to build the model's graph in inference mode (False), training mode (True), or using the Keras learning phase (None). """ - if context.in_eager_mode(): + if context.executing_eagerly(): self._eager_set_inputs(inputs) else: self._symbolic_set_inputs(inputs, training=training) @@ -903,7 +903,7 @@ class Model(Network): Raises: ValueError: If the model's inputs are already set. """ - assert context.in_eager_mode() + assert context.executing_eagerly() if self.inputs: raise ValueError('Model inputs are already set.') # On-the-fly setting of model inputs/outputs as DeferredTensors, @@ -950,7 +950,7 @@ class Model(Network): Raises: ValueError: If the model's inputs are already set. """ - assert context.in_graph_mode() + assert not context.executing_eagerly() if self.inputs: raise ValueError('Model inputs are already set.') @@ -1186,7 +1186,7 @@ class Model(Network): val_y = None val_sample_weights = None - if context.in_eager_mode(): + if context.executing_eagerly(): return training_eager.fit_loop( self, inputs=x, @@ -1289,7 +1289,7 @@ class Model(Network): sample_weight=sample_weight, batch_size=batch_size) - if context.in_eager_mode(): + if context.executing_eagerly(): return training_eager.test_loop( self, inputs=x, targets=y, sample_weights=sample_weights, batch_size=batch_size, verbose=verbose, steps=steps) @@ -1330,7 +1330,7 @@ class Model(Network): 'argument.') x, _, _ = self._standardize_user_data(x) - if context.in_eager_mode(): + if context.executing_eagerly(): return training_eager.predict_loop( self, x, batch_size=batch_size, verbose=verbose, steps=steps) else: @@ -1381,7 +1381,7 @@ class Model(Network): sample_weight=sample_weight, class_weight=class_weight) - if context.in_eager_mode(): + if context.executing_eagerly(): outputs = training_eager.train_on_batch( self, x, y, sample_weights=sample_weights) else: @@ -1431,7 +1431,7 @@ class Model(Network): x, y, sample_weights = self._standardize_user_data( x, y, sample_weight=sample_weight) - if context.in_eager_mode(): + if context.executing_eagerly(): outputs = training_eager.test_on_batch( self, x, y, sample_weights=sample_weights) else: @@ -1458,11 +1458,11 @@ class Model(Network): """ x, _, _ = self._standardize_user_data(x) - if context.in_eager_mode(): + if context.executing_eagerly(): inputs = [ops.convert_to_tensor(val, dtype=K.floatx()) for val in x] return self(inputs) # pylint: disable=not-callable - if context.in_graph_mode(): + if not context.executing_eagerly(): if self.uses_learning_phase and not isinstance(K.learning_phase(), int): ins = x + [0] else: diff --git a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py index c612e97a9d..f4a134b96c 100644 --- a/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/convolutional_test.py @@ -553,7 +553,7 @@ class ZeroPaddingTest(test.TestCase): layer = keras.layers.ZeroPadding1D(padding=2) layer.build(shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -564,7 +564,7 @@ class ZeroPaddingTest(test.TestCase): layer = keras.layers.ZeroPadding1D(padding=(1, 2)) layer.build(shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -610,7 +610,7 @@ class ZeroPaddingTest(test.TestCase): padding=(2, 2), data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -629,7 +629,7 @@ class ZeroPaddingTest(test.TestCase): padding=((1, 2), (3, 4)), data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -683,7 +683,7 @@ class ZeroPaddingTest(test.TestCase): layer = keras.layers.ZeroPadding3D(padding=(2, 2, 2)) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -737,7 +737,7 @@ class UpSamplingTest(test.TestCase): size=(length_row, length_col), data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -790,7 +790,7 @@ class UpSamplingTest(test.TestCase): data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -865,7 +865,7 @@ class CroppingTest(test.TestCase): cropping=cropping, data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -892,7 +892,7 @@ class CroppingTest(test.TestCase): cropping=cropping, data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -937,7 +937,7 @@ class CroppingTest(test.TestCase): cropping=cropping, data_format=data_format) layer.build(inputs.shape) output = layer(keras.backend.variable(inputs)) - if context.in_eager_mode(): + if context.executing_eagerly(): np_output = output.numpy() else: np_output = keras.backend.eval(output) @@ -954,7 +954,7 @@ class CroppingTest(test.TestCase): cropping[2][0]:-cropping[2][1], :] np.testing.assert_allclose(np_output, expected_out) - # test incorrect use + # test incorrect use with self.assertRaises(ValueError): keras.layers.Cropping3D(cropping=(1, 1)) with self.assertRaises(ValueError): diff --git a/tensorflow/python/keras/_impl/keras/layers/core.py b/tensorflow/python/keras/_impl/keras/layers/core.py index 50a197c80c..73e4f15f7e 100644 --- a/tensorflow/python/keras/_impl/keras/layers/core.py +++ b/tensorflow/python/keras/_impl/keras/layers/core.py @@ -124,7 +124,7 @@ class Dropout(tf_core_layers.Dropout, Layer): training = K.learning_phase() output = super(Dropout, self).call(inputs, training=training) # EagerTensor object has no attribute _uses_learning_phase - if not context.in_eager_mode() and training is K.learning_phase(): + if not context.executing_eagerly() and training is K.learning_phase(): output._uses_learning_phase = True # pylint: disable=protected-access return output diff --git a/tensorflow/python/keras/_impl/keras/layers/normalization.py b/tensorflow/python/keras/_impl/keras/layers/normalization.py index 0dedd5e8da..3b44b20bf8 100644 --- a/tensorflow/python/keras/_impl/keras/layers/normalization.py +++ b/tensorflow/python/keras/_impl/keras/layers/normalization.py @@ -111,7 +111,7 @@ class BatchNormalization(tf_normalization_layers.BatchNormalization, Layer): if training is None: training = K.learning_phase() output = super(BatchNormalization, self).call(inputs, training=training) - if context.in_graph_mode() and training is K.learning_phase(): + if not context.executing_eagerly() and training is K.learning_phase(): output._uses_learning_phase = True # pylint: disable=protected-access return output diff --git a/tensorflow/python/keras/_impl/keras/layers/pooling_test.py b/tensorflow/python/keras/_impl/keras/layers/pooling_test.py index 70049f0976..bb003c1ddd 100644 --- a/tensorflow/python/keras/_impl/keras/layers/pooling_test.py +++ b/tensorflow/python/keras/_impl/keras/layers/pooling_test.py @@ -105,7 +105,7 @@ class Pooling2DTest(test.TestCase): # This part of the test can only run on GPU but doesn't appear # to be properly assigned to a GPU when running in eager mode. - if not context.in_eager_mode(): + if not context.executing_eagerly(): # Only runs on GPU with CUDA, channels_first is not supported on CPU. # TODO(b/62340061): Support channels_first on CPU. if test.is_gpu_available(cuda_only=True): diff --git a/tensorflow/python/keras/_impl/keras/layers/recurrent.py b/tensorflow/python/keras/_impl/keras/layers/recurrent.py index 0264c7ae01..2910719807 100644 --- a/tensorflow/python/keras/_impl/keras/layers/recurrent.py +++ b/tensorflow/python/keras/_impl/keras/layers/recurrent.py @@ -936,7 +936,7 @@ class SimpleRNNCell(Layer): # Properly set learning phase on output tensor. if 0 < self.dropout + self.recurrent_dropout: - if training is None and not context.in_eager_mode(): + if training is None and not context.executing_eagerly(): # This would be harmless to set in eager mode, but eager tensors # disallow setting arbitrary attributes. output._uses_learning_phase = True @@ -1384,7 +1384,7 @@ class GRUCell(Layer): hh = self.activation(x_h + recurrent_h) h = z * h_tm1 + (1 - z) * hh if 0 < self.dropout + self.recurrent_dropout: - if training is None and not context.in_eager_mode(): + if training is None and not context.executing_eagerly(): # This would be harmless to set in eager mode, but eager tensors # disallow setting arbitrary attributes. h._uses_learning_phase = True @@ -1877,7 +1877,7 @@ class LSTMCell(Layer): h = o * self.activation(c) if 0 < self.dropout + self.recurrent_dropout: - if training is None and not context.in_eager_mode(): + if training is None and not context.executing_eagerly(): # This would be harmless to set in eager mode, but eager tensors # disallow setting arbitrary attributes. h._uses_learning_phase = True diff --git a/tensorflow/python/kernel_tests/atrous_convolution_test.py b/tensorflow/python/kernel_tests/atrous_convolution_test.py index 2d1b3d9b7e..0ef08581c9 100644 --- a/tensorflow/python/kernel_tests/atrous_convolution_test.py +++ b/tensorflow/python/kernel_tests/atrous_convolution_test.py @@ -83,14 +83,14 @@ class AtrousConvolutionTest(test.TestCase): checks = [] def add_check(check, *args, **kwargs): - if context.in_eager_mode(): + if context.executing_eagerly(): args_val, kwargs_val = self.evaluate([args, kwargs]) check(*args_val, **kwargs_val) else: checks.append((check, args, kwargs)) yield add_check - if context.in_graph_mode(): + if not context.executing_eagerly(): all_values = self.evaluate([[args, kwargs] for _, args, kwargs in checks]) for (check, _, _), (args, kwargs) in zip(checks, all_values): check(*args, **kwargs) diff --git a/tensorflow/python/kernel_tests/check_ops_test.py b/tensorflow/python/kernel_tests/check_ops_test.py index 2e94603a3f..26d3df9e63 100644 --- a/tensorflow/python/kernel_tests/check_ops_test.py +++ b/tensorflow/python/kernel_tests/check_ops_test.py @@ -102,17 +102,15 @@ class AssertEqualTest(test.TestCase): with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - if context.in_graph_mode(): - with self.test_session(): - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies( - [check_ops.assert_equal( - big, small, message="fail")]): - out = array_ops.identity(small) - with self.assertRaisesOpError("fail.*big.*small"): - out.eval(feed_dict={small: [1, 2], big: [3, 4]}) + def test_raises_when_greater_dynamic(self): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies( + [check_ops.assert_equal(big, small, message="fail")]): + out = array_ops.identity(small) + with self.assertRaisesOpError("fail.*big.*small"): + out.eval(feed_dict={small: [1, 2], big: [3, 4]}) def test_error_message_eager(self): expected_error_msg_full = r"""big does not equal small @@ -182,15 +180,14 @@ First 2 elements of y: with self.assertRaisesRegexp(errors.InvalidArgumentError, "fail"): check_ops.assert_equal(static_big, static_small, message="fail") - # Dynamic check - if context.in_graph_mode(): - with self.test_session(): - small = array_ops.placeholder(dtypes.int32, name="small") - big = array_ops.placeholder(dtypes.int32, name="big") - with ops.control_dependencies([check_ops.assert_equal(small, big)]): - out = array_ops.identity(small) - with self.assertRaisesOpError("small.*big"): - out.eval(feed_dict={small: [3, 1], big: [4, 2]}) + def test_raises_when_less_dynamic(self): + with self.test_session(): + small = array_ops.placeholder(dtypes.int32, name="small") + big = array_ops.placeholder(dtypes.int32, name="big") + with ops.control_dependencies([check_ops.assert_equal(small, big)]): + out = array_ops.identity(small) + with self.assertRaisesOpError("small.*big"): + out.eval(feed_dict={small: [3, 1], big: [4, 2]}) @test_util.run_in_graph_and_eager_modes() def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 61fb3f12e4..63203a0043 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -360,7 +360,7 @@ class PyFuncTest(test.TestCase): raise py_exp("blah") # pylint: disable=not-callable if eager: - if context.in_eager_mode(): + if context.executing_eagerly(): with self.assertRaisesRegexp(tf_exp, "blah"): f = script_ops.eager_py_func(raise_exception, [], []) return @@ -432,7 +432,7 @@ class PyFuncTest(test.TestCase): output = script_ops.eager_py_func(no_return_value, inp=[], Tout=[]) ret = self.evaluate(output) - if context.in_eager_mode(): + if context.executing_eagerly(): self.assertEquals(len(ret), 0) else: self.assertIsNone(ret) diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index 10ba9fa674..d34b751062 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -279,15 +279,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): # Tests for the 'read_value' argument: assign_with_read = v.assign(3.0, read_value=True) - if context.in_graph_mode(): - self.assertEqual(3.0, assign_with_read.eval()) - else: - self.assertEqual(3.0, self.evaluate(assign_with_read)) + self.assertEqual(3.0, self.evaluate(assign_with_read)) assign_without_read = v.assign(4.0, read_value=False) - if context.in_graph_mode(): - self.assertIsInstance(assign_without_read, ops.Operation) - else: + if context.executing_eagerly(): self.assertIsNone(assign_without_read) + else: + self.assertIsInstance(assign_without_read, ops.Operation) self.evaluate(assign_without_read) self.assertEqual(4.0, self.evaluate(v.value())) @@ -355,15 +352,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): # Tests for the 'read_value' argument: assign_with_read = v.assign_add(1.0, read_value=True) - if context.in_graph_mode(): - self.assertEqual(3.0, assign_with_read.eval()) - else: - self.assertEqual(3.0, self.evaluate(assign_with_read)) + self.assertEqual(3.0, self.evaluate(assign_with_read)) assign_without_read = v.assign_add(1.0, read_value=False) - if context.in_graph_mode(): - self.assertIsInstance(assign_without_read, ops.Operation) - else: + if context.executing_eagerly(): self.assertIsNone(assign_without_read) + else: + self.assertIsInstance(assign_without_read, ops.Operation) self.evaluate(assign_without_read) self.assertEqual(4.0, self.evaluate(v.value())) @@ -376,15 +370,12 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): # Tests for the 'read_value' argument: assign_with_read = v.assign_sub(1.0, read_value=True) - if context.in_graph_mode(): - self.assertEqual(1.0, assign_with_read.eval()) - else: - self.assertEqual(1.0, self.evaluate(assign_with_read)) + self.assertEqual(1.0, self.evaluate(assign_with_read)) assign_without_read = v.assign_sub(1.0, read_value=False) - if context.in_graph_mode(): - self.assertIsInstance(assign_without_read, ops.Operation) - else: + if context.executing_eagerly(): self.assertIsNone(assign_without_read) + else: + self.assertIsInstance(assign_without_read, ops.Operation) self.evaluate(assign_without_read) self.assertEqual(0.0, self.evaluate(v.value())) @@ -485,7 +476,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): self.assertEqual("(10, 20, 35)", str(v.get_shape())) self.assertEqual("(10, 20, 35)", str(v.value().shape)) self.assertEqual("(3, 20, 35)", str(v.sparse_read([0, 1, 2]).shape)) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( "", str(v.sparse_read(array_ops.placeholder(dtypes.int32)).shape)) diff --git a/tensorflow/python/kernel_tests/rnn_test.py b/tensorflow/python/kernel_tests/rnn_test.py index daa42938e6..9a0409c796 100644 --- a/tensorflow/python/kernel_tests/rnn_test.py +++ b/tensorflow/python/kernel_tests/rnn_test.py @@ -111,10 +111,10 @@ class RNNTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testInvalidSequenceLengthShape(self): cell = Plus1RNNCell() - if context.in_graph_mode(): - inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))] - else: + if context.executing_eagerly(): inputs = [constant_op.constant(np.ones((3, 4)))] + else: + inputs = [array_ops.placeholder(dtypes.float32, shape=(3, 4))] with self.assertRaisesRegexp(ValueError, "must be a vector"): rnn.dynamic_rnn( cell, @@ -125,38 +125,30 @@ class RNNTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testBatchSizeFromInput(self): cell = Plus1RNNCell() - in_graph_mode = context.in_graph_mode() + in_eager_mode = context.executing_eagerly() # With static batch size - if in_graph_mode: - inputs = array_ops.placeholder(dtypes.float32, shape=(3, 4, 5)) - initial_state = array_ops.placeholder(dtypes.float32, shape=(3, 5)) - else: + if in_eager_mode: inputs = np.zeros((3, 4, 5), dtype=np.float32) initial_state = np.zeros((3, 5), dtype=np.float32) + else: + inputs = array_ops.placeholder(dtypes.float32, shape=(3, 4, 5)) + initial_state = array_ops.placeholder(dtypes.float32, shape=(3, 5)) # - Without initial_state outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32) - if in_graph_mode: - self.assertEqual(3, outputs.shape[0].value) - self.assertEqual(3, state.shape[0].value) - else: - self.assertEqual(3, outputs.shape[0]) - self.assertEqual(3, state.shape[0]) + self.assertEqual(3, outputs.shape[0]) + self.assertEqual(3, state.shape[0]) # - With initial_state outputs, state = rnn.dynamic_rnn( cell, inputs, initial_state=initial_state) - if in_graph_mode: - self.assertEqual(3, outputs.shape[0].value) - self.assertEqual(3, state.shape[0].value) - else: - self.assertEqual(3, outputs.shape[0]) - self.assertEqual(3, state.shape[0]) + self.assertEqual(3, outputs.shape[0]) + self.assertEqual(3, state.shape[0]) # Without static batch size - # Tensor shapes are fully determined in Eager mode, so only run this - # test in graph mode. - if in_graph_mode: + # Tensor shapes are fully determined with eager execution enabled, + # so only run this test for graph construction. + if not in_eager_mode: inputs = array_ops.placeholder(dtypes.float32, shape=(None, 4, 5)) # - Without initial_state outputs, state = rnn.dynamic_rnn(cell, inputs, dtype=dtypes.float32) @@ -173,56 +165,46 @@ class RNNTest(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testScalarStateIsAccepted(self): cell = ScalarStateRNNCell() - in_graph_mode = context.in_graph_mode() + in_eager_mode = context.executing_eagerly() - if in_graph_mode: - inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1)) - else: + if in_eager_mode: inputs = np.array([[[1], [2], [3], [4]]], dtype=np.float32) + else: + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1)) with self.test_session() as sess: outputs, state = rnn.dynamic_rnn( cell, inputs, dtype=dtypes.float32, sequence_length=[4]) - if in_graph_mode: + if not in_eager_mode: outputs, state = sess.run( [outputs, state], feed_dict={inputs: [[[1], [2], [3], [4]]]}) - if in_graph_mode: - self.assertAllEqual(outputs, np.array([[[1], [2], [3], [4]]])) - self.assertEqual(state, 4) - else: - self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]])) - self.assertEqual(state.numpy(), 4) + self.assertAllEqual([[[1], [2], [3], [4]]], outputs) + self.assertAllEqual(4, state) @test_util.run_in_graph_and_eager_modes() def testTensorArrayStateIsAccepted(self): cell = TensorArrayStateRNNCell() - in_graph_mode = context.in_graph_mode() + in_eager_mode = context.executing_eagerly() - if in_graph_mode: - inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1)) - else: + if in_eager_mode: inputs = np.array([[[1], [2], [3], [4]]], dtype=np.float32) + else: + inputs = array_ops.placeholder(dtypes.float32, shape=(1, 4, 1)) with self.test_session() as sess: outputs, state = rnn.dynamic_rnn( cell, inputs, dtype=dtypes.float32, sequence_length=[4]) state = (state[0], state[1].stack()) - if in_graph_mode: + if not in_eager_mode: outputs, state = sess.run( [outputs, state], feed_dict={ inputs: [[[1], [2], [3], [4]]] }) - if in_graph_mode: - self.assertAllEqual(outputs, np.array([[[1], [2], [3], [4]]])) - self.assertEqual(state[0], 4) - self.assertAllEqual(state[1], np.array([[[1]], [[2]], [[3]], [[4]]])) - else: - self.assertAllEqual(outputs.numpy(), np.array([[[1], [2], [3], [4]]])) - self.assertEqual(state[0].numpy(), 4) - self.assertAllEqual(state[1].numpy(), - np.array([[[1]], [[2]], [[3]], [[4]]])) + self.assertAllEqual([[[1], [2], [3], [4]]], outputs) + self.assertAllEqual(4, state[0]) + self.assertAllEqual([[[1]], [[2]], [[3]], [[4]]], state[1]) ######### Benchmarking RNN code diff --git a/tensorflow/python/kernel_tests/slice_op_test.py b/tensorflow/python/kernel_tests/slice_op_test.py index 051a25080b..5fc9bef218 100644 --- a/tensorflow/python/kernel_tests/slice_op_test.py +++ b/tensorflow/python/kernel_tests/slice_op_test.py @@ -283,7 +283,7 @@ class SliceTest(test.TestCase): # unintended behavior is prevented. c = constant_op.constant(5.0) with self.assertRaisesWithPredicateMatch( - TypeError, lambda e: "`Tensor` objects are not iterable" in str(e)): + TypeError, lambda e: "Tensor objects are not iterable" in str(e)): for _ in c: pass diff --git a/tensorflow/python/kernel_tests/template_test.py b/tensorflow/python/kernel_tests/template_test.py index c42ae5a77d..1b935d5286 100644 --- a/tensorflow/python/kernel_tests/template_test.py +++ b/tensorflow/python/kernel_tests/template_test.py @@ -562,7 +562,7 @@ class TemplateTest(test.TestCase): outputs_b, _ = linear1(inputs) self.assertEquals("foo", linear1.variable_scope.name) self.assertEquals("foo/w:0", w1.name) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEquals("foo/add:0", outputs_a.name, "First application of template should get " "same name scope as variables.") @@ -577,7 +577,7 @@ class TemplateTest(test.TestCase): "New template gets a freshly uniquified variable scope " "because 'foo' is already taken.") self.assertEquals("foo_1/w:0", w2.name) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEquals("foo_1_1/add:0", outputs_c.name, "First application of template would get " "same name scope as variables, but 'foo_1' is already " @@ -592,7 +592,7 @@ class TemplateTest(test.TestCase): with variable_scope.variable_scope("foo"): # Create two templates with the same name, ensure scopes are made unique. ta = template.make_template("bar", variable_scoped_function, True) - if context.in_eager_mode(): + if context.executing_eagerly(): tb = template.make_template("s", function_with_side_create, trainable=False) else: diff --git a/tensorflow/python/kernel_tests/tensor_array_ops_test.py b/tensorflow/python/kernel_tests/tensor_array_ops_test.py index 8f09f3d78b..a834675828 100644 --- a/tensorflow/python/kernel_tests/tensor_array_ops_test.py +++ b/tensorflow/python/kernel_tests/tensor_array_ops_test.py @@ -399,28 +399,14 @@ class TensorArrayTest(test.TestCase): def testTensorArrayWriteWrongIndexOrDataTypeFails(self): with self.test_session(use_gpu=True): ta = _make_ta(3, "foo", dtype=dtypes.float32) - in_graph_mode = context.in_graph_mode() # Test writing the wrong datatype - if in_graph_mode: - with self.assertRaisesOpError( - "TensorArray dtype is float but Op is trying to write " - "dtype string"): - self.evaluate(ta.write(0, "wrong_type_scalar").flow) - else: - with self.assertRaisesOpError( - "TensorArray dtype is float32 but Op is trying to write " - "dtype string"): - self.evaluate(ta.write(0, "wrong_type_scalar").flow) + with self.assertRaisesOpError( + "TensorArray dtype is (float|float32) but Op is trying to write " + "dtype string"): + self.evaluate(ta.write(0, "wrong_type_scalar").flow) - if context.in_graph_mode(): - with self.assertRaisesOpError( - "Tried to write to index -1 but array is not " - "resizeable and size is: 3"): - self.evaluate(ta.write(-1, 3.0).flow) - else: - with self.assertRaisesOpError( - r"Writing to negative indices \(index -1\) is not allowed."): - self.evaluate(ta.write(-1, 3.0).flow) + with self.assertRaisesOpError("index -1"): + self.evaluate(ta.write(-1, 3.0).flow) # Test reading from too large an index with self.assertRaisesOpError( @@ -435,8 +421,8 @@ class TensorArrayTest(test.TestCase): w0 = ta.write(0, [[4.0, 5.0]]) - # Test reading wrong datatype, which is only possible in graph mode - if context.in_graph_mode(): + # Test reading wrong datatype (only possible when constructing graphs). + if not context.executing_eagerly(): r0_bad = gen_data_flow_ops.tensor_array_read_v3( handle=w0.handle, index=0, dtype=dtypes.float64, flow_in=w0.flow) with self.assertRaisesOpError( @@ -444,14 +430,8 @@ class TensorArrayTest(test.TestCase): r0_bad.eval() # Test reading from a negative index, which is not allowed - if context.in_graph_mode(): - with self.assertRaisesOpError( - r"Tried to read from index -1 but array size is: 3"): - self.evaluate(ta.read(-1)) - else: - with self.assertRaisesOpError( - r"Reading from negative indices \(index -1\) is not allowed."): - self.evaluate(ta.read(-1)) + with self.assertRaisesOpError("index -1"): + self.evaluate(ta.read(-1)) # Test reading from too large an index with self.assertRaisesOpError( @@ -467,10 +447,7 @@ class TensorArrayTest(test.TestCase): with self.assertRaisesOpError( "Could not write to TensorArray index 2 because " "it has already been written to."): - if context.in_graph_mode(): - self.evaluate(ta.write(2, 3.0).write(2, 3.0).flow) - else: - self.evaluate(ta.write(2, 3.0).write(2, 3.0)) + self.evaluate(ta.write(2, 3.0).write(2, 3.0).flow) @test_util.run_in_graph_and_eager_modes() def testTensorArrayConcatIncompatibleShapesFails(self): @@ -499,58 +476,40 @@ class TensorArrayTest(test.TestCase): w2 = w1.write(1, [4.0]) w3 = w2.write(2, [[3.0]]) - # The eager-mode implementation just passes up array_op.concat's error - # message. - if context.in_graph_mode(): - with self.assertRaisesOpError( - r"TensorArray has inconsistent shapes. Index 0 has " - r"\(excepting dimension 0\) shape: \[\] but index 2 has " - r"\(excepting dimension 0\) shape: \[1\]"): - self.evaluate(w3.concat()) - else: - with self.assertRaisesOpError( - r".*Ranks of all input tensors should match: shape\[0\] " - r"= \[1\] vs\. shape\[2\] = \[1,1\].*"): - self.evaluate(w3.concat()) + # The exact error messages differ between eager execution and graph + # construction as the former bubbles up the error from array_op.concat. + with self.assertRaisesOpError("shape"): + self.evaluate(w3.concat()) @test_util.run_in_graph_and_eager_modes() def testTensorArraySplitIncompatibleShapesFails(self): with self.test_session(use_gpu=True): - in_graph_mode = context.in_graph_mode() + in_eager_mode = context.executing_eagerly() ta = _make_ta(3, "foo") with self.assertRaisesOpError( r"Expected lengths to be a vector, received shape: \[\]"): - if in_graph_mode: + if in_eager_mode: + self.evaluate(ta.split([1.0, 2.0, 3.0], 1)) + else: lengths = array_ops.placeholder(dtypes.int64) ta.split([1.0, 2.0, 3.0], lengths).flow.eval(feed_dict={lengths: 1}) - else: - self.evaluate(ta.split([1.0, 2.0, 3.0], 1)) with self.assertRaisesOpError( r"Expected sum of lengths to be equal to values.shape\[0\], " r"but sum of lengths is 1 and value's shape is: \[3\]"): - if in_graph_mode: - self.evaluate(ta.split([1.0, 2.0, 3.0], [1]).flow) - else: - self.evaluate(ta.split([1.0, 2.0, 3.0], [1])) + self.evaluate(ta.split([1.0, 2.0, 3.0], [1]).flow) ta = _make_ta(1, "baz") with self.assertRaisesOpError( r"Expected value to be at least a vector, but received shape: \[\]"): - if in_graph_mode: - self.evaluate(ta.split(1.0, [1]).flow) - else: - self.evaluate(ta.split(1.0, [1])) + self.evaluate(ta.split(1.0, [1]).flow) ta = _make_ta(2, "buz") with self.assertRaisesOpError( r"TensorArray's size is not equal to the size of lengths " r"\(2 vs. 1\), and the TensorArray is not marked as " r"dynamically resizeable"): - if in_graph_mode: - self.evaluate(ta.split([1.0], [1]).flow) - else: - self.evaluate(ta.split([1.0], [1])) + self.evaluate(ta.split([1.0], [1]).flow) def _testTensorArrayWriteGradientAddMultipleAdds(self, dtype): with self.test_session(use_gpu=True): @@ -868,14 +827,14 @@ class TensorArrayTest(test.TestCase): vout = func(v0, state0, var) grad_val = -np.arange(3 * 5, dtype=np_dtype).reshape(3, 5) - if context.in_graph_mode(): + if context.executing_eagerly(): + grad_fn = backprop.gradients_function(func) + v0_grad, state0_grad, var_grad = grad_fn(v0, state0, var, dy=grad_val) + else: v0_grad = gradients_impl.gradients([vout], [v0], [grad_val])[0] state0_grad = gradients_impl.gradients([vout], [state0], [grad_val])[0] var_grad = gradients_impl.gradients([vout], [var], [grad_val])[0] variables.global_variables_initializer().run() - else: - grad_fn = backprop.gradients_function(func) - v0_grad, state0_grad, var_grad = grad_fn(v0, state0, var, dy=grad_val) state0_t, var_t, v0_t, vout_t, v0_grad_t, var_grad_t, state0_grad_t = ( self.evaluate( @@ -959,10 +918,10 @@ class TensorArrayTest(test.TestCase): return r x = constant_op.constant(2.0, name="x") - if context.in_graph_mode(): - grad = gradients_impl.gradients(loop(x), [x])[0] - else: + if context.executing_eagerly(): grad = backprop.gradients_function(loop)(x)[0] + else: + grad = gradients_impl.gradients(loop(x), [x])[0] self.assertAllClose(31.0, self.evaluate(grad)) def testSumOfTwoReadVariablesWithoutRepeatGrad(self): @@ -1158,14 +1117,14 @@ class TensorArrayTest(test.TestCase): infer_shape=True) w0 = ta1.split(value, [1, 2]) r0 = w0.read(0) - if context.in_graph_mode(): + if context.executing_eagerly(): + self.assertEqual((1, 2), r0.get_shape()) + self.assertEqual((2, 2), w0.read(1).get_shape()) + else: self.assertEqual(r0.get_shape().ndims, None) self.assertEqual( tensor_shape.TensorShape( ta1.handle.op.get_attr("element_shape")).ndims, None) - else: - self.assertEqual((1, 2), r0.get_shape()) - self.assertEqual((2, 2), w0.read(1).get_shape()) def testWriteUnknownShape(self): with self.test_session(use_gpu=True): @@ -1297,13 +1256,13 @@ class TensorArrayTest(test.TestCase): g = func(values) grad_ys = [[[2.0, 3.0], [4.0, 5.0]]] # Test combined gradients + aggregation of read(0) - if context.in_graph_mode(): - grad = gradients_impl.gradients(ys=[g], xs=[values], grad_ys=grad_ys) - g_vals, grad_vals = session.run([[g], grad]) - else: + if context.executing_eagerly(): g_vals = [g] grad_vals = backprop.gradients_function(func)( values, dy=constant_op.constant(grad_ys[0], dtype=dtypes.float32)) + else: + grad = gradients_impl.gradients(ys=[g], xs=[values], grad_ys=grad_ys) + g_vals, grad_vals = session.run([[g], grad]) # Gradients for 8 of the 10 unread components are zero. expected_grad = np.zeros((10, 2)) @@ -1453,13 +1412,13 @@ class TensorArrayTest(test.TestCase): # Tests correct properties on new TensorArrays. self.assertEqual(dtypes.float32, ta0.dtype) self.assertEqual(dtypes.int32, ta1.dtype) - if context.in_graph_mode(): - self.assertEqual(tensor_shape.unknown_shape(), read0.get_shape()) + if context.executing_eagerly(): + self.assertEqual(tensor_shape.scalar(), read0.get_shape()) else: - self.assertEqual(tensor_shape.scalar(), read1.get_shape()) + self.assertEqual(tensor_shape.unknown_shape(), read0.get_shape()) self.assertEqual(tensor_shape.scalar(), read1.get_shape()) - if context.in_graph_mode(): + if not context.executing_eagerly(): variables.global_variables_initializer().run() read0_v, read1_v, size0_v, size1_v = self.evaluate((read0, read1, size0, diff --git a/tensorflow/python/kernel_tests/variable_scope_test.py b/tensorflow/python/kernel_tests/variable_scope_test.py index 8527f116f9..531d0cdf90 100644 --- a/tensorflow/python/kernel_tests/variable_scope_test.py +++ b/tensorflow/python/kernel_tests/variable_scope_test.py @@ -166,12 +166,10 @@ class VariableScopeTest(test.TestCase): self.evaluate(variables_lib.variables_initializer([w])) self.assertAllClose(self.evaluate(w.value()), [1, 2, 3]) - if context.in_graph_mode(): - with self.assertRaises(TypeError): - variable_scope.get_variable("x4", initializer={}) - else: - with self.assertRaises(ValueError): - variable_scope.get_variable("x4", initializer={}) + # A quirk to be revisited? + error = ValueError if context.executing_eagerly() else TypeError + with self.assertRaises(error): + variable_scope.get_variable("x4", initializer={}) @test_util.run_in_graph_and_eager_modes() def testInitFromNonInitializer(self): @@ -267,7 +265,7 @@ class VariableScopeTest(test.TestCase): self.assertAllClose(self.evaluate(losses[2]), 0.5) with variable_scope.variable_scope("foo", reuse=True): # reuse=True is for now only supported when eager execution is disabled. - if context.in_graph_mode(): + if not context.executing_eagerly(): v = variable_scope.get_variable("v", []) # "v" is alredy there, reused losses = ops.get_collection(ops.GraphKeys.REGULARIZATION_LOSSES) @@ -374,7 +372,7 @@ class VariableScopeTest(test.TestCase): v = variable_scope.get_variable("v", []) self.evaluate(variables_lib.variables_initializer([v])) self.assertAllClose(self.evaluate(v.value()), 0.3) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Check that we can set reuse. variable_scope.get_variable_scope().reuse_variables() with self.assertRaises(ValueError): # Fail, w does not exist yet. @@ -408,7 +406,7 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope("tower") as tower: with ops.name_scope("scope2") as sc2: self.assertEqual(sc2, "testVarScopeNameScope1/tower/scope2/") - if context.in_graph_mode(): + if not context.executing_eagerly(): with variable_scope.variable_scope( tower): # Re-entering acts like another "tower". with ops.name_scope("scope2") as sc2: @@ -422,7 +420,7 @@ class VariableScopeTest(test.TestCase): with variable_scope.variable_scope("tower"): with ops.name_scope("scope2") as sc2: self.assertEqual(sc2, "testVarScopeNameScope2/tower/scope2/") - if context.in_graph_mode(): + if not context.executing_eagerly(): with variable_scope.variable_scope(tower): with ops.name_scope("scope2") as sc2: self.assertEqual(sc2, "testVarScopeNameScope2/tower_1/scope2/") @@ -903,17 +901,15 @@ class VariableScopeTest(test.TestCase): "w", [], collections=["foo"]) self.assertEqual(local_var.name, "outer/w:0") - # Since variable is local, it should be in the local variable collection - # but not the trainable collection. - if context.in_graph_mode(): + if not context.executing_eagerly(): + # Since variable is local, it should be in the local variable collection + # but not the trainable collection. self.assertIn(local_var, ops.get_collection(ops.GraphKeys.LOCAL_VARIABLES)) self.assertIn(local_var, ops.get_collection("foo")) self.assertNotIn(local_var, ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) - - # Check that local variable respects `reuse`. - if context.in_graph_mode(): + # Check that local variable respects `reuse`. with variable_scope.variable_scope(outer, "default", reuse=True): self.assertEqual( variable_scope.get_local_variable("w", []).name, "outer/w:0") diff --git a/tensorflow/python/layers/base.py b/tensorflow/python/layers/base.py index 15f72786de..e9066d3fda 100644 --- a/tensorflow/python/layers/base.py +++ b/tensorflow/python/layers/base.py @@ -115,7 +115,7 @@ class Layer(checkpointable.CheckpointableBase): # Provides information about which inputs are compatible with the layer. self.input_spec = None - if activity_regularizer and context.in_eager_mode(): + if activity_regularizer and context.executing_eagerly(): raise ValueError( ('Activity regularization is not supported when executing eagerly. ' 'Got activity_regularizer=%s') % (activity_regularizer,)) @@ -228,7 +228,7 @@ class Layer(checkpointable.CheckpointableBase): @property def updates(self): - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.updates not supported in Eager mode.') if not self.trainable and not self.stateful: return [] @@ -260,7 +260,7 @@ class Layer(checkpointable.CheckpointableBase): have is available at runtime. A step counter might fall into this category. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return # Updates already applied when in eager mode. updates = _to_list(updates) @@ -286,7 +286,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('`get_updates_for()` not supported in Eager mode.') # Updates disabled if layer is not trainable and not explicitly stateful. @@ -317,7 +317,7 @@ class Layer(checkpointable.CheckpointableBase): Returns: A list of tensors. """ - if context.in_eager_mode(): + if context.executing_eagerly(): # _losses may only contain variable regularization losses when executing # eagerly, and they have been saved as lambdas to be executed when # requested. @@ -355,7 +355,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): # TODO(fchollet): it should be possible (and highly desirable) to support # `add_loss` in eager mode. This allows great convenience and flexibility # in defining custom losses on the fly (e.g. in VAEs). @@ -389,7 +389,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.get_losses_for not supported in Eager mode.') if inputs is None: @@ -509,7 +509,7 @@ class Layer(checkpointable.CheckpointableBase): # will occur; it should be None if and only if initialization will take # place in the eager context. init_graph = None - if context.in_graph_mode(): + if not context.executing_eagerly(): default_graph = ops.get_default_graph() if default_graph.building_function: with ops.init_scope(): @@ -517,7 +517,7 @@ class Layer(checkpointable.CheckpointableBase): # will be lifted; if initialization ops will be lifted into # the eager context, then there is nothing to retrieve, since variable # collections are not supported when eager execution is enabled. - if context.in_graph_mode(): + if not context.executing_eagerly(): init_graph = ops.get_default_graph() existing_variables = set(tf_variables.global_variables()) else: @@ -624,17 +624,17 @@ class Layer(checkpointable.CheckpointableBase): self._set_scope(kwargs.pop('scope', None)) input_list = nest.flatten(inputs) - in_graph_mode = context.in_graph_mode() + build_graph = not context.executing_eagerly() in_deferred_mode = isinstance(input_list[0], _DeferredTensor) # Ensure the Layer, if being reused, is working with inputs from # the same graph as where it was created. - if in_graph_mode: + if build_graph: try: # Set layer's "graph" at build time self._graph = ops._get_graph_from_inputs(input_list, graph=self._graph) # pylint: disable=protected-access except ValueError as e: raise ValueError('Input graph and Layer graph are not the same: %s' % e) - if in_graph_mode or in_deferred_mode: + if build_graph or in_deferred_mode: user_kwargs = copy.copy(kwargs) # Handle Keras mask propagation from previous layer to current layer. @@ -669,13 +669,14 @@ class Layer(checkpointable.CheckpointableBase): with scope_context_manager as scope: with ops.name_scope(self._name_scope_name(scope)): if not self.built: - if not in_graph_mode: + if not build_graph: # Activity regularization is currently unsupported in Eager mode. if self._activity_regularizer: - raise ValueError('activity_regularizer currently unsupported in ' - 'Eager mode. Found an activity_regularizer in ' - '%s(%s).' % (self.__class__.__name__, self)) - if not in_graph_mode and not in_deferred_mode: + raise ValueError( + 'activity_regularizer currently unsupported with ' + 'eager execution enabled. Found an activity_regularizer in ' + '%s(%s).' % (self.__class__.__name__, self)) + if not build_graph and not in_deferred_mode: # TODO(agarwal): support _keras_history in Eager mode. for x in input_list: if hasattr(x, '_keras_history'): @@ -706,7 +707,7 @@ class Layer(checkpointable.CheckpointableBase): if call_has_scope_arg: kwargs['scope'] = scope # Check input assumptions set after layer building, e.g. input shape. - if in_graph_mode or in_deferred_mode: + if build_graph or in_deferred_mode: self._assert_input_compatibility(inputs) if not in_deferred_mode: @@ -730,7 +731,7 @@ class Layer(checkpointable.CheckpointableBase): if len(outputs) == 1: outputs = outputs[0] - if in_graph_mode: + if build_graph: # Apply activity regularization. # Note that it should be applied every time the layer creates a new # output, since it is output-specific. @@ -752,7 +753,7 @@ class Layer(checkpointable.CheckpointableBase): else: outputs._keras_mask = output_mask # pylint: disable=protected-access - if in_graph_mode: + if build_graph: # If all input tensors have history metadata, # we update the output tensors # with corresponding history metadata, thus eventually allowing to use @@ -775,7 +776,7 @@ class Layer(checkpointable.CheckpointableBase): # Update global default collections. _add_elements_to_collection(self.updates, ops.GraphKeys.UPDATE_OPS) - if in_deferred_mode or in_graph_mode: + if in_deferred_mode or build_graph: if _have_all_keras_metadata(inputs): # Add an inbound node to the layer, so it can keep track of this call. # This updates the layer history of the output tensor(s). @@ -787,7 +788,7 @@ class Layer(checkpointable.CheckpointableBase): @property def graph(self): - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.graph not supported in Eager mode.') return self._graph @@ -891,7 +892,7 @@ class Layer(checkpointable.CheckpointableBase): mode. ValueError: If the index provided does not match any node. """ - assert context.in_graph_mode() + assert not context.executing_eagerly() if not self._inbound_nodes: raise RuntimeError('The layer has never been called ' 'and thus has no defined ' + attr_name + '.') @@ -921,7 +922,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( 'Layer.get_input_shape_at not supported in Eager mode.') return self._get_node_attribute_at_index(node_index, 'input_shapes', @@ -943,7 +944,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( 'Layer.get_output_shape_at not supported in Eager mode.') return self._get_node_attribute_at_index(node_index, 'output_shapes', @@ -964,7 +965,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.get_input_at not supported in Eager mode.') return self._get_node_attribute_at_index(node_index, 'input_tensors', 'input') @@ -984,7 +985,7 @@ class Layer(checkpointable.CheckpointableBase): Raises: RuntimeError: If called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.get_output_at not supported in Eager mode.') return self._get_node_attribute_at_index(node_index, 'output_tensors', 'output') @@ -1007,7 +1008,7 @@ class Layer(checkpointable.CheckpointableBase): RuntimeError: If called in Eager mode. AttributeError: If no inbound nodes are found. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.input not supported in Eager mode.') if not self._inbound_nodes: raise AttributeError('Layer ' + self.name + @@ -1029,7 +1030,7 @@ class Layer(checkpointable.CheckpointableBase): layers. RuntimeError: if called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.output not supported in Eager mode.') if not self._inbound_nodes: raise AttributeError('Layer ' + self.name + ' has no inbound nodes.') @@ -1051,7 +1052,7 @@ class Layer(checkpointable.CheckpointableBase): AttributeError: if the layer has no defined input_shape. RuntimeError: if called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.input_shape not supported in Eager mode.') if not self._inbound_nodes: raise AttributeError('The layer has never been called ' @@ -1112,7 +1113,7 @@ class Layer(checkpointable.CheckpointableBase): AttributeError: if the layer has no defined output shape. RuntimeError: if called in Eager mode. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Layer.output_shape not supported in Eager mode.') if not self._inbound_nodes: raise AttributeError('The layer has never been called ' @@ -1470,7 +1471,7 @@ def _to_list(x): def _add_elements_to_collection(elements, collection_list): - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('Using collections from Layers not supported in Eager ' 'mode. Tried to add %s to %s' % (elements, collection_list)) diff --git a/tensorflow/python/layers/base_test.py b/tensorflow/python/layers/base_test.py index 1ee9ec7f7a..9ed4afeaba 100644 --- a/tensorflow/python/layers/base_test.py +++ b/tensorflow/python/layers/base_test.py @@ -44,7 +44,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.variables, []) self.assertEqual(layer.trainable_variables, []) self.assertEqual(layer.non_trainable_variables, []) - if context.in_graph_mode(): + if not context.executing_eagerly(): # updates, losses only supported in GRAPH mode self.assertEqual(layer.updates, []) self.assertEqual(layer.losses, []) @@ -63,7 +63,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.variables, [variable]) self.assertEqual(layer.trainable_variables, [variable]) self.assertEqual(layer.non_trainable_variables, []) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( layer.variables, ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)) @@ -77,7 +77,7 @@ class BaseLayerTest(test.TestCase): self.assertEqual(layer.variables, [variable, variable_2]) self.assertEqual(layer.trainable_variables, [variable]) self.assertEqual(layer.non_trainable_variables, [variable_2]) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1) @@ -161,7 +161,7 @@ class BaseLayerTest(test.TestCase): inputs = random_ops.random_uniform((5,), seed=1) outputs = layer.apply(inputs) self.assertEqual(layer.built, True) - if context.in_graph_mode(): + if not context.executing_eagerly(): # op is only supported in GRAPH mode self.assertEqual(outputs.op.name, 'my_layer/Square') @@ -210,7 +210,7 @@ class BaseLayerTest(test.TestCase): inputs = random_ops.random_uniform((5,), seed=1) outputs = layer.apply(inputs) self.assertEqual(layer.built, True) - if context.in_graph_mode(): + if not context.executing_eagerly(): # op only supported in GRAPH mode. self.assertEqual(outputs.op.name, 'my_layer/Square') @@ -280,7 +280,7 @@ class BaseLayerTest(test.TestCase): def call(self, inputs): return inputs - if context.in_graph_mode(): + if not context.executing_eagerly(): layer = CustomerLayer() with self.assertRaisesRegexp(ValueError, r'requires a defined rank'): layer.apply(array_ops.placeholder('int32')) @@ -307,7 +307,7 @@ class BaseLayerTest(test.TestCase): def call(self, inputs): return inputs - if context.in_graph_mode(): + if not context.executing_eagerly(): layer = CustomerLayer() with self.assertRaisesRegexp(ValueError, r'requires a defined rank'): layer.apply(array_ops.placeholder('int32')) @@ -335,7 +335,7 @@ class BaseLayerTest(test.TestCase): def call(self, inputs): return inputs - if context.in_graph_mode(): + if not context.executing_eagerly(): layer = CustomerLayer() with self.assertRaisesRegexp(ValueError, r'requires a defined rank'): layer.apply(array_ops.placeholder('int32')) @@ -430,7 +430,7 @@ class BaseLayerTest(test.TestCase): layer.apply(constant_op.constant(1)) # Works - if context.in_graph_mode(): + if not context.executing_eagerly(): layer.apply(array_ops.placeholder('int32')) layer.apply(array_ops.placeholder('int32', shape=(2, 3))) @@ -453,13 +453,7 @@ class BaseLayerTest(test.TestCase): return {'l' + key: inputs[key] for key in inputs} layer = DictLayer() - if context.in_graph_mode(): - i1 = array_ops.placeholder('int32') - i2 = array_ops.placeholder('float32') - result = layer.apply({'abel': i1, 'ogits': i2}) - self.assertTrue(isinstance(result, dict)) - self.assertEqual(set(['label', 'logits']), set(result.keys())) - else: + if context.executing_eagerly(): i1 = constant_op.constant(3) i2 = constant_op.constant(4.0) result = layer.apply({'abel': i1, 'ogits': i2}) @@ -467,6 +461,12 @@ class BaseLayerTest(test.TestCase): self.assertEqual(set(['label', 'logits']), set(result.keys())) self.assertEqual(3, result['label'].numpy()) self.assertEqual(4.0, result['logits'].numpy()) + else: + i1 = array_ops.placeholder('int32') + i2 = array_ops.placeholder('float32') + result = layer.apply({'abel': i1, 'ogits': i2}) + self.assertTrue(isinstance(result, dict)) + self.assertEqual(set(['label', 'logits']), set(result.keys())) def testActivityRegularizer(self): regularizer = math_ops.reduce_sum diff --git a/tensorflow/python/layers/convolutional.py b/tensorflow/python/layers/convolutional.py index bb10fe5e8b..74e7c63fb3 100644 --- a/tensorflow/python/layers/convolutional.py +++ b/tensorflow/python/layers/convolutional.py @@ -1664,7 +1664,7 @@ class Conv2DTranspose(Conv2D): padding=self.padding.upper(), data_format=utils.convert_data_format(self.data_format, ndim=4)) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Infer the static output shape: out_shape = inputs.get_shape().as_list() out_shape[c_axis] = self.filters @@ -1969,7 +1969,7 @@ class Conv3DTranspose(Conv3D): data_format=utils.convert_data_format(self.data_format, ndim=5), padding=self.padding.upper()) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Infer the static output shape: out_shape = inputs.get_shape().as_list() out_shape[c_axis] = self.filters diff --git a/tensorflow/python/layers/core.py b/tensorflow/python/layers/core.py index bdbbc59eaf..e598d9f83a 100644 --- a/tensorflow/python/layers/core.py +++ b/tensorflow/python/layers/core.py @@ -156,7 +156,7 @@ class Dense(base.Layer): outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1], [0]]) # Reshape the output back to the original ndim of the input. - if context.in_graph_mode(): + if not context.executing_eagerly(): output_shape = shape[:-1] + [self.units] outputs.set_shape(output_shape) else: @@ -374,7 +374,7 @@ class Flatten(base.Layer): def call(self, inputs): outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1)) - if context.in_graph_mode(): + if not context.executing_eagerly(): outputs.set_shape(self.compute_output_shape(inputs.get_shape())) return outputs diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index 15ce6cba21..ae19866d7a 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -77,7 +77,7 @@ class DenseTest(test.TestCase): self.assertListEqual(dense.trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.non_trainable_variables, []) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 2) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') @@ -98,7 +98,7 @@ class DenseTest(test.TestCase): self.assertListEqual(dense.variables, [dense.kernel]) self.assertListEqual(dense.trainable_variables, [dense.kernel]) self.assertListEqual(dense.non_trainable_variables, []) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 1) self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') @@ -113,7 +113,7 @@ class DenseTest(test.TestCase): self.assertListEqual(dense.non_trainable_variables, [dense.kernel, dense.bias]) self.assertListEqual(dense.trainable_variables, []) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( len(ops.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES)), 0) @@ -162,13 +162,13 @@ class DenseTest(test.TestCase): dense = core_layers.Dense(2, activation=nn_ops.relu, name='dense1') inputs = random_ops.random_uniform((5, 3), seed=1) outputs = dense(inputs) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense1/Relu') dense = core_layers.Dense(2, name='dense2') inputs = random_ops.random_uniform((5, 3), seed=1) outputs = dense(inputs) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual(outputs.op.name, 'dense2/BiasAdd') def testActivityRegularizer(self): @@ -374,7 +374,7 @@ class DropoutTest(test.TestCase): dp = core_layers.Dropout(0.5) inputs = array_ops.ones((5, 3)) dropped = dp.apply(inputs, training=True) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) np_output = self.evaluate(dropped) self.assertAlmostEqual(0., np_output.min()) diff --git a/tensorflow/python/layers/normalization.py b/tensorflow/python/layers/normalization.py index d83292b809..c23d755a8e 100644 --- a/tensorflow/python/layers/normalization.py +++ b/tensorflow/python/layers/normalization.py @@ -338,8 +338,9 @@ class BatchNormalization(base.Layer): return var with ops.device(None): - device = ((lambda _: self.moving_mean.device) - if context.in_graph_mode() else self.moving_mean.device) + device = ( + self.moving_mean.device if context.executing_eagerly() else + (lambda _: self.moving_mean.device)) with ops.device(device): self.renorm_mean = _renorm_variable('renorm_mean', param_shape) self.renorm_mean_weight = _renorm_variable('renorm_mean_weight', ()) @@ -347,8 +348,9 @@ class BatchNormalization(base.Layer): # renorm_stddev_weight. This allows us to (1) mix the average # stddev with the minibatch stddev early in training, and (2) compute # the unbiased average stddev by dividing renorm_stddev by the weight. - device = ((lambda _: self.moving_variance.device) - if context.in_graph_mode() else self.moving_variance.device) + device = ( + self.moving_variance.device if context.executing_eagerly() else + (lambda _: self.moving_variance.device)) with ops.device(device): self.renorm_stddev = _renorm_variable('renorm_stddev', param_shape) self.renorm_stddev_weight = _renorm_variable( @@ -420,7 +422,7 @@ class BatchNormalization(base.Layer): one_minus_decay) variance_update = self._assign_moving_average(self.moving_variance, variance, one_minus_decay) - if context.in_graph_mode(): + if not context.executing_eagerly(): # Note that in Eager mode, the updates are already executed when running # assign_moving_averages. So we do not need to put them into # collections. @@ -493,7 +495,7 @@ class BatchNormalization(base.Layer): return (r, d, new_mean, new_variance) def call(self, inputs, training=False): - in_eager_mode = context.in_eager_mode() + in_eager_mode = context.executing_eagerly() if self.virtual_batch_size is not None: # Virtual batches (aka ghost batches) can be simulated by reshaping the # Tensor and reusing the existing batch norm implementation @@ -610,7 +612,7 @@ class BatchNormalization(base.Layer): training, lambda: _do_update(self.moving_variance, new_variance), lambda: self.moving_variance) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.add_update(mean_update, inputs=inputs) self.add_update(variance_update, inputs=inputs) diff --git a/tensorflow/python/ops/array_grad.py b/tensorflow/python/ops/array_grad.py index 925cf8ef32..3c6a5c9e56 100644 --- a/tensorflow/python/ops/array_grad.py +++ b/tensorflow/python/ops/array_grad.py @@ -80,7 +80,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): def _ExtractInputShapes(inputs): """Extract the shapes of a set of input tensors.""" - if not context.in_graph_mode(): + if context.executing_eagerly(): return array_ops.shape_n(inputs) sizes = [] fully_known = True @@ -106,7 +106,7 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index): out_grads = [] if isinstance(grad, ops.Tensor): - if context.in_eager_mode(): + if context.executing_eagerly(): # Using mod here for convenience since concat_dim is already verified # in concat implementation to be within the allowed [-rank, rank) range. non_neg_concat_dim = ( @@ -428,7 +428,7 @@ def _GatherV2Grad(op, grad): # For axis 0 gathers, build an appropriately shaped IndexedSlices. if axis_static == 0: - if context.in_eager_mode(): + if context.executing_eagerly(): params_tail_shape = params_shape.cpu()[1:] else: params_tail_shape = params_shape[1:] @@ -578,7 +578,7 @@ def _TileGrad(op, grad): axes = math_ops.range(0, array_ops.size(split_shape), 2) input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes) # Fix shape inference - if context.in_graph_mode(): + if not context.executing_eagerly(): input_grad.set_shape(op.inputs[0].get_shape()) return [input_grad, None] diff --git a/tensorflow/python/ops/array_ops.py b/tensorflow/python/ops/array_ops.py index 9108fe759b..b4e1b9d781 100644 --- a/tensorflow/python/ops/array_ops.py +++ b/tensorflow/python/ops/array_ops.py @@ -128,9 +128,7 @@ def identity(input, name=None): # pylint: disable=redefined-builtin Returns: A `Tensor`. Has the same type as `input`. """ - if context.in_graph_mode(): - return gen_array_ops.identity(input, name=name) - else: + if context.executing_eagerly(): input = ops.convert_to_tensor(input) in_device = input.device # TODO(ashankar): Does 'identity' need to invoke execution callbacks? @@ -140,6 +138,8 @@ def identity(input, name=None): # pylint: disable=redefined-builtin if context_device != in_device: return input._copy() # pylint: disable=protected-access return input + else: + return gen_array_ops.identity(input, name=name) # pylint: disable=redefined-builtin,protected-access @@ -305,7 +305,7 @@ def shape_internal(input, name=None, optimize=True, out_type=dtypes.int32): sparse_tensor.SparseTensorValue)): return gen_math_ops.cast(input.dense_shape, out_type) else: - if context.in_graph_mode(): + if not context.executing_eagerly(): input_tensor = ops.convert_to_tensor(input) input_shape = input_tensor.get_shape() if optimize and input_shape.is_fully_defined(): @@ -330,7 +330,7 @@ def shape_n(input, out_type=dtypes.int32, name=None): """ output = gen_array_ops.shape_n(input, out_type=out_type, name=name) - if context.in_graph_mode(): + if not context.executing_eagerly(): for i, input_tensor in enumerate(input): input_tensor = ops.convert_to_tensor(input_tensor) input_shape = input_tensor.get_shape() @@ -385,9 +385,8 @@ def size_internal(input, name=None, optimize=True, out_type=dtypes.int32): Returns: A `Tensor` of type `out_type`. Defaults to `tf.int32`. """ - if context.in_eager_mode() and not isinstance( - input, (sparse_tensor.SparseTensor, - sparse_tensor.SparseTensorValue)): + if context.executing_eagerly() and not isinstance( + input, (sparse_tensor.SparseTensor, sparse_tensor.SparseTensorValue)): return np.prod(ops.convert_to_tensor(input)._shape_tuple()) # pylint: disable=protected-access with ops.name_scope(name, "Size", [input]) as name: if isinstance(input, (sparse_tensor.SparseTensor, @@ -783,7 +782,7 @@ def strided_slice(input_, new_axis_mask=new_axis_mask, shrink_axis_mask=shrink_axis_mask) - if context.in_graph_mode(): + if not context.executing_eagerly(): # TODO(apassos) In eager mode assignment will be done by overriding # __setitem__ instead. op.assign = assign @@ -1457,7 +1456,7 @@ def transpose(a, perm=None, name="transpose", conjugate=False): ret = transpose_fn(a, perm, name=name) # NOTE(mrry): Setting the shape explicitly because # reverse is not handled by the shape function. - if context.in_graph_mode(): + if not context.executing_eagerly(): input_shape = ret.op.inputs[0].get_shape().dims if input_shape is not None: ret.set_shape(input_shape[::-1]) @@ -1622,7 +1621,7 @@ def zeros_like(tensor, dtype=None, name=None, optimize=True): with ops.name_scope(name, "zeros_like", [tensor]) as name: tensor = ops.convert_to_tensor(tensor, name="tensor") - if context.in_eager_mode(): + if context.executing_eagerly(): if dtype is not None and dtype != tensor.dtype: return zeros( shape_internal(tensor, optimize=optimize), dtype=dtype, name=name) @@ -1678,7 +1677,7 @@ def ones_like(tensor, dtype=None, name=None, optimize=True): if dtype is None: dtype = tensor.dtype ret = ones(ones_shape, dtype=dtype, name=name) - if context.in_graph_mode(): + if not context.executing_eagerly(): ret.set_shape(tensor.get_shape()) return ret @@ -1759,7 +1758,7 @@ def placeholder(dtype, shape=None, name=None): Raises: RuntimeError: if eager execution is enabled """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("tf.placeholder() is not compatible with " "eager execution.") @@ -1822,7 +1821,7 @@ def sparse_placeholder(dtype, shape=None, name=None): Raises: RuntimeError: if eager execution is enabled """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("tf.placeholder() is not compatible with " "eager execution.") @@ -1921,7 +1920,7 @@ def pad(tensor, paddings, mode="CONSTANT", name=None, constant_values=0): # pyl raise ValueError("Unknown padding mode: %s" % mode) # Restore shape information where possible. - if context.in_graph_mode(): + if not context.executing_eagerly(): paddings_constant = tensor_util.constant_value( result.op.inputs[1], partial=True) input_shape = result.op.inputs[0].shape diff --git a/tensorflow/python/ops/check_ops.py b/tensorflow/python/ops/check_ops.py index 0fd6e29a49..7d6e047d7c 100644 --- a/tensorflow/python/ops/check_ops.py +++ b/tensorflow/python/ops/check_ops.py @@ -169,7 +169,7 @@ def assert_negative(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_negative', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: - if context.in_eager_mode(): + if context.executing_eagerly(): name = _shape_and_dtype_str(x) else: name = x.name @@ -210,7 +210,7 @@ def assert_positive(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_positive', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: - if context.in_eager_mode(): + if context.executing_eagerly(): name = _shape_and_dtype_str(x) else: name = x.name @@ -251,7 +251,7 @@ def assert_non_negative(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_non_negative', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: - if context.in_eager_mode(): + if context.executing_eagerly(): name = _shape_and_dtype_str(x) else: name = x.name @@ -293,7 +293,7 @@ def assert_non_positive(x, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_non_positive', [x, data]): x = ops.convert_to_tensor(x, name='x') if data is None: - if context.in_eager_mode(): + if context.executing_eagerly(): name = _shape_and_dtype_str(x) else: name = x.name @@ -343,7 +343,7 @@ def assert_equal(x, y, data=None, summarize=None, message=None, name=None): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): eq = math_ops.equal(x, y) condition = math_ops.reduce_all(eq) if not condition: @@ -435,7 +435,7 @@ def assert_none_equal( with ops.name_scope(name, 'assert_none_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -512,7 +512,7 @@ def assert_near( rtol = ops.convert_to_tensor(rtol, name='rtol', dtype=x.dtype) atol = ops.convert_to_tensor(atol, name='atol', dtype=x.dtype) - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -562,7 +562,7 @@ def assert_less(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -610,7 +610,7 @@ def assert_less_equal(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_less_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -658,7 +658,7 @@ def assert_greater(x, y, data=None, summarize=None, message=None, name=None): with ops.name_scope(name, 'assert_greater', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -708,7 +708,7 @@ def assert_greater_equal(x, y, data=None, summarize=None, message=None, with ops.name_scope(name, 'assert_greater_equal', [x, y, data]): x = ops.convert_to_tensor(x, name='x') y = ops.convert_to_tensor(y, name='y') - if context.in_eager_mode(): + if context.executing_eagerly(): x_name = _shape_and_dtype_str(x) y_name = _shape_and_dtype_str(y) else: @@ -808,7 +808,7 @@ def assert_rank(x, rank, data=None, summarize=None, message=None, name=None): static_condition = lambda actual_rank, given_rank: actual_rank == given_rank dynamic_condition = math_ops.equal - if context.in_eager_mode(): + if context.executing_eagerly(): name = '' else: name = x.name @@ -873,7 +873,7 @@ def assert_rank_at_least( static_condition = lambda actual_rank, given_rank: actual_rank >= given_rank dynamic_condition = math_ops.greater_equal - if context.in_eager_mode(): + if context.executing_eagerly(): name = '' else: name = x.name @@ -1001,7 +1001,7 @@ def assert_rank_in( ranks = tuple([ops.convert_to_tensor(rank, name='rank') for rank in ranks]) message = message or '' - if context.in_eager_mode(): + if context.executing_eagerly(): name = '' else: name = x.name @@ -1054,7 +1054,7 @@ def assert_integer(x, message=None, name=None): with ops.name_scope(name, 'assert_integer', [x]): x = ops.convert_to_tensor(x, name='x') if not x.dtype.is_integer: - if context.in_eager_mode(): + if context.executing_eagerly(): name = 'tensor' else: name = x.name @@ -1087,12 +1087,11 @@ def assert_type(tensor, tf_type, message=None, name=None): with ops.name_scope(name, 'assert_type', [tensor]): tensor = ops.convert_to_tensor(tensor, name='tensor') if tensor.dtype != tf_type: - if context.in_graph_mode(): - raise TypeError( - '%s %s must be of type %s' % (message, tensor.name, tf_type)) + if context.executing_eagerly(): + raise TypeError('%s tensor must be of type %s' % (message, tf_type)) else: - raise TypeError( - '%s tensor must be of type %s' % (message, tf_type)) + raise TypeError('%s %s must be of type %s' % (message, tensor.name, + tf_type)) return control_flow_ops.no_op('statically_determined_correct_type') @@ -1240,7 +1239,7 @@ def assert_scalar(tensor, name=None): tensor = ops.convert_to_tensor(tensor, name=name_scope) shape = tensor.get_shape() if shape.ndims != 0: - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError('Expected scalar shape, saw shape: %s.' % (shape,)) else: diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index 4e524846cc..a2f52de749 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -152,7 +152,7 @@ def Assert(condition, data, summarize=None, name=None): @compatibility{eager} `tf.errors.InvalidArgumentError` if `condition` is not true """ - if context.in_eager_mode(): + if context.executing_eagerly(): if not condition: xs = ops.convert_n_to_tensor(data) data_str = [_summarize_eager(x, summarize) for x in xs] @@ -178,7 +178,7 @@ def Assert(condition, data, summarize=None, name=None): condition, data, summarize, name="Assert") guarded_assert = cond(condition, no_op, true_assert, name="AssertGuard") - if context.in_eager_mode(): + if context.executing_eagerly(): return return guarded_assert.op @@ -2025,7 +2025,7 @@ def cond(pred, raise TypeError("false_fn must be callable.") with ops.name_scope(name, "cond", [pred]): - if context.in_eager_mode(): + if context.executing_eagerly(): if pred: return _UnpackIfSingleton(true_fn()) return _UnpackIfSingleton(false_fn()) @@ -3177,7 +3177,7 @@ def while_loop(cond, math_ops.logical_and(i < maximum_iterations, orig_cond(*lv))) body = lambda i, lv: (i + 1, orig_body(*lv)) - if context.in_eager_mode(): + if context.executing_eagerly(): while cond(*loop_vars): loop_vars = body(*loop_vars) if maximum_iterations is not None: @@ -3271,7 +3271,7 @@ def with_dependencies(dependencies, output_tensor, name=None): Raises: TypeError: if `output_tensor` is not a `Tensor` or `IndexedSlices`. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return output_tensor with ops.name_scope(name, "control_dependency", list(dependencies) + [output_tensor]) as name: @@ -3316,7 +3316,7 @@ def group(*inputs, **kwargs): Raises: ValueError: If an unknown keyword argument is provided. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return None name = kwargs.pop("name", None) if kwargs: @@ -3396,7 +3396,7 @@ def tuple(tensors, name=None, control_inputs=None): # pylint: disable=redefined objects. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return tensors with ops.name_scope(name, "tuple", tensors) as name: tensors = [t if (isinstance(t, ops.Operation) diff --git a/tensorflow/python/ops/custom_gradient.py b/tensorflow/python/ops/custom_gradient.py index f199ba8fd4..9eacac1b37 100644 --- a/tensorflow/python/ops/custom_gradient.py +++ b/tensorflow/python/ops/custom_gradient.py @@ -92,7 +92,7 @@ def custom_gradient(f): def decorated(*args, **kwargs): """Decorated function with custom gradient.""" - if context.in_graph_mode(): + if not context.executing_eagerly(): if kwargs: raise ValueError( "The custom_gradient decorator currently suports keywords " diff --git a/tensorflow/python/ops/data_flow_ops.py b/tensorflow/python/ops/data_flow_ops.py index 052caffd49..d2cc87555f 100644 --- a/tensorflow/python/ops/data_flow_ops.py +++ b/tensorflow/python/ops/data_flow_ops.py @@ -159,7 +159,7 @@ class QueueBase(object): ValueError: If one of the arguments is invalid. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "Queues are not supported when eager execution is enabled. " "Instead, please use tf.data to get data into your model.") @@ -177,10 +177,10 @@ class QueueBase(object): else: self._names = None self._queue_ref = queue_ref - if context.in_graph_mode(): - self._name = self._queue_ref.op.name.split("/")[-1] - else: + if context.executing_eagerly(): self._name = context.context().scope_name + else: + self._name = self._queue_ref.op.name.split("/")[-1] @staticmethod def from_list(index, queues): @@ -231,9 +231,9 @@ class QueueBase(object): @property def name(self): """The name of the underlying queue.""" - if context.in_graph_mode(): - return self._queue_ref.op.name - return self._name + if context.executing_eagerly(): + return self._name + return self._queue_ref.op.name @property def dtypes(self): @@ -444,7 +444,7 @@ class QueueBase(object): # NOTE(mrry): Not using a shape function because we need access to # the `QueueBase` object. - if context.in_graph_mode(): + if not context.executing_eagerly(): op = ret[0].op for output, shape in zip(op.values(), self._shapes): output.set_shape(shape) @@ -484,7 +484,7 @@ class QueueBase(object): # NOTE(mrry): Not using a shape function because we need access to # the Queue object. - if context.in_graph_mode(): + if not context.executing_eagerly(): op = ret[0].op batch_dim = tensor_shape.Dimension( tensor_util.constant_value(op.inputs[1])) @@ -528,7 +528,7 @@ class QueueBase(object): # NOTE(mrry): Not using a shape function because we need access to # the Queue object. - if context.in_graph_mode(): + if not context.executing_eagerly(): op = ret[0].op for output, shape in zip(op.values(), self._shapes): output.set_shape(tensor_shape.TensorShape([None]).concatenate(shape)) @@ -990,10 +990,10 @@ class Barrier(object): shapes=self._shapes, shared_name=shared_name, name=name) - if context.in_graph_mode(): - self._name = self._barrier_ref.op.name.split("/")[-1] - else: + if context.executing_eagerly(): self._name = context.context().scope_name + else: + self._name = self._barrier_ref.op.name.split("/")[-1] @property def barrier_ref(self): @@ -1003,9 +1003,9 @@ class Barrier(object): @property def name(self): """The name of the underlying barrier.""" - if context.in_graph_mode(): - return self._barrier_ref.op.name - return self._name + if context.executing_eagerly(): + return self._name + return self._barrier_ref.op.name def insert_many(self, component_index, keys, values, name=None): """For each key, assigns the respective value to the specified component. @@ -1083,7 +1083,7 @@ class Barrier(object): # NOTE(mrry): Not using a shape function because we need access to # the Barrier object. - if context.in_graph_mode(): + if not context.executing_eagerly(): op = ret[0].op if allow_small_batch: batch_dim = None @@ -1183,10 +1183,10 @@ class ConditionalAccumulatorBase(object): else: self._shape = tensor_shape.unknown_shape() self._accumulator_ref = accumulator_ref - if context.in_graph_mode(): - self._name = self._accumulator_ref.op.name.split("/")[-1] - else: + if context.executing_eagerly(): self._name = context.context().scope_name + else: + self._name = self._accumulator_ref.op.name.split("/")[-1] @property def accumulator_ref(self): diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index 09a0e345f2..8f5673597e 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -90,7 +90,7 @@ def foldl(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, if not callable(fn): raise TypeError("fn must be callable.") - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "foldl", [elems]): # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager @@ -178,7 +178,7 @@ def foldr(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, if not callable(fn): raise TypeError("fn must be callable.") - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "foldr", [elems]): # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager @@ -343,7 +343,7 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, elems_flat = input_flatten(elems) - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "map", elems_flat): # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager @@ -536,7 +536,7 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, elems_flat = input_flatten(elems) - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "scan", elems_flat): # TODO(akshayka): Remove the in_graph_mode check once caching devices are # supported in Eager diff --git a/tensorflow/python/ops/gradients_impl.py b/tensorflow/python/ops/gradients_impl.py index b678090542..44473ec69c 100644 --- a/tensorflow/python/ops/gradients_impl.py +++ b/tensorflow/python/ops/gradients_impl.py @@ -86,7 +86,7 @@ def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False): % str(value)) # TODO(mrry): Consider adding static shape information to # IndexedSlices, to avoid using numpy here. - if context.in_graph_mode(): + if not context.executing_eagerly(): dense_shape_value = tensor_util.constant_value(value.dense_shape) if dense_shape_value is not None: num_elements = np.prod(dense_shape_value) @@ -491,9 +491,10 @@ def gradients(ys, def _GradientsHelper(ys, xs, grad_ys, name, colocate_gradients_with_ops, gate_gradients, aggregation_method, stop_gradients): """Implementation of gradients().""" - if context.in_eager_mode(): - raise RuntimeError("tf.gradients not supported in EAGER mode. Use " - "functions in tf.contrib.eager.backprop instead.") + if context.executing_eagerly(): + raise RuntimeError("tf.gradients not supported when eager execution " + "is enabled. Use tf.contrib.eager.GradientTape " + "instead.") ys = _AsList(ys) xs = _AsList(xs) stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients) diff --git a/tensorflow/python/ops/io_ops.py b/tensorflow/python/ops/io_ops.py index 7c782c12a5..f6a25610c5 100644 --- a/tensorflow/python/ops/io_ops.py +++ b/tensorflow/python/ops/io_ops.py @@ -173,7 +173,7 @@ class ReaderBase(object): Raises: RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "Readers are not supported when eager execution is enabled. " "Instead, please use tf.data to get data into your model.") diff --git a/tensorflow/python/ops/lookup_ops.py b/tensorflow/python/ops/lookup_ops.py index baf7cc19fa..6f043f60e6 100644 --- a/tensorflow/python/ops/lookup_ops.py +++ b/tensorflow/python/ops/lookup_ops.py @@ -157,10 +157,10 @@ class InitializableLookupTableBase(LookupInterface): default_value: The value to use if a key is missing in the table. initializer: The table initializer to use. """ - if context.in_graph_mode(): - name = table_ref.op.name.split("/")[-1] - else: + if context.executing_eagerly(): name = context.context().scope_name + else: + name = table_ref.op.name.split("/")[-1] super(InitializableLookupTableBase, self).__init__(initializer.key_dtype, initializer.value_dtype, name) @@ -521,7 +521,7 @@ class TextFileInitializer(TableInitializerBase): ops.add_to_collection(ops.GraphKeys.TABLE_INITIALIZERS, init_op) # If the filename tensor is anything other than a string constant (e.g., if # it is a placeholder) then it does not make sense to track it as an asset. - if context.in_graph_mode() and constant_op.is_constant(filename): + if not context.executing_eagerly() and constant_op.is_constant(filename): ops.add_to_collection(ops.GraphKeys.ASSET_FILEPATHS, filename) return init_op diff --git a/tensorflow/python/ops/losses/losses_impl.py b/tensorflow/python/ops/losses/losses_impl.py index 0cae3c1453..424fd09e09 100644 --- a/tensorflow/python/ops/losses/losses_impl.py +++ b/tensorflow/python/ops/losses/losses_impl.py @@ -136,7 +136,7 @@ def _num_present(losses, weights, per_batch=False): `[batch_size]`. Otherwise, a single scalar tensor is returned. """ if ((isinstance(weights, float) and weights != 0.0) or - (context.in_eager_mode() and weights._rank() == 0 # pylint: disable=protected-access + (context.executing_eagerly() and weights._rank() == 0 # pylint: disable=protected-access and not math_ops.equal(weights, 0.0))): return _num_elements(losses) with ops.name_scope(None, "num_present", (losses, weights)) as scope: diff --git a/tensorflow/python/ops/math_grad.py b/tensorflow/python/ops/math_grad.py index 55dd0c0e0d..e2ee9e4fe4 100644 --- a/tensorflow/python/ops/math_grad.py +++ b/tensorflow/python/ops/math_grad.py @@ -52,14 +52,14 @@ def _SumGrad(op, grad): if axes is not None: rank = len(input_0_shape) if np.array_equal(axes, np.arange(rank)): # Reduce all dims. - if context.in_graph_mode(): - new_shape = [1] * rank - else: + if context.executing_eagerly(): ctx = context.context() new_shape = ctx.ones_rank_cache().get(rank) if new_shape is None: new_shape = constant_op.constant([1] * rank, dtype=dtypes.int32) ctx.ones_rank_cache().put(rank, new_shape) + else: + new_shape = [1] * rank grad = array_ops.reshape(grad, new_shape) # If shape is not fully defined (but rank is), we use Shape. if None not in input_0_shape: @@ -997,7 +997,7 @@ def _SparseMatMulGrad(op, grad): op.inputs[0]: op.get_attr("a_is_sparse"), op.inputs[1]: op.get_attr("b_is_sparse"), # Use heuristic to figure out if grad might be sparse - grad: context.in_graph_mode() and (grad.op.type == "ReluGrad") + grad: not context.executing_eagerly() and (grad.op.type == "ReluGrad") } def _SparseMatMul(t1, t2, out_dtype, transpose_a=False, transpose_b=False): diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index c019a5851f..5130c50717 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -2007,14 +2007,14 @@ def matmul(a, if transpose_b and adjoint_b: raise ValueError("Only one of transpose_b and adjoint_b can be True.") - if context.in_graph_mode(): - a = ops.convert_to_tensor(a, name="a") - b = ops.convert_to_tensor(b, name="b") - else: + if context.executing_eagerly(): if not isinstance(a, (ops.EagerTensor, _resource_variable_type)): a = ops.convert_to_tensor(a, name="a") if not isinstance(b, (ops.EagerTensor, _resource_variable_type)): b = ops.convert_to_tensor(b, name="b") + else: + a = ops.convert_to_tensor(a, name="a") + b = ops.convert_to_tensor(b, name="b") # TODO(apassos) remove _shape_tuple here when it is not needed. a_shape = a._shape_tuple() # pylint: disable=protected-access @@ -2249,7 +2249,7 @@ def accumulate_n(inputs, shape=None, tensor_dtype=None, name=None): return inputs[0] elif len(inputs) == 1 and name is not None: return array_ops.identity(inputs[0], name=name) - elif context.in_eager_mode(): + elif context.executing_eagerly(): # TemporaryVariable not currently supported in eager mode; fall back # onto AddN for now. # TODO(frreiss) remove this once the lifetime of eager variables gets diff --git a/tensorflow/python/ops/math_ops_test.py b/tensorflow/python/ops/math_ops_test.py index d314124ccd..9f85188b35 100644 --- a/tensorflow/python/ops/math_ops_test.py +++ b/tensorflow/python/ops/math_ops_test.py @@ -60,7 +60,7 @@ class ReduceTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testReduceInvalidAxis(self): - if context.in_eager_mode(): + if context.executing_eagerly(): # The shape check is in run a graph construction time. In eager mode, # it misses the check, magically return result given wrong shape. return @@ -249,7 +249,7 @@ class ScalarMulTest(test_util.TensorFlowTestCase): @test_util.run_in_graph_and_eager_modes() def testAcceptsRefs(self): - if context.in_eager_mode(): + if context.executing_eagerly(): var = resource_variable_ops.ResourceVariable(10, name="var") else: var = variables.Variable(10) diff --git a/tensorflow/python/ops/metrics_impl.py b/tensorflow/python/ops/metrics_impl.py index 0123162b54..9ec4954579 100644 --- a/tensorflow/python/ops/metrics_impl.py +++ b/tensorflow/python/ops/metrics_impl.py @@ -308,7 +308,7 @@ def mean(values, or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean is not supported when eager execution ' 'is enabled.') @@ -394,7 +394,7 @@ def accuracy(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.accuracy is not supported when eager ' 'execution is enabled.') @@ -644,7 +644,7 @@ def auc(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.auc is not supported when eager execution ' 'is enabled.') @@ -758,7 +758,7 @@ def mean_absolute_error(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_absolute_error is not supported ' 'when eager execution is enabled.') @@ -818,7 +818,7 @@ def mean_cosine_distance(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_cosine_distance is not supported when ' 'eager execution is enabled.') @@ -891,7 +891,7 @@ def mean_per_class_accuracy(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_per_class_accuracy is not supported ' 'when eager execution is enabled.') @@ -996,7 +996,7 @@ def mean_iou(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_iou is not supported when ' 'eager execution is enabled.') @@ -1098,7 +1098,7 @@ def mean_relative_error(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_relative_error is not supported when ' 'eager execution is enabled.') @@ -1165,7 +1165,7 @@ def mean_squared_error(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_squared_error is not supported when ' 'eager execution is enabled.') @@ -1223,7 +1223,7 @@ def mean_tensor(values, or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.mean_tensor is not supported when ' 'eager execution is enabled.') @@ -1304,7 +1304,7 @@ def percentage_below(values, or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.percentage_below is not supported when ' 'eager execution is enabled.') @@ -1397,7 +1397,7 @@ def false_negatives(labels, or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.false_negatives is not supported when ' 'eager execution is enabled.') @@ -1453,7 +1453,7 @@ def false_negatives_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.false_negatives_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -1507,7 +1507,7 @@ def false_positives(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.false_positives is not supported when ' 'eager execution is enabled.') @@ -1563,7 +1563,7 @@ def false_positives_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.false_positives_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -1617,7 +1617,7 @@ def true_negatives(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.true_negatives is not ' 'supported when eager execution is enabled.') @@ -1673,7 +1673,7 @@ def true_negatives_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.true_negatives_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -1727,7 +1727,7 @@ def true_positives(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.true_positives is not ' 'supported when eager execution is enabled.') @@ -1783,7 +1783,7 @@ def true_positives_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.true_positives_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -1851,7 +1851,7 @@ def precision(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.precision is not ' 'supported when eager execution is enabled.') @@ -1947,7 +1947,7 @@ def precision_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.precision_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -2023,7 +2023,7 @@ def recall(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.recall is not supported is not ' 'supported when eager execution is enabled.') @@ -2400,7 +2400,7 @@ def recall_at_k(labels, are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.recall_at_k is not ' 'supported when eager execution is enabled.') @@ -2549,7 +2549,7 @@ def recall_at_thresholds(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.recall_at_thresholds is not ' 'supported when eager execution is enabled.') @@ -2626,7 +2626,7 @@ def root_mean_squared_error(labels, tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.root_mean_squared_error is not ' 'supported when eager execution is enabled.') @@ -2707,7 +2707,7 @@ def sensitivity_at_specificity(labels, or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.sensitivity_at_specificity is not ' 'supported when eager execution is enabled.') @@ -3098,7 +3098,7 @@ def average_precision_at_k(labels, ValueError: if k is invalid. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.sparse_average_precision_at_k is not ' 'supported when eager execution is enabled.') @@ -3267,7 +3267,7 @@ def precision_at_top_k(labels, are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.precision_at_top_k is not ' 'supported when eager execution is enabled.') @@ -3396,7 +3396,7 @@ def precision_at_k(labels, are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.sparse_precision_at_k is not ' 'supported when eager execution is enabled.') @@ -3473,7 +3473,7 @@ def specificity_at_sensitivity(labels, or `updates_collections` are not a list or tuple. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError('tf.metrics.specificity_at_sensitivity is not ' 'supported when eager execution is enabled.') diff --git a/tensorflow/python/ops/nn_grad.py b/tensorflow/python/ops/nn_grad.py index 5582daf2da..4af5bd26dd 100644 --- a/tensorflow/python/ops/nn_grad.py +++ b/tensorflow/python/ops/nn_grad.py @@ -456,7 +456,7 @@ def _SoftmaxCrossEntropyWithLogitsGrad(op, grad_loss, grad_grad): def IsZero(g): # Some introspection to check if the gradient is feeding zeros - if context.in_eager_mode(): + if context.executing_eagerly(): # TODO(apassos) add an efficient way to detect eager zeros here. return False if g.op.type in ("ZerosLike", "Zeros"): diff --git a/tensorflow/python/ops/nn_ops.py b/tensorflow/python/ops/nn_ops.py index 66a05f2228..fb3fe77b4d 100644 --- a/tensorflow/python/ops/nn_ops.py +++ b/tensorflow/python/ops/nn_ops.py @@ -1504,7 +1504,7 @@ def bias_add(value, bias, data_format=None, name=None): A `Tensor` with the same type as `value`. """ with ops.name_scope(name, "BiasAdd", [value, bias]) as name: - if context.in_graph_mode(): + if not context.executing_eagerly(): value = ops.convert_to_tensor(value, name="input") bias = ops.convert_to_tensor(bias, dtype=value.dtype, name="bias") return gen_nn_ops.bias_add(value, bias, data_format=data_format, name=name) @@ -1616,7 +1616,7 @@ def _flatten_outer_dims(logits): output = array_ops.reshape(logits, array_ops.concat([[-1], last_dim_size], 0)) # Set output shape if known. - if context.in_graph_mode(): + if not context.executing_eagerly(): shape = logits.get_shape() if shape is not None and shape.dims is not None: shape = shape.as_list() @@ -1881,7 +1881,8 @@ def softmax_cross_entropy_with_logits_v2( # Make shape inference work since reshape and transpose may erase its static # shape. - if context.in_graph_mode() and shape is not None and shape.dims is not None: + if not context.executing_eagerly( + ) and shape is not None and shape.dims is not None: shape = shape.as_list() del shape[dim] cost.set_shape(shape) @@ -2318,7 +2319,7 @@ def dropout(x, keep_prob, noise_shape=None, seed=None, name=None): # pylint: di # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) binary_tensor = math_ops.floor(random_tensor) ret = math_ops.div(x, keep_prob) * binary_tensor - if context.in_graph_mode(): + if not context.executing_eagerly(): ret.set_shape(x.get_shape()) return ret diff --git a/tensorflow/python/ops/numerics.py b/tensorflow/python/ops/numerics.py index b4ce1cbf25..d348e47f57 100644 --- a/tensorflow/python/ops/numerics.py +++ b/tensorflow/python/ops/numerics.py @@ -74,7 +74,7 @@ def add_check_numerics_ops(): the checked operations. @enc_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "add_check_numerics_ops() is not compatible with eager execution. " "To check for Inf's and NaN's under eager execution, call " diff --git a/tensorflow/python/ops/resource_variable_ops.py b/tensorflow/python/ops/resource_variable_ops.py index d0578f8205..54191ee765 100644 --- a/tensorflow/python/ops/resource_variable_ops.py +++ b/tensorflow/python/ops/resource_variable_ops.py @@ -135,10 +135,10 @@ class EagerResourceDeleter(object): # valid, and so on. Printing warnings in these cases is silly # (exceptions raised from __del__ are printed as warnings to stderr). pass # 'NoneType' object is not callable when the handle has been - # partially unloaded. + # partially unloaded. except AttributeError: pass # 'NoneType' object has no attribute 'eager_mode' when context has - # been unloaded. Will catch other module unloads as well. + # been unloaded. Will catch other module unloads as well. def shape_safe_assign_variable_handle(handle, shape, value, name=None): @@ -267,9 +267,9 @@ class ResourceVariable(variables.Variable): if initial_value is not None: raise ValueError("variable_def and initial_value are mutually " "exclusive.") - if not context.in_graph_mode(): - raise ValueError("Creating ResourceVariable from variable_def" - " only supported in GRAPH mode.") + if context.executing_eagerly(): + raise ValueError("Creating ResourceVariable from variable_def is " + "not supported when eager execution is enabled.") self._init_from_proto(variable_def, import_scope=import_scope) else: self._init_from_args( @@ -363,7 +363,7 @@ class ResourceVariable(variables.Variable): # this graph. self._graph_key = ops.get_default_graph()._graph_key # pylint: disable=protected-access with ops.init_scope(): - self._in_graph_mode = context.in_graph_mode() + self._in_graph_mode = not context.executing_eagerly() with ops.name_scope(name, "Variable", [] if init_from_fn else [initial_value]) as name: # pylint: disable=protected-access @@ -470,7 +470,7 @@ class ResourceVariable(variables.Variable): self._cached_value = self._read_variable_op() else: self._cached_value = None - if context.in_graph_mode(): + if not context.executing_eagerly(): ops.add_to_collections(collections, self) elif ops.GraphKeys.GLOBAL_STEP in collections: ops.add_to_collections(ops.GraphKeys.GLOBAL_STEP, self) @@ -489,7 +489,7 @@ class ResourceVariable(variables.Variable): def _init_from_proto(self, variable_def, import_scope=None): """Initializes from `VariableDef` proto.""" # Note that init_from_proto is currently not supported in Eager mode. - assert context.in_graph_mode() + assert not context.executing_eagerly() self._in_graph_mode = True assert isinstance(variable_def, variable_pb2.VariableDef) if not variable_def.is_resource: @@ -582,7 +582,8 @@ class ResourceVariable(variables.Variable): def create(self): """The op responsible for initializing this variable.""" if not self._in_graph_mode: - raise RuntimeError("Calling create in EAGER mode not supported.") + raise RuntimeError("Calling create is not supported when eager execution" + " is enabled.") return self._initializer_op @property @@ -610,7 +611,7 @@ class ResourceVariable(variables.Variable): @property def initial_value(self): """Returns the Tensor used as the initial value for the variable.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("initial_value not supported in EAGER mode.") return self._initial_value @@ -631,15 +632,15 @@ class ResourceVariable(variables.Variable): def eval(self, session=None): """Evaluates and returns the value of this variable.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Trying to eval in EAGER mode") return self._graph_element.eval(session=session) def numpy(self): - if context.in_graph_mode(): - raise NotImplementedError( - "numpy() is only available when eager execution is enabled.") - return self.read_value().numpy() + if context.executing_eagerly(): + return self.read_value().numpy() + raise NotImplementedError( + "numpy() is only available when eager execution is enabled.") def count_up_to(self, limit): """Increments this variable until it reaches `limit`. @@ -720,7 +721,7 @@ class ResourceVariable(variables.Variable): A `VariableDef` protocol buffer, or `None` if the `Variable` is not in the specified name scope. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("to_proto not supported in EAGER mode.") if export_scope is None or self.handle.name.startswith(export_scope): var_def = variable_pb2.VariableDef() @@ -747,7 +748,7 @@ class ResourceVariable(variables.Variable): @staticmethod def from_proto(variable_def, import_scope=None): - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("from_proto not supported in EAGER mode.") return ResourceVariable( variable_def=variable_def, import_scope=import_scope) @@ -984,10 +985,10 @@ class _UnreadVariable(ResourceVariable): self._is_initialized_op = None self._initializer_op = None self._parent_op = parent_op - if context.in_graph_mode(): - self._graph_element = self.read_value() - else: + if context.executing_eagerly(): self._graph_element = None + else: + self._graph_element = self.read_value() self._handle_deleter = deleter def value(self): diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index aa8d4327d2..625d433b1f 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -575,7 +575,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, # Create a new scope in which the caching device is either # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. - if context.in_graph_mode(): + if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) @@ -616,7 +616,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, ["Expected shape for Tensor %s is " % x.name, packed_shape, " but saw shape: ", x_shape]) - if context.in_graph_mode() and sequence_length is not None: + if not context.executing_eagerly() and sequence_length is not None: # Perform some shape validation with ops.control_dependencies( [_assert_has_shape(sequence_length, [batch_size])]): @@ -742,7 +742,7 @@ def _dynamic_rnn_loop(cell, element_shape=element_shape, tensor_array_name=base_name + name) - in_graph_mode = context.in_graph_mode() + in_graph_mode = not context.executing_eagerly() if in_graph_mode: output_ta = tuple( _create_ta( @@ -1027,7 +1027,7 @@ def raw_rnn(cell, loop_fn, # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "rnn") as varscope: - if context.in_graph_mode(): + if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) @@ -1242,7 +1242,7 @@ def static_rnn(cell, # determined by the parent scope, or is set to place the cached # Variable using the same placement as for the rest of the RNN. with vs.variable_scope(scope or "rnn") as varscope: - if context.in_graph_mode(): + if not context.executing_eagerly(): if varscope.caching_device is None: varscope.set_caching_device(lambda op: op.device) diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index 3ae1d1184d..e61d10835f 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -128,7 +128,7 @@ def _zero_state_tensors(state_size, batch_size, dtype): """Combine s with batch_size to get a proper tensor shape.""" c = _concat(batch_size, s) size = array_ops.zeros(c, dtype=dtype) - if context.in_graph_mode(): + if not context.executing_eagerly(): c_static = _concat(batch_size, s, static=True) size.set_shape(c_static) return size @@ -192,12 +192,13 @@ class RNNCell(base_layer.Layer): def _rnn_get_variable(self, getter, *args, **kwargs): variable = getter(*args, **kwargs) - if context.in_graph_mode(): - trainable = (variable in tf_variables.trainable_variables() or - (isinstance(variable, tf_variables.PartitionedVariable) and - list(variable)[0] in tf_variables.trainable_variables())) - else: + if context.executing_eagerly(): trainable = variable._trainable # pylint: disable=protected-access + else: + trainable = ( + variable in tf_variables.trainable_variables() or + (isinstance(variable, tf_variables.PartitionedVariable) and + list(variable)[0] in tf_variables.trainable_variables())) if trainable and variable not in self._trainable_weights: self._trainable_weights.append(variable) elif not trainable and variable not in self._non_trainable_weights: @@ -241,7 +242,7 @@ class RNNCell(base_layer.Layer): # Try to use the last cached zero_state. This is done to avoid recreating # zeros, especially when eager execution is enabled. state_size = self.state_size - is_eager = context.in_eager_mode() + is_eager = context.executing_eagerly() if is_eager and hasattr(self, "_last_zero_state"): (last_state_size, last_batch_size, last_dtype, last_output) = getattr(self, "_last_zero_state") diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 01f0b81684..529eebe769 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -317,7 +317,7 @@ def py_func(func, inp, Tout, stateful=True, name=None): Returns: A list of `Tensor` or a single `Tensor` which `func` computes. """ - if context.in_eager_mode(): + if context.executing_eagerly(): result = func(*[x.numpy() for x in inp]) result = nest.flatten(result) diff --git a/tensorflow/python/ops/state_ops.py b/tensorflow/python/ops/state_ops.py index fd4419640a..c3ad5831b4 100644 --- a/tensorflow/python/ops/state_ops.py +++ b/tensorflow/python/ops/state_ops.py @@ -186,7 +186,7 @@ def is_variable_initialized(ref, name=None): if ref.dtype._is_ref_dtype: return gen_state_ops.is_variable_initialized(ref=ref, name=name) # Handle resource variables. - if context.in_eager_mode() or ref.op.type == "VarHandleOp": + if context.executing_eagerly() or ref.op.type == "VarHandleOp": return gen_resource_variable_ops.var_is_initialized_op(ref.handle, name=name) diff --git a/tensorflow/python/ops/template.py b/tensorflow/python/ops/template.py index 70e8040512..0a391d896a 100644 --- a/tensorflow/python/ops/template.py +++ b/tensorflow/python/ops/template.py @@ -204,7 +204,7 @@ def make_template_internal(name_, if kwargs: func_ = tf_decorator.make_decorator(func_, functools.partial( func_, **kwargs)) - if context.in_eager_mode(): + if context.executing_eagerly(): if unique_name_ is not None: raise ValueError( "unique_name_ cannot be used when eager exeuction is enabled.") @@ -364,7 +364,7 @@ class Template(checkpointable.CheckpointableBase): """ def _call_next_creator_renaming_initializer(initializer, **inner_kwargs): inner_kwargs.pop("name") # Ignored; this is the scope-stripped name which - # we don't want to propagate. + # we don't want to propagate. return next_creator( initial_value=initializer, name=name, @@ -647,7 +647,7 @@ class EagerTemplate(Template): Raises: RuntimeError: if eager execution is not enabled. """ - if not context.in_eager_mode(): + if not context.executing_eagerly(): raise RuntimeError( "{} objects can only be used when eager execution is enabled, use " "tf.Template for graph construction". diff --git a/tensorflow/python/ops/tensor_array_ops.py b/tensorflow/python/ops/tensor_array_ops.py index 6226f426be..2f6badcb53 100644 --- a/tensorflow/python/ops/tensor_array_ops.py +++ b/tensorflow/python/ops/tensor_array_ops.py @@ -338,7 +338,7 @@ class _GraphTensorArray(object): with ops.name_scope(name, "TensorArrayScatter", [self._handle, value, indices]): value = ops.convert_to_tensor(value, name="value") - if self._infer_shape and context.in_graph_mode(): + if self._infer_shape and not context.executing_eagerly(): self._merge_element_shape(value.shape[1:]) with self._maybe_colocate_with(value): flow_out = gen_data_flow_ops.tensor_array_scatter_v3( @@ -363,7 +363,7 @@ class _GraphTensorArray(object): value = ops.convert_to_tensor(value, name="value") with self._maybe_colocate_with(value): lengths_64 = math_ops.to_int64(lengths) - if self._infer_shape and context.in_graph_mode(): + if self._infer_shape and not context.executing_eagerly(): clengths = tensor_util.constant_value(lengths_64) if value.shape.dims is not None: if clengths is not None and clengths.max() == clengths.min(): @@ -774,10 +774,10 @@ class TensorArray(object): ValueError: if both handle and tensor_array_name are provided. TypeError: if handle is provided but is not a Tensor. """ - if context.in_graph_mode(): - implementation = _GraphTensorArray - else: + if context.executing_eagerly(): implementation = _EagerTensorArray + else: + implementation = _GraphTensorArray self._implementation = implementation( dtype, diff --git a/tensorflow/python/ops/variable_scope.py b/tensorflow/python/ops/variable_scope.py index de4e44f60c..7f650ff6a9 100644 --- a/tensorflow/python/ops/variable_scope.py +++ b/tensorflow/python/ops/variable_scope.py @@ -321,7 +321,7 @@ class _VariableStore(object): raise ValueError( "Passed a custom_getter which is not callable: %s" % custom_getter) - if context.in_eager_mode(): + if context.executing_eagerly(): if not self._store_eager_variables and reuse: raise RuntimeError( "When eager execution is enabled variable reuse is only supported" @@ -518,7 +518,7 @@ class _VariableStore(object): when violating reuse during variable creation, or if an existing sharded variable exists for the given name but with different sharding. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise NotImplementedError("Partitioned variables are not yet supported " "when eager execution is enabled.") @@ -798,7 +798,7 @@ class _VariableStore(object): validate_shape=validate_shape, constraint=constraint, use_resource=use_resource) - if context.in_graph_mode() or self._store_eager_variables: + if not context.executing_eagerly() or self._store_eager_variables: # In eager mode we do not want to keep default references to Variable # objects as this will prevent their memory from being released. self._vars[name] = v @@ -811,12 +811,12 @@ class _VariableStore(object): with ops.name_scope(name + "/Regularizer/"): loss = regularizer(v) if loss is not None: - if context.in_graph_mode(): - v_name = v.name - loss_name = loss.name - else: + if context.executing_eagerly(): v_name = "v_%s" % type(v) loss_name = "loss_%s" % type(loss) + else: + v_name = v.name + loss_name = loss.name logging.vlog(1, "Applied regularizer to %s and added the result %s " "to REGULARIZATION_LOSSES.", v_name, loss_name) ops.add_to_collection(ops.GraphKeys.REGULARIZATION_LOSSES, loss) @@ -920,7 +920,7 @@ class VariableScope(object): self._dtype = dtype self._use_resource = use_resource self._constraint = constraint - if context.in_eager_mode(): + if context.executing_eagerly(): if self._caching_device is not None: raise NotImplementedError("Caching devices is not yet supported " "when eager execution is enabled.") @@ -988,7 +988,7 @@ class VariableScope(object): def set_use_resource(self, use_resource): """Sets whether to use ResourceVariables for this scope.""" - if context.in_eager_mode() and not use_resource: + if context.executing_eagerly() and not use_resource: raise ValueError("When eager execution is enabled, " "use_resource cannot be set to false.") self._use_resource = use_resource @@ -999,14 +999,14 @@ class VariableScope(object): def set_caching_device(self, caching_device): """Set caching_device for this scope.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise NotImplementedError("Caching devices are not yet supported " "when eager execution is enabled.") self._caching_device = caching_device def set_partitioner(self, partitioner): """Set partitioner for this scope.""" - if partitioner and context.in_eager_mode(): + if partitioner and context.executing_eagerly(): raise NotImplementedError("Partitioned variables are not yet supported " "when eager execution is enabled.") self._partitioner = partitioner @@ -1057,14 +1057,14 @@ class VariableScope(object): partitioner = self._partitioner if custom_getter is None: custom_getter = self._custom_getter - if context.in_graph_mode(): + if context.executing_eagerly(): + reuse = False + use_resource = True + else: if reuse is None: reuse = self._reuse if use_resource is None: use_resource = self._use_resource - else: - reuse = False - use_resource = True full_name = self.name + "/" + name if self.name else name # Variable names only depend on variable_scope (full_name here), @@ -1107,7 +1107,7 @@ class VariableScope(object): use_resource=None, constraint=None): """Gets an existing variable with this name or create a new one.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise NotImplementedError("Partitioned variables are not yet supported " "when eager execution is enabled.") if initializer is None: @@ -1871,7 +1871,7 @@ class variable_scope(object): raise ValueError("The reuse parameter must be True or False or None.") if self._values is None: self._values = [] - self._in_graph_mode = not context.in_eager_mode() + self._in_graph_mode = not context.executing_eagerly() if self._in_graph_mode: self._graph = ops._get_graph_from_inputs(self._values) # pylint: disable=protected-access self._cached_pure_variable_scope = None @@ -2111,13 +2111,13 @@ def default_variable_creator(next_creator=None, **kwargs): use_resource = kwargs.get("use_resource", None) if use_resource is None: use_resource = get_variable_scope().use_resource - if use_resource or (use_resource is None and context.in_eager_mode()): + if use_resource or (use_resource is None and context.executing_eagerly()): return resource_variable_ops.ResourceVariable( initial_value=initial_value, trainable=trainable, collections=collections, validate_shape=validate_shape, caching_device=caching_device, name=name, dtype=dtype, constraint=constraint) - elif not use_resource and context.in_eager_mode(): + elif not use_resource and context.executing_eagerly(): raise RuntimeError( "VariableScope should use resource variable when eager execution is" " enabled, but use_resource is False." diff --git a/tensorflow/python/ops/variables.py b/tensorflow/python/ops/variables.py index 643a3b7edc..5b9947f441 100644 --- a/tensorflow/python/ops/variables.py +++ b/tensorflow/python/ops/variables.py @@ -210,10 +210,11 @@ class Variable(checkpointable.CheckpointableBase): for details on how variables work in eager execution. @end_compatibility """ - if not context.in_graph_mode(): - raise RuntimeError("tf.Variable not supported in Eager mode. " - "Please use tfe.Variable instead") - self._in_graph_mode = context.in_graph_mode() + if context.executing_eagerly(): + raise RuntimeError( + "tf.Variable not supported when eager execution is enabled. " + "Please use tf.contrib.eager.Variable instead") + self._in_graph_mode = True if variable_def: # If variable_def is provided, recreates the variable from its fields. if initial_value: @@ -234,7 +235,7 @@ class Variable(checkpointable.CheckpointableBase): constraint=constraint) def __repr__(self): - if context.in_eager_mode(): + if context.executing_eagerly(): return "" % ( self.name, self.get_shape(), self.dtype.name, ops.numpy_text(self.read_value(), is_repr=True)) @@ -740,15 +741,15 @@ class Variable(checkpointable.CheckpointableBase): Raises: ValueError: Session is not passed and no default session """ - if context.in_graph_mode(): + if context.executing_eagerly(): + self.assign(value) + else: session = session or ops.get_default_session() if session is None: raise ValueError( "Either session argument should be provided or default session " "should be established") session.run(self._initializer_op, {self._initializer_op.inputs[1]: value}) - else: - self.assign(value) # Conversion to tensor. @staticmethod @@ -1248,9 +1249,9 @@ class PartitionedVariable(object): information does not match `shape`, or `partitions` has invalid values. RuntimeError: If eager execution is enabled """ - if not context.in_graph_mode(): - raise RuntimeError("tf.PartitionedVariable not supported in " - "eager mode. Please use tfe.Variable instead") + if context.executing_eagerly(): + raise RuntimeError( + "tf.PartitionedVariable not supported with eager execution enabled.") if not isinstance(variable_list, (list, tuple)): raise TypeError( "variable_list is not a list or tuple: %s" % variable_list) @@ -1541,7 +1542,7 @@ def variables_initializer(var_list, name="init"): Returns: An Op that run the initializers of all the specified variables. """ - if var_list and context.in_graph_mode(): + if var_list and not context.executing_eagerly(): return control_flow_ops.group(*[v.initializer for v in var_list], name=name) return control_flow_ops.no_op(name=name) @@ -1563,7 +1564,7 @@ def global_variables_initializer(): Returns: An Op that initializes global variables in the graph. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return control_flow_ops.no_op(name="global_variables_initializer") return variables_initializer(global_variables()) @@ -1585,7 +1586,7 @@ def local_variables_initializer(): Returns: An Op that initializes all local variables in the graph. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return control_flow_ops.no_op(name="local_variables_initializer") return variables_initializer(local_variables()) diff --git a/tensorflow/python/profiler/model_analyzer.py b/tensorflow/python/profiler/model_analyzer.py index 0e20ca35bb..acf02096ff 100644 --- a/tensorflow/python/profiler/model_analyzer.py +++ b/tensorflow/python/profiler/model_analyzer.py @@ -172,7 +172,7 @@ class Profiler(object): op_log: optional. tensorflow::tfprof::OpLogProto proto. Used to define extra op types. """ - if not graph and context.in_graph_mode(): + if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() self._coverage = 0.0 self._graph = graph @@ -336,7 +336,7 @@ def profile(graph=None, If cmd is 'op' or 'code', returns MultiGraphNodeProto proto. Side effect: stdout/file/timeline.json depending on options['output'] """ - if not graph and context.in_graph_mode(): + if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() if options == _DEFAULT_PROFILE_OPTIONS: diff --git a/tensorflow/python/profiler/tfprof_logger.py b/tensorflow/python/profiler/tfprof_logger.py index 8d12106496..e651de32ea 100644 --- a/tensorflow/python/profiler/tfprof_logger.py +++ b/tensorflow/python/profiler/tfprof_logger.py @@ -156,7 +156,7 @@ def merge_default_with_oplog(graph, op_log=None, run_meta=None, Returns: tmp_op_log: Merged OpLogProto proto. """ - if not graph and context.in_graph_mode(): + if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() tmp_op_log = tfprof_log_pb2.OpLogProto() @@ -210,7 +210,7 @@ def write_op_log(graph, log_dir, op_log=None, run_meta=None, add_trace=True): add_trace: Whether to add python code trace information. Used to support "code" view. """ - if not graph and context.in_graph_mode(): + if not graph and not context.executing_eagerly(): graph = ops.get_default_graph() op_log = merge_default_with_oplog(graph, op_log, run_meta, add_trace) diff --git a/tensorflow/python/summary/summary.py b/tensorflow/python/summary/summary.py index 7ff633a654..2a3918b9b4 100644 --- a/tensorflow/python/summary/summary.py +++ b/tensorflow/python/summary/summary.py @@ -278,7 +278,7 @@ def merge(inputs, collections=None, name=None): @end_compatbility """ # pylint: enable=line-too-long - if _context.in_eager_mode(): + if _context.executing_eagerly(): raise RuntimeError( 'Merging tf.summary.* ops is not compatible with eager execution. ' 'Use tf.contrib.summary instead.') @@ -311,7 +311,7 @@ def merge_all(key=_ops.GraphKeys.SUMMARIES, scope=None): summaries under eager execution, use `tf.contrib.summary` instead. @end_compatbility """ - if _context.in_eager_mode(): + if _context.executing_eagerly(): raise RuntimeError( 'Merging tf.summary.* ops is not compatible with eager execution. ' 'Use tf.contrib.summary instead.') diff --git a/tensorflow/python/summary/writer/writer.py b/tensorflow/python/summary/writer/writer.py index 1f3f228704..57f78c156b 100644 --- a/tensorflow/python/summary/writer/writer.py +++ b/tensorflow/python/summary/writer/writer.py @@ -343,7 +343,7 @@ class FileWriter(SummaryToEventTransformer): summaries under eager execution, use `tf.contrib.summary` instead. @end_compatbility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "tf.summary.FileWriter is not compatible with eager execution. " "Use tf.contrib.summary instead.") diff --git a/tensorflow/python/training/adam.py b/tensorflow/python/training/adam.py index c92f6fc301..006e360389 100644 --- a/tensorflow/python/training/adam.py +++ b/tensorflow/python/training/adam.py @@ -106,10 +106,10 @@ class AdamOptimizer(optimizer.Optimizer): self._updated_lr = None def _get_beta_accumulators(self): - if context.in_graph_mode(): - graph = ops.get_default_graph() - else: + if context.executing_eagerly(): graph = None + else: + graph = ops.get_default_graph() return (self._get_non_slot_variable("beta1_power", graph=graph), self._get_non_slot_variable("beta2_power", graph=graph)) diff --git a/tensorflow/python/training/adam_test.py b/tensorflow/python/training/adam_test.py index a521f1299e..af87d6f0e5 100644 --- a/tensorflow/python/training/adam_test.py +++ b/tensorflow/python/training/adam_test.py @@ -184,7 +184,7 @@ class AdamOptimizerTest(test.TestCase): # Shouldn't return non-slot variables from other graphs. self.assertEqual(0, len(opt.variables())) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) @@ -194,7 +194,7 @@ class AdamOptimizerTest(test.TestCase): # Run 3 steps of Adam for t in range(1, 4): - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(update) elif t > 1: opt.apply_gradients(zip([grads0, grads1], [var0, var1])) diff --git a/tensorflow/python/training/checkpointable.py b/tensorflow/python/training/checkpointable.py index 92e8ff3308..e49965703e 100644 --- a/tensorflow/python/training/checkpointable.py +++ b/tensorflow/python/training/checkpointable.py @@ -208,7 +208,7 @@ class _CheckpointPosition(object): # Name saveables based on the name this object had when it was checkpointed. named_saveables = {} restore_ops = [] - in_graph_mode = context.in_graph_mode() + building_graph = not context.executing_eagerly() for serialized_tensor in self.object_proto.attributes: saveable_object = saveables.get(serialized_tensor.name, None) if saveable_object is None: @@ -219,7 +219,7 @@ class _CheckpointPosition(object): self._checkpoint.unused_attributes.setdefault( self.checkpointable, []).append(serialized_tensor.name) continue - if in_graph_mode: + if building_graph: existing_ops = self._checkpoint.restore_ops_by_name.get( serialized_tensor.name, None) else: @@ -245,7 +245,7 @@ class _CheckpointPosition(object): saveable_index:saveable_index + num_specs] saveable_index += num_specs restore_op = saveable.restore(saveable_tensors, restored_shapes=None) - if in_graph_mode: + if building_graph: assert saveable.name not in self._checkpoint.restore_ops_by_name self._checkpoint.restore_ops_by_name[saveable.name] = restore_op restore_ops.append(restore_op) @@ -388,7 +388,7 @@ class CheckpointableBase(object): "Checkpointable._add_variable called to create another with " "that name. Variable names must be unique within a Checkpointable " "object.") % (name,)) - if context.in_eager_mode(): + if context.executing_eagerly(): # If this is a variable with a single Tensor stored in the checkpoint, we # can set that value as an initializer rather than initializing and then # assigning (when executing eagerly). This call returns None if there is diff --git a/tensorflow/python/training/gradient_descent.py b/tensorflow/python/training/gradient_descent.py index 538164adb6..6caf29d83a 100644 --- a/tensorflow/python/training/gradient_descent.py +++ b/tensorflow/python/training/gradient_descent.py @@ -71,6 +71,6 @@ class GradientDescentOptimizer(optimizer.Optimizer): return var.scatter_sub(delta, use_locking=self._use_locking) def _prepare(self): - if context.in_graph_mode() or self._learning_rate_tensor is None: + if not context.executing_eagerly() or self._learning_rate_tensor is None: self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate, name="learning_rate") diff --git a/tensorflow/python/training/input.py b/tensorflow/python/training/input.py index bd9985a7c5..44f00a96de 100644 --- a/tensorflow/python/training/input.py +++ b/tensorflow/python/training/input.py @@ -159,7 +159,7 @@ def input_producer(input_tensor, enabled. Please use the `tf.data` API to ingest data under eager execution. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" @@ -737,7 +737,7 @@ def _batch(tensors, batch_size, keep_input, num_threads=1, capacity=32, allow_smaller_final_batch=False, shared_name=None, name=None): """Helper function for `batch` and `maybe_batch`.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" @@ -775,7 +775,7 @@ def _batch_join(tensors_list, batch_size, keep_input, capacity=32, enqueue_many=False, shapes=None, dynamic_pad=False, allow_smaller_final_batch=False, shared_name=None, name=None): """Helper function for `batch_join` and `maybe_batch_join`.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" @@ -810,7 +810,7 @@ def _shuffle_batch(tensors, batch_size, capacity, min_after_dequeue, shapes=None, allow_smaller_final_batch=False, shared_name=None, name=None): """Helper function for `shuffle_batch` and `maybe_shuffle_batch`.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" @@ -855,7 +855,7 @@ def _shuffle_batch_join(tensors_list, batch_size, capacity, allow_smaller_final_batch=False, shared_name=None, name=None): """Helper function for `shuffle_batch_join` and `maybe_shuffle_batch_join`.""" - if context.in_eager_mode(): + if context.executing_eagerly(): raise ValueError( "Input pipelines based on Queues are not supported when eager execution" " is enabled. Please use tf.data to ingest data into your model" diff --git a/tensorflow/python/training/learning_rate_decay_test.py b/tensorflow/python/training/learning_rate_decay_test.py index 23b30632f6..60306e4f12 100644 --- a/tensorflow/python/training/learning_rate_decay_test.py +++ b/tensorflow/python/training/learning_rate_decay_test.py @@ -113,7 +113,7 @@ class LRDecayTest(test_util.TensorFlowTestCase): learning_rate_decay.piecewise_constant(x, boundaries, values) # Test that ref types are valid. - if context.in_graph_mode(): + if not context.executing_eagerly(): x = variables.Variable(0.0) x_ref = x.op.outputs[0] # float32_ref tensor should be accepted boundaries, values = [1.0, 2.0], [1, 2, 3] diff --git a/tensorflow/python/training/momentum_test.py b/tensorflow/python/training/momentum_test.py index cda421cef8..297a8bbde5 100644 --- a/tensorflow/python/training/momentum_test.py +++ b/tensorflow/python/training/momentum_test.py @@ -66,7 +66,7 @@ class MomentumOptimizerTest(test.TestCase): mom_update = mom_opt.apply_gradients( zip([grads0, grads1], [var0, var1])) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(variables.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) @@ -78,13 +78,13 @@ class MomentumOptimizerTest(test.TestCase): self.assertEquals(slot0.get_shape(), var0.get_shape()) slot1 = mom_opt.get_slot(var1, "momentum") self.assertEquals(slot1.get_shape(), var1.get_shape()) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertFalse(slot0 in variables.trainable_variables()) self.assertFalse(slot1 in variables.trainable_variables()) # Step 1: the momentum accumulators where 0. So we should see a normal # update: v -= grad * learning_rate - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType(np.array([0.1, 0.1]), @@ -99,10 +99,10 @@ class MomentumOptimizerTest(test.TestCase): np.array([3.0 - (0.01 * 2.0), 4.0 - (0.01 * 2.0)]), self.evaluate(var1)) # Step 2: the momentum accumulators contain the previous update. - if context.in_graph_mode(): - self.evaluate(mom_update) - else: + if context.executing_eagerly(): mom_opt.apply_gradients(zip([grads0, grads1], [var0, var1])) + else: + self.evaluate(mom_update) # Check that the momentum accumulators have been updated. self.assertAllCloseAccordingToType( np.array([(0.9 * 0.1 + 0.1), (0.9 * 0.1 + 0.1)]), @@ -142,7 +142,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var0") var1 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var1") - if context.in_eager_mode(): + if context.executing_eagerly(): loss = lambda: math_ops.reduce_sum(var0 + var1) else: loss = math_ops.reduce_sum(var0 + var1) @@ -157,7 +157,7 @@ class MomentumOptimizerTest(test.TestCase): [1.0, 2.0], dtype=dtypes.float32, name="var2") var3 = resource_variable_ops.ResourceVariable( [3.0, 4.0], dtype=dtypes.float32, name="var3") - if context.in_eager_mode(): + if context.executing_eagerly(): loss = lambda: math_ops.reduce_sum(var2 + var3) else: loss = math_ops.reduce_sum(var2 + var3) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index ba7e087c5a..9776b90ba4 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -42,7 +42,7 @@ from tensorflow.python.util.tf_export import tf_export def _get_variable_for(v): """Returns the ResourceVariable responsible for v, or v if not necessary.""" - if context.in_eager_mode(): + if context.executing_eagerly(): return v if v.op.type == "VarHandleOp": for var in variables.trainable_variables(): @@ -73,7 +73,7 @@ def _deduplicate_indexed_slices(values, indices): def _var_key(var): - if context.in_eager_mode(): + if context.executing_eagerly(): return var._shared_name # pylint: disable=protected-access return (var.op.graph, var.op.name) @@ -199,7 +199,7 @@ class _TensorProcessor(_OptimizableVariable): def _get_processor(v): """The processor of v.""" - if context.in_eager_mode(): + if context.executing_eagerly(): if isinstance(v, ops.Tensor): return _TensorProcessor(v) else: @@ -460,7 +460,7 @@ class Optimizer( var_list = tape.watched_variables() grads = tape.gradient(loss_value, var_list, grad_loss) return list(zip(grads, var_list)) - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "`loss` passed to Optimizer.compute_gradients should " "be a function when eager execution is enabled.") @@ -559,7 +559,7 @@ class Optimizer( # We colocate all ops created in _apply_dense or _apply_sparse # on the same device as the variable. # TODO(apassos): figure out how to get the variable name here. - scope_name = var.op.name if context.in_graph_mode() else "" + scope_name = "" if context.executing_eagerly() else var.op.name with ops.name_scope("update_" + scope_name), ops.colocate_with(var): update_ops.append(processor.update_op(self, grad)) if global_step is None: @@ -577,7 +577,7 @@ class Optimizer( else: apply_updates = state_ops.assign_add(global_step, 1, name=name) - if context.in_graph_mode(): + if not context.executing_eagerly(): if isinstance(apply_updates, ops.Tensor): apply_updates = apply_updates.op train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP) @@ -627,7 +627,7 @@ class Optimizer( Returns: A list of variables. """ - executing_eagerly = context.in_eager_mode() + executing_eagerly = context.executing_eagerly() current_graph = ops.get_default_graph() def _from_current_graph(variable): @@ -649,18 +649,15 @@ class Optimizer( def _create_non_slot_variable(self, initial_value, name, colocate_with): """Add an extra variable, not associated with a slot.""" - in_graph_mode = context.in_graph_mode() - if in_graph_mode: - graph = colocate_with.graph - else: - graph = None + eager = context.executing_eagerly() + graph = None if eager else colocate_with.graph key = (name, graph) v = self._non_slot_dict.get(key, None) if v is None: self._maybe_initialize_checkpointable() with ops.colocate_with(colocate_with): - if not in_graph_mode: + if eager: restored_initial_value = self._preload_simple_restoration( name=name, shape=None) if restored_initial_value is not None: @@ -697,10 +694,7 @@ class Optimizer( unconditional = super(Optimizer, self)._lookup_dependency(name) if unconditional is not None: return unconditional - if context.in_graph_mode(): - graph = ops.get_default_graph() - else: - graph = None + graph = None if context.executing_eagerly() else ops.get_default_graph() return self._get_non_slot_variable(name, graph=graph) def _get_non_slot_variable(self, name, graph=None): @@ -1034,9 +1028,8 @@ class Optimizer( named_slots = self._slot_dict(slot_name) variable_key = _var_key(variable) slot_variable = named_slots.get(variable_key, None) - if (slot_variable is None - and context.in_eager_mode() - and slot_variable_position.is_simple_variable()): + if (slot_variable is None and context.executing_eagerly() and + slot_variable_position.is_simple_variable()): initializer = checkpointable.CheckpointInitialValue( checkpoint_position=slot_variable_position) slot_variable = self._get_or_make_slot( diff --git a/tensorflow/python/training/queue_runner_impl.py b/tensorflow/python/training/queue_runner_impl.py index 07afba79ab..d38c5499c7 100644 --- a/tensorflow/python/training/queue_runner_impl.py +++ b/tensorflow/python/training/queue_runner_impl.py @@ -89,7 +89,7 @@ class QueueRunner(object): restoring from `queue_runner_def`. RuntimeError: If eager execution is enabled. """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError( "QueueRunners are not supported when eager execution is enabled. " "Instead, please use tf.data to get data into your model.") @@ -441,7 +441,7 @@ def start_queue_runners(sess=None, coord=None, daemon=True, start=True, use the `tf.data` API instead. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Queues are not compatible with eager execution.") if sess is None: sess = ops.get_default_session() diff --git a/tensorflow/python/training/saver.py b/tensorflow/python/training/saver.py index df3ccce63e..2ce57c4432 100644 --- a/tensorflow/python/training/saver.py +++ b/tensorflow/python/training/saver.py @@ -582,7 +582,20 @@ class BaseSaverBuilder(object): BaseSaverBuilder.OpListToDict( list(var._gather_saveables_for_checkpoint().values()))) else: - if context.in_graph_mode(): + if context.executing_eagerly(): + if not isinstance(var, resource_variable_ops.ResourceVariable): + raise ValueError( + "Can only save/restore ResourceVariables when eager execution " + "is enabled, type: %s." % type(var)) + set_var = names_to_saveables.setdefault(var._shared_name, var) + if set_var is not var: + raise ValueError( + ("Two different ResourceVariable objects with the same " + "shared_name '%s' were passed to the Saver. This likely means " + "that they were created in different Graphs or isolation " + "contexts, and may not be checkpointed together.") % + (var._shared_name,)) + else: if convert_variable_to_tensor: if isinstance(var, resource_variable_ops.ResourceVariable): var = var._graph_element # pylint: disable=protected-access @@ -598,18 +611,6 @@ class BaseSaverBuilder(object): raise ValueError("At least two variables have the same name: %s" % name) names_to_saveables[name] = var - else: - if not isinstance(var, resource_variable_ops.ResourceVariable): - raise ValueError("Can only save/restore ResourceVariable eager " - "mode is enabled, type: %s." % type(var)) - set_var = names_to_saveables.setdefault(var._shared_name, var) - if set_var is not var: - raise ValueError( - ("Two different ResourceVariable objects with the same " - "shared_name '%s' were passed to the Saver. This likely means " - "that they were created in different Graphs or isolation " - "contexts, and may not be checkpointed together.") % ( - var._shared_name,)) # pylint: enable=protected-access return names_to_saveables @@ -671,7 +672,7 @@ class BaseSaverBuilder(object): # pylint: enable=protected-access else: # A variable or tensor. - if context.in_eager_mode(): + if context.executing_eagerly(): if not isinstance(op, resource_variable_ops.ResourceVariable): raise ValueError("Can only save/restore ResourceVariable eager " "mode is enabled, type: %s." % type(op)) @@ -778,8 +779,10 @@ class BaseSaverBuilder(object): build_save=True, build_restore=True): """build() with option to only perform save and restore.""" - if context.in_graph_mode() and (not build_save or not build_restore): - raise ValueError("Graph mode needs to build save and restore together.") + if not context.executing_eagerly() and (not build_save or + not build_restore): + raise ValueError("save and restore operations need to be built together " + " when eager execution is not enabled.") saveables = self._ValidateAndSliceInputs(names_to_saveables) if max_to_keep is None: @@ -816,22 +819,22 @@ class BaseSaverBuilder(object): # such usage model makes sense. # # assert restore_op.name.endswith("restore_all"), restore_op.name - if context.in_graph_mode(): + if context.executing_eagerly(): + # Store the tensor values to the tensor_names. + save_tensor_name = save_tensor.numpy() if build_save else "" return saver_pb2.SaverDef( - filename_tensor_name=filename_tensor.name, - save_tensor_name=save_tensor.name, - restore_op_name=restore_op.name, + filename_tensor_name=filename_tensor.numpy(), + save_tensor_name=save_tensor_name, + restore_op_name="", max_to_keep=max_to_keep, sharded=sharded, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, version=self._write_version) else: - # Store the tensor values to the tensor_names. - save_tensor_name = save_tensor.numpy() if build_save else "" return saver_pb2.SaverDef( - filename_tensor_name=filename_tensor.numpy(), - save_tensor_name=save_tensor_name, - restore_op_name="", + filename_tensor_name=filename_tensor.name, + save_tensor_name=save_tensor.name, + restore_op_name=restore_op.name, max_to_keep=max_to_keep, sharded=sharded, keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours, @@ -1280,7 +1283,7 @@ class Saver(object): raise ValueError( "If `var_list` is provided then build cannot be deferred. " "Either set defer_build=False or var_list=None.") - if context.in_eager_mode() and var_list is None: + if context.executing_eagerly() and var_list is None: raise RuntimeError( "When eager execution is enabled, `var_list` must specify a list or " "dict of variables to save") @@ -1301,10 +1304,10 @@ class Saver(object): self._filename = filename self._last_checkpoints = [] self._checkpoints_to_be_deleted = [] - if context.in_eager_mode(): + if context.executing_eagerly(): self._next_checkpoint_time = ( time.time() + self._keep_checkpoint_every_n_hours * 3600) - if not defer_build and context.in_graph_mode(): + elif not defer_build: self.build() if self.saver_def: self._check_saver_def() @@ -1312,7 +1315,7 @@ class Saver(object): self._save_relative_paths = save_relative_paths def build(self): - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Use save/restore instead of build in eager mode.") self._build(self._filename, build_save=True, build_restore=True) @@ -1322,12 +1325,12 @@ class Saver(object): def _build(self, checkpoint_path, build_save, build_restore): """Builds saver_def.""" - if context.in_graph_mode(): + if not context.executing_eagerly(): if self._is_built: return self._is_built = True - if not self.saver_def or context.in_eager_mode(): + if not self.saver_def or context.executing_eagerly(): if self._builder is None: self._builder = BulkSaverBuilder(self._write_version) @@ -1364,8 +1367,9 @@ class Saver(object): self.saver_def.restore_op_name, self._name) self._check_saver_def() - if context.in_graph_mode(): # Set in __init__ when executing eagerly. + if not context.executing_eagerly(): # Updates next checkpoint time. + # Set in __init__ when executing eagerly. self._next_checkpoint_time = ( time.time() + self.saver_def.keep_checkpoint_every_n_hours * 3600) @@ -1373,7 +1377,7 @@ class Saver(object): if not isinstance(self.saver_def, saver_pb2.SaverDef): raise ValueError("saver_def must be a saver_pb2.SaverDef: %s" % self.saver_def) - if context.in_graph_mode(): + if not context.executing_eagerly(): if not self.saver_def.save_tensor_name: raise ValueError("saver_def must specify the save_tensor_name: %s" % str(self.saver_def)) @@ -1623,7 +1627,7 @@ class Saver(object): RuntimeError: If save and restore ops weren't built. """ # pylint: enable=line-too-long - if not self._is_built and context.in_graph_mode(): + if not self._is_built and not context.executing_eagerly(): raise RuntimeError( "`build()` should be called before save if defer_build==True") if latest_filename is None: @@ -1655,21 +1659,21 @@ class Saver(object): "'latest_filename' collides with 'save_path': '%s' and '%s'" % (latest_filename, save_path)) - if (context.in_graph_mode() and + if (not context.executing_eagerly() and not isinstance(sess, session.SessionInterface)): raise TypeError("'sess' must be a Session; %s" % sess) save_path_parent = os.path.dirname(save_path) if not self._is_empty: try: - if context.in_graph_mode(): - model_checkpoint_path = sess.run( - self.saver_def.save_tensor_name, - {self.saver_def.filename_tensor_name: checkpoint_file}) - else: + if context.executing_eagerly(): self._build_eager( checkpoint_file, build_save=True, build_restore=False) model_checkpoint_path = self.saver_def.save_tensor_name + else: + model_checkpoint_path = sess.run( + self.saver_def.save_tensor_name, + {self.saver_def.filename_tensor_name: checkpoint_file}) model_checkpoint_path = compat.as_str(model_checkpoint_path) if write_state: @@ -1691,7 +1695,7 @@ class Saver(object): if write_meta_graph: meta_graph_filename = self._MetaGraphFilename( checkpoint_file, meta_graph_suffix=meta_graph_suffix) - if context.in_graph_mode(): + if not context.executing_eagerly(): with sess.graph.as_default(): self.export_meta_graph( meta_graph_filename, strip_default_attrs=strip_default_attrs) @@ -1764,11 +1768,11 @@ class Saver(object): if save_path is None: raise ValueError("Can't load save_path when it is None.") logging.info("Restoring parameters from %s", save_path) - if context.in_graph_mode(): + if context.executing_eagerly(): + self._build_eager(save_path, build_save=False, build_restore=True) + else: sess.run(self.saver_def.restore_op_name, {self.saver_def.filename_tensor_name: save_path}) - else: - self._build_eager(save_path, build_save=False, build_restore=True) @staticmethod def _add_collection_def(meta_graph_def, key, export_scope=None): @@ -1908,7 +1912,7 @@ def import_meta_graph(meta_graph_or_file, clear_devices=False, execution is enabled. @end_compatibility """ # pylint: disable=g-doc-exception - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Exporting/importing meta graphs is not supported when " "eager execution is enabled. No graph exists when eager " "execution is enabled.") @@ -1991,7 +1995,7 @@ def export_meta_graph(filename=None, @end_compatibility """ # pylint: enable=line-too-long - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Exporting/importing meta graphs is not supported when " "eager execution is enabled. No graph exists when eager " "execution is enabled.") diff --git a/tensorflow/python/training/saver_test.py b/tensorflow/python/training/saver_test.py index 1021ccae5f..67848f7340 100644 --- a/tensorflow/python/training/saver_test.py +++ b/tensorflow/python/training/saver_test.py @@ -91,7 +91,7 @@ class SaverTest(test.TestCase): v2_init = v2.insert("k1", 30.0) # Initialize all variables - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate([variables.global_variables_initializer(), v2_init]) # Check that the parameter nodes have been initialized. @@ -119,7 +119,7 @@ class SaverTest(test.TestCase): v2 = saver_test_utils.CheckpointedOp(name="v2") # Assert that the variables are not initialized. - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual( len(variables.report_uninitialized_variables().eval()), 2) self.assertEqual(0, len(v2.keys().eval())) @@ -142,7 +142,7 @@ class SaverTest(test.TestCase): v2_init = v2_2.insert("k1000", 3000.0) # Check that the parameter nodes have been initialized. - if context.in_graph_mode(): + if not context.executing_eagerly(): init_all_op = [variables.global_variables_initializer(), v2_init] self.evaluate(init_all_op) # TODO(xpan): Why _mutable_hash_table_v2 doesn't create empty @@ -251,10 +251,10 @@ class SaverTest(test.TestCase): with self.test_session(graph=ops_lib.Graph()) as sess: v = resource_variable_ops.ResourceVariable([1], caching_device="/cpu:0", name="v") - if context.in_graph_mode(): - self.evaluate(variables.global_variables_initializer()) - else: + if context.executing_eagerly(): sess = None + else: + self.evaluate(variables.global_variables_initializer()) save = saver_module.Saver([v]) save.save(sess, save_path) @@ -517,7 +517,7 @@ class SaverTest(test.TestCase): with self.test_session(graph=ops_lib.Graph()) as sess: var = resource_variable_ops.ResourceVariable(var_value, name=var_name) save = saver_module.Saver({var_name: var}) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.evaluate(var.initializer) val = save.save(sess, save_path) self.assertEqual(save_path, val) @@ -677,11 +677,11 @@ class SaverTest(test.TestCase): { var._shared_name: var }, pad_step_number=pad_step_number) - if context.in_graph_mode(): + if context.executing_eagerly(): + sess = None + else: self.evaluate(var.initializer) sess = ops_lib.get_default_session() - else: - sess = None if use_tensor: global_step = constant_op.constant(global_step_int) val = save.save(sess, save_path, global_step=global_step) @@ -1066,7 +1066,7 @@ class MaxToKeepTest(test.TestCase): v = variable_scope.variable(10.0, name="v") save = saver_module.Saver({"v": v}, max_to_keep=2) self.evaluate(variables.global_variables_initializer()) - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual([], save.last_checkpoints) s1 = save.save(None, os.path.join(save_dir, "s1")) @@ -1479,7 +1479,7 @@ class SaveRestoreWithVariableNameMap(test.TestCase): v0 = variable_op(-1.0, name="v0") v1 = variable_op(-1.0, name="v1") - if context.in_graph_mode(): + if not context.executing_eagerly(): with self.assertRaisesOpError("uninitialized"): self.evaluate(v0) with self.assertRaisesOpError("uninitialized"): @@ -1489,7 +1489,7 @@ class SaveRestoreWithVariableNameMap(test.TestCase): save.restore(sess, save_path) # Check that the parameter nodes have been restored. - if context.in_graph_mode(): + if not context.executing_eagerly(): self.assertEqual(10.0, self.evaluate(v0)) self.assertEqual(20.0, self.evaluate(v1)) @@ -1499,7 +1499,7 @@ class SaveRestoreWithVariableNameMap(test.TestCase): v0 = variable_op(-1.0, name="restore_prefix/v0") v1 = variable_op(-1.0, name="restore_prefix/v1") - if context.in_graph_mode(): + if not context.executing_eagerly(): with self.assertRaisesOpError("uninitialized"): self.evaluate(v0) with self.assertRaisesOpError("uninitialized"): diff --git a/tensorflow/python/training/saver_test_utils.py b/tensorflow/python/training/saver_test_utils.py index 0a8b7a09af..2bbe5b6d84 100644 --- a/tensorflow/python/training/saver_test_utils.py +++ b/tensorflow/python/training/saver_test_utils.py @@ -40,7 +40,7 @@ class CheckpointedOp(object): else: self.table_ref = table_ref self._name = name - if context.in_graph_mode(): + if not context.executing_eagerly(): self._saveable = CheckpointedOp.CustomSaveable(self, name) ops_lib.add_to_collection(ops_lib.GraphKeys.SAVEABLE_OBJECTS, self._saveable) @@ -51,10 +51,10 @@ class CheckpointedOp(object): @property def saveable(self): - if context.in_graph_mode(): - return self._saveable - else: + if context.executing_eagerly(): return CheckpointedOp.CustomSaveable(self, self.name) + else: + return self._saveable def insert(self, keys, values): return gen_lookup_ops.lookup_table_insert_v2(self.table_ref, keys, values) diff --git a/tensorflow/python/training/slot_creator.py b/tensorflow/python/training/slot_creator.py index 75ef3d5976..9ac52dd071 100644 --- a/tensorflow/python/training/slot_creator.py +++ b/tensorflow/python/training/slot_creator.py @@ -106,7 +106,10 @@ def create_slot(primary, val, name, colocate_with_primary=True): # and the same name has been previously used, the scope name will add '_N' # as suffix for unique identifications. validate_shape = val.get_shape().is_fully_defined() - prefix = primary.op.name if context.in_graph_mode() else primary._shared_name # pylint: disable=protected-access + if context.executing_eagerly(): + prefix = primary._shared_name # pylint: disable=protected-access + else: + prefix = primary.op.name with variable_scope.variable_scope(None, prefix + "/" + name): if colocate_with_primary: with ops.colocate_with(primary): @@ -139,7 +142,10 @@ def create_slot_with_initializer(primary, initializer, shape, dtype, name, # and the same name has been previously used, the scope name will add '_N' # as suffix for unique identifications. validate_shape = shape.is_fully_defined() - prefix = primary.op.name if context.in_graph_mode() else primary._shared_name # pylint: disable=protected-access + if context.executing_eagerly(): + prefix = primary._shared_name # pylint: disable=protected-access + else: + prefix = primary.op.name with variable_scope.variable_scope(None, prefix + "/" + name): if colocate_with_primary: with ops.colocate_with(primary): diff --git a/tensorflow/python/training/supervisor.py b/tensorflow/python/training/supervisor.py index 86d2f1ab0a..7389e344c7 100644 --- a/tensorflow/python/training/supervisor.py +++ b/tensorflow/python/training/supervisor.py @@ -305,7 +305,7 @@ class Supervisor(object): `Supervisor`s are not supported when eager execution is enabled. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Supervisors are compatible with eager execution.") # Set default values of arguments. if graph is None: @@ -762,7 +762,7 @@ class Supervisor(object): execution is enabled, use the `tf.data` API. @end_compatibility """ - if context.in_eager_mode(): + if context.executing_eagerly(): raise RuntimeError("Queues are not compatible with eager execution.") if queue_runners is None: queue_runners = self._graph.get_collection(ops.GraphKeys.QUEUE_RUNNERS) diff --git a/tensorflow/python/training/training_util.py b/tensorflow/python/training/training_util.py index 499f1feb2d..4f1abccc96 100644 --- a/tensorflow/python/training/training_util.py +++ b/tensorflow/python/training/training_util.py @@ -64,7 +64,7 @@ def global_step(sess, global_step_tensor): Returns: The global step value. """ - if context.in_eager_mode(): + if context.executing_eagerly(): return int(global_step_tensor.numpy()) return int(sess.run(global_step_tensor)) @@ -123,7 +123,7 @@ def create_global_step(graph=None): raise ValueError('"global_step" already exists.') # Create in proper graph and base name_scope. with graph.as_default() as g, g.name_scope(None): - if context.in_eager_mode(): + if context.executing_eagerly(): with ops.device('cpu:0'): return variable_scope.get_variable( ops.GraphKeys.GLOBAL_STEP, diff --git a/tensorflow/python/util/tf_should_use.py b/tensorflow/python/util/tf_should_use.py index 37733152e8..28e49afa02 100644 --- a/tensorflow/python/util/tf_should_use.py +++ b/tensorflow/python/util/tf_should_use.py @@ -47,7 +47,7 @@ def _add_should_use_warning(x, fatal_error=False): if x is None or x == []: # pylint: disable=g-explicit-bool-comparison return x - if context.in_eager_mode(): + if context.executing_eagerly(): # Typically not needed when executing eagerly (the main use case is for ops # which need to be incorporated into the graph), and even the no-op wrapper # creates reference cycles which require garbage collection. -- GitLab From c6705910f782a3f07d610cb21af5cba167eaa65f Mon Sep 17 00:00:00 2001 From: RJ Ryan Date: Wed, 7 Mar 2018 12:21:17 -0800 Subject: [PATCH 541/884] Add support for padding tf.string tensors on CPU. PiperOrigin-RevId: 188215092 --- tensorflow/core/kernels/mirror_pad_op.cc | 2 ++ .../core/kernels/mirror_pad_op_cpu_impl.h | 1 + tensorflow/core/kernels/pad_op.cc | 3 ++- tensorflow/python/kernel_tests/pad_op_test.py | 23 +++++++++++++++++++ 4 files changed, 28 insertions(+), 1 deletion(-) diff --git a/tensorflow/core/kernels/mirror_pad_op.cc b/tensorflow/core/kernels/mirror_pad_op.cc index 26e1082989..1c85c744fc 100644 --- a/tensorflow/core/kernels/mirror_pad_op.cc +++ b/tensorflow/core/kernels/mirror_pad_op.cc @@ -173,6 +173,7 @@ namespace functor { DECLARE_CPU_SPEC(T, int64, 5); TF_CALL_POD_TYPES(DECLARE_CPU_SPECS); +TF_CALL_string(DECLARE_CPU_SPECS); #undef DECLARE_CPU_SPEC #undef DECLARE_CPU_SPECS @@ -194,6 +195,7 @@ TF_CALL_POD_TYPES(DECLARE_CPU_SPECS); // Note that we do register for bool type, but not in the gradient op. TF_CALL_POD_TYPES(REGISTER_KERNEL); +TF_CALL_string(REGISTER_KERNEL); #undef REGISTER_KERNEL #if GOOGLE_CUDA diff --git a/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h b/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h index 6716a26fac..f27ca139c9 100644 --- a/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h +++ b/tensorflow/core/kernels/mirror_pad_op_cpu_impl.h @@ -29,6 +29,7 @@ using CpuDevice = Eigen::ThreadPoolDevice; template struct functor::MirrorPad; \ template struct functor::MirrorPad; TF_CALL_POD_TYPES(DEFINE_CPU_SPECS); +TF_CALL_string(DEFINE_CPU_SPECS); #undef DEFINE_CPU_SPECS #define DEFINE_CPU_SPECS(T) \ diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index eff3e4d92c..77c180873f 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -70,7 +70,7 @@ class PadOp : public OpKernel { "The first dimension of paddings must be the rank of inputs", in1.shape().DebugString(), " ", in0.shape().DebugString())); - T pad_value(0); + T pad_value = T(); if (context->num_inputs() == 3) { const Tensor& constant_values = context->input(2); OP_REQUIRES( @@ -186,6 +186,7 @@ class PadOp : public OpKernel { PadOp); TF_CALL_POD_TYPES(REGISTER_KERNEL); +TF_CALL_string(REGISTER_KERNEL); #undef REGISTER_KERNEL #if GOOGLE_CUDA diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index 2c766e3640..aaeb3b199e 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -238,6 +238,29 @@ class PadOpTest(test.TestCase): x = np.random.rand(3, 2, 1, 1).astype(t) self._testAll(x + 1j * x, [[0, 0], [0, 0], [0, 0], [0, 0]], 0 + 0j) + def testString(self): + # Numpy does not support padding strings so we compare padding manually. + x = ops.convert_to_tensor([["Hello", "World"], + ["Goodnight", "Moon"]]) + + constant = array_ops.pad(x, [[1, 0], [0, 1]], mode="CONSTANT", + constant_values="PAD") + reflect = array_ops.pad(x, [[1, 0], [0, 1]], mode="REFLECT", + constant_values="PAD") + symmetric = array_ops.pad(x, [[1, 0], [0, 1]], mode="SYMMETRIC", + constant_values="PAD") + with self.test_session(use_gpu=True): + self.assertAllEqual([[b"PAD", b"PAD", b"PAD"], + [b"Hello", b"World", b"PAD"], + [b"Goodnight", b"Moon", b"PAD"]], constant.eval()) + self.assertAllEqual([[b"Goodnight", b"Moon", b"Goodnight"], + [b"Hello", b"World", b"Hello"], + [b"Goodnight", b"Moon", b"Goodnight"]], + reflect.eval()) + self.assertAllEqual([[b"Hello", b"World", b"World"], + [b"Hello", b"World", b"World"], + [b"Goodnight", b"Moon", b"Moon"]], symmetric.eval()) + def testShapeFunctionEdgeCases(self): # Unknown paddings shape. inp = constant_op.constant(0.0, shape=[4, 4, 4, 4]) -- GitLab From c209eb4ceca82f6c910047f20c207e8f226e6dc9 Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 7 Mar 2018 12:30:47 -0800 Subject: [PATCH 542/884] TFE_Context gets its local devices from the source instead of a session. PiperOrigin-RevId: 188216178 --- tensorflow/c/c_api.cc | 10 +----- tensorflow/c/c_api_internal.h | 5 --- tensorflow/c/eager/c_api.cc | 55 ++++++++++++----------------- tensorflow/c/eager/c_api_internal.h | 18 +++++----- 4 files changed, 34 insertions(+), 54 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index e3a95a0577..8b9b3da21c 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -2462,15 +2462,7 @@ void TF_AddGradients(TF_Graph* g, TF_Output* y, int ny, TF_Output* x, int nx, // TF_Session functions ---------------------------------------------- TF_Session::TF_Session(tensorflow::Session* s, TF_Graph* g) - : session(s), - graph(g), - last_num_graph_nodes(0), - device_mgr(nullptr), - extend_before_run(true) { - if (s->LocalDeviceManager(&device_mgr).ok()) { - devices = device_mgr->ListDevices(); - } -} + : session(s), graph(g), last_num_graph_nodes(0), extend_before_run(true) {} TF_Session* TF_NewSession(TF_Graph* graph, const TF_SessionOptions* opt, TF_Status* status) { diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 027e2d2b15..25233931de 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -129,11 +129,6 @@ struct TF_Session { tensorflow::mutex mu; int last_num_graph_nodes; - // NOTE(ashankar): Experimental fields to help keep the - // buffers of a TF_Tensor pinned in device memory. - const tensorflow::DeviceMgr* device_mgr; // Owned by session. - std::vector devices; // Owned by device_mgr. - // If true, TF_SessionRun and similar methods will call // ExtendSessionGraphHelper before running the graph (this is the default // public behavior). Can be set to false if the caller needs to call diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 4b619dc4e1..dfe2089d60 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -98,22 +98,15 @@ void TFE_ContextOptionsSetDevicePlacementPolicy( void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { - TF_Graph* graph = TF_NewGraph(); - TF_Session* session = TF_NewSession(graph, &opts->session_options, status); - if (status->status.ok()) { - if (session->device_mgr == nullptr || session->devices.empty()) { - status->status = tensorflow::errors::InvalidArgument( - "Provided TF_SessionOptions are not compatible with eager execution " - "(perhaps the TF_SessionOptions alluded to session execution in a " - "remote address space?)"); - } - } + std::vector devices; + status->status = tensorflow::DeviceFactory::AddDevices( + opts->session_options.options, "/job:localhost/replica:0/task:0", + &devices); if (!status->status.ok()) { - TF_DeleteGraph(graph); return nullptr; } - - return new TFE_Context(*opts, session); + return new TFE_Context(*opts, std::unique_ptr( + new tensorflow::DeviceMgr(devices))); } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { @@ -122,15 +115,14 @@ void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); } - TF_Graph* graph = ctx->session->graph; - TF_DeleteSession(ctx->session, status); - TF_DeleteGraph(graph); ctx->rendezvous->Unref(); delete ctx; } TF_DeviceList* TFE_ContextListDevices(TFE_Context* ctx, TF_Status* status) { - return TF_SessionListDevices(ctx->session, status); + TF_DeviceList* list = new TF_DeviceList; + ctx->device_manager->ListDeviceAttributes(&list->response); + return list; } void TFE_ContextClearCaches(TFE_Context* ctx) { @@ -205,13 +197,13 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status) { - tensorflow::Device* dstd = ctx->devices()[0]; + tensorflow::Device* dstd = ctx->devices[0]; if (device_name != nullptr && strlen(device_name) > 0) { - status->status = ctx->session->device_mgr->LookupDevice(device_name, &dstd); + status->status = ctx->device_manager->LookupDevice(device_name, &dstd); if (!status->status.ok()) return nullptr; } - tensorflow::Device* srcd = h->d == nullptr ? ctx->devices()[0] : h->d; + tensorflow::Device* srcd = h->d == nullptr ? ctx->devices[0] : h->d; bool is_same_device = (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); const bool dst_cpu = IsCPU(dstd); @@ -295,8 +287,7 @@ void TFE_DeleteOp(TFE_Op* op) { delete op; } void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { tensorflow::Device* d = nullptr; if (device_name != nullptr && strlen(device_name) > 0) { - status->status = - op->ctx->session->device_mgr->LookupDevice(device_name, &d); + status->status = op->ctx->device_manager->LookupDevice(device_name, &d); if (!status->status.ok()) return; } op->device = d; @@ -304,7 +295,7 @@ void TFE_OpSetDevice(TFE_Op* op, const char* device_name, TF_Status* status) { const char* TFE_OpGetDevice(TFE_Op* op, TF_Status* status) { tensorflow::Device* device = - (op->device == nullptr) ? op->ctx->devices()[0] : op->device; + (op->device == nullptr) ? op->ctx->devices[0] : op->device; return device->name().c_str(); } @@ -798,7 +789,7 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, TFE_Context* ctx, TF_Status* status) { tensorflow::DeviceSet ds; - for (tensorflow::Device* d : ctx->devices()) { + for (tensorflow::Device* d : ctx->devices) { ds.AddDevice(d); } tensorflow::DeviceTypeVector final_devices; @@ -812,7 +803,7 @@ tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, "Could not find valid device for node ", ndef.DebugString()); return nullptr; } - for (tensorflow::Device* d : ctx->devices()) { + for (tensorflow::Device* d : ctx->devices) { if (d->device_type() == final_devices[0].type_string()) { return d; } @@ -845,7 +836,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, if (op->inputs[i].dtype() == tensorflow::DT_RESOURCE && op->input_op_devices[i] != device) { tensorflow::Device* d = op->input_op_devices[i] == nullptr - ? ctx->devices()[0] + ? ctx->devices[0] : op->input_op_devices[i]; VLOG(1) << "Changing device of operation " << op->name << " to " << d->name() << " because input #" << i @@ -855,8 +846,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } } if (!ctx->soft_placement && device == nullptr) { - // TODO(ashankar): ASSUMPTION: ctx->devices()[0] is always CPU - device = ctx->devices()[0]; + // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU + device = ctx->devices[0]; } std::vector outputs(1); @@ -924,7 +915,7 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, std::vector copied_tensors; status->status = ValidateInputTypeAndPlacement( - ctx, ctx->devices()[0], device, op, kernel->kernel(), &copied_tensors); + ctx, ctx->devices[0], device, op, kernel->kernel(), &copied_tensors); output_memory_types = &kernel->kernel()->output_memory_types(); if (!status->status.ok()) { for (auto* t : copied_tensors) { @@ -963,13 +954,13 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, auto* step_stats = ctx->run_metadata.mutable_step_stats(); // Lazily initialize the RunMetadata with information about all devices if // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices().size()) { + while (step_stats->dev_stats_size() < ctx->devices.size()) { step_stats->add_dev_stats(); } // Find the current device's index. int device_idx = 0; - for (int i = 0; i < ctx->devices().size(); ++i) { - if (ctx->devices()[i] == device) { + for (int i = 0; i < ctx->devices.size(); ++i) { + if (ctx->devices[i] == device) { device_idx = i; break; } diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 145e4c95cf..f701f3483e 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -47,14 +47,17 @@ TFE_ContextDevicePlacementPolicy PlacementPolicy( bool soft_placement, TFE_ContextDevicePlacementPolicy original_policy); struct TFE_Context { - explicit TFE_Context(const TFE_ContextOptions& opts, TF_Session* s) + explicit TFE_Context(const TFE_ContextOptions& opts, + std::unique_ptr device_mgr) : soft_placement( opts.session_options.options.config.allow_soft_placement()), policy(PlacementPolicy(soft_placement, opts.policy)), - session(s), - rendezvous(new tensorflow::IntraProcessRendezvous(s->device_mgr)), + device_manager(std::move(device_mgr)), + devices(device_manager->ListDevices()), + rendezvous( + new tensorflow::IntraProcessRendezvous(device_manager.get())), pflr(new tensorflow::ProcessFunctionLibraryRuntime( - session->device_mgr, opts.session_options.options.env, + device_manager.get(), opts.session_options.options.env, TF_GRAPH_DEF_VERSION, &func_lib_def, {})), log_device_placement( opts.session_options.options.config.log_device_placement()) {} @@ -68,8 +71,9 @@ struct TFE_Context { std::unordered_map thread_local_policies GUARDED_BY(policy_map_mu); - // TFE_Context is an extension of TF_Session. And TF_Session needs a TF_Graph. - TF_Session* const session; + std::unique_ptr device_manager; + // Devices owned by device_manager + const std::vector devices; tensorflow::Rendezvous* const rendezvous; tensorflow::mutex functions_mu; @@ -90,8 +94,6 @@ struct TFE_Context { return pflr->GetFLR(d->name()); } - const std::vector& devices() { return session->devices; } - // Whether we should compute RunMetadata. std::atomic should_store_metadata{false}; tensorflow::mutex metadata_mu; -- GitLab From 84898e72faa3db4d2fdf1d94518604055a887854 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 12:38:18 -0800 Subject: [PATCH 543/884] Internal Change PiperOrigin-RevId: 188217110 --- tensorflow/python/kernel_tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/python/kernel_tests/BUILD b/tensorflow/python/kernel_tests/BUILD index 23b79a24c0..5b0c38fa5d 100644 --- a/tensorflow/python/kernel_tests/BUILD +++ b/tensorflow/python/kernel_tests/BUILD @@ -1087,6 +1087,7 @@ cuda_py_test( tags = [ "no_windows", "noasan", + "notap", ], ) -- GitLab From d8809e9c94c959ad290d41a104ed0c65f434079a Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 7 Mar 2018 12:56:30 -0800 Subject: [PATCH 544/884] raise RuntimeError to catch exception --- tensorflow/contrib/tensorrt/python/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 120904b8b6..658c0c7eae 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -29,6 +29,6 @@ except: ' installation path is not in LD_LIBRARY_PATH, or because you do not have it' ' installed. If not installed, please go to' ' https://developer.nvidia.com/tensorrt to download and install' - ' TensorRT ****''') - print(no_trt_message) + ' TensorRT ****') + raise RuntimeError(no_trt_message) # pylint: enable=unused-import,line-too-long -- GitLab From eec325bee98723ae3dc07f2f9abdbc3516dab0f5 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 13:09:07 -0800 Subject: [PATCH 545/884] Further small support for quantized unfused LSTMs. PiperOrigin-RevId: 188221169 --- .../toco/graph_transformations/quantize.cc | 61 ++++++++++++++++--- tensorflow/contrib/lite/toco/tooling_util.cc | 5 +- 2 files changed, 56 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 77316751bc..6c3e5fd492 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -222,7 +222,50 @@ ArrayDataType GetQuantizedDataType(const Array& array, default: LOG(FATAL) << "Unhandled final quantization type " << static_cast(array.final_data_type); - return default_type; + } +} + +void GetQuantizationParams(ArrayDataType data_type, + const ModelFlags& model_flags, const MinMax& minmax, + QuantizationParams* quantization_params) { + switch (data_type) { + case ArrayDataType::kInt8: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kUint8: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kInt16: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kUint16: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kInt32: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kUint32: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kInt64: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kUint64: + GetQuantizationParamsFromMinMax( + model_flags, minmax, quantization_params); + break; + case ArrayDataType::kFloat: + case ArrayDataType::kNone: + default: + LOG(FATAL) << "Unhandled final quantization type " + << static_cast(data_type); } } @@ -284,16 +327,16 @@ bool ChooseQuantizationForOperatorInput( if (op.type == OperatorType::kLstmCell) { if (input_index == LstmCellOperator::PREV_STATE_INPUT) { - GetQuantizationParamsFromMinMax( - model->flags, minmax, quantization_params); *quantized_data_type = ArrayDataType::kInt16; + GetQuantizationParams(*quantized_data_type, model->flags, minmax, + quantization_params); return true; } } - GetQuantizationParamsFromMinMax(model->flags, minmax, - quantization_params); *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8); + GetQuantizationParams(*quantized_data_type, model->flags, minmax, + quantization_params); transformation->AddMessageF( "For input array %s with min=%g" ", max=%g" @@ -416,15 +459,15 @@ bool ChooseQuantizationForOperatorOutput( if (op.type == OperatorType::kLstmCell) { if (output_index == LstmCellOperator::STATE_OUTPUT || output_index == LstmCellOperator::ACTIV_TEMP) { - GetQuantizationParamsFromMinMax( - model->flags, minmax, quantization_params); *quantized_data_type = ArrayDataType::kInt16; + GetQuantizationParams(*quantized_data_type, model->flags, minmax, + quantization_params); return true; } } - GetQuantizationParamsFromMinMax(model->flags, minmax, - quantization_params); *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8); + GetQuantizationParams(*quantized_data_type, model->flags, minmax, + quantization_params); transformation->AddMessageF( "For output array %s with min=%g, max=%g" ", chose to quantize as %s with zero_point=%d" diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index f92e10752d..48aad89b8c 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1809,7 +1809,10 @@ bool IsDiscardableArray(const Model& model, const string& array_name) { void CheckFinalDataTypesSatisfied(const Model& model) { for (const auto& array_entry : model.GetArrayMap()) { const auto& array = *array_entry.second; - if (array.final_data_type != ArrayDataType::kNone) { + // If the final data type is int16, the data type may be float, for example + // after dequantization. + if (array.final_data_type != ArrayDataType::kNone && + array.final_data_type != ArrayDataType::kInt16) { CHECK(array.final_data_type == array.data_type) << "Array \"" << array_entry.first << "\" has mis-matching actual and final data types (" -- GitLab From 39da23ba61084d392c89e5476060e058e6eeffce Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 7 Mar 2018 13:52:44 -0800 Subject: [PATCH 546/884] [tpu.datasets]: Improve the performance of the StreamingFilesDataset. In order to effectively pipeline the transfers, set num_parallel_calls=4. PiperOrigin-RevId: 188227890 --- tensorflow/contrib/tpu/python/tpu/datasets.py | 24 +++++++------------ .../contrib/tpu/python/tpu/datasets_test.py | 10 ++++---- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 71a3a92540..51b67bd6fa 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -92,8 +92,9 @@ def StreamingFilesDataset(files, amortize the remote function invocation overhead. Set to a very large number to increase throughput. Set to a very small number to reduce memory consumption. Set to False to skip batching. - sloppy: (Optional.) If `True`, read input data as fast as possible, without - maintaining a deterministic order. Defaults to `False`. + sloppy: (Optional.) If `False`, read input data while maintaining a + deterministic order. (This may have significant performance impacts.) + sloppy defaults to: True. Returns: A `tf.data.Dataset` with an infinite stream of elements generated by a parallel interleaving of the set of files matched (or generated) by `files` @@ -124,10 +125,10 @@ def StreamingFilesDataset(files, num_parallel_reads = num_parallel_reads or 8 if batch_transfer_size is None: - batch_transfer_size = 1024 + batch_transfer_size = 256 if sloppy is None: - sloppy = False + sloppy = True with ops.device('/job:%s' % file_reader_job): if isinstance(files, str): @@ -151,10 +152,7 @@ def StreamingFilesDataset(files, reader_fn, cycle_length=num_parallel_reads, sloppy=sloppy)) if batch_transfer_size: - # Note: we can safely call batch_and_drop_remainder because we have an - # infinite stream of TFRecords. - source_dataset = source_dataset.apply( - batching.batch_and_drop_remainder(batch_transfer_size)) + source_dataset = source_dataset.batch(batch_transfer_size) source_dataset = source_dataset.prefetch(1) @@ -175,14 +173,8 @@ def StreamingFilesDataset(files, target='/job:%s/replica:0/task:0/cpu:0' % file_reader_job) with ops.device('/job:%s' % worker_job): - # TODO(saeta,mrry): Switch to using _GeneratorDataset. - - # identity = lambda x: x - # dummy = constant_op.constant(0) - # output_dataset = dataset_ops._GeneratorDataset(dummy, identity, MapFn, - # identity) - - output_dataset = dataset_ops.Dataset.range(2).repeat().map(MapFn) + output_dataset = dataset_ops.Dataset.range(2).repeat().map( + MapFn, num_parallel_calls=4 if sloppy else None) output_dataset = output_dataset.prefetch(1) if batch_transfer_size: diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 0173aac4f7..6e6a7ce809 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -32,7 +32,7 @@ from tensorflow.python.training import server_lib from tensorflow.python.util import compat _NUM_FILES = 10 -_NUM_ENTRIES = 200 +_NUM_ENTRIES = 20 class DatasetsTest(test.TestCase): @@ -73,7 +73,7 @@ class DatasetsTest(test.TestCase): get_next = iterator.get_next() retrieved_values = [] - for _ in range(2 * len(all_contents)): + for _ in range(4 * len(all_contents)): retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) self.assertEqual(set(all_contents), set(retrieved_values)) @@ -97,7 +97,7 @@ class DatasetsTest(test.TestCase): get_next = iterator.get_next() retrieved_values = [] - for _ in range(2 * len(all_contents)): + for _ in range(4 * len(all_contents)): retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) self.assertEqual(set(all_contents), set(retrieved_values)) @@ -124,7 +124,7 @@ class DatasetsTest(test.TestCase): get_next = iterator.get_next() retrieved_values = [] - for _ in range(2 * len(all_contents)): + for _ in range(4 * len(all_contents)): retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) self.assertEqual(set(all_contents), set(retrieved_values)) @@ -157,7 +157,7 @@ class DatasetsTest(test.TestCase): get_next = iterator.get_next() retrieved_values = [] - for _ in range(2 * len(all_contents)): + for _ in range(4 * len(all_contents)): retrieved_values.append(compat.as_bytes(self._sess.run(get_next))) self.assertEqual(set(all_contents), set(retrieved_values)) -- GitLab From 10fe6cae69f551408441fa275b2ff42da5d47647 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 13:56:05 -0800 Subject: [PATCH 547/884] Update graph rewrites for host compute ops PiperOrigin-RevId: 188228489 --- .../jit/encapsulate_subgraphs_pass.cc | 111 ++++++++-- .../jit/encapsulate_subgraphs_pass_test.cc | 208 ++++++++++++------ tensorflow/contrib/tpu/BUILD | 5 + .../contrib/tpu/ops/host_compute_ops.cc | 64 ++++++ 4 files changed, 302 insertions(+), 86 deletions(-) create mode 100644 tensorflow/contrib/tpu/ops/host_compute_ops.cc diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc index 9c372a0127..2d175c40f9 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass.cc @@ -381,12 +381,24 @@ class Encapsulator { Node* send_from_host = nullptr; }; + // Creates an outside_compilation subgraph for outside_compilation_id if + // none exists yet. Returns the (possible newly created) subgraph for + // outside_compilation_id. + OutsideCompilationSubgraph* LookupOrCreateOutsideCompilationSubgraph( + const string& outside_compilation_id); + // Builds a ParallelCheck op that compares the output of the original // subgraph with the encapsulated subgraph. Status BuildParallelCheckOp( const std::unordered_map& node_images, Graph* graph_out); + // Builds a placeholder node used to provide the key input to a RecvAtHost + // or SendFromHost node. This placeholder node will be removed by a later + // pass. + Status AddHostComputeKeyPlaceholder(OutsideCompilationSubgraph* oc_subgraph, + Graph* graph_out); + // Builds a _RecvAtHost node producing all the inputs of an // outside_compilation subgraph and stores it in oc_subgraph.recv_at_host. Status AddRecvAtHostNode(const string& subgraph_name, @@ -413,6 +425,10 @@ class Encapsulator { // NodeDef for the function call node. NodeDef call_node_def_; + // Placeholder node simulating the host compute key in the output graph. + // Not owned. + Node* host_compute_key_placeholder_ = nullptr; + // Function call node(s) in the output graph. Not owned. // If parallel_checking is enabled, 'call_node_inputs' is the function call // node to which inputs should be fed, and 'call_node_outputs' is the @@ -712,39 +728,44 @@ Status Encapsulator::Subgraph::RecordResult( return Status::OK(); } -void Encapsulator::Subgraph::RecordOutsideCompilationInputOrControl( - const string& outside_compilation_id, const Edge* edge) { +Encapsulator::Subgraph::OutsideCompilationSubgraph* +Encapsulator::Subgraph::LookupOrCreateOutsideCompilationSubgraph( + const string& outside_compilation_id) { auto iter = outside_compilation_subgraphs_ .emplace(outside_compilation_id, OutsideCompilationSubgraph()) .first; - OutsideCompilationSubgraph& outside_subgraph = iter->second; + OutsideCompilationSubgraph* outside_subgraph = &iter->second; + return outside_subgraph; +} + +void Encapsulator::Subgraph::RecordOutsideCompilationInputOrControl( + const string& outside_compilation_id, const Edge* edge) { + OutsideCompilationSubgraph* outside_subgraph = + LookupOrCreateOutsideCompilationSubgraph(outside_compilation_id); if (edge->IsControlEdge()) { - outside_subgraph.control_inputs.insert(edge->src()); + outside_subgraph->control_inputs.insert(edge->src()); } else { - int input_index = outside_subgraph.inputs.size(); - outside_subgraph.inputs.emplace(NodeSlot(edge->src(), edge->src_output()), - input_index); + int input_index = outside_subgraph->inputs.size(); + outside_subgraph->inputs.emplace(NodeSlot(edge->src(), edge->src_output()), + input_index); } } void Encapsulator::Subgraph::RecordOutsideCompilationOutputOrControl( const string& outside_compilation_id, const Edge* edge) { - auto subgraph_iter = - outside_compilation_subgraphs_ - .emplace(outside_compilation_id, OutsideCompilationSubgraph()) - .first; - OutsideCompilationSubgraph& outside_subgraph = subgraph_iter->second; + OutsideCompilationSubgraph* outside_subgraph = + LookupOrCreateOutsideCompilationSubgraph(outside_compilation_id); if (edge->IsControlEdge()) { - outside_subgraph.control_outputs.insert(edge->dst()); + outside_subgraph->control_outputs.insert(edge->dst()); } else { DataType dtype = edge->dst()->input_type(edge->dst_input()); auto output_iter = - outside_subgraph.outputs_by_src + outside_subgraph->outputs_by_src .emplace(NodeSlot(edge->src(), edge->src_output(), dtype), - outside_subgraph.outputs_by_src.size()) + outside_subgraph->outputs_by_src.size()) .first; int output_index = output_iter->second; - outside_subgraph.outputs_by_dst[NodeSlot(edge->dst(), edge->dst_input())] = + outside_subgraph->outputs_by_dst[NodeSlot(edge->dst(), edge->dst_input())] = output_index; } } @@ -1060,9 +1081,36 @@ Status Encapsulator::Subgraph::AddFunctionCallNode( return Status::OK(); } +Status Encapsulator::Subgraph::AddHostComputeKeyPlaceholder( + OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) { + TensorShapeProto shape_proto; + TensorShape shape({2}); + shape.AsProto(&shape_proto); + GraphDefBuilder::Options options(graph_out, /*status=*/nullptr); + NodeDef key_def; + NodeDefBuilder builder( + strings::StrCat(call_node_def_.name(), "_key_placeholder"), + "Placeholder"); + builder.Attr("dtype", DT_STRING); + builder.Attr("shape", shape_proto); + builder.Attr("_host_compute_call_node", call_node_def_.name()); + Status s = builder.Finalize(&key_def); + if (!s.ok()) return s; + + host_compute_key_placeholder_ = graph_out->AddNode(key_def, &s); + if (!s.ok()) return s; + host_compute_key_placeholder_->set_assigned_device_name(device_); + + return Status::OK(); +} + Status Encapsulator::Subgraph::AddRecvAtHostNode( const string& subgraph_name, const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) { + if (host_compute_key_placeholder_ == nullptr) { + TF_RETURN_IF_ERROR(AddHostComputeKeyPlaceholder(oc_subgraph, graph_out)); + } + std::vector dtypes(oc_subgraph->inputs.size(), DT_INVALID); for (const auto& input : oc_subgraph->inputs) { @@ -1078,15 +1126,21 @@ Status Encapsulator::Subgraph::AddRecvAtHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_recv"), kRecvAtHostOp); + builder.Device(device_); builder.Attr("Toutputs", dtypes); + // TODO(misard) For now we only support TPU device 0. + builder.Attr("device_ordinal", 0); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); + builder.Input(host_compute_key_placeholder_->name(), 0, DT_STRING); Status s = builder.Finalize(&recv_def); if (!s.ok()) return s; oc_subgraph->recv_at_host = graph_out->AddNode(recv_def, &s); if (!s.ok()) return s; oc_subgraph->recv_at_host->set_assigned_device_name(device_); + graph_out->AddEdge(host_compute_key_placeholder_, 0, + oc_subgraph->recv_at_host, 0); // Add a control dependency forcing the RecvAtHost to run before the subgraph // completes. This has no effect on execution order but prevents the @@ -1101,6 +1155,10 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( const std::unordered_map& node_images, const string& subgraph_name, const string& oc_subgraph_name, OutsideCompilationSubgraph* oc_subgraph, Graph* graph_out) { + if (host_compute_key_placeholder_ == nullptr) { + TF_RETURN_IF_ERROR(AddHostComputeKeyPlaceholder(oc_subgraph, graph_out)); + } + std::vector dtypes(oc_subgraph->outputs_by_src.size(), DT_INVALID); std::vector inputs( oc_subgraph->outputs_by_src.size()); @@ -1120,16 +1178,22 @@ Status Encapsulator::Subgraph::AddSendFromHostNode( NodeDefBuilder builder(strings::StrCat("outside_compilation_", subgraph_name, "_", oc_subgraph_name, "_send"), kSendFromHostOp); + builder.Device(device_); builder.Attr("Tinputs", dtypes); builder.Attr("key", strings::StrCat("host_compute_channel_", subgraph_name, "_", oc_subgraph_name)); + // TODO(misard) For now we only support TPU device 0. + builder.Attr("device_ordinal", 0); builder.Input(inputs); + builder.Input(host_compute_key_placeholder_->name(), 0, DT_STRING); Status s = builder.Finalize(&send_def); if (!s.ok()) return s; oc_subgraph->send_from_host = graph_out->AddNode(send_def, &s); if (!s.ok()) return s; oc_subgraph->send_from_host->set_assigned_device_name(device_); + graph_out->AddEdge(host_compute_key_placeholder_, 0, + oc_subgraph->send_from_host, inputs.size()); // Add a control dependency forcing the SendFromHost to run before the // subgraph completes. This has no effect on execution order but prevents the @@ -1709,7 +1773,9 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( std::unique_ptr graph_out(new Graph(graph_in.op_registry())); graph_out->set_versions(graph_in.versions()); - static_shape_out->resize(send_node->num_inputs()); + // The final input to the send node is the dynamic key, which we don't include + // in the static shapes. + static_shape_out->resize(send_node->num_inputs() - 1); // We don't use the standard ReverseDFS because we want to cut off traversal // whenever we find an output with fully defined shape. @@ -1750,9 +1816,14 @@ Status Encapsulator::DoStaticShapeInferenceForOutsideCompilationSend( // continue. TensorShapeProto proto; context->ShapeHandleToProto(shape, &proto); - dummy_node_images[src_node] = AddDummyShapedNode( - src_node->output_type(src_port), proto, graph_out.get()); - if (n == send_node) { + if (dummy_node_images.find(src_node) == dummy_node_images.end()) { + dummy_node_images[src_node] = AddDummyShapedNode( + src_node->output_type(src_port), proto, graph_out.get()); + } + // The final input to the send node is the dynamic key, which we + // don't include in the static shapes. + if (n == send_node && + in_edge->dst_input() < static_shape_out->size()) { (*static_shape_out)[in_edge->dst_input()] = proto; } } else { diff --git a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc index aed9cae0f1..d7bea56a72 100644 --- a/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc +++ b/tensorflow/compiler/jit/encapsulate_subgraphs_pass_test.cc @@ -246,26 +246,32 @@ bool EqualFunctionDefLibrary(const FunctionDefLibrary& expected, << diff << "\nActual: " << actual.DebugString(); \ } while (false) -// TODO(misard): remove these fake registrations once there are real Ops to be -// compiled. +// These dummy Op registrations are here because the real Op registrations live +// in contrib and there can't be a dependence from this test to contrib. REGISTER_OP("_XlaHostCompute") .Input("inputs: Tinputs") .Output("outputs: Toutputs") .Attr("Tinputs: list(type) >= 0") .Attr("Toutputs: list(type) >= 0") .Attr("key: string") + .Attr("shape_inference_graph: string = ''") + .Attr("shapes: list(shape) >= 0") .SetShapeFn(::tensorflow::shape_inference::UnknownShape); REGISTER_OP("_XlaSendFromHost") - .Input("input: Tinputs") + .Input("inputs: Tinputs") + .Input("dynamic_key: string") .Attr("Tinputs: list(type) >= 0") .Attr("key: string") + .Attr("device_ordinal: int") .SetShapeFn(::tensorflow::shape_inference::UnknownShape); REGISTER_OP("_XlaRecvAtHost") - .Output("output: Toutputs") + .Input("dynamic_key: string") + .Output("outputs: Toutputs") .Attr("Toutputs: list(type) >= 0") .Attr("key: string") + .Attr("device_ordinal: int") .SetShapeFn(::tensorflow::shape_inference::UnknownShape); REGISTER_OP("InputTest") @@ -327,43 +333,71 @@ Node* InputShaped(const GraphDefBuilder::Options& opts) { return ops::SourceOp("InputTestShaped", opts); } -Node* KnownShape(const gtl::ArraySlice& shape, - const GraphDefBuilder::Options& opts) { +Node* KnownShapeBase(DataType dtype, const gtl::ArraySlice& shape, + const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp("Const"), "Const", opts.op_registry()); TensorProto value; - value.set_dtype(DT_FLOAT); + value.set_dtype(dtype); for (int dim : shape) { value.mutable_tensor_shape()->add_dim()->set_size(dim); } return opts.WithAttr("value", value) - .WithAttr("dtype", DT_FLOAT) + .WithAttr("dtype", dtype) + .FinalizeBuilder(&node_builder); +} + +Node* KnownShape(const gtl::ArraySlice& shape, + const GraphDefBuilder::Options& opts) { + return KnownShapeBase(DT_FLOAT, shape, opts); +} + +Node* KeyPlaceholderShape(const GraphDefBuilder::Options& opts) { + return KnownShapeBase(DT_STRING, {2}, opts); +} + +Node* KeyPlaceholder(const string& call_node, + const GraphDefBuilder::Options& opts) { + if (opts.HaveError()) return nullptr; + NodeBuilder node_builder(opts.GetNameForOp("Placeholder"), "Placeholder", + opts.op_registry()); + TensorShapeProto shape; + shape.add_dim()->set_size(2); + return opts.WithAttr("shape", shape) + .WithAttr("dtype", DT_STRING) + .WithAttr("_host_compute_call_node", call_node) .FinalizeBuilder(&node_builder); } -Node* RecvAtHost(const string& key, const gtl::ArraySlice& dtypes, +Node* RecvAtHost(ops::NodeOut key_input, const string& key, + const gtl::ArraySlice& dtypes, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp("_XlaRecvAtHost"), "_XlaRecvAtHost", opts.op_registry()); + node_builder.Input(std::move(key_input)); return opts.WithAttr("Toutputs", dtypes) .WithAttr("key", key) + .WithAttr("device_ordinal", 0) .FinalizeBuilder(&node_builder); } -Node* SendFromHost(const string& key, const std::vector& inputs, +Node* SendFromHost(ops::NodeOut key_input, const string& key, + const std::vector& inputs, const GraphDefBuilder::Options& opts) { if (opts.HaveError()) return nullptr; NodeBuilder node_builder(opts.GetNameForOp("_XlaSendFromHost"), "_XlaSendFromHost", opts.op_registry()); node_builder.Input(inputs); + node_builder.Input(std::move(key_input)); std::vector dtypes; for (const auto& node : inputs) { dtypes.push_back(node.dt); } - return opts.WithAttr("key", key) - .WithAttr("Tinputs", dtypes) + return opts.WithAttr("Tinputs", dtypes) + .WithAttr("key", key) + .WithAttr("device_ordinal", 0) .FinalizeBuilder(&node_builder); } @@ -809,13 +843,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, shape.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), shape.opts().WithName("E")); - SendFromHost("host_compute_channel_F1_O1", {e}, - shape.opts().WithName("outside_compilation_F1_O1_send")); + SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); GraphDef shape_graph; TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); @@ -855,12 +892,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionOneOutside) { node_builder.Input(a).Input(b); Node* call = b2.opts().FinalizeBuilder(&node_builder); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), b2.opts().WithName("E").WithControlInputs({recv, b})); - Node* send = SendFromHost("host_compute_channel_F1_O1", {e}, + Node* send = SendFromHost(ops::NodeOut(key_constant, 0), + "host_compute_channel_F1_O1", {e}, b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); @@ -921,13 +962,16 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { string shape_string_expected_1; { GraphDefBuilder shape1(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape1.opts().WithName("KnownShape/_0")); Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, shape1.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), shape1.opts().WithName("E")); - SendFromHost("host_compute_channel_F1_O1", {e}, - shape1.opts().WithName("outside_compilation_F1_O1_send")); + SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {e}, shape1.opts().WithName("outside_compilation_F1_O1_send")); GraphDef shape1_graph; TF_EXPECT_OK(shape1.ToGraphDef(&shape1_graph)); EXPECT_TRUE(shape1_graph.SerializeToString(&shape_string_expected_1)); @@ -936,17 +980,21 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { string shape_string_expected_2; { GraphDefBuilder shape2(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape2.opts().WithName("KnownShape/_0")); Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, shape2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), shape2.opts().WithName("E")); Node* recv2 = - RecvAtHost("host_compute_channel_F1_O2", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", + {DT_FLOAT, DT_FLOAT}, shape2.opts().WithName("outside_compilation_F1_O2_recv")); Node* h = Binary(ops::NodeOut(recv2, 0), e, shape2.opts().WithName("H")); - SendFromHost("host_compute_channel_F1_O2", {h}, - shape2.opts().WithName("outside_compilation_F1_O2_send")); + SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", + {h}, shape2.opts().WithName("outside_compilation_F1_O2_send")); GraphDef shape2_graph; TF_EXPECT_OK(shape2.ToGraphDef(&shape2_graph)); EXPECT_TRUE(shape2_graph.SerializeToString(&shape_string_expected_2)); @@ -997,25 +1045,30 @@ TEST(EncapsulateSubgraphsTest, OneFunctionTwoOutside) { node_builder.Input(a).Input(b); Node* call = b2.opts().FinalizeBuilder(&node_builder); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), b2.opts().WithName("E").WithControlInputs({recv1, b})); - Node* send1 = SendFromHost("host_compute_channel_F1_O1", {e}, + Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), + "host_compute_channel_F1_O1", {e}, b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); Node* recv2 = - RecvAtHost("host_compute_channel_F1_O2", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", + {DT_FLOAT, DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O2_recv")); Node* g = Binary(e, ops::NodeOut(recv2, 1), b2.opts().WithName("G").WithControlInputs({recv2, e})); Node* h = Binary(ops::NodeOut(recv2, 0), e, b2.opts().WithName("H")); - Node* send2 = - SendFromHost("host_compute_channel_F1_O2", {h}, - b2.opts().WithName("outside_compilation_F1_O2_send")); + Node* send2 = SendFromHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O2", {h}, + b2.opts().WithName("outside_compilation_F1_O2_send")); Node* s = NoOp(b2.opts() .WithName("F1_sequencer") @@ -1073,13 +1126,16 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, shape.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv, 0), ops::NodeOut(recv, 1), shape.opts().WithName("E")); - SendFromHost("host_compute_channel_F1_O1", {e}, - shape.opts().WithName("outside_compilation_F1_O1_send")); + SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); GraphDef shape_graph; TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); @@ -1138,12 +1194,16 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { Node* a = InputShaped(b2.opts().WithName("A")); Node* b = InputShaped(b2.opts().WithName("B")); + Node* key_constant1 = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT, DT_FLOAT}, + RecvAtHost(ops::NodeOut(key_constant1, 0), "host_compute_channel_F1_O1", + {DT_FLOAT, DT_FLOAT}, b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Binary(ops::NodeOut(recv1, 0), ops::NodeOut(recv1, 1), b2.opts().WithName("E").WithControlInputs({recv1, b})); - Node* send1 = SendFromHost("host_compute_channel_F1_O1", {e}, + Node* send1 = SendFromHost(ops::NodeOut(key_constant1, 0), + "host_compute_channel_F1_O1", {e}, b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); @@ -1153,14 +1213,16 @@ TEST(EncapsulateSubgraphsTest, TwoFunctionsTwoOutside) { Node* s1 = NoOp( b2.opts().WithName("F1_sequencer").WithControlInputs({recv1, send1})); - Node* recv2 = - RecvAtHost("host_compute_channel_F2_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F2_O1_recv")); + Node* key_constant2 = + KeyPlaceholder("F2", b2.opts().WithName("F2_key_placeholder")); + Node* recv2 = RecvAtHost( + ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", + {DT_FLOAT}, b2.opts().WithName("outside_compilation_F2_O1_recv")); Node* h = Binary(ops::NodeOut(call1, 1), recv2, b2.opts().WithName("H").WithControlInput(s1)); - Node* send2 = - SendFromHost("host_compute_channel_F2_O1", {h}, - b2.opts().WithName("outside_compilation_F2_O1_send")); + Node* send2 = SendFromHost( + ops::NodeOut(key_constant2, 0), "host_compute_channel_F2_O1", {h}, + b2.opts().WithName("outside_compilation_F2_O1_send")); NodeBuilder node_builder2("F2", "F2", lib_def.get()); node_builder2.Input(e).Input(call1); @@ -1237,9 +1299,11 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoInputs) { Node* b = Input(b2.opts().WithName("B")); Node* e = Unary(a, b2.opts().WithName("E")); - Node* send1 = - SendFromHost("host_compute_channel_F1_O1", {e}, - b2.opts().WithName("outside_compilation_F1_O1_send")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* send1 = SendFromHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, + b2.opts().WithName("outside_compilation_F1_O1_send")); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); @@ -1313,13 +1377,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlInput) { Node* a = InputShaped(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + RecvAtHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {}, b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Unary(a, b2.opts().WithName("E").WithControlInput(recv1)); - Node* send1 = - SendFromHost("host_compute_channel_F1_O1", {e}, - b2.opts().WithName("outside_compilation_F1_O1_send")); + Node* send1 = SendFromHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {e}, + b2.opts().WithName("outside_compilation_F1_O1_send")); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); Node* call1 = b2.opts().FinalizeBuilder(&node_builder1); @@ -1385,9 +1451,11 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationNoOutputs) { Node* a = Input(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, + b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Unary(recv1, b2.opts().WithName("E")); NodeBuilder node_builder1("F1", "F1", lib_def.get()); node_builder1.Input(a).Input(b); @@ -1458,11 +1526,14 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationControlOutput) { Node* a = Input(b2.opts().WithName("A")); Node* b = Input(b2.opts().WithName("B")); - Node* recv1 = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv1 = RecvAtHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, + b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = Unary(recv1, b2.opts().WithName("E")); - Node* send1 = SendFromHost("host_compute_channel_F1_O1", {}, + Node* send1 = SendFromHost(ops::NodeOut(key_constant, 0), + "host_compute_channel_F1_O1", {}, b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); @@ -1572,13 +1643,15 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { string shape_string_expected; { GraphDefBuilder shape(GraphDefBuilder::kFailImmediately); - Node* known = KnownShape({2}, shape.opts().WithName("KnownShape/_0")); - Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT}, - shape.opts().WithName("outside_compilation_F1_O1_recv")); + Node* key_constant = + KeyPlaceholderShape(shape.opts().WithName("KnownShape/_0")); + Node* known = KnownShape({2}, shape.opts().WithName("KnownShape/_1")); + Node* recv = RecvAtHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, + shape.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = BinaryUnknownShape(known, recv, shape.opts().WithName("E")); - SendFromHost("host_compute_channel_F1_O1", {e}, - shape.opts().WithName("outside_compilation_F1_O1_send")); + SendFromHost(ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", + {e}, shape.opts().WithName("outside_compilation_F1_O1_send")); GraphDef shape_graph; TF_EXPECT_OK(shape.ToGraphDef(&shape_graph)); EXPECT_TRUE(shape_graph.SerializeToString(&shape_string_expected)); @@ -1619,13 +1692,16 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) { Node* call = b2.opts().WithControlInputs({c}).FinalizeBuilder(&node_builder); - Node* recv = - RecvAtHost("host_compute_channel_F1_O1", {DT_FLOAT}, - b2.opts().WithName("outside_compilation_F1_O1_recv")); + Node* key_constant = + KeyPlaceholder("F1", b2.opts().WithName("F1_key_placeholder")); + Node* recv = RecvAtHost( + ops::NodeOut(key_constant, 0), "host_compute_channel_F1_O1", {DT_FLOAT}, + b2.opts().WithName("outside_compilation_F1_O1_recv")); Node* e = BinaryUnknownShape( c, ops::NodeOut(recv, 0), b2.opts().WithName("E").WithControlInputs({recv, b})); - Node* send = SendFromHost("host_compute_channel_F1_O1", {e}, + Node* send = SendFromHost(ops::NodeOut(key_constant, 0), + "host_compute_channel_F1_O1", {e}, b2.opts() .WithName("outside_compilation_F1_O1_send") .WithControlInput(e)); diff --git a/tensorflow/contrib/tpu/BUILD b/tensorflow/contrib/tpu/BUILD index 095b4821f1..ed930e44e8 100644 --- a/tensorflow/contrib/tpu/BUILD +++ b/tensorflow/contrib/tpu/BUILD @@ -24,6 +24,7 @@ cc_library( name = "all_ops", deps = [ ":cross_replica_ops_op_lib", + ":host_compute_ops_op_lib", ":infeed_ops_op_lib", ":outfeed_ops_op_lib", ":replication_ops_op_lib", @@ -69,6 +70,7 @@ py_library( tf_gen_op_libs( op_lib_names = [ "cross_replica_ops", + "host_compute_ops", "infeed_ops", "outfeed_ops", "replication_ops", @@ -78,6 +80,7 @@ tf_gen_op_libs( deps = [ "//tensorflow/contrib/tpu/proto:tpu_embedding_config_proto_cc", "//tensorflow/core:lib_proto_parsing", + "//tensorflow/core:protos_all_cc", ], ) @@ -85,6 +88,7 @@ tf_custom_op_library( name = "python/ops/_tpu_ops.so", srcs = [ "ops/cross_replica_ops.cc", + "ops/host_compute_ops.cc", "ops/infeed_ops.cc", "ops/outfeed_ops.cc", "ops/replication_ops.cc", @@ -101,6 +105,7 @@ tf_gen_op_wrapper_py( name = "tpu_ops", deps = [ ":cross_replica_ops_op_lib", + ":host_compute_ops_op_lib", ":infeed_ops_op_lib", ":outfeed_ops_op_lib", ":replication_ops_op_lib", diff --git a/tensorflow/contrib/tpu/ops/host_compute_ops.cc b/tensorflow/contrib/tpu/ops/host_compute_ops.cc new file mode 100644 index 0000000000..48aeb81ac1 --- /dev/null +++ b/tensorflow/contrib/tpu/ops/host_compute_ops.cc @@ -0,0 +1,64 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/common_shape_fns.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" + +namespace tensorflow { + +REGISTER_OP("_XlaSendFromHost") + .Input("inputs: Tinputs") + .Input("dynamic_key: string") + .Attr("Tinputs: list(type) >= 0") + .Attr("key: string") + .Attr("device_ordinal: int") + .SetIsStateful() + .SetShapeFn(::tensorflow::shape_inference::NoOutputs) + .Doc(R"doc( +A placeholder op for multiple values that will be sent from TensorFlow to a +running XLA computation. + +inputs: A list of tensors that will be sent to the XLA computation. +dynamic_key: The key sent at runtime by the compile node to identify which +execution the transfer corresponds to. +Tinputs: The element types of each element in `inputs`. +key: A key that is unique in the computation and associates the send with the consumer in +the XLA computation. +device_ordinal: The device to use. +)doc"); + +REGISTER_OP("_XlaRecvAtHost") + .Input("dynamic_key: string") + .Output("outputs: Toutputs") + .Attr("Toutputs: list(type) >= 0") + .Attr("key: string") + .Attr("device_ordinal: int") + .SetIsStateful() + .SetShapeFn(::tensorflow::shape_inference::UnknownShape) + .Doc(R"doc( +A placeholder op for multiple values that will be sent to TensorFlow from a +running XLA computation. + +dynamic_key: The key sent at runtime by the compile node to identify which +execution the transfer corresponds to. +outputs: A list of tensors that will be received from the XLA computation. +Toutputs: The element types of each element in `outputs`. +key: A key that is unique in the computation and associates the send with the consumer in +the XLA computation. +device_ordinal: The device to use. +)doc"); + +} // namespace tensorflow -- GitLab From 2941052ddcc140becd43cc96da6664028217182d Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Wed, 7 Mar 2018 14:02:15 -0800 Subject: [PATCH 548/884] [tf.data] Optimize `Dataset.filter()` when the predicate returns one of its args. This change avoids the overhead of function dispatch (~10--15us) when the filter predicate simply returns one of its arguments directly. It also adds a benchmark to track the performance of this optimization. The checkpointing code required minor modifications to enable functions to be instantiated in the `FilterDatasetOp::Compute()` method when an iterator is being restored. PiperOrigin-RevId: 188229570 --- tensorflow/core/kernels/data/BUILD | 1 + .../core/kernels/data/filter_dataset_op.cc | 111 ++++++++++++++---- tensorflow/core/kernels/data/iterator_ops.cc | 24 ++-- .../kernel_tests/filter_dataset_op_test.py | 55 +++++++++ 4 files changed, 163 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/kernels/data/BUILD b/tensorflow/core/kernels/data/BUILD index 253399c1e4..484d4f88d6 100644 --- a/tensorflow/core/kernels/data/BUILD +++ b/tensorflow/core/kernels/data/BUILD @@ -162,6 +162,7 @@ tf_kernel_library( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:dataset_ops_op_lib", "//tensorflow/core:framework", + "//tensorflow/core:lib", "//tensorflow/core:lib_internal", ], ) diff --git a/tensorflow/core/kernels/data/filter_dataset_op.cc b/tensorflow/core/kernels/data/filter_dataset_op.cc index d16b5b7d41..186b1e1c6c 100644 --- a/tensorflow/core/kernels/data/filter_dataset_op.cc +++ b/tensorflow/core/kernels/data/filter_dataset_op.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/kernels/data/captured_function.h" #include "tensorflow/core/kernels/data/dataset.h" +#include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/random/random.h" namespace tensorflow { @@ -44,21 +45,45 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { other_arguments.push_back(t); } + FunctionLibraryRuntime::Handle pred_handle; + OP_REQUIRES_OK(ctx, + ctx->function_library()->Instantiate( + func_.name(), AttrSlice(&func_.attr()), &pred_handle)); + auto cleanup = gtl::MakeCleanup([ctx, pred_handle]() { + OP_REQUIRES_OK(ctx, ctx->function_library()->ReleaseHandle(pred_handle)); + }); + + const FunctionBody* pred_body = + ctx->function_library()->GetFunctionBody(pred_handle); + OP_REQUIRES(ctx, pred_body->ret_nodes.size() == 1, + errors::InvalidArgument( + "predicate function must have a single return value.")); + Node* ret_node = pred_body->ret_nodes[0]; + Node* ret_input_node; + OP_REQUIRES_OK(ctx, ret_node->input_node(0, &ret_input_node)); std::unique_ptr captured_func; OP_REQUIRES_OK(ctx, CapturedFunction::Create( func_, std::move(other_arguments), &captured_func)); - *output = new Dataset(ctx, input, func_, std::move(captured_func)); + if (ret_input_node->def().op() == "_Arg") { + int32 index = -1; + OP_REQUIRES_OK(ctx, GetNodeAttr(ret_input_node->def(), "index", &index)); + *output = new FilterTensorDataset(ctx, input, func_, + std::move(captured_func), index); + } else { + *output = new FilterFunctionDataset(ctx, input, func_, + std::move(captured_func)); + } } private: const int graph_def_version_; - class Dataset : public GraphDatasetBase { + class FilterDatasetBase : public GraphDatasetBase { public: - Dataset(OpKernelContext* ctx, const DatasetBase* input, - const NameAttrList& func, - std::unique_ptr captured_func) + FilterDatasetBase(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func) : GraphDatasetBase(ctx), input_(input), func_(func), @@ -66,7 +91,7 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { input_->Ref(); } - ~Dataset() override { input_->Unref(); } + ~FilterDatasetBase() override { input_->Unref(); } std::unique_ptr MakeIterator( const string& prefix) const override { @@ -112,11 +137,15 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } + virtual Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const = 0; + private: - class Iterator : public DatasetIterator { + class Iterator : public DatasetIterator { public: explicit Iterator(const Params& params) - : DatasetIterator(params), + : DatasetIterator(params), input_impl_(params.dataset->input_->MakeIterator(params.prefix)) {} Status GetNextInternal(IteratorContext* ctx, @@ -143,18 +172,8 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { return Status::OK(); } - // TODO(mrry): Avoid blocking a threadpool thread. We will need to - // stack-rip the iterators and use async kernels. - std::vector result; - TF_RETURN_IF_ERROR(dataset()->captured_func_->RunWithBorrowedArgs( - ctx, *out_tensors, &result)); - - if (result.size() != 1 || result[0].dtype() != DT_BOOL || - result[0].NumElements() != 1) { - return errors::InvalidArgument( - "Filter predicate `f` must return a scalar bool."); - } - matched = result[0].scalar()(); + TF_RETURN_IF_ERROR( + dataset()->EvaluatePredicate(ctx, *out_tensors, &matched)); if (!matched) { // Clear the output tensor list since it didn't match. out_tensors->clear(); @@ -192,9 +211,61 @@ class FilterDatasetOp : public UnaryDatasetOpKernel { const DatasetBase* const input_; const NameAttrList func_; + + protected: const std::unique_ptr captured_func_; }; + class FilterFunctionDataset : public FilterDatasetBase { + public: + using FilterDatasetBase::FilterDatasetBase; + + protected: + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { + // TODO(mrry): Avoid blocking a threadpool thread. We will need to + // stack-rip the iterators and use async kernels. + std::vector result; + TF_RETURN_IF_ERROR( + captured_func_->RunWithBorrowedArgs(ctx, element, &result)); + + if (result.size() != 1 || result[0].dtype() != DT_BOOL || + result[0].NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = result[0].scalar()(); + return Status::OK(); + } + }; + + class FilterTensorDataset : public FilterDatasetBase { + public: + FilterTensorDataset(OpKernelContext* ctx, const DatasetBase* input, + const NameAttrList& func, + std::unique_ptr captured_func, + int32 index) + : FilterDatasetBase(ctx, input, func, std::move(captured_func)), + index_(index) {} + + protected: + Status EvaluatePredicate(IteratorContext* ctx, + const std::vector& element, + bool* out_matched) const override { + const Tensor& predicate = element[index_]; + if (predicate.dtype() != DT_BOOL || predicate.NumElements() != 1) { + return errors::InvalidArgument( + "Filter predicate `f` must return a scalar bool."); + } + *out_matched = predicate.scalar()(); + return Status::OK(); + } + + private: + const int32 index_; + }; + private: NameAttrList func_; }; diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index d7d4ad5cf7..3fb96679da 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -141,14 +141,20 @@ class IteratorResource : public ResourceBase { std::vector outputs; GraphRunner graph_runner(ctx->env()); - // Build a new FLR that knows about the functions in the graph. - std::shared_ptr flib_def( - new FunctionLibraryDefinition( - *ctx->function_library()->GetFunctionLibraryDefinition())); + // Build a new FLR that knows about the functions in the graph, and use + // it for all operations on the restored iterator. + // NOTE(mrry): We clone the existing FLR and use it in the GraphRunner + // because some of the OpKernels in the graph might call functions that are + // only defined in the loaded GraphDef. + FunctionLibraryRuntime* lib; + std::unique_ptr device_mgr(nullptr); + std::unique_ptr flib_def(nullptr); + std::unique_ptr pflr(nullptr); + TF_RETURN_IF_ERROR(ctx->function_library()->Clone(&flib_def, &pflr, &lib)); TF_RETURN_IF_ERROR(flib_def->AddLibrary(graph_def.library())); TF_RETURN_IF_ERROR( - graph_runner.Run(&graph, lib_, {}, {output_node}, &outputs)); + graph_runner.Run(&graph, lib, {}, {output_node}, &outputs)); TF_RETURN_IF_ERROR(GetDatasetFromVariantTensor(outputs[0], &dataset)); TF_RETURN_IF_ERROR(set_iterator(dataset->MakeIterator("Iterator"))); @@ -158,9 +164,8 @@ class IteratorResource : public ResourceBase { IteratorContext::Params params; params.env = ctx->env(); params.runner = *(ctx->runner()); - params.function_library = flib_def; - params.lib = lib_; - DeviceBase* device = lib_->device(); + params.lib = lib; + DeviceBase* device = lib->device(); params.allocator_getter = [device](AllocatorAttributes attrs) { return device->GetAllocator(attrs); }; @@ -168,7 +173,10 @@ class IteratorResource : public ResourceBase { TF_RETURN_IF_ERROR(captured_iterator->Restore(&iter_ctx, reader)); mutex_lock l(mu_); + device_mgr_ = std::move(device_mgr); lib_def_ = std::move(flib_def); + pflr_ = std::move(pflr); + lib_ = lib; return Status::OK(); } else { return errors::FailedPrecondition( diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py index b9258b720e..2c71723167 100644 --- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py @@ -17,11 +17,15 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import time + import numpy as np +from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops from tensorflow.python.framework import dtypes from tensorflow.python.framework import errors +from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import array_ops from tensorflow.python.ops import functional_ops @@ -156,6 +160,57 @@ class FilterDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testReturnComponent(self): + iterator = ( + dataset_ops.Dataset.zip( + (dataset_ops.Dataset.range(10), + dataset_ops.Dataset.from_tensors(True).repeat(None))) + .filter(lambda x, y: y).make_initializable_iterator()) + init_op = iterator.initializer + get_next = iterator.get_next() + + with self.test_session() as sess: + sess.run(init_op) + for i in range(10): + self.assertEqual((i, True), sess.run(get_next)) + with self.assertRaises(errors.OutOfRangeError): + sess.run(get_next) + + +class FilterDatasetBenchmark(test.Benchmark): + + def _benchmark(self, predicate, name): + with ops.Graph().as_default(): + dataset = ( + dataset_ops.Dataset.from_tensors(True).repeat(None).filter(predicate)) + iterator = dataset.make_one_shot_iterator() + next_element = iterator.get_next() + + with session.Session() as sess: + for _ in range(5): + sess.run(next_element.op) + deltas = [] + for _ in range(100): + start = time.time() + for _ in range(100): + sess.run(next_element.op) + end = time.time() + deltas.append(end - start) + + median_wall_time = np.median(deltas) / 100 + print("Filter dataset using %s. Median wall time: %f" % + (name, median_wall_time)) + self.report_benchmark( + iters=100, + wall_time=median_wall_time, + name="benchmark_filter_dataset_%s" % name) + + def benchmarkSimpleFunction(self): + self._benchmark(array_ops.identity, "simple_function") + + def benchmarkReturnComponentOptimization(self): + self._benchmark(lambda x: x, "return_component") + if __name__ == "__main__": test.main() -- GitLab From 22529af3169181c83eb2e0bb48660b8f8858bb14 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 7 Mar 2018 14:02:38 -0800 Subject: [PATCH 549/884] [TF:XLA] Bump open source llvm revision to r326829 PiperOrigin-RevId: 188229669 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 8350993cc8..38acb1a6b2 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/fce2d38e3979d1b01238c6b7df1b2c56da8569f1.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/fce2d38e3979d1b01238c6b7df1b2c56da8569f1.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/195a164675af86f390f9816e53291013d1b551d7.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/195a164675af86f390f9816e53291013d1b551d7.tar.gz", ], - sha256 = "9931112227f09b8533911174fa03f563e822d3e02d73df506fa97caa7a31363a", - strip_prefix = "llvm-fce2d38e3979d1b01238c6b7df1b2c56da8569f1", + sha256 = "57a8333f8e6095d49f1e597ca18e591aba8a89d417f4b58bceffc5fe1ffcc02b", + strip_prefix = "llvm-195a164675af86f390f9816e53291013d1b551d7", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 1e293597745c7c2e07106deb2b6fe537e6c3a7ad Mon Sep 17 00:00:00 2001 From: Austin Anderson Date: Wed, 7 Mar 2018 14:30:00 -0800 Subject: [PATCH 550/884] Create mobile testing rules for TF Lite known-portable targets This CL tags all known-already-portable TF Lite tests as portable, and (from those tests) tags those known as not portable. Adding tflite_portable_test_suite() to the bottom of a package marks all previous cc_tests as "intended to be portable". I've included all tests that I was able to naively make buildable on Android with my previous change that created a custom logging.h library. Most tests are buildable on Android already, but there is something in the common dependencies for the kernel tests that is not compatible with iOS. Outside of Google, this change does nothing except tag tests that are known to not be buildable on certain platforms. PiperOrigin-RevId: 188234489 --- tensorflow/contrib/lite/kernels/BUILD | 167 +++++++++++++++++++++++++- tensorflow/contrib/lite/schema/BUILD | 3 + tensorflow/contrib/lite/testing/BUILD | 15 ++- tensorflow/contrib/lite/tools/BUILD | 6 + 4 files changed, 186 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 8e9d427770..b1a29701e0 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -5,15 +5,17 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 load("//tensorflow/contrib/lite:build_def.bzl", "tflite_copts") -load( - "//tensorflow:tensorflow.bzl", - "tf_cc_test", -) +load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") +load("//tensorflow:tensorflow.bzl", "tf_cc_test") tf_cc_test( name = "optional_tensor_test", size = "small", srcs = ["optional_tensor_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -90,6 +92,10 @@ tf_cc_test( name = "kernel_util_test", size = "small", srcs = ["kernel_util_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":kernel_util", "//tensorflow/contrib/lite/testing:util", @@ -189,6 +195,10 @@ tf_cc_test( name = "activations_test", size = "small", srcs = ["activations_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -201,6 +211,10 @@ tf_cc_test( name = "add_test", size = "small", srcs = ["add_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -213,6 +227,10 @@ tf_cc_test( name = "transpose_test", size = "small", srcs = ["transpose_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -227,6 +245,10 @@ tf_cc_test( name = "space_to_batch_nd_test", size = "small", srcs = ["space_to_batch_nd_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -239,6 +261,10 @@ tf_cc_test( name = "batch_to_space_nd_test", size = "small", srcs = ["batch_to_space_nd_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -263,6 +289,10 @@ tf_cc_test( name = "concatenation_test", size = "small", srcs = ["concatenation_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -275,6 +305,10 @@ tf_cc_test( name = "conv_test", size = "small", srcs = ["conv_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -288,6 +322,10 @@ tf_cc_test( name = "depthwise_conv_test", size = "small", srcs = ["depthwise_conv_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -313,6 +351,10 @@ tf_cc_test( name = "basic_rnn_test", size = "small", srcs = ["basic_rnn_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -325,6 +367,10 @@ tf_cc_test( name = "bidirectional_sequence_lstm_test", size = "small", srcs = ["bidirectional_sequence_lstm_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -337,6 +383,10 @@ tf_cc_test( name = "unidirectional_sequence_lstm_test", size = "small", srcs = ["unidirectional_sequence_lstm_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -349,6 +399,9 @@ tf_cc_test( name = "bidirectional_sequence_rnn_test", size = "small", srcs = ["bidirectional_sequence_rnn_test.cc"], + tags = [ + "tflite_not_portable", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -361,6 +414,10 @@ tf_cc_test( name = "unidirectional_sequence_rnn_test", size = "small", srcs = ["unidirectional_sequence_rnn_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -373,6 +430,10 @@ tf_cc_test( name = "l2norm_test", size = "small", srcs = ["l2norm_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -385,6 +446,10 @@ tf_cc_test( name = "exp_test", size = "small", srcs = ["exp_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -397,6 +462,10 @@ tf_cc_test( name = "mean_test", size = "small", srcs = ["mean_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -409,6 +478,10 @@ tf_cc_test( name = "mul_test", size = "small", srcs = ["mul_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -421,6 +494,10 @@ tf_cc_test( name = "pad_test", size = "small", srcs = ["pad_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -433,6 +510,10 @@ tf_cc_test( name = "reshape_test", size = "small", srcs = ["reshape_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -445,6 +526,10 @@ tf_cc_test( name = "gather_test", size = "small", srcs = ["gather_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:builtin_op_data", @@ -458,6 +543,10 @@ tf_cc_test( name = "topk_v2_test", size = "small", srcs = ["topk_v2_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:builtin_op_data", @@ -471,6 +560,10 @@ tf_cc_test( name = "resize_bilinear_test", size = "small", srcs = ["resize_bilinear_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -483,6 +576,10 @@ tf_cc_test( name = "svdf_test", size = "small", srcs = ["svdf_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -495,6 +592,10 @@ tf_cc_test( name = "embedding_lookup_test", size = "small", srcs = ["embedding_lookup_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -507,6 +608,10 @@ tf_cc_test( name = "embedding_lookup_sparse_test", size = "small", srcs = ["embedding_lookup_sparse_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -519,6 +624,10 @@ tf_cc_test( name = "fully_connected_test", size = "small", srcs = ["fully_connected_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -531,6 +640,10 @@ tf_cc_test( name = "local_response_norm_test", size = "small", srcs = ["local_response_norm_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -543,6 +656,10 @@ tf_cc_test( name = "pooling_test", size = "small", srcs = ["pooling_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -555,6 +672,10 @@ tf_cc_test( name = "softmax_test", size = "small", srcs = ["softmax_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -568,6 +689,10 @@ tf_cc_test( name = "log_softmax_test", size = "small", srcs = ["log_softmax_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -581,6 +706,10 @@ tf_cc_test( name = "lsh_projection_test", size = "small", srcs = ["lsh_projection_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -593,6 +722,10 @@ tf_cc_test( name = "hashtable_lookup_test", size = "small", srcs = ["hashtable_lookup_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -606,6 +739,10 @@ tf_cc_test( name = "lstm_test", size = "small", srcs = ["lstm_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -618,6 +755,10 @@ tf_cc_test( name = "skip_gram_test", size = "small", srcs = ["skip_gram_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -631,6 +772,10 @@ tf_cc_test( name = "space_to_depth_test", size = "small", srcs = ["space_to_depth_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -643,6 +788,10 @@ tf_cc_test( name = "split_test", size = "small", srcs = ["split_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -655,6 +804,10 @@ tf_cc_test( name = "squeeze_test", size = "small", srcs = ["squeeze_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -667,6 +820,10 @@ tf_cc_test( name = "strided_slice_test", size = "small", srcs = ["strided_slice_test.cc"], + tags = [ + "tflite_not_portable_ios_arm64", + "tflite_not_portable_ios_x86_64", + ], deps = [ ":builtin_ops", "//tensorflow/contrib/lite:framework", @@ -686,3 +843,5 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/schema/BUILD b/tensorflow/contrib/lite/schema/BUILD index 54167ddd9a..da65ec659c 100644 --- a/tensorflow/contrib/lite/schema/BUILD +++ b/tensorflow/contrib/lite/schema/BUILD @@ -5,6 +5,7 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") py_binary( name = "upgrade_schema", @@ -80,3 +81,5 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/testing/BUILD b/tensorflow/contrib/lite/testing/BUILD index 83b9e21427..631601656d 100644 --- a/tensorflow/contrib/lite/testing/BUILD +++ b/tensorflow/contrib/lite/testing/BUILD @@ -8,6 +8,7 @@ load( "//tensorflow/contrib/lite:build_def.bzl", "gen_zipped_test_files", ) +load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") load( "//tensorflow:tensorflow.bzl", "tf_cc_test", @@ -236,6 +237,9 @@ cc_test( size = "small", srcs = ["tf_driver_test.cc"], data = ["//tensorflow/contrib/lite:testdata/multi_add.pb"], + tags = [ + "tflite_not_portable", + ], deps = [ ":tf_driver", "@com_google_googletest//:gtest_main", @@ -259,6 +263,9 @@ cc_test( name = "generate_testspec_test", size = "small", srcs = ["generate_testspec_test.cc"], + tags = [ + "tflite_not_portable", + ], deps = [ ":generate_testspec", "@com_google_googletest//:gtest_main", @@ -320,6 +327,7 @@ tf_cc_test( tags = [ "no_cuda_on_cpu_tap", "no_oss", + "tflite_not_portable", ], deps = [ ":tflite_diff_flags", @@ -339,7 +347,10 @@ tf_cc_test( ], data = [":optest"], shard_count = 20, - tags = ["no_oss"], + tags = [ + "no_oss", + "tflite_not_portable", + ], deps = [ ":parse_testdata_lib", ":tflite_driver", @@ -373,3 +384,5 @@ filegroup( ), visibility = ["//tensorflow:__subpackages__"], ) + +tflite_portable_test_suite() diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 999ccf2ebc..54df724f79 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -4,6 +4,7 @@ package(default_visibility = [ licenses(["notice"]) # Apache 2.0 +load("//tensorflow/contrib/lite:special_rules.bzl", "tflite_portable_test_suite") load("//tensorflow:tensorflow.bzl", "tf_cc_binary") py_binary( @@ -111,6 +112,9 @@ cc_test( name = "verifier_test", size = "small", srcs = ["verifier_test.cc"], + tags = [ + "tflite_not_portable", + ], deps = [ ":mutable_op_resolver", ":verifier", @@ -124,3 +128,5 @@ cc_test( "@flatbuffers", ], ) + +tflite_portable_test_suite() -- GitLab From d622a144a5667943f11974c2fe8afc6501290837 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Wed, 7 Mar 2018 14:32:28 -0800 Subject: [PATCH 551/884] Don't populate linear_ with a logical index We use linear_ to directly emit array element access in some cases so populating it with the logical linear index seems incorrect. PiperOrigin-RevId: 188234902 --- tensorflow/compiler/xla/service/llvm_ir/ir_array.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index 4221a52fbe..f7821adc74 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -90,7 +90,6 @@ IrArray::Index::Index(tensorflow::gtl::ArraySlice multidim, dims_(shape.dimensions().begin(), shape.dimensions().end()) { CHECK_EQ(shape.dimensions_size(), multidim.size()); CHECK(LayoutUtil::HasLayout(shape)); - linear_ = Linearize(AsInt64Slice(shape.dimensions()), ir_builder); } IrArray::IrArray(llvm::Value* base_ptr, const Shape& shape) -- GitLab From c30a57ece6698365daf7a8a8a77c1da26a1707a4 Mon Sep 17 00:00:00 2001 From: Jonathan Hseu Date: Wed, 7 Mar 2018 14:33:07 -0800 Subject: [PATCH 552/884] Fix GCS uploads occasionally failing when retrying. GCS returns 400, invalid argument because it thinks the body is not empty. cURL, by default, sets "Transfer-Encoding: Chunked", which causes the server to ignore "Content-Length: 0": https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Transfer-Encoding The server considers the HTTP request incomplete and may non-deterministically fail. PiperOrigin-RevId: 188235030 --- tensorflow/core/platform/cloud/curl_http_request.cc | 8 ++++---- tensorflow/core/platform/cloud/curl_http_request_test.cc | 6 ++++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index 20d9285a70..c0d6e49af9 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -282,8 +282,8 @@ void CurlHttpRequest::SetPutEmptyBody() { method_ = RequestMethod::kPut; TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting put request"); - curl_headers_ = - libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); + AddHeader("Content-Length", "0"); + AddHeader("Transfer-Encoding", "identity"); TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, reinterpret_cast(this)), @@ -323,8 +323,8 @@ void CurlHttpRequest::SetPostEmptyBody() { TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), "Setting POST request"); - curl_headers_ = - libcurl_->curl_slist_append(curl_headers_, "Content-Length: 0"); + AddHeader("Content-Length", "0"); + AddHeader("Transfer-Encoding", "identity"); TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, reinterpret_cast(this)), diff --git a/tensorflow/core/platform/cloud/curl_http_request_test.cc b/tensorflow/core/platform/cloud/curl_http_request_test.cc index 0f0ccba050..522b717568 100644 --- a/tensorflow/core/platform/cloud/curl_http_request_test.cc +++ b/tensorflow/core/platform/cloud/curl_http_request_test.cc @@ -476,9 +476,10 @@ TEST(CurlHttpRequestTest, PutRequest_WithoutBody) { EXPECT_TRUE(libcurl.is_initialized_); EXPECT_EQ("http://www.testuri.com", libcurl.url_); EXPECT_EQ("", libcurl.custom_request_); - EXPECT_EQ(2, libcurl.headers_->size()); + EXPECT_EQ(3, libcurl.headers_->size()); EXPECT_EQ("Authorization: Bearer fake-bearer", (*libcurl.headers_)[0]); EXPECT_EQ("Content-Length: 0", (*libcurl.headers_)[1]); + EXPECT_EQ("Transfer-Encoding: identity", (*libcurl.headers_)[2]); EXPECT_TRUE(libcurl.is_put_); EXPECT_EQ("", libcurl.posted_content_); } @@ -517,9 +518,10 @@ TEST(CurlHttpRequestTest, PostRequest_WithoutBody) { EXPECT_TRUE(libcurl.is_initialized_); EXPECT_EQ("http://www.testuri.com", libcurl.url_); EXPECT_EQ("", libcurl.custom_request_); - EXPECT_EQ(2, libcurl.headers_->size()); + EXPECT_EQ(3, libcurl.headers_->size()); EXPECT_EQ("Authorization: Bearer fake-bearer", (*libcurl.headers_)[0]); EXPECT_EQ("Content-Length: 0", (*libcurl.headers_)[1]); + EXPECT_EQ("Transfer-Encoding: identity", (*libcurl.headers_)[2]); EXPECT_TRUE(libcurl.is_post_); EXPECT_EQ("", libcurl.posted_content_); } -- GitLab From 3152a96ff23de6790d0faf83f823e9c8dbc51c53 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 14:42:36 -0800 Subject: [PATCH 553/884] Remove unused parameter from GetQuantizationParamsFromMinMax. PiperOrigin-RevId: 188236536 --- .../make_initial_dequantize_operator.cc | 4 +-- .../toco/graph_transformations/quantize.cc | 31 ++++++++----------- .../resolve_constant_fake_quant.cc | 4 +-- tensorflow/contrib/lite/toco/tooling_util.h | 3 +- 4 files changed, 18 insertions(+), 24 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc index d83603e9a2..935da9f966 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/make_initial_dequantize_operator.cc @@ -85,8 +85,8 @@ bool AddDequantizeOperatorToInput(const string& input_name, const Operator* op, auto& dequantized_input_minmax = dequantized_input_array.GetOrCreateMinMax(); dequantized_input_minmax = input_minmax; auto& input_qparams = input_array.GetOrCreateQuantizationParams(); - GetQuantizationParamsFromMinMax( - model->flags, input_minmax, &input_qparams); + GetQuantizationParamsFromMinMax(input_minmax, + &input_qparams); transformation->AddMessageF( "Created %s" diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 6c3e5fd492..4fd26e4325 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -225,41 +225,40 @@ ArrayDataType GetQuantizedDataType(const Array& array, } } -void GetQuantizationParams(ArrayDataType data_type, - const ModelFlags& model_flags, const MinMax& minmax, +void GetQuantizationParams(ArrayDataType data_type, const MinMax& minmax, QuantizationParams* quantization_params) { switch (data_type) { case ArrayDataType::kInt8: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kUint8: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kInt16: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kUint16: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kInt32: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kUint32: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kInt64: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kUint64: GetQuantizationParamsFromMinMax( - model_flags, minmax, quantization_params); + minmax, quantization_params); break; case ArrayDataType::kFloat: case ArrayDataType::kNone: @@ -328,15 +327,13 @@ bool ChooseQuantizationForOperatorInput( if (op.type == OperatorType::kLstmCell) { if (input_index == LstmCellOperator::PREV_STATE_INPUT) { *quantized_data_type = ArrayDataType::kInt16; - GetQuantizationParams(*quantized_data_type, model->flags, minmax, - quantization_params); + GetQuantizationParams(*quantized_data_type, minmax, quantization_params); return true; } } *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8); - GetQuantizationParams(*quantized_data_type, model->flags, minmax, - quantization_params); + GetQuantizationParams(*quantized_data_type, minmax, quantization_params); transformation->AddMessageF( "For input array %s with min=%g" ", max=%g" @@ -460,14 +457,12 @@ bool ChooseQuantizationForOperatorOutput( if (output_index == LstmCellOperator::STATE_OUTPUT || output_index == LstmCellOperator::ACTIV_TEMP) { *quantized_data_type = ArrayDataType::kInt16; - GetQuantizationParams(*quantized_data_type, model->flags, minmax, - quantization_params); + GetQuantizationParams(*quantized_data_type, minmax, quantization_params); return true; } } *quantized_data_type = GetQuantizedDataType(array, ArrayDataType::kUint8); - GetQuantizationParams(*quantized_data_type, model->flags, minmax, - quantization_params); + GetQuantizationParams(*quantized_data_type, minmax, quantization_params); transformation->AddMessageF( "For output array %s with min=%g, max=%g" ", chose to quantize as %s with zero_point=%d" diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc index 944901ece7..625d90205a 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_fake_quant.cc @@ -55,8 +55,8 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) { const int size = input_buffer.data.size(); output_buffer.data.resize(size); QuantizationParams qparams; - GetQuantizationParamsFromMinMax( - model->flags, *fakequant_op->minmax, &qparams); + GetQuantizationParamsFromMinMax(*fakequant_op->minmax, + &qparams); for (int i = 0; i < size; i++) { const double src_val = input_buffer.data[i]; const double unclamped_quantized_val = diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index 01917b29de..d5796486c5 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -146,8 +146,7 @@ void FixNoOrphanedArray(Model* model); void ResolveModelFlags(const ModelFlags& model_flags, Model* model); template -void GetQuantizationParamsFromMinMax(const ModelFlags& model_flags, - const MinMax& minmax, +void GetQuantizationParamsFromMinMax(const MinMax& minmax, QuantizationParams* quantization_params) { using Integer = DataType; const Integer qmin = std::numeric_limits::min(); -- GitLab From e3e68038271d989d7c4220a0ae17a058594188de Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 7 Mar 2018 14:47:20 -0800 Subject: [PATCH 554/884] moved try/catch to contrib/tensorrt/__init__.py to guard whole TRT; raise original error --- tensorflow/contrib/tensorrt/__init__.py | 12 +++++++++++- tensorflow/contrib/tensorrt/python/__init__.py | 15 +++------------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index fd551d70b4..faedaf29d8 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -19,5 +19,15 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,wildcard-import -from tensorflow.contrib.tensorrt.python import * +try: + from tensorflow.contrib.tensorrt.python import * +except Exception as e: + no_trt_message = ( + '**** Failed to initialize TensorRT. This is either because the TensorRT' + ' installation path is not in LD_LIBRARY_PATH, or because you do not have it' + ' installed. If not installed, please go to' + ' https://developer.nvidia.com/tensorrt to download and install' + ' TensorRT ****') + print(no_trt_message) + raise e # pylint: enable=unused-import,wildcard-import diff --git a/tensorflow/contrib/tensorrt/python/__init__.py b/tensorflow/contrib/tensorrt/python/__init__.py index 658c0c7eae..0b2321b5fc 100644 --- a/tensorflow/contrib/tensorrt/python/__init__.py +++ b/tensorflow/contrib/tensorrt/python/__init__.py @@ -19,16 +19,7 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long -try: - from tensorflow.contrib.tensorrt.python.ops import trt_engine_op - from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph - from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph -except: - no_trt_message = ( - '**** Failed to initialize TensorRT. This is either because the TensorRT' - ' installation path is not in LD_LIBRARY_PATH, or because you do not have it' - ' installed. If not installed, please go to' - ' https://developer.nvidia.com/tensorrt to download and install' - ' TensorRT ****') - raise RuntimeError(no_trt_message) +from tensorflow.contrib.tensorrt.python.ops import trt_engine_op +from tensorflow.contrib.tensorrt.python.trt_convert import calib_graph_to_infer_graph +from tensorflow.contrib.tensorrt.python.trt_convert import create_inference_graph # pylint: enable=unused-import,line-too-long -- GitLab From cc143645b2ec251b234ee17a52a1cff2456ce9d3 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 7 Mar 2018 14:47:28 -0800 Subject: [PATCH 555/884] Resolve more conflicts. --- tensorflow/python/keras/_impl/keras/estimator_test.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/estimator_test.py b/tensorflow/python/keras/_impl/keras/estimator_test.py index 32d1fd21a8..e076dc25b1 100644 --- a/tensorflow/python/keras/_impl/keras/estimator_test.py +++ b/tensorflow/python/keras/_impl/keras/estimator_test.py @@ -496,7 +496,6 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): keras_model=keras_model, model_dir=tempfile.mkdtemp(dir=self._base_dir)) -<<<<<<< HEAD def test_gpu_config(self): keras_model, (_, _), (_, _), _, _ = get_resource_for_simple_model() keras_model.compile( @@ -513,8 +512,6 @@ class TestKerasEstimator(test_util.TensorFlowTestCase): ._config.gpu_options.per_process_gpu_memory_fraction, gpu_options.per_process_gpu_memory_fraction) -======= ->>>>>>> google/r1.6 if __name__ == '__main__': test.main() -- GitLab From 6dd28ea1d1f2057fb7297f3d8e06635b1c00e977 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Wed, 7 Mar 2018 14:53:17 -0800 Subject: [PATCH 556/884] added pylint flag for build --- tensorflow/contrib/tensorrt/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index faedaf29d8..0d1c90ea64 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -20,7 +20,7 @@ from __future__ import print_function # pylint: disable=unused-import,wildcard-import try: - from tensorflow.contrib.tensorrt.python import * + from tensorflow.contrib.tensorrt.python import * # pylint: disable=import-not-at-top except Exception as e: no_trt_message = ( '**** Failed to initialize TensorRT. This is either because the TensorRT' -- GitLab From fffb7b59f5695b36af4e03c1dd8eadff3fd0024c Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Wed, 7 Mar 2018 14:53:49 -0800 Subject: [PATCH 557/884] py_func attaches full stack traces when an error is raised. This should help debugging errors that occur inside a py_func. PiperOrigin-RevId: 188238495 --- .../python/kernel_tests/py_func_test.py | 18 +++++- tensorflow/python/lib/core/py_util.cc | 59 ++++++++++++++++++- tensorflow/python/ops/script_ops.py | 3 + 3 files changed, 75 insertions(+), 5 deletions(-) diff --git a/tensorflow/python/kernel_tests/py_func_test.py b/tensorflow/python/kernel_tests/py_func_test.py index 63203a0043..36142801d6 100644 --- a/tensorflow/python/kernel_tests/py_func_test.py +++ b/tensorflow/python/kernel_tests/py_func_test.py @@ -19,6 +19,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import re + import numpy as np from six.moves import queue from six.moves import xrange # pylint: disable=redefined-builtin @@ -356,12 +358,22 @@ class PyFuncTest(test.TestCase): def _testExceptionHandling(self, py_exp, tf_exp, eager=False): - def raise_exception(): + def inner_exception(): raise py_exp("blah") # pylint: disable=not-callable + def raise_exception(): + inner_exception() + + expected_regexp = r": blah.*" # Error at the top + expected_regexp += r"in raise_exception.*" # Stacktrace outer + expected_regexp += r"in inner_exception.*" # Stacktrace inner + expected_regexp += r": blah" # Stacktrace of raise + def expected_error_check(exception): + return re.search(expected_regexp, str(exception), re.DOTALL) + if eager: if context.executing_eagerly(): - with self.assertRaisesRegexp(tf_exp, "blah"): + with self.assertRaisesWithPredicateMatch(tf_exp, expected_error_check): f = script_ops.eager_py_func(raise_exception, [], []) return else: @@ -370,7 +382,7 @@ class PyFuncTest(test.TestCase): f = script_ops.py_func(raise_exception, [], []) with self.test_session(): - with self.assertRaisesRegexp(tf_exp, "blah"): + with self.assertRaisesWithPredicateMatch(tf_exp, expected_error_check): self.evaluate(f) def testExceptionHandling(self): diff --git a/tensorflow/python/lib/core/py_util.cc b/tensorflow/python/lib/core/py_util.cc index 2635694e23..00cbf0c532 100644 --- a/tensorflow/python/lib/core/py_util.cc +++ b/tensorflow/python/lib/core/py_util.cc @@ -41,6 +41,55 @@ const char* ClassName(PyObject* py) { } // end namespace +// Returns a PyObject containing a string, or null +void TryAppendTraceback(PyObject* ptype, PyObject* pvalue, PyObject* ptraceback, + string* out) { + // The "traceback" module is assumed to be imported already by script_ops.py. + PyObject* tb_module = PyImport_AddModule("traceback"); + + if (!tb_module) { + return; + } + + PyObject* format_exception = + PyObject_GetAttrString(tb_module, "format_exception"); + + if (!format_exception) { + return; + } + + if (!PyCallable_Check(format_exception)) { + Py_DECREF(format_exception); + return; + } + + PyObject* ret_val = PyObject_CallFunctionObjArgs(format_exception, ptype, + pvalue, ptraceback, nullptr); + Py_DECREF(format_exception); + + if (!ret_val) { + return; + } + + if (!PyList_Check(ret_val)) { + Py_DECREF(ret_val); + return; + } + + Py_ssize_t n = PyList_GET_SIZE(ret_val); + for (Py_ssize_t i = 0; i < n; ++i) { + PyObject* v = PyList_GET_ITEM(ret_val, i); +#if PY_MAJOR_VERSION < 3 + strings::StrAppend(out, PyString_AS_STRING(v), "\n"); +#else + strings::StrAppend(out, PyUnicode_AsUTF8(v), "\n"); +#endif + } + + // Iterate through ret_val. + Py_DECREF(ret_val); +} + string PyExceptionFetch() { CHECK(PyErr_Occurred()) << "Must only call PyExceptionFetch after an exception."; @@ -52,14 +101,20 @@ string PyExceptionFetch() { string err = ClassName(ptype); if (pvalue) { PyObject* str = PyObject_Str(pvalue); + if (str) { #if PY_MAJOR_VERSION < 3 - strings::StrAppend(&err, ": ", PyString_AS_STRING(str)); + strings::StrAppend(&err, ": ", PyString_AS_STRING(str), "\n"); #else - strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str)); + strings::StrAppend(&err, ": ", PyUnicode_AsUTF8(str), "\n"); #endif Py_DECREF(str); + } else { + strings::StrAppend(&err, "(unknown error message)\n"); } + + TryAppendTraceback(ptype, pvalue, ptraceback, &err); + Py_DECREF(pvalue); } Py_DECREF(ptype); diff --git a/tensorflow/python/ops/script_ops.py b/tensorflow/python/ops/script_ops.py index 529eebe769..fb59bbba5e 100644 --- a/tensorflow/python/ops/script_ops.py +++ b/tensorflow/python/ops/script_ops.py @@ -25,6 +25,9 @@ from __future__ import print_function import threading +# Used by py_util.cc to get tracebacks. +import traceback # pylint: disable=unused-import + import numpy as np import six -- GitLab From 5e7b3556619a4a6450b588d8b2f173729ffc9203 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Wed, 7 Mar 2018 15:00:43 -0800 Subject: [PATCH 558/884] Migrate AIS chain into `tfp.mcmc` and modularize its interface to take a TransitionKernel. PiperOrigin-RevId: 188239559 --- .../bayesflow/python/kernel_tests/hmc_test.py | 132 ----------- .../contrib/bayesflow/python/ops/hmc.py | 1 - .../contrib/bayesflow/python/ops/hmc_impl.py | 217 ------------------ 3 files changed, 350 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py index 819095a060..dabadfc7b6 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/hmc_test.py @@ -462,138 +462,6 @@ class HMCTest(test.TestCase): def testKernelLeavesTargetInvariant3(self): self._kernel_leaves_target_invariant_wrapper(3) - def _ais_gets_correct_log_normalizer(self, init, independent_chain_ndims, - sess, feed_dict=None): - counter = collections.Counter() - - def proposal_log_prob(x): - counter["proposal_calls"] += 1 - event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) - return -0.5 * math_ops.reduce_sum(x**2. + np.log(2 * np.pi), - axis=event_dims) - - def target_log_prob(x): - counter["target_calls"] += 1 - event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) - return self._log_gamma_log_prob(x, event_dims) - - if feed_dict is None: - feed_dict = {} - - num_steps = 200 - - _, ais_weights, _ = hmc.sample_annealed_importance_chain( - proposal_log_prob_fn=proposal_log_prob, - num_steps=num_steps, - target_log_prob_fn=target_log_prob, - step_size=0.5, - current_state=init, - num_leapfrog_steps=2, - seed=45) - - # We have three calls because the calculation of `ais_weights` entails - # another call to the `convex_combined_log_prob_fn`. We could refactor - # things to avoid this, if needed (eg, b/72994218). - self.assertAllEqual(dict(target_calls=3, proposal_calls=3), counter) - - event_shape = array_ops.shape(init)[independent_chain_ndims:] - event_size = math_ops.reduce_prod(event_shape) - - log_true_normalizer = ( - -self._shape_param * math_ops.log(self._rate_param) - + math_ops.lgamma(self._shape_param)) - log_true_normalizer *= math_ops.cast(event_size, log_true_normalizer.dtype) - - log_estimated_normalizer = (math_ops.reduce_logsumexp(ais_weights) - - np.log(num_steps)) - - ratio_estimate_true = math_ops.exp(ais_weights - log_true_normalizer) - ais_weights_size = array_ops.size(ais_weights) - standard_error = math_ops.sqrt( - _reduce_variance(ratio_estimate_true) - / math_ops.cast(ais_weights_size, ratio_estimate_true.dtype)) - - [ - ratio_estimate_true_, - log_true_normalizer_, - log_estimated_normalizer_, - standard_error_, - ais_weights_size_, - event_size_, - ] = sess.run([ - ratio_estimate_true, - log_true_normalizer, - log_estimated_normalizer, - standard_error, - ais_weights_size, - event_size, - ], feed_dict) - - logging_ops.vlog(1, " log_true_normalizer: {}\n" - " log_estimated_normalizer: {}\n" - " ais_weights_size: {}\n" - " event_size: {}\n".format( - log_true_normalizer_, - log_estimated_normalizer_, - ais_weights_size_, - event_size_)) - self.assertNear(ratio_estimate_true_.mean(), 1., 4. * standard_error_) - - def _ais_gets_correct_log_normalizer_wrapper(self, independent_chain_ndims): - """Tests that AIS yields reasonable estimates of normalizers.""" - with self.test_session(graph=ops.Graph()) as sess: - x_ph = array_ops.placeholder(np.float32, name="x_ph") - initial_draws = np.random.normal(size=[30, 2, 1]) - self._ais_gets_correct_log_normalizer( - x_ph, - independent_chain_ndims, - sess, - feed_dict={x_ph: initial_draws}) - - def testAIS1(self): - self._ais_gets_correct_log_normalizer_wrapper(1) - - def testAIS2(self): - self._ais_gets_correct_log_normalizer_wrapper(2) - - def testAIS3(self): - self._ais_gets_correct_log_normalizer_wrapper(3) - - def testSampleAIChainSeedReproducibleWorksCorrectly(self): - with self.test_session(graph=ops.Graph()) as sess: - independent_chain_ndims = 1 - x = np.random.rand(4, 3, 2) - - def proposal_log_prob(x): - event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) - return -0.5 * math_ops.reduce_sum(x**2. + np.log(2 * np.pi), - axis=event_dims) - - def target_log_prob(x): - event_dims = math_ops.range(independent_chain_ndims, array_ops.rank(x)) - return self._log_gamma_log_prob(x, event_dims) - - ais_kwargs = dict( - proposal_log_prob_fn=proposal_log_prob, - num_steps=200, - target_log_prob_fn=target_log_prob, - step_size=0.5, - current_state=x, - num_leapfrog_steps=2, - seed=53) - - _, ais_weights0, _ = hmc.sample_annealed_importance_chain( - **ais_kwargs) - - _, ais_weights1, _ = hmc.sample_annealed_importance_chain( - **ais_kwargs) - - [ais_weights0_, ais_weights1_] = sess.run([ - ais_weights0, ais_weights1]) - - self.assertAllClose(ais_weights0_, ais_weights1_, - atol=1e-5, rtol=1e-5) - def testNanRejection(self): """Tests that an update that yields NaN potentials gets rejected. diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc.py b/tensorflow/contrib/bayesflow/python/ops/hmc.py index 7fd5652c5c..c8a5a195d3 100644 --- a/tensorflow/contrib/bayesflow/python/ops/hmc.py +++ b/tensorflow/contrib/bayesflow/python/ops/hmc.py @@ -24,7 +24,6 @@ from tensorflow.python.util import all_util _allowed_symbols = [ "sample_chain", - "sample_annealed_importance_chain", "kernel", ] diff --git a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py index 82693c2b7b..66afcc7497 100644 --- a/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/hmc_impl.py @@ -15,7 +15,6 @@ """Hamiltonian Monte Carlo, a gradient-based MCMC algorithm. @@sample_chain -@@sample_annealed_importance_chain @@kernel """ @@ -38,7 +37,6 @@ from tensorflow.python.ops.distributions import util as distributions_util __all__ = [ "sample_chain", - "sample_annealed_importance_chain", "kernel", ] @@ -330,221 +328,6 @@ def sample_chain( return functional_ops.scan(**scan_kwargs) -def sample_annealed_importance_chain( - proposal_log_prob_fn, - num_steps, - target_log_prob_fn, - current_state, - step_size, - num_leapfrog_steps, - seed=None, - name=None): - """Runs annealed importance sampling (AIS) to estimate normalizing constants. - - This function uses Hamiltonian Monte Carlo to sample from a series of - distributions that slowly interpolates between an initial "proposal" - distribution: - - `exp(proposal_log_prob_fn(x) - proposal_log_normalizer)` - - and the target distribution: - - `exp(target_log_prob_fn(x) - target_log_normalizer)`, - - accumulating importance weights along the way. The product of these - importance weights gives an unbiased estimate of the ratio of the - normalizing constants of the initial distribution and the target - distribution: - - `E[exp(ais_weights)] = exp(target_log_normalizer - proposal_log_normalizer)`. - - Note: `proposal_log_prob_fn` and `target_log_prob_fn` are called exactly three - times (although this may be reduced to two times, in the future). - - #### Examples: - - ##### Estimate the normalizing constant of a log-gamma distribution. - - ```python - tfd = tf.contrib.distributions - - # Run 100 AIS chains in parallel - num_chains = 100 - dims = 20 - dtype = np.float32 - - proposal = tfd.MultivatiateNormalDiag( - loc=tf.zeros([dims], dtype=dtype)) - - target = tfd.TransformedDistribution( - distribution=tfd.Gamma(concentration=dtype(2), - rate=dtype(3)), - bijector=tfd.bijectors.Invert(tfd.bijectors.Exp()), - event_shape=[dims]) - - chains_state, ais_weights, kernels_results = ( - hmc.sample_annealed_importance_chain( - proposal_log_prob_fn=proposal.log_prob, - num_steps=1000, - target_log_prob_fn=target.log_prob, - step_size=0.2, - current_state=proposal.sample(num_chains), - num_leapfrog_steps=2)) - - log_estimated_normalizer = (tf.reduce_logsumexp(ais_weights) - - np.log(num_chains)) - log_true_normalizer = tf.lgamma(2.) - 2. * tf.log(3.) - ``` - - ##### Estimate marginal likelihood of a Bayesian regression model. - - ```python - tfd = tf.contrib.distributions - - def make_prior(dims, dtype): - return tfd.MultivariateNormalDiag( - loc=tf.zeros(dims, dtype)) - - def make_likelihood(weights, x): - return tfd.MultivariateNormalDiag( - loc=tf.tensordot(weights, x, axes=[[0], [-1]])) - - # Run 100 AIS chains in parallel - num_chains = 100 - dims = 10 - dtype = np.float32 - - # Make training data. - x = np.random.randn(num_chains, dims).astype(dtype) - true_weights = np.random.randn(dims).astype(dtype) - y = np.dot(x, true_weights) + np.random.randn(num_chains) - - # Setup model. - prior = make_prior(dims, dtype) - def target_log_prob_fn(weights): - return prior.log_prob(weights) + make_likelihood(weights, x).log_prob(y) - - proposal = tfd.MultivariateNormalDiag( - loc=tf.zeros(dims, dtype)) - - weight_samples, ais_weights, kernel_results = ( - hmc.sample_annealed_importance_chain( - num_steps=1000, - proposal_log_prob_fn=proposal.log_prob, - target_log_prob_fn=target_log_prob_fn - current_state=tf.zeros([num_chains, dims], dtype), - step_size=0.1, - num_leapfrog_steps=2)) - log_normalizer_estimate = (tf.reduce_logsumexp(ais_weights) - - np.log(num_chains)) - ``` - - Args: - proposal_log_prob_fn: Python callable that returns the log density of the - initial distribution. - num_steps: Integer number of Markov chain updates to run. More - iterations means more expense, but smoother annealing between q - and p, which in turn means exponentially lower variance for the - normalizing constant estimator. - target_log_prob_fn: Python callable which takes an argument like - `current_state` (or `*current_state` if it's a list) and returns its - (possibly unnormalized) log-density under the target distribution. - current_state: `Tensor` or Python `list` of `Tensor`s representing the - current state(s) of the Markov chain(s). The first `r` dimensions index - independent chains, `r = tf.rank(target_log_prob_fn(*current_state))`. - step_size: `Tensor` or Python `list` of `Tensor`s representing the step size - for the leapfrog integrator. Must broadcast with the shape of - `current_state`. Larger step sizes lead to faster progress, but too-large - step sizes make rejection exponentially more likely. When possible, it's - often helpful to match per-variable step sizes to the standard deviations - of the target distribution in each variable. - num_leapfrog_steps: Integer number of steps to run the leapfrog integrator - for. Total progress per HMC step is roughly proportional to `step_size * - num_leapfrog_steps`. - seed: Python integer to seed the random number generator. - name: Python `str` name prefixed to Ops created by this function. - Default value: `None` (i.e., "hmc_sample_annealed_importance_chain"). - - Returns: - next_state: `Tensor` or Python list of `Tensor`s representing the - state(s) of the Markov chain(s) at the final iteration. Has same shape as - input `current_state`. - ais_weights: Tensor with the estimated weight(s). Has shape matching - `target_log_prob_fn(current_state)`. - kernel_results: `collections.namedtuple` of internal calculations used to - advance the chain. - """ - def make_convex_combined_log_prob_fn(iter_): - def _fn(*args): - p = proposal_log_prob_fn(*args) - t = target_log_prob_fn(*args) - dtype = p.dtype.base_dtype - beta = (math_ops.cast(iter_ + 1, dtype) - / math_ops.cast(num_steps, dtype)) - return (1. - beta) * p + beta * t - return _fn - - with ops.name_scope( - name, "hmc_sample_annealed_importance_chain", - [num_steps, current_state, step_size, num_leapfrog_steps, seed]): - with ops.name_scope("initialize"): - [ - current_state, - step_size, - current_log_prob, - current_grads_log_prob, - ] = _prepare_args( - make_convex_combined_log_prob_fn(iter_=0), - current_state, - step_size, - description="convex_combined_log_prob") - num_steps = ops.convert_to_tensor( - num_steps, - dtype=dtypes.int32, - name="num_steps") - num_leapfrog_steps = ops.convert_to_tensor( - num_leapfrog_steps, - dtype=dtypes.int32, - name="num_leapfrog_steps") - def _loop_body(iter_, ais_weights, current_state, kernel_results): - """Closure which implements `tf.while_loop` body.""" - current_state_parts = (list(current_state) - if _is_list_like(current_state) - else [current_state]) - # TODO(b/72994218): Consider refactoring things to avoid this unecessary - # call. - ais_weights += ((target_log_prob_fn(*current_state_parts) - - proposal_log_prob_fn(*current_state_parts)) - / math_ops.cast(num_steps, ais_weights.dtype)) - return [iter_ + 1, ais_weights] + list(kernel( - make_convex_combined_log_prob_fn(iter_), - current_state, - step_size, - num_leapfrog_steps, - seed, - kernel_results.current_target_log_prob, - kernel_results.current_grads_target_log_prob)) - - while_loop_kwargs = dict( - cond=lambda iter_, *args: iter_ < num_steps, - body=_loop_body, - loop_vars=[ - np.int32(0), # iter_ - array_ops.zeros_like(current_log_prob), # ais_weights - current_state, - _make_dummy_kernel_results(current_state, - current_log_prob, - current_grads_log_prob), - ]) - if seed is not None: - while_loop_kwargs["parallel_iterations"] = 1 - - [ais_weights, current_state, kernel_results] = control_flow_ops.while_loop( - **while_loop_kwargs)[1:] # Lop-off "iter_". - - return [current_state, ais_weights, kernel_results] - - def kernel(target_log_prob_fn, current_state, step_size, -- GitLab From faa09ad9d3eb9f7a4dcd7c11f3b1e22e13496afd Mon Sep 17 00:00:00 2001 From: Katherine Wu Date: Wed, 7 Mar 2018 15:03:32 -0800 Subject: [PATCH 559/884] Added tf.contrib.data.make_batched_features_dataset as replacement of tf.contrib.learn.io.read_batch_features. Added warning about the deprecation of tf.contrib.data.read_batch_features. PiperOrigin-RevId: 188240046 --- tensorflow/contrib/data/__init__.py | 2 + .../contrib/data/python/kernel_tests/BUILD | 1 + .../kernel_tests/reader_dataset_ops_test.py | 154 +++++++++++++-- tensorflow/contrib/data/python/ops/BUILD | 1 + tensorflow/contrib/data/python/ops/readers.py | 180 +++++++++++++++--- 5 files changed, 301 insertions(+), 37 deletions(-) diff --git a/tensorflow/contrib/data/__init__.py b/tensorflow/contrib/data/__init__.py index 1311119e79..f09d156832 100644 --- a/tensorflow/contrib/data/__init__.py +++ b/tensorflow/contrib/data/__init__.py @@ -31,6 +31,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview. @@enumerate_dataset @@group_by_window @@ignore_errors +@@make_batched_features_dataset @@make_saveable_from_iterator @@map_and_batch @@padded_batch_and_drop_remainder @@ -65,6 +66,7 @@ from tensorflow.contrib.data.python.ops.grouping import group_by_window from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave from tensorflow.contrib.data.python.ops.interleave_ops import sloppy_interleave from tensorflow.contrib.data.python.ops.iterator_ops import make_saveable_from_iterator +from tensorflow.contrib.data.python.ops.readers import make_batched_features_dataset from tensorflow.contrib.data.python.ops.readers import read_batch_features from tensorflow.contrib.data.python.ops.readers import SqlDataset from tensorflow.contrib.data.python.ops.resampling import rejection_resample diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 22bcf90dd4..45a0be0ddd 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -297,6 +297,7 @@ py_test( "//tensorflow/python:parsing_ops", "//tensorflow/python:util", "//tensorflow/python/data/ops:iterator_ops", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py index 6efe97444a..15bd55bf64 100644 --- a/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py +++ b/tensorflow/contrib/data/python/kernel_tests/reader_dataset_ops_test.py @@ -21,6 +21,8 @@ import gzip import os import zlib +import numpy as np + from tensorflow.contrib.data.python.kernel_tests import dataset_serialization_test_base from tensorflow.contrib.data.python.ops import readers from tensorflow.core.example import example_pb2 @@ -262,12 +264,19 @@ class ReadBatchFeaturesTest(test.TestCase): self._num_records = 7 self.test_filenames = self._createFiles() - def _read_batch_features(self, filenames, num_epochs, batch_size): + def _read_batch_features(self, + filenames, + num_epochs, + batch_size, + reader_num_threads=1, + parser_num_threads=1, + shuffle=False, + shuffle_seed=None): self.filenames = filenames self.num_epochs = num_epochs self.batch_size = batch_size - return readers.read_batch_features( + return readers.make_batched_features_dataset( file_pattern=self.filenames, batch_size=self.batch_size, features={ @@ -276,8 +285,12 @@ class ReadBatchFeaturesTest(test.TestCase): "keywords": parsing_ops.VarLenFeature(dtypes.string) }, reader=core_readers.TFRecordDataset, - randomize_input=False, - num_epochs=self.num_epochs) + num_epochs=self.num_epochs, + shuffle=shuffle, + shuffle_seed=shuffle_seed, + reader_num_threads=reader_num_threads, + parser_num_threads=parser_num_threads).make_one_shot_iterator( + ).get_next() def _record(self, f, r): example = example_pb2.Example(features=feature_pb2.Features( @@ -312,24 +325,35 @@ class ReadBatchFeaturesTest(test.TestCase): writer.close() return filenames - def _next_actual_batch(self, sess): - file_op = self.outputs["file"] - keywords_indices_op = self.outputs["keywords"].indices - keywords_values_op = self.outputs["keywords"].values - keywords_dense_shape_op = self.outputs["keywords"].dense_shape - record_op = self.outputs["record"] + def _run_actual_batch(self, outputs, sess): + file_op = outputs["file"] + keywords_indices_op = outputs["keywords"].indices + keywords_values_op = outputs["keywords"].values + keywords_dense_shape_op = outputs["keywords"].dense_shape + record_op = outputs["record"] return sess.run([ file_op, keywords_indices_op, keywords_values_op, keywords_dense_shape_op, record_op ]) - def _next_expected_batch(self, file_indices, batch_size, num_epochs): + def _next_actual_batch(self, sess): + return self._run_actual_batch(self.outputs, sess) + + def _next_expected_batch(self, + file_indices, + batch_size, + num_epochs, + cycle_length=1): def _next_record(file_indices): for j in file_indices: for i in range(self._num_records): yield j, i + def _next_record_interleaved(file_indices, cycle_length): + return self._interleave([_next_record([i]) for i in file_indices], + cycle_length) + file_batch = [] keywords_batch_indices = [] keywords_batch_values = [] @@ -337,7 +361,11 @@ class ReadBatchFeaturesTest(test.TestCase): record_batch = [] batch_index = 0 for _ in range(num_epochs): - for record in _next_record(file_indices): + if cycle_length == 1: + next_records = _next_record(file_indices) + else: + next_records = _next_record_interleaved(file_indices, cycle_length) + for record in next_records: f = record[0] r = record[1] file_batch.append(f) @@ -365,14 +393,41 @@ class ReadBatchFeaturesTest(test.TestCase): [len(file_batch), keywords_batch_max_len], record_batch ] - def _verify_records(self, sess, batch_size, file_index=None, num_epochs=1): + def _interleave(self, iterators, cycle_length): + pending_iterators = iterators + open_iterators = [] + num_open = 0 + for i in range(cycle_length): + if pending_iterators: + open_iterators.append(pending_iterators.pop(0)) + num_open += 1 + + while num_open: + for i in range(min(cycle_length, len(open_iterators))): + if open_iterators[i] is None: + continue + try: + yield next(open_iterators[i]) + except StopIteration: + if pending_iterators: + open_iterators[i] = pending_iterators.pop(0) + else: + open_iterators[i] = None + num_open -= 1 + + def _verify_records(self, + sess, + batch_size, + file_index=None, + num_epochs=1, + interleave_cycle_length=1): if file_index is not None: file_indices = [file_index] else: file_indices = range(self._num_files) - for expected_batch in self._next_expected_batch(file_indices, batch_size, - num_epochs): + for expected_batch in self._next_expected_batch( + file_indices, batch_size, num_epochs, interleave_cycle_length): actual_batch = self._next_actual_batch(sess) for i in range(len(expected_batch)): self.assertAllEqual(expected_batch[i], actual_batch[i]) @@ -435,6 +490,75 @@ class ReadBatchFeaturesTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(next_element) + def testReadWithFusedShuffleRepeatDataset(self): + num_epochs = 5 + total_records = num_epochs * self._num_records + for batch_size in [1, 2]: + # Test that shuffling with same seed produces the same result. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + outputs1 = self._read_batch_features( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + shuffle=True, + shuffle_seed=5) + outputs2 = self._read_batch_features( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + shuffle=True, + shuffle_seed=5) + for _ in range(total_records // batch_size): + batch1 = self._run_actual_batch(outputs1, sess) + batch2 = self._run_actual_batch(outputs2, sess) + for i in range(len(batch1)): + self.assertAllEqual(batch1[i], batch2[i]) + + # Test that shuffling with different seeds produces a different order. + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + outputs1 = self._read_batch_features( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + shuffle=True, + shuffle_seed=5) + outputs2 = self._read_batch_features( + filenames=self.test_filenames[0], + num_epochs=num_epochs, + batch_size=batch_size, + shuffle=True, + shuffle_seed=15) + all_equal = True + for _ in range(total_records // batch_size): + batch1 = self._run_actual_batch(outputs1, sess) + batch2 = self._run_actual_batch(outputs2, sess) + for i in range(len(batch1)): + all_equal = all_equal and np.array_equal(batch1[i], batch2[i]) + self.assertFalse(all_equal) + + def testParallelReadersAndParsers(self): + num_epochs = 5 + for batch_size in [1, 2]: + for reader_num_threads in [2, 4]: + for parser_num_threads in [2, 4]: + with ops.Graph().as_default() as g: + with self.test_session(graph=g) as sess: + self.outputs = self._read_batch_features( + filenames=self.test_filenames, + num_epochs=num_epochs, + batch_size=batch_size, + reader_num_threads=reader_num_threads, + parser_num_threads=parser_num_threads) + self._verify_records( + sess, + batch_size, + num_epochs=num_epochs, + interleave_cycle_length=reader_num_threads) + with self.assertRaises(errors.OutOfRangeError): + self._next_actual_batch(sess) + if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/data/python/ops/BUILD b/tensorflow/contrib/data/python/ops/BUILD index 16fe31675f..171948da45 100644 --- a/tensorflow/contrib/data/python/ops/BUILD +++ b/tensorflow/contrib/data/python/ops/BUILD @@ -67,6 +67,7 @@ py_library( srcs_version = "PY2AND3", deps = [ ":dataset_ops", + ":shuffle_ops", "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", diff --git a/tensorflow/contrib/data/python/ops/readers.py b/tensorflow/contrib/data/python/ops/readers.py index 57f3010277..b346bed3e6 100644 --- a/tensorflow/contrib/data/python/ops/readers.py +++ b/tensorflow/contrib/data/python/ops/readers.py @@ -17,7 +17,10 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.contrib.data.python.ops import interleave_ops +from tensorflow.contrib.data.python.ops import shuffle_ops from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import readers as core_readers from tensorflow.python.data.util import nest from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -25,12 +28,150 @@ from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_dataset_ops from tensorflow.python.ops import parsing_ops from tensorflow.python.platform import gfile +from tensorflow.python.util import deprecation +def make_batched_features_dataset(file_pattern, + batch_size, + features, + reader=core_readers.TFRecordDataset, + reader_args=None, + num_epochs=None, + shuffle=True, + shuffle_buffer_size=10000, + shuffle_seed=None, + prefetch_buffer_size=1, + reader_num_threads=1, + parser_num_threads=2, + sloppy_ordering=False): + """Returns a `Dataset` of feature dictionaries from `Example` protos. + + Example: + + ``` + serialized_examples = [ + features { + feature { key: "age" value { int64_list { value: [ 0 ] } } } + feature { key: "gender" value { bytes_list { value: [ "f" ] } } } + feature { key: "kws" value { bytes_list { value: [ "code", "art" ] } } } + }, + features { + feature { key: "age" value { int64_list { value: [] } } } + feature { key: "gender" value { bytes_list { value: [ "f" ] } } } + feature { key: "kws" value { bytes_list { value: [ "sports" ] } } } + } + ] + ``` + + We can use arguments: + + ``` + features: { + "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), + "gender": FixedLenFeature([], dtype=tf.string), + "kws": VarLenFeature(dtype=tf.string), + } + ``` + + And the expected output is: + + ```python + { + "age": [[0], [-1]], + "gender": [["f"], ["f"]], + "kws": SparseTensor( + indices=[[0, 0], [0, 1], [1, 0]], + values=["code", "art", "sports"] + dense_shape=[2, 2]), + } + ``` + + Args: + file_pattern: List of files or patterns of file paths containing + `Example` records. See `tf.gfile.Glob` for pattern rules. + batch_size: An int representing the number of consecutive elements of this + dataset to combine in a single batch. + features: A `dict` mapping feature keys to `FixedLenFeature` or + `VarLenFeature` values. See `tf.parse_example`. + reader: A function or class that can be + called with a `filenames` tensor and (optional) `reader_args` and returns + a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`. + reader_args: Additional arguments to pass to the reader class. + num_epochs: Integer specifying the number of times to read through the + dataset. If None, cycles through the dataset forever. Defaults to `None`. + shuffle: A boolean, indicates whether the input should be shuffled. Defaults + to `True`. + shuffle_buffer_size: Buffer size of the ShuffleDataset. A large capacity + ensures better shuffling but would increase memory usage and startup time. + shuffle_seed: Randomization seed to use for shuffling. + prefetch_buffer_size: Number of feature batches to prefetch in order to + improve performance. Recommended value is the number of batches consumed + per training step (default is 1). + reader_num_threads: Number of threads used to read `Example` records. If >1, + the results will be interleaved. + parser_num_threads: Number of threads to use for parsing `Example` tensors + into a dictionary of `Feature` tensors. + sloppy_ordering: If `True`, reading performance will be improved at + the cost of non-deterministic ordering. If `False`, the order of elements + produced is deterministic prior to shuffling (elements are still + randomized if `shuffle=True`. Note that if the seed is set, then order + of elements after shuffling is deterministic). Defaults to `False`. + + Returns: + A dataset of `dict` elements. Each `dict` maps feature keys to + `Tensor` or `SparseTensor` objects. + """ + # Create dataset of all matching filenames + if shuffle: + dataset = dataset_ops.Dataset.list_files(file_pattern, shuffle=True) + else: + # TODO(b/73959787): Use Dataset.list_files() once ordering is deterministic. + filenames = _get_file_names(file_pattern, shuffle) + dataset = dataset_ops.Dataset.from_tensor_slices(filenames) + + # Read `Example` records from files as tensor objects. + if reader_args is None: + reader_args = [] + + # Read files sequentially (if reader_num_threads=1) or in parallel + dataset = dataset.apply( + interleave_ops.parallel_interleave( + lambda filename: reader(filename, *reader_args), + cycle_length=reader_num_threads, + sloppy=sloppy_ordering)) + + # Extract values if the `Example` tensors are stored as key-value tuples. + if dataset.output_types == (dtypes.string, dtypes.string): + dataset = dataset.map(lambda _, v: v) + + # Apply dataset repeat and shuffle transformations. + repeat_dataset = (num_epochs != 1) + if repeat_dataset and shuffle: + # Used fused shuffle_and_repeat operation for better performance + dataset = dataset.apply( + shuffle_ops.shuffle_and_repeat(shuffle_buffer_size, num_epochs, + shuffle_seed)) + elif repeat_dataset: + dataset = dataset.repeat(num_epochs) + elif shuffle: + dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed) + + dataset = dataset.batch(batch_size) + + # Parse `Example` tensors to a dictionary of `Feature` tensors. + dataset = dataset.map( + lambda x: parsing_ops.parse_example(x, features), + num_parallel_calls=parser_num_threads) + dataset = dataset.prefetch(prefetch_buffer_size) + return dataset + + +@deprecation.deprecated(None, + "Use `tf.contrib.data.make_batched_features_dataset`") def read_batch_features(file_pattern, batch_size, features, - reader, + reader=core_readers.TFRecordDataset, reader_args=None, randomize_input=True, num_epochs=None, @@ -84,43 +225,38 @@ def read_batch_features(file_pattern, dataset to combine in a single batch. features: A `dict` mapping feature keys to `FixedLenFeature` or `VarLenFeature` values. See `tf.parse_example`. - reader: A function or class that can be called with a `filenames` tensor - and (optional) `reader_args` and returns a `Dataset` of Examples. + reader: A function or class that can be + called with a `filenames` tensor and (optional) `reader_args` and returns + a `Dataset` of `Example` tensors. Defaults to `tf.data.TFRecordDataset`. reader_args: Additional arguments to pass to the reader class. randomize_input: Whether the input should be randomized. num_epochs: Integer specifying the number of times to read through the dataset. If None, cycles through the dataset forever. - capacity: Capacity of the ShuffleDataset. A large capacity ensures better + capacity: Buffer size of the ShuffleDataset. A large capacity ensures better shuffling but would increase memory usage and startup time. - Returns: A dict from keys in features to `Tensor` or `SparseTensor` objects. """ - filenames = _get_file_names(file_pattern, randomize_input) - if reader_args: - dataset = reader(filenames, *reader_args) - else: - dataset = reader(filenames) - if dataset.output_types == (dtypes.string, dtypes.string): - dataset = dataset.map(lambda _, v: v) - if num_epochs != 1: - dataset = dataset.repeat(num_epochs) - if randomize_input: - dataset = dataset.shuffle(capacity) - dataset = dataset.batch(batch_size) - dataset = dataset.map(lambda x: parsing_ops.parse_example(x, features)) - dataset = dataset.prefetch(1) + dataset = make_batched_features_dataset( + file_pattern, + batch_size, + features, + reader=reader, + reader_args=reader_args, + shuffle=randomize_input, + num_epochs=num_epochs, + shuffle_buffer_size=capacity) iterator = dataset.make_one_shot_iterator() outputs = iterator.get_next() return outputs -def _get_file_names(file_pattern, randomize_input): +def _get_file_names(file_pattern, shuffle): """Parse list of file names from pattern, optionally shuffled. Args: file_pattern: File glob pattern, or list of glob patterns. - randomize_input: Whether to shuffle the order of file names. + shuffle: Whether to shuffle the order of file names. Returns: List of file names matching `file_pattern`. @@ -141,7 +277,7 @@ def _get_file_names(file_pattern, randomize_input): raise ValueError("No files match %s." % file_pattern) # Sort files so it will be deterministic for unit tests. - if not randomize_input: + if not shuffle: file_names = sorted(file_names) return file_names -- GitLab From f7b1d233ed39eed24e3c1489738df01f700112e3 Mon Sep 17 00:00:00 2001 From: gracehoney <31743510+aaroey@users.noreply.github.com> Date: Wed, 7 Mar 2018 15:26:09 -0800 Subject: [PATCH 560/884] Move the pylint message and fix comment length --- tensorflow/contrib/tensorrt/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index 0d1c90ea64..d53a05827a 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,16 +18,16 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -# pylint: disable=unused-import,wildcard-import +# pylint: disable=unused-import,wildcard-import,g-import-not-at-top try: - from tensorflow.contrib.tensorrt.python import * # pylint: disable=import-not-at-top + from tensorflow.contrib.tensorrt.python import * except Exception as e: no_trt_message = ( '**** Failed to initialize TensorRT. This is either because the TensorRT' - ' installation path is not in LD_LIBRARY_PATH, or because you do not have it' - ' installed. If not installed, please go to' + ' installation path is not in LD_LIBRARY_PATH, or because you do not have' + ' it installed. If not installed, please go to' ' https://developer.nvidia.com/tensorrt to download and install' ' TensorRT ****') print(no_trt_message) raise e -# pylint: enable=unused-import,wildcard-import +# pylint: enable=unused-import,wildcard-import,g-import-not-at-top -- GitLab From 5ae2d41e7a1daf4b00b24dda683fabf7c283df7c Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Wed, 7 Mar 2018 15:52:25 -0800 Subject: [PATCH 561/884] Checkpointable: Fix device placement when restoring name-based checkpoints. Just need to put the restore ops on a CPU. PiperOrigin-RevId: 188248198 --- .../eager/python/checkpointable_utils.py | 5 ++-- .../eager/python/checkpointable_utils_test.py | 27 ++++++++++--------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/tensorflow/contrib/eager/python/checkpointable_utils.py b/tensorflow/contrib/eager/python/checkpointable_utils.py index 1fa150f3c6..d07121df63 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils.py @@ -493,8 +493,9 @@ class NameBasedSaverStatus(_LoadStatus): """Load the name-based training checkpoint using a new `tf.train.Saver`.""" if session is None and not context.executing_eagerly(): session = ops.get_default_session() - saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access - sess=session, save_path=self._save_path) + with ops.device("/cpu:0"): + saver_lib.Saver(self._object_saver._global_variable_names()).restore( # pylint: disable=protected-access + sess=session, save_path=self._save_path) def initialize_or_restore(self, session=None): """Alias for `run_restore_ops`.""" diff --git a/tensorflow/contrib/eager/python/checkpointable_utils_test.py b/tensorflow/contrib/eager/python/checkpointable_utils_test.py index fd9fc098b3..2054878bf8 100644 --- a/tensorflow/contrib/eager/python/checkpointable_utils_test.py +++ b/tensorflow/contrib/eager/python/checkpointable_utils_test.py @@ -993,20 +993,21 @@ class CheckpointCompatibilityTests(test.TestCase): @test_util.run_in_graph_and_eager_modes() def testLoadFromNameBasedSaver(self): """Save a name-based checkpoint, load it using the object-based API.""" - save_path = self._write_name_based_checkpoint() - root = self._initialized_model() - self._set_sentinels(root) - with self.assertRaises(AssertionError): + with test_util.device(use_gpu=True): + save_path = self._write_name_based_checkpoint() + root = self._initialized_model() + self._set_sentinels(root) + with self.assertRaises(AssertionError): + self._check_sentinels(root) + object_saver = checkpointable_utils.CheckpointableSaver(root) + status = object_saver.restore(save_path) + with self.assertRaises(AssertionError): + status.assert_consumed() + status.run_restore_ops() + self._check_sentinels(root) + self._set_sentinels(root) + status.initialize_or_restore() self._check_sentinels(root) - object_saver = checkpointable_utils.CheckpointableSaver(root) - status = object_saver.restore(save_path) - with self.assertRaises(AssertionError): - status.assert_consumed() - status.run_restore_ops() - self._check_sentinels(root) - self._set_sentinels(root) - status.initialize_or_restore() - self._check_sentinels(root) # TODO(allenl): Test for the core name-based saver loading object-based # checkpoints once object-based checkpointing is in core. -- GitLab From 22ff6e7b89384d83556edcf78e15fdfa226371d7 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Wed, 7 Mar 2018 16:44:11 -0800 Subject: [PATCH 562/884] eager: Export tf.enable_eager_execution() and tf.executing_eagerly() PiperOrigin-RevId: 188255674 --- tensorflow/python/__init__.py | 10 +++ tensorflow/python/eager/context.py | 9 ++- tensorflow/python/framework/ops.py | 69 +++++++++++++------- tensorflow/tools/api/golden/tensorflow.pbtxt | 8 +++ 4 files changed, 70 insertions(+), 26 deletions(-) diff --git a/tensorflow/python/__init__.py b/tensorflow/python/__init__.py index d6715fa522..5a9cd7531d 100644 --- a/tensorflow/python/__init__.py +++ b/tensorflow/python/__init__.py @@ -139,6 +139,10 @@ from tensorflow.python.ops import state_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import tensor_array_ops +# Eager execution +from tensorflow.python.eager.context import executing_eagerly +from tensorflow.python.framework.ops import enable_eager_execution + # Symbols whitelisted for export without documentation. # TODO(cwhipkey): review these and move to contrib, expose through # documentation, or remove. @@ -290,6 +294,12 @@ _allowed_symbols.extend([ 'MONOLITHIC_BUILD', ]) +# Eager execution +_allowed_symbols.extend([ + 'enable_eager_execution', + 'executing_eagerly', +]) + # Remove all extra symbols that don't have a docstring or are not explicitly # referenced in the whitelist. remove_undocumented(__name__, _allowed_symbols, [ diff --git a/tensorflow/python/eager/context.py b/tensorflow/python/eager/context.py index 5d13aada63..87d3ed880a 100644 --- a/tensorflow/python/eager/context.py +++ b/tensorflow/python/eager/context.py @@ -32,6 +32,7 @@ from tensorflow.python.framework import errors from tensorflow.python.util import compat from tensorflow.python.util import is_in_graph_mode from tensorflow.python.util import tf_contextlib +from tensorflow.python.util.tf_export import tf_export GRAPH_MODE = 0 EAGER_MODE = 1 @@ -518,8 +519,14 @@ def internal_operation_seed(): return context()._internal_operation_seed() # pylint: disable=protected-access +@tf_export("executing_eagerly") def executing_eagerly(): - """Returns True if the current thread has eager execution enabled.""" + """Returns True if the current thread has eager execution enabled. + + Eager execution is typically enabled via @{tf.enable_eager_execution}, + but may also be enabled within the context of a Python function via + tf.contrib.eager.py_func. + """ return context().executing_eagerly() diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index 8ff247fdb1..f5dde3a358 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5169,41 +5169,60 @@ def init_scope(): yield +@tf_export("enable_eager_execution") def enable_eager_execution(config=None, device_policy=None): - """Enables, for the rest of the lifetime of this program, eager execution. + """Enables eager execution for the lifetime of this program. - If not called immediately on startup risks creating breakage and bugs. + Eager execution provides an imperative interface to TensorFlow. With eager + execution enabled, TensorFlow functions execute operations immediately (as + opposed to adding to a graph to be executed later in a @{tf.Session}) and + return concrete values (as opposed to symbolic references to a node in a + computational graph). - Example: + For example: ```python - tfe.enable_eager_execution() + tf.enable_eager_execution() # After eager execution is enabled, operations are executed as they are - # defined and `Tensor`s hold concrete values, which can be accessed as - # `numpy.ndarray`s through the `numpy()` method. + # defined and Tensor objects hold concrete values, which can be accessed as + # numpy.ndarray`s through the numpy() method. assert tf.multiply(6, 7).numpy() == 42 ``` + Eager execution cannot be enabled after TensorFlow APIs have been used to + create or execute graphs. It is typically recommended to invoke this function + at program startup and not in a library (as most libraries should be usable + both with and without eager execution). + Args: - config: (Optional.) A `ConfigProto` protocol buffer with configuration - options for the Context. Note that a lot of these options may be - currently unimplemented or irrelevant when eager execution is enabled. - device_policy: (Optional.) What policy to use when trying to run an - operation on a device with inputs which are not on that device. + config: (Optional.) A @{tf.ConfigProto} to use to configure the environment + in which operations are executed. Note that @{tf.ConfigProto} is also + used to configure graph execution (via @{tf.Session}) and many options + within `tf.ConfigProto` are not implemented (or are irrelevant) when + eager execution is enabled. + device_policy: (Optional.) Policy controlling how operations requiring + inputs on a specific device (e.g., a GPU 0) handle inputs on a different + device (e.g. GPU 1 or CPU). Valid values: - tfe.DEVICE_PLACEMENT_EXPLICIT: raises an error if the placement is not - correct. - tfe.DEVICE_PLACEMENT_WARN: copies the tensors which are not on the - right device but raises a warning. - tfe.DEVICE_PLACEMENT_SILENT: silently copies the tensors. This might - hide performance problems. - tfe.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies int32 tensors, - raising errors on the other ones. + + - tf.contrib.eager.DEVICE_PLACEMENT_EXPLICIT: raises an error if the + placement is not correct. + + - tf.contrib.eager.DEVICE_PLACEMENT_WARN: copies the tensors which are not + on the right device but logs a warning. + + - tf.contrib.eager.DEVICE_PLACEMENT_SILENT: silently copies the tensors. + Note that this may hide performance problems as there is no notification + provided when operations are blocked on the tensor being copied between + devices. + + - tf.contrib.eager.DEVICE_PLACEMENT_SILENT_FOR_INT32: silently copies + int32 tensors, raising errors on the other ones. Raises: - ValueError: If trying to create a context after using graph operations - or if trying to create a context with nontrivial options which differ - from those of the existing context. + ValueError: If eager execution is enabled after creating/executing a + TensorFlow graph, or if options provided conflict with a previous call + to this function. """ if config is not None and not isinstance(config, config_pb2.ConfigProto): raise TypeError( @@ -5213,7 +5232,7 @@ def enable_eager_execution(config=None, device_policy=None): context.DEVICE_PLACEMENT_SILENT, context.DEVICE_PLACEMENT_SILENT_FOR_INT32): raise ValueError( - "device_policy must be one of None, tfe.DEVICE_PLACEMENT_*" + "device_policy must be one of None, tf.contrib.eager.DEVICE_PLACEMENT_*" ) # pylint: disable=protected-access if context._default_mode == context.GRAPH_MODE: @@ -5222,7 +5241,7 @@ def enable_eager_execution(config=None, device_policy=None): _default_graph_stack._global_default_graph is not None) if graph_mode_has_been_used: raise ValueError( - "tfe.enable_eager_execution has to be called at program startup.") + "tf.enable_eager_execution must be called at program startup.") context._default_mode = context.EAGER_MODE if context._context is None: context._context = context.Context(config=config, @@ -5245,7 +5264,7 @@ def enable_eager_execution(config=None, device_policy=None): context._context._device_policy)) else: raise ValueError( - "tfe.enable_eager_execution has to be called at program startup.") + "tf.enable_eager_execution must be called at program startup.") def eager_run(main=None, argv=None): diff --git a/tensorflow/tools/api/golden/tensorflow.pbtxt b/tensorflow/tools/api/golden/tensorflow.pbtxt index a88a87b952..bb95f34e01 100644 --- a/tensorflow/tools/api/golden/tensorflow.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.pbtxt @@ -968,6 +968,10 @@ tf_module { name: "einsum" argspec: "args=[\'equation\'], varargs=inputs, keywords=kwargs, defaults=None" } + member_method { + name: "enable_eager_execution" + argspec: "args=[\'config\', \'device_policy\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " + } member_method { name: "encode_base64" argspec: "args=[\'input\', \'pad\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " @@ -984,6 +988,10 @@ tf_module { name: "erfc" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "executing_eagerly" + argspec: "args=[], varargs=None, keywords=None, defaults=None" + } member_method { name: "exp" argspec: "args=[\'x\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 1408f05c9a1f1180f67112d8adb9cf79b3b0ac44 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 16:54:01 -0800 Subject: [PATCH 563/884] Internal change. PiperOrigin-RevId: 188257136 --- .../contrib/lite/kernels/internal/BUILD | 5 +- .../lite/kernels/internal/quantization_util.h | 78 +++++++++++++++++++ .../internal/quantization_util_test.cc | 45 +++++++++++ .../contrib/lite/kernels/internal/types.h | 16 ++++ tensorflow/contrib/lite/toco/BUILD | 1 + tensorflow/contrib/lite/toco/model.h | 18 +---- tensorflow/contrib/lite/toco/tooling_util.h | 65 +--------------- 7 files changed, 149 insertions(+), 79 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/internal/BUILD b/tensorflow/contrib/lite/kernels/internal/BUILD index c7290c2aaa..aa3957bee1 100644 --- a/tensorflow/contrib/lite/kernels/internal/BUILD +++ b/tensorflow/contrib/lite/kernels/internal/BUILD @@ -213,7 +213,10 @@ cc_library( "compatibility.h", "quantization_util.h", ], - deps = [":round"], + deps = [ + ":round", + ":types", + ], ) cc_test( diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util.h b/tensorflow/contrib/lite/kernels/internal/quantization_util.h index b84d2f9ee1..f7706c7938 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util.h +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util.h @@ -15,10 +15,88 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ #define TENSORFLOW_CONTRIB_LITE_KERNELS_INTERNAL_QUANTIZATION_UTIL_H_ +#include #include +#include + +#include "tensorflow/contrib/lite/kernels/internal/compatibility.h" +#include "tensorflow/contrib/lite/kernels/internal/round.h" +#include "tensorflow/contrib/lite/kernels/internal/types.h" namespace tflite { +// Given the min and max values of a float array, return +// reasonable quantization parameters to use for this array. +template +QuantizationParams ChooseQuantizationParams(double rmin, double rmax) { + const T qmin = std::numeric_limits::min(); + const T qmax = std::numeric_limits::max(); + const double qmin_double = qmin; + const double qmax_double = qmax; + // 0 should always be a representable value. Let's assume that the initial + // min,max range contains 0. + TFLITE_CHECK_LE(rmin, 0.); + TFLITE_CHECK_GE(rmax, 0.); + if (rmin == rmax) { + // Special case where the min,max range is a point. Should be {0}. + TFLITE_CHECK_EQ(rmin, 0.); + TFLITE_CHECK_EQ(rmax, 0.); + QuantizationParams quantization_params; + quantization_params.zero_point = 0; + quantization_params.scale = 0.; + return quantization_params; + } + + // General case. + // + // First determine the scale. + const double scale = (rmax - rmin) / (qmax_double - qmin_double); + + // Zero-point computation. + // First the initial floating-point computation. The zero-point can be + // determined from solving an affine equation for any known pair + // (real value, corresponding quantized value). + // We know two such pairs: (rmin, qmin) and (rmax, qmax). + // The arithmetic error on the zero point computed from either pair + // will be roughly machine_epsilon * (sum of absolute values of terms) + // so we want to use the variant that adds the smaller terms. + const double zero_point_from_min = qmin_double - rmin / scale; + const double zero_point_from_max = qmax_double - rmax / scale; + const double zero_point_from_min_error = + std::abs(qmin_double) + std::abs(rmin / scale); + const double zero_point_from_max_error = + std::abs(qmax_double) + std::abs(rmax / scale); + + const double zero_point_double = + zero_point_from_min_error < zero_point_from_max_error + ? zero_point_from_min + : zero_point_from_max; + + // Now we need to nudge the zero point to be an integer + // (our zero points are integer, and this is motivated by the requirement + // to be able to represent the real value "0" exactly as a quantized value, + // which is required in multiple places, for example in Im2col with SAME + // padding). + T nudged_zero_point = 0; + if (zero_point_double < qmin_double) { + nudged_zero_point = qmin; + } else if (zero_point_double > qmax_double) { + nudged_zero_point = qmax; + } else { + nudged_zero_point = static_cast(round(zero_point_double)); + } + // The zero point should always be in the range of quantized value, + // [qmin, qmax]. + TFLITE_CHECK_GE(nudged_zero_point, qmin); + TFLITE_CHECK_LE(nudged_zero_point, qmax); + + // Finally, store the result nudged quantization params. + QuantizationParams quantization_params; + quantization_params.zero_point = nudged_zero_point; + quantization_params.scale = scale; + return quantization_params; +} + // Decompose a double multiplier into a Q0.31 int32 representation of its // significand, and shift representation of NEGATIVE its exponent --- // this is intended as a RIGHT-shift. diff --git a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc index 19b1b408ec..4ae2085c30 100644 --- a/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc +++ b/tensorflow/contrib/lite/kernels/internal/quantization_util_test.cc @@ -22,6 +22,51 @@ namespace { using ::testing::Pair; +// Example taken from http://www.tensorflow.org/performance/quantization +// +// Quantized | Float +// --------- | ----- +// 0 | -10.0 +// 255 | 30.0 +// 128 | 10.0 +TEST(QuantizationUtilTest, ChooseQuantizationParams) { + QuantizationParams qp = ChooseQuantizationParams(-10.0, 30.0); + EXPECT_NEAR(qp.scale, 0.156863, 1e-5); + EXPECT_EQ(qp.zero_point, 64); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsZeroPointOnMinBoundary) { + QuantizationParams qp = ChooseQuantizationParams(0.0, 30.0); + EXPECT_NEAR(qp.scale, 0.117647, 1e-5); + EXPECT_EQ(qp.zero_point, 0); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsZeroNotInRange) { + // Assumption is that zero is within the range. + EXPECT_DEATH(ChooseQuantizationParams(10.0, 30.0), ""); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsEmptyRangePositive) { + // Assumption is that zero is within the range. + EXPECT_DEATH(ChooseQuantizationParams(30.0, 30.0), ""); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsEmptyRangeZero) { + QuantizationParams qp = ChooseQuantizationParams(0.0, 0.0); + EXPECT_NEAR(qp.scale, 0.0, 1e-5); + EXPECT_EQ(qp.zero_point, 0); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsZeroPointOnMaxBoundary) { + QuantizationParams qp = ChooseQuantizationParams(-10.0, 0.0); + EXPECT_NEAR(qp.scale, 0.039216, 1e-5); + EXPECT_EQ(qp.zero_point, 255); +} + +TEST(QuantizationUtilTest, ChooseQuantizationParamsInvalidRange) { + EXPECT_DEATH(ChooseQuantizationParams(10.0, -30.0), ""); +} + TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOne) { auto quantize = [](double d) { int32_t q; diff --git a/tensorflow/contrib/lite/kernels/internal/types.h b/tensorflow/contrib/lite/kernels/internal/types.h index afe131b06e..293538fcbb 100644 --- a/tensorflow/contrib/lite/kernels/internal/types.h +++ b/tensorflow/contrib/lite/kernels/internal/types.h @@ -21,6 +21,22 @@ namespace tflite { enum class FusedActivationFunctionType : uint8 { kNone, kRelu6, kRelu1, kRelu }; +// Quantization parameters, determining the mapping of quantized values +// to real values (i.e. determining how quantized values are mathematically +// interpreted). +// +// The correspondence is as follows: +// +// real_value = scale * (quantized_value - zero_point); +// +// In other words, zero_point designates which quantized value corresponds to +// the real 0 value, and scale designates the difference between the real values +// corresponding to consecutive quantized values differing by 1. +struct QuantizationParams { + int32 zero_point = 0; + double scale = 0.0; +}; + template struct Dims { int sizes[N]; diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 845bc0460f..031db2bd7c 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -329,6 +329,7 @@ cc_library( ":toco_graphviz_dump_options", ":toco_port", ":types_proto_cc", + "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/core:lib", "@com_google_absl//absl/strings", "@protobuf_archive//:protobuf_headers", diff --git a/tensorflow/contrib/lite/toco/model.h b/tensorflow/contrib/lite/toco/model.h index cd3eb06602..3fa0089cba 100644 --- a/tensorflow/contrib/lite/toco/model.h +++ b/tensorflow/contrib/lite/toco/model.h @@ -29,6 +29,8 @@ limitations under the License. namespace toco { +using tflite::QuantizationParams; + enum class OperatorType { kNone, // General-purpose neural network operators. @@ -1463,22 +1465,6 @@ inline bool operator<(const Alloc& a, const Alloc& b) { return a.start < b.start; } -// Quantization parameters, determining the mapping of quantized values -// to real values (i.e. determining how quantized values are mathematically -// interpreted). -// -// The correspondence is as follows: -// -// real_value = scale * (quantized_value - zero_point); -// -// In other words, zero_point designates which quantized value corresponds to -// the real 0 value, and scale designates the difference between the real values -// corresponding to consecutive quantized values differing by 1. -struct QuantizationParams { - int32 zero_point = 0; - double scale = 0.; -}; - class Shape { public: // For Shape, we stick to half-way encapsulation for now: diff --git a/tensorflow/contrib/lite/toco/tooling_util.h b/tensorflow/contrib/lite/toco/tooling_util.h index d5796486c5..05360e3b0a 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.h +++ b/tensorflow/contrib/lite/toco/tooling_util.h @@ -28,6 +28,7 @@ limitations under the License. #if TOCO_SUPPORT_PORTABLE_PROTOS #include "third_party/protobuf/src/google/protobuf/text_format.h" #endif // TOCO_SUPPORT_PORTABLE_PROTOS +#include "tensorflow/contrib/lite/kernels/internal/quantization_util.h" #include "tensorflow/contrib/lite/toco/model.h" #include "tensorflow/contrib/lite/toco/model_flags.pb.h" #include "tensorflow/contrib/lite/toco/runtime/types.h" @@ -149,71 +150,11 @@ template void GetQuantizationParamsFromMinMax(const MinMax& minmax, QuantizationParams* quantization_params) { using Integer = DataType; - const Integer qmin = std::numeric_limits::min(); - const Integer qmax = std::numeric_limits::max(); - const double qmin_double = qmin; - const double qmax_double = qmax; const double rmin = minmax.min; const double rmax = minmax.max; - // 0 should always be a representable value. Let's assume that the initial - // min,max range contains 0. - CHECK_LE(rmin, 0.); - CHECK_GE(rmax, 0.); - if (rmin == rmax) { - // Special case where the min,max range is a point. Should be {0}. - CHECK_EQ(rmin, 0.); - CHECK_EQ(rmax, 0.); - quantization_params->zero_point = 0; - quantization_params->scale = 0.; - return; - } - // General case. - // - // First determine the scale. - const double scale = (rmax - rmin) / (qmax_double - qmin_double); - - // Zero-point computation. - // First the initial floating-point computation. The zero-point can be - // determined from solving an affine equation for any known pair - // (real value, corresponding quantized value). - // We know two such pairs: (rmin, qmin) and (rmax, qmax). - // The arithmetic error on the zero point computed from either pair - // will be roughly machine_epsilon * (sum of absolute values of terms) - // so we want to use the variant that adds the smaller terms. - const double zero_point_from_min = qmin_double - rmin / scale; - const double zero_point_from_max = qmax_double - rmax / scale; - const double zero_point_from_min_error = - std::abs(qmin_double) + std::abs(rmin / scale); - const double zero_point_from_max_error = - std::abs(qmax_double) + std::abs(rmax / scale); - - const double zero_point_double = - zero_point_from_min_error < zero_point_from_max_error - ? zero_point_from_min - : zero_point_from_max; - - // Now we need to nudge the zero point to be an integer - // (our zero points are integer, and this is motivated by the requirement - // to be able to represent the real value "0" exactly as a quantized value, - // which is required in multiple places, for example in Im2col with SAME - // padding). - Integer nudged_zero_point = 0; - if (zero_point_double < qmin_double) { - nudged_zero_point = qmin; - } else if (zero_point_double > qmax_double) { - nudged_zero_point = qmax; - } else { - nudged_zero_point = static_cast(std::round(zero_point_double)); - } - // The zero point should always be in the range of quantized value, - // [qmin, qmax]. - CHECK_GE(nudged_zero_point, qmin); - CHECK_LE(nudged_zero_point, qmax); - - // Finally, store the result nudged quantization params. - quantization_params->zero_point = nudged_zero_point; - quantization_params->scale = scale; + *quantization_params = + ::tflite::ChooseQuantizationParams(rmin, rmax); } void CheckIsReadyForQuantization(const Model& model); -- GitLab From c9ccad16fcac996983d30d309d7405581658f0e3 Mon Sep 17 00:00:00 2001 From: Yifei Feng Date: Wed, 7 Mar 2018 16:58:46 -0800 Subject: [PATCH 564/884] Add scan command to saved_model_cli to check for security sensitive ops. --- tensorflow/python/tools/saved_model_cli.py | 60 +++++++++++++++++++ .../python/tools/saved_model_cli_test.py | 22 +++++++ 2 files changed, 82 insertions(+) diff --git a/tensorflow/python/tools/saved_model_cli.py b/tensorflow/python/tools/saved_model_cli.py index b0e9e3e5ed..b88be4ae04 100644 --- a/tensorflow/python/tools/saved_model_cli.py +++ b/tensorflow/python/tools/saved_model_cli.py @@ -38,11 +38,15 @@ from tensorflow.core.example import example_pb2 from tensorflow.core.framework import types_pb2 from tensorflow.python.client import session from tensorflow.python.debug.wrappers import local_cli_wrapper +from tensorflow.python.framework import meta_graph as meta_graph_lib from tensorflow.python.framework import ops as ops_lib from tensorflow.python.platform import app # pylint: disable=unused-import from tensorflow.python.saved_model import loader from tensorflow.python.tools import saved_model_utils +# Set of ops to blacklist. +_OP_BLACKLIST = set(['WriteFile', 'ReadFile']) + def _show_tag_sets(saved_model_dir): """Prints the tag-sets stored in SavedModel directory. @@ -242,6 +246,27 @@ def get_signature_def_map(saved_model_dir, tag_set): return meta_graph.signature_def +def scan_meta_graph_def(meta_graph_def): + """Scans meta_graph_def and reports if there are ops on blacklist. + + Print ops if they are on black list, or print success if no blacklisted ops + found. + + Args: + meta_graph_def: MetaGraphDef protocol buffer. + """ + all_ops_set = set( + meta_graph_lib.ops_used_by_graph_def(meta_graph_def.graph_def)) + blacklisted_ops = _OP_BLACKLIST & all_ops_set + if blacklisted_ops: + # TODO(yifeif): print more warnings + print('MetaGraph with tag set %s contains the following blacklisted ops:' % + meta_graph_def.meta_info_def.tags, blacklisted_ops) + else: + print('MetaGraph with tag set %s does not contain blacklisted ops.' % + meta_graph_def.meta_info_def.tags) + + def run_saved_model_with_feed_dict(saved_model_dir, tag_set, signature_def_key, input_tensor_key_feed_dict, outdir, overwrite_flag, tf_debug=False): @@ -609,6 +634,21 @@ def run(args): args.overwrite, tf_debug=args.tf_debug) +def scan(args): + """Function triggered by scan command. + + Args: + args: A namespace parsed from command line. + """ + if args.tag_set: + scan_meta_graph_def( + saved_model_utils.get_meta_graph_def(args.dir, args.tag_set)) + else: + saved_model = reader.read_saved_model(args.dir) + for meta_graph_def in saved_model.meta_graphs: + scan_meta_graph_def(meta_graph_def) + + def create_parser(): """Creates a parser that parse the command line arguments. @@ -730,6 +770,26 @@ def create_parser(): 'SavedModel.') parser_run.set_defaults(func=run) + # scan command + scan_msg = ('Usage example:\n' + 'To scan for blacklisted ops in SavedModel:\n' + '$saved_model_cli scan --dir /tmp/saved_model\n' + 'To scan a specific MetaGraph, pass in --tag_set\n') + parser_scan = subparsers.add_parser( + 'scan', + description=scan_msg, + formatter_class=argparse.RawTextHelpFormatter) + parser_scan.add_argument( + '--dir', + type=str, + required=True, + help='directory containing the SavedModel to execute') + parser_scan.add_argument( + '--tag_set', + type=str, + help='tag-set of graph in SavedModel to scan, separated by \',\'') + parser_scan.set_defaults(func=scan) + return parser diff --git a/tensorflow/python/tools/saved_model_cli_test.py b/tensorflow/python/tools/saved_model_cli_test.py index f99c844845..eedc893a38 100644 --- a/tensorflow/python/tools/saved_model_cli_test.py +++ b/tensorflow/python/tools/saved_model_cli_test.py @@ -525,6 +525,28 @@ signature_def['serving_default']: y_expected = np.array([[2.5], [3.0]]) self.assertAllClose(y_expected, y_actual) + def testScanCommand(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args(['scan', '--dir', base_path]) + with captured_output() as (out, _): + saved_model_cli.scan(args) + output = out.getvalue().strip() + self.assertTrue('does not contain blacklisted ops' in output) + + def testScanCommandFoundBlacklistedOp(self): + self.parser = saved_model_cli.create_parser() + base_path = test.test_src_dir_path(SAVED_MODEL_PATH) + args = self.parser.parse_args( + ['scan', '--dir', base_path, '--tag_set', 'serve']) + op_blacklist = saved_model_cli._OP_BLACKLIST + saved_model_cli._OP_BLACKLIST = set(['VariableV2']) + with captured_output() as (out, _): + saved_model_cli.scan(args) + saved_model_cli._OP_BLACKLIST = op_blacklist + output = out.getvalue().strip() + self.assertTrue('\'VariableV2\'' in output) + if __name__ == '__main__': test.main() -- GitLab From 708def503604a3a9be255edf36623833937c3469 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Wed, 7 Mar 2018 16:56:34 -0800 Subject: [PATCH 565/884] Remove unneeded rewrite, now that contrib.quantize is ready and better. PiperOrigin-RevId: 188257466 --- tensorflow/tools/graph_transforms/BUILD | 4 - .../fake_quantize_training.cc | 51 ------ .../fake_quantize_training_test.cc | 63 -------- .../tools/graph_transforms/remove_ema.cc | 146 ------------------ .../tools/graph_transforms/remove_ema_test.cc | 121 --------------- 5 files changed, 385 deletions(-) delete mode 100644 tensorflow/tools/graph_transforms/fake_quantize_training.cc delete mode 100644 tensorflow/tools/graph_transforms/fake_quantize_training_test.cc delete mode 100644 tensorflow/tools/graph_transforms/remove_ema.cc delete mode 100644 tensorflow/tools/graph_transforms/remove_ema_test.cc diff --git a/tensorflow/tools/graph_transforms/BUILD b/tensorflow/tools/graph_transforms/BUILD index ad3668fa02..fba39526b2 100644 --- a/tensorflow/tools/graph_transforms/BUILD +++ b/tensorflow/tools/graph_transforms/BUILD @@ -91,7 +91,6 @@ cc_library( srcs = [ "add_default_attributes.cc", "backports.cc", - "fake_quantize_training.cc", "flatten_atrous.cc", "fold_batch_norms.cc", "fold_constants_lib.cc", @@ -105,7 +104,6 @@ cc_library( "remove_attribute.cc", "remove_control_dependencies.cc", "remove_device.cc", - "remove_ema.cc", "remove_nodes.cc", "rename_attribute.cc", "rename_op.cc", @@ -148,7 +146,6 @@ tf_cc_test( srcs = [ "add_default_attributes_test.cc", "backports_test.cc", - "fake_quantize_training_test.cc", "flatten_atrous_test.cc", "fold_batch_norms_test.cc", "fold_constants_test.cc", @@ -161,7 +158,6 @@ tf_cc_test( "quantize_weights_test.cc", "remove_attribute_test.cc", "remove_device_test.cc", - "remove_ema_test.cc", "remove_nodes_test.cc", "rename_attribute_test.cc", "rename_op_test.cc", diff --git a/tensorflow/tools/graph_transforms/fake_quantize_training.cc b/tensorflow/tools/graph_transforms/fake_quantize_training.cc deleted file mode 100644 index 61aecc6e16..0000000000 --- a/tensorflow/tools/graph_transforms/fake_quantize_training.cc +++ /dev/null @@ -1,51 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#define EIGEN_USE_THREADS - -#include "tensorflow/core/graph/quantize_training.h" -#include "tensorflow/tools/graph_transforms/transform_utils.h" - -namespace tensorflow { -namespace graph_transforms { - -// EXPERIMENTAL: This can change without warning. -// Rewrites the GraphDef for quantized training. -// Rewrites the forward pass to include the precision loss with quantization so -// the model can learn to deal with such loss and achieve better accuracy when -// it is quantized later for inference. -// Quantization range information is collected in FakeQuantizeWithMinMaxVars -// ops. -// -// TODO(suharshs): Provide instructions on converting the resulting graph for -// inference. -// TODO(suharshs): Implement this using the GTT rather than calling the old -// prototype function. -Status FakeQuantizeTraining(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def) { - // TODO(suharshs): Make num_bits a parameter. - const int32 num_bits = 8; - // TODO(suharshs): Make quantization op a parameter? - const string quant_op_type = "FakeQuantWithMinMaxVars"; - - return DoQuantizeTrainingOnGraphDef(input_graph_def, num_bits, quant_op_type, - output_graph_def); -} - -REGISTER_GRAPH_TRANSFORM("fake_quantize_training", FakeQuantizeTraining); - -} // namespace graph_transforms -} // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/fake_quantize_training_test.cc b/tensorflow/tools/graph_transforms/fake_quantize_training_test.cc deleted file mode 100644 index 5e4ab209e9..0000000000 --- a/tensorflow/tools/graph_transforms/fake_quantize_training_test.cc +++ /dev/null @@ -1,63 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/cc/ops/const_op.h" -#include "tensorflow/cc/ops/math_ops.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/tools/graph_transforms/transform_utils.h" - -namespace tensorflow { -namespace graph_transforms { - -// Declare here, so we don't need a public header. -Status FakeQuantizeTraining(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def); - -class FakeQuantizeTrainingTest : public ::testing::Test {}; - -// For now, since the fake_quantize_training transform just calls the -// quantize_training rewrite from tensorflow/core/graph/quantize_training.h, -// we just test that the graph has been changed by the transform. -// TODO(suharshs): Once we implement the fake_quantize_training transform -// using the GTT, write proper tests of the transform here. -TEST_F(FakeQuantizeTrainingTest, TransformOccurred) { - auto root = tensorflow::Scope::DisabledShapeInferenceScope(); - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - Tensor a_data(DT_FLOAT, TensorShape()); - test::FillIota(&a_data, 1.0f); - Output a_const = Const(root.WithOpName("a"), Input::Initializer(a_data)); - - Tensor b_data(DT_FLOAT, TensorShape()); - test::FillIota(&b_data, 1.0f); - Output b_const = Const(root.WithOpName("b"), Input::Initializer(b_data)); - - Output matmul = MatMul(root.WithOpName("matmul"), a_const, b_const); - GraphDef graph_def; - TF_ASSERT_OK(root.ToGraphDef(&graph_def)); - - GraphDef result; - TransformFuncContext context; - TF_ASSERT_OK(FakeQuantizeTraining(graph_def, context, &result)); - - // Test that the transformation resulted in a graph with more nodes. - EXPECT_GT(result.node_size(), graph_def.node_size()); -} - -} // namespace graph_transforms -} // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/remove_ema.cc b/tensorflow/tools/graph_transforms/remove_ema.cc deleted file mode 100644 index 22e2626702..0000000000 --- a/tensorflow/tools/graph_transforms/remove_ema.cc +++ /dev/null @@ -1,146 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#define EIGEN_USE_THREADS - -#include "tensorflow/tools/graph_transforms/transform_utils.h" - -namespace tensorflow { -namespace graph_transforms { - -// EXPERIMENTAL: This can change without warning. -// Given a graph that has gone through the FakeQuantizeTraining transform and -// has been frozen afterwards, RemoveEMA simplifies the FakeQuantize estimated -// moving average subgraphs to make it compatible with the QuantizeNodes -// transform. -Status RemoveEMA(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def) { - TF_RETURN_IF_ERROR(ReplaceMatchingOpTypes( - input_graph_def, // clang-format off - {"FakeQuantWithMinMaxVars", - { - {"*"}, - {"Assign", - { - {"Const"}, - {"Merge", - { - {"Switch", - { - {"Min", - { - {"*"}, - {"Range", - { - {"*"}, - {"*"}, - {"*"}, - } - } - } - }, - {"IsVariableInitialized"} - } - }, - {"Sub", - { - {"Const"}, - {"Mul", - { - {"Sub"}, - {"Sub", - { - {"Const"}, - {"Const"} - } - } - } - } - } - } - } - } - } - }, - {"Assign", - { - {"Const"}, - {"Merge", - { - {"Switch", - { - {"Max"}, - {"IsVariableInitialized"} - } - }, - {"Sub", - { - {"Const"}, - {"Mul", - { - {"Sub"}, - {"Sub", - { - {"Const"}, - {"Const"} - } - } - } - } - } - } - } - } - } - }, - } - }, // clang-format on - [](const NodeMatch& match, const std::set& input_nodes, - const std::set& output_nodes, - std::vector* new_nodes) { - const NodeDef& fake_quant_node = match.node; - const NodeDef& input_node = match.inputs[0].node; - const NodeDef& min_var_node = match.inputs[1].inputs[0].node; - const NodeDef& max_var_node = match.inputs[2].inputs[0].node; - - // Make a new FakeQuantizeWithMinMaxVars operation that uses constants - // for its min/max arguments rather than an entire EMA subgraph. - NodeDef new_fake_quant_node; - new_fake_quant_node.set_op(fake_quant_node.op()); - new_fake_quant_node.set_name(fake_quant_node.name()); - AddNodeInput(input_node.name(), &new_fake_quant_node); - AddNodeInput(min_var_node.name(), &new_fake_quant_node); - AddNodeInput(max_var_node.name(), &new_fake_quant_node); - CopyNodeAttr(fake_quant_node, "narrow_range", "narrow_range", - &new_fake_quant_node); - CopyNodeAttr(fake_quant_node, "num_bits", "num_bits", - &new_fake_quant_node); - - new_nodes->push_back(new_fake_quant_node); - new_nodes->push_back(input_node); - new_nodes->push_back(min_var_node); - new_nodes->push_back(max_var_node); - - return Status::OK(); - }, - {}, output_graph_def)); - return Status::OK(); -} - -REGISTER_GRAPH_TRANSFORM("remove_ema", RemoveEMA); - -} // namespace graph_transforms -} // namespace tensorflow diff --git a/tensorflow/tools/graph_transforms/remove_ema_test.cc b/tensorflow/tools/graph_transforms/remove_ema_test.cc deleted file mode 100644 index 27db90e272..0000000000 --- a/tensorflow/tools/graph_transforms/remove_ema_test.cc +++ /dev/null @@ -1,121 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/cc/ops/const_op.h" -#include "tensorflow/cc/ops/math_ops.h" -#include "tensorflow/core/framework/tensor_testutil.h" -#include "tensorflow/core/lib/core/status_test_util.h" -#include "tensorflow/core/platform/test.h" -#include "tensorflow/core/public/session.h" -#include "tensorflow/tools/graph_transforms/transform_utils.h" - -namespace tensorflow { -namespace graph_transforms { - -// Declare transformations here, so we don't need a public header. -Status FakeQuantizeTraining(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def); - -Status RemoveEMA(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def); - -Status QuantizeNodes(const GraphDef& input_graph_def, - const TransformFuncContext& context, - GraphDef* output_graph_def); - -class RemoveEMATest : public ::testing::Test {}; - -TEST_F(RemoveEMATest, FakeQuant_RemoveEMA_QuantizeTraining) { - // Build a small graph. - auto root = tensorflow::Scope::NewRootScope(); - using namespace ::tensorflow::ops; // NOLINT(build/namespaces) - - Tensor a_data(DT_FLOAT, TensorShape({1, 1})); - test::FillIota(&a_data, 1.0f); - Output a_const = Const(root.WithOpName("a"), Input::Initializer(a_data)); - - Tensor b_data(DT_FLOAT, TensorShape({1, 1})); - test::FillIota(&b_data, 1.0f); - Output b_const = Const(root.WithOpName("b"), Input::Initializer(b_data)); - - Output matmul = MatMul(root.WithOpName("matmul"), a_const, b_const); - GraphDef graph_def; - TF_ASSERT_OK(root.ToGraphDef(&graph_def)); - - // (1) FakeQuantize the graph. - GraphDef fake_quantized_graph_def; - TransformFuncContext context; - TF_ASSERT_OK( - FakeQuantizeTraining(graph_def, context, &fake_quantized_graph_def)); - - // Test that the transformation resulted in a graph with more nodes. - EXPECT_GT(fake_quantized_graph_def.node_size(), graph_def.node_size()); - - // (2) Run the graph to initialize the newly added variables. - std::unique_ptr session(NewSession(SessionOptions())); - TF_ASSERT_OK(session->Create(fake_quantized_graph_def)); - std::vector outputs; - TF_ASSERT_OK(session->Run({}, {"matmul"}, {}, &outputs)); - - // (3) Freeze the graph. Create a "frozen graph" that matches what we would - // expect if we actually froze the above graph. - // TODO(suharshs): Use a c++ freeze graph alternative, when one is available. - GraphDef frozen_graph_def; - for (const NodeDef& node : fake_quantized_graph_def.node()) { - if (node.op() == "Variable" || node.op() == "VariableV2") { - NodeDef const_node; - const_node.set_op("Const"); - const_node.set_name(node.name()); - SetNodeAttr("dtype", DT_FLOAT, &const_node); - Tensor tensor(DT_FLOAT, {}); - tensor.flat()(0) = 1.0f; - SetNodeTensorAttr("value", tensor, &const_node); - *(frozen_graph_def.mutable_node()->Add()) = const_node; - } else { - *(frozen_graph_def.mutable_node()->Add()) = node; - } - } - - // Test that freezing the graph resulted in a graph with the same number of - // nodes. - EXPECT_EQ(frozen_graph_def.node_size(), fake_quantized_graph_def.node_size()); - - // (4) RemoveEMA on the graph to make it compatible with QuantizeNodes. - GraphDef removed_ema_graph_def; - TF_ASSERT_OK(RemoveEMA(frozen_graph_def, context, &removed_ema_graph_def)); - - // Test that the transformation resulted in a graph with less nodes. - EXPECT_LT(removed_ema_graph_def.node_size(), frozen_graph_def.node_size()); - - // (5) QuantizeNodes and inspect the final graph. - // TODO(suharshs): Add a more thorough inspection of the structure of - // the output graph. - GraphDef quantized_graph_def; - TF_ASSERT_OK( - QuantizeNodes(removed_ema_graph_def, context, &quantized_graph_def)); - - // Test that the transformation resulted in a graph with more nodes. - EXPECT_GT(quantized_graph_def.node_size(), removed_ema_graph_def.node_size()); - - // Make sure that the FakeQuantizeWithMinMaxVars op has been removed. - for (const NodeDef& node : quantized_graph_def.node()) { - EXPECT_NE(node.op(), "FakeQuantWithMinMaxVars"); - } -} - -} // namespace graph_transforms -} // namespace tensorflow -- GitLab From 6f8ac2157c05d76ed75e6e8c0e93077d7d664457 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 7 Mar 2018 17:23:57 -0800 Subject: [PATCH 566/884] Add tracing annotations to RemoteCallOp's execution. PiperOrigin-RevId: 188260984 --- tensorflow/core/kernels/BUILD | 1 + tensorflow/core/kernels/function_ops.cc | 30 +++++++++++++++---------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 52be90ea1f..1e2a33566b 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1951,6 +1951,7 @@ tf_kernel_library( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", ], ) diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index e3c78d6b70..7c302e2fc2 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -28,6 +28,7 @@ limitations under the License. #include "tensorflow/core/graph/gradients.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/platform/macros.h" +#include "tensorflow/core/platform/tracing.h" #include "tensorflow/core/util/device_name_utils.h" namespace tensorflow { @@ -317,6 +318,8 @@ class RemoteCallOp : public AsyncOpKernel { if (cached_entry != handle_cache_.end()) { handle = cached_entry->second; } else { + port::Tracing::TraceMe activity(strings::StrCat( + "RemoteCall: Instantiate: ", func_.name(), " on ", target_device)); OP_REQUIRES_OK_ASYNC( ctx, lib->Instantiate(func_.name(), AttrSlice(&attr_values), @@ -344,21 +347,24 @@ class RemoteCallOp : public AsyncOpKernel { args.push_back(argument); } auto* rets = new std::vector; - lib->Run(opts, handle, args, rets, [rets, done, ctx](const Status& status) { - if (!status.ok()) { - ctx->SetStatus(status); - } else { - for (size_t i = 0; i < rets->size(); ++i) { - ctx->set_output(i, (*rets)[i]); - } - } - delete rets; - done(); - }); + auto* trace = new port::Tracing::TraceMe(strings::StrCat( + "RemoteCall: Run: ", func_.name(), " on ", target_device)); + lib->Run(opts, handle, args, rets, + [rets, trace, done, ctx](const Status& status) { + if (!status.ok()) { + ctx->SetStatus(status); + } else { + for (size_t i = 0; i < rets->size(); ++i) { + ctx->set_output(i, (*rets)[i]); + } + } + delete rets; + delete trace; + done(); + }); } private: - string target_; NameAttrList func_; mutex mu_; -- GitLab From d90b30286a6ac808371131d1f05b371f37127265 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 7 Mar 2018 17:26:18 -0800 Subject: [PATCH 567/884] Helpful ImportError message PiperOrigin-RevId: 188261273 --- .../cluster_resolver/python/training/tpu_cluster_resolver.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py index 91874f9b5c..300b19733e 100644 --- a/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py +++ b/tensorflow/contrib/cluster_resolver/python/training/tpu_cluster_resolver.py @@ -147,7 +147,9 @@ class TPUClusterResolver(ClusterResolver): if service is None and should_resolve: if not _GOOGLE_API_CLIENT_INSTALLED: raise ImportError('googleapiclient must be installed before using the ' - 'TPU cluster resolver') + 'TPU cluster resolver. Execute: `pip install ' + '--upgrade google-api-python-client` to install with ' + 'pip.') self._service = discovery.build( 'tpu', 'v1alpha1', -- GitLab From 9cdfd3878935fb6c3c2a5da7f65ee0db6c751170 Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Wed, 7 Mar 2018 17:26:21 -0800 Subject: [PATCH 568/884] Internal-only change. PiperOrigin-RevId: 188261279 --- tensorflow/contrib/tpu/python/tpu/datasets.py | 2 +- tensorflow/contrib/tpu/python/tpu/datasets_test.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/datasets.py b/tensorflow/contrib/tpu/python/tpu/datasets.py index 51b67bd6fa..465c668fd8 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets.py @@ -117,7 +117,7 @@ def StreamingFilesDataset(files, file_reader_job = file_reader_job or 'coordinator' - worker_job = worker_job or 'tpu_worker' + worker_job = worker_job or 'worker' if filename_shuffle_buffer_size is None: filename_shuffle_buffer_size = 4096 diff --git a/tensorflow/contrib/tpu/python/tpu/datasets_test.py b/tensorflow/contrib/tpu/python/tpu/datasets_test.py index 6e6a7ce809..918cf0ed8e 100644 --- a/tensorflow/contrib/tpu/python/tpu/datasets_test.py +++ b/tensorflow/contrib/tpu/python/tpu/datasets_test.py @@ -44,7 +44,7 @@ class DatasetsTest(test.TestCase): self._cluster_def = cluster_pb2.ClusterDef() worker_job = self._cluster_def.job.add() - worker_job.name = 'tpu_worker' + worker_job.name = 'worker' worker_job.tasks[0] = self._worker.target[len('grpc://'):] coord_job = self._cluster_def.job.add() coord_job.name = 'coordinator' -- GitLab From 5594bc3c43f6829b7ea77f96852c98fb41e4deb2 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Wed, 7 Mar 2018 17:42:08 -0800 Subject: [PATCH 569/884] TFLite: Delegate Buffer Handle interface PiperOrigin-RevId: 188263046 --- tensorflow/contrib/lite/BUILD | 22 +++ tensorflow/contrib/lite/context.c | 7 +- tensorflow/contrib/lite/context.h | 64 +++++++- tensorflow/contrib/lite/interpreter.cc | 154 ++++++++++++++---- tensorflow/contrib/lite/interpreter.h | 45 +++++- tensorflow/contrib/lite/interpreter_test.cc | 164 ++++++++++++++++---- tensorflow/contrib/lite/util.cc | 27 ++++ tensorflow/contrib/lite/util.h | 34 ++++ tensorflow/contrib/lite/util_test.cc | 50 ++++++ 9 files changed, 496 insertions(+), 71 deletions(-) create mode 100644 tensorflow/contrib/lite/util.cc create mode 100644 tensorflow/contrib/lite/util.h create mode 100644 tensorflow/contrib/lite/util_test.cc diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 44c4a7e2ca..5cfbb544b7 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -132,6 +132,7 @@ cc_library( ":memory_planner", ":schema_fbs_version", ":simple_memory_arena", + ":util", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/schema:schema_fbs", @@ -232,6 +233,27 @@ cc_test( ], ) +cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], + deps = [ + ":context", + ], +) + +cc_test( + name = "util_test", + size = "small", + srcs = ["util_test.cc"], + deps = [ + ":context", + ":util", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + # Test the serialization of a model with optional tensors. # Model tests diff --git a/tensorflow/contrib/lite/context.c b/tensorflow/contrib/lite/context.c index c09e838c5c..620de5d678 100644 --- a/tensorflow/contrib/lite/context.c +++ b/tensorflow/contrib/lite/context.c @@ -17,9 +17,14 @@ limitations under the License. #include #include +int TfLiteIntArrayGetSizeInBytes(int size) { + static TfLiteIntArray dummy; + return sizeof(dummy) + sizeof(dummy.data[0]) * size; +} + TfLiteIntArray* TfLiteIntArrayCreate(int size) { TfLiteIntArray* ret = - (TfLiteIntArray*)malloc(sizeof(*ret) + sizeof(ret->data[0]) * size); + (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size)); ret->size = size; return ret; } diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index ed7f4515fa..d901b9f065 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -29,6 +29,7 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ #define TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ +#include #include #include @@ -40,6 +41,7 @@ typedef enum { kTfLiteOk = 0, kTfLiteError = 1 } TfLiteStatus; // Forward declare so GetNode can use this is in Context. typedef struct _TfLiteRegistration TfLiteRegistration; +typedef struct _TfLiteDelegate TfLiteDelegate; #define kOptionalTensor (-1) @@ -57,6 +59,10 @@ typedef struct { #endif } TfLiteIntArray; +// Given the size (number of elements) in a TfLiteIntArray, calculate its size +// in bytes. +int TfLiteIntArrayGetSizeInBytes(int size); + // Create a array of a given `size` (uninitialized entries). // This returns a pointer, that you must free using TfLiteIntArrayFree(). TfLiteIntArray* TfLiteIntArrayCreate(int size); @@ -162,6 +168,11 @@ typedef enum { kTfLiteDynamic, } TfLiteAllocationType; +// The delegates should use zero or positive integers to represent handles. +// -1 is reserved from unallocated status. +typedef int TfLiteDelegateBufferHandle; +const TfLiteDelegateBufferHandle kTfLiteNullBufferHandle = -1; + // An tensor in the interpreter system which is a wrapper around a buffer of // data including a dimensionality (or NULL if not currently defined). typedef struct { @@ -194,6 +205,22 @@ typedef struct { // Null-terminated name of this tensor. const char* name; + + // The delegate which knows how to handle `delegate_buffer_handle`. + // WARNING: This is an experimental interface that is subject to change. + TfLiteDelegate* delegate; + + // An integer buffer handle that can be handled by `delegate`. + // The value is valid only when delegate is not null. + // WARNING: This is an experimental interface that is subject to change. + TfLiteDelegateBufferHandle delegate_buffer_handle; + + // If the delegate uses its own buffer (e.g. GPU memory), the delegate is + // responsible to set data_is_stale to true. + // `delegate->CopyFromBufferHandle` can be called to copy the data from + // delegate buffer. + // WARNING: This is an // experimental interface that is subject to change. + bool data_is_stale; } TfLiteTensor; // Free memory of tensor `t`; @@ -234,6 +261,11 @@ typedef struct { // WARNING: This is an experimental interface that is subject to change. const void* custom_initial_data; int custom_initial_data_size; + + // The pointer to the delegate. This is non-null only when the node is + // created by calling `interpreter.ModifyGraphWithDelegate`. + // WARNING: This is an experimental interface that is subject to change. + TfLiteDelegate* delegate; } TfLiteNode; typedef struct TfLiteContext { @@ -287,7 +319,7 @@ typedef struct TfLiteContext { // does not take ownership of `nodes_to_replace`. TfLiteStatus (*ReplaceSubgraphsWithDelegateKernels)( struct TfLiteContext*, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace); + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); // TODO(ahentz): we should create a more general mechanism for this sort of // library-global objects. @@ -338,19 +370,45 @@ typedef struct _TfLiteRegistration { } TfLiteRegistration; // WARNING: This is an experimental interface that is subject to change. -typedef struct { +typedef struct _TfLiteDelegate { // Data that delegate needs to identify itself. This data is owned by the // delegate. The delegate is owned in the user code, so the delegate is // responsible for doing this when it is destroyed. void* data_; + // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the // delegate a view of the current graph through TfLiteContext*. It typically // will look at the nodes and call ReplaceSubgraphsWithDelegateKernels() // to ask the TensorFlow lite runtime to create macro-nodes to represent // delegated subgraphs of the original graph. - TfLiteStatus (*Prepare)(TfLiteContext* context, void* data); + TfLiteStatus (*Prepare)(TfLiteContext* context, TfLiteDelegate* delegate); + + // Copy the data from delegate buffer handle to raw memory. + // This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyFromBufferHandle)( + TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, int size); + + // Copy the data from raw memory to delegate buffer handle. + // This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyToBufferHandle)( + TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, int size); + + // Free the Delegate Buffer Handle. Note: This only frees the handle, but + // this doesn't release the underlying resource (e.g. textures). The + // resources are either owned by application layer or the delegate. + // This can be null if the delegate doesn't use its own buffer. + void (*FreeBufferHandle)(TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle* handle); } TfLiteDelegate; +// WARNING: This is an experimental interface that is subject to change. +typedef struct { + TfLiteDelegate* delegate; + TfLiteIntArray* nodes_to_replace; +} TfLiteDelegateParams; + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 0f5e17f0de..733c47852e 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/util.h" namespace tflite { @@ -96,19 +97,57 @@ Interpreter::~Interpreter() { } for (int i = 0; i < context_.tensors_size; i++) { - TfLiteTensorFree(&context_.tensors[i]); + TfLiteTensor* tensor = &context_.tensors[i]; + if (tensor->delegate_buffer_handle != kTfLiteNullBufferHandle) { + tensor->delegate->FreeBufferHandle(tensor->delegate, + &tensor->delegate_buffer_handle); + } + TfLiteTensorFree(tensor); } } TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace) { + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) { return static_cast(context->impl_) - ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace); + ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace, + delegate); +} + +namespace { + +// This function allocates a continuous memory space that contains a +// TfLiteDelegateParams followed by a TfLiteIntArray. The pointer will be +// deallocated by C `free` function later. +TfLiteDelegateParams* CreateDelegateParams( + TfLiteDelegate* delegate, const std::vector& nodes_to_replace) { + int nodes_to_replace_size_in_bytes = + TfLiteIntArrayGetSizeInBytes(nodes_to_replace.size()); + void* allocation = + malloc(sizeof(TfLiteDelegateParams) + nodes_to_replace_size_in_bytes); + TfLiteDelegateParams* params = + reinterpret_cast(allocation); + TfLiteIntArray* nodes_to_replace_arr = reinterpret_cast( + static_cast(allocation) + sizeof(TfLiteDelegateParams)); + + nodes_to_replace_arr->size = nodes_to_replace.size(); + for (int i = 0; i < nodes_to_replace.size(); ++i) { + nodes_to_replace_arr->data[i] = nodes_to_replace[i]; + } + + params->delegate = delegate; + params->nodes_to_replace = nodes_to_replace_arr; + return params; } +} // Anonymous namespace + TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace) { + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, + TfLiteDelegate* delegate) { + // Annotate the registration as DELEGATE op. + registration.builtin_code = BuiltinOperator_DELEGATE; + // Annotate the registration as DELEGATE op. registration.builtin_code = BuiltinOperator_DELEGATE; @@ -120,30 +159,38 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( execution_plan_.clear(); for (auto& subgraph : subgraphs) { - // Turn subgraph.nodes into a TfLiteIntArray compatible data structure. - // TODO(aselle): Avoid this copy by constructing subgraph.nodes that way - // in the first place - subgraph.nodes.insert(subgraph.nodes.begin(), - static_cast(subgraph.nodes.size())); // Subgraphs calimed by the delegate should have a "macro" op created, the // other subgraphs (kTfNonPartition) just have their nodes added back to // the execution plan. switch (subgraph.type) { case Subgraph::kTfNonPartition: - for (auto it = subgraph.nodes.begin() + 1; it != subgraph.nodes.end(); + for (auto it = subgraph.nodes.begin(); it != subgraph.nodes.end(); ++it) { execution_plan_.push_back(*it); } break; case Subgraph::kTfPartition: { - void* builtin_data = nullptr; int node_index; - // Create a node that represents computation of this subgraph. - AddNodeWithParameters( - subgraph.input_tensors, subgraph.output_tensors, - reinterpret_cast(subgraph.nodes.data()), - subgraph.nodes.size() * sizeof(subgraph.nodes[0]), builtin_data, - ®istration, &node_index); + + TfLiteDelegateParams* params = + CreateDelegateParams(delegate, subgraph.nodes); + AddNodeWithParameters(subgraph.input_tensors, subgraph.output_tensors, + nullptr, 0, params, ®istration, &node_index); + + // Initialize the output tensors's delegate-related fields. + for (int tensor_index : subgraph.output_tensors) { + TfLiteTensor* tensor = &tensors_[tensor_index]; + TF_LITE_ENSURE_EQ(&context_, tensor->delegate, nullptr); + TF_LITE_ENSURE_EQ(&context_, tensor->delegate_buffer_handle, + kTfLiteNullBufferHandle); + // delegate_buffer_handle will be filled in delegate's `Prepare` + // function. + tensor->delegate = delegate; + } + + // Associate the node with the delegate. + TfLiteNode* node = &nodes_and_registration_[node_index].first; + node->delegate = delegate; } break; case Subgraph::kTfUnexplored: return kTfLiteError; @@ -233,14 +280,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, return kTfLiteOk; } -namespace { -TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector& x) { - TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); - for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i]; - return lite; -} -} // namespace - TfLiteStatus Interpreter::AllocateTensors() { next_execution_plan_index_to_prepare_ = 0; if (memory_planner_) { @@ -275,7 +314,6 @@ TfLiteStatus Interpreter::AddNodeWithParameters( int new_node_index = nodes_and_registration_.size(); if (node_index) *node_index = new_node_index; nodes_and_registration_.resize(nodes_and_registration_.size() + 1); - auto& node_and_reg = nodes_and_registration_.back(); TfLiteNode& node = node_and_reg.first; if (node.inputs) TfLiteIntArrayFree(node.inputs); @@ -285,8 +323,8 @@ TfLiteStatus Interpreter::AddNodeWithParameters( // NOTE, here we are not using move semantics yet, since our internal // representation isn't std::vector, but in the future we would like to avoid // copies, so we want the interface to take r-value references now. - node.inputs = convertVectorToTfLiteIntArray(inputs); - node.outputs = convertVectorToTfLiteIntArray(outputs); + node.inputs = ConvertVectorToTfLiteIntArray(inputs); + node.outputs = ConvertVectorToTfLiteIntArray(outputs); node.temporaries = TfLiteIntArrayCreate(0); if (init_data) { node.user_data = OpInit(*registration, init_data, init_data_size); @@ -299,6 +337,7 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.builtin_data = builtin_data_deleter.release(); // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size` // properly for nodes generated by ReplaceSubgraphsWithDelegateKernels. + if (registration->builtin_code == BuiltinOperator_CUSTOM) { // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer // `Operator` table is passed in. @@ -309,6 +348,7 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.custom_initial_data_size = 0; } + node.delegate = nullptr; node_and_reg.second = *registration; execution_plan_.push_back(new_node_index); return kTfLiteOk; @@ -322,7 +362,7 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); invokable_ = false; - TfLiteIntArray* dims_lite = convertVectorToTfLiteIntArray(dims); + TfLiteIntArray* dims_lite = ConvertVectorToTfLiteIntArray(dims); return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); } @@ -424,11 +464,29 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + + // TODO(ycling): This is an extra loop through inputs to check if the data + // need to be copied from Delegate buffer to raw memory, which is often not + // needed. We may want to cache this in prepare to know if this needs to be + // done for a node or not. + for (int i = 0; i < node.inputs->size; ++i) { + int tensor_index = node.inputs->data[i]; + if (tensor_index == kOptionalTensor) { + continue; + } + TfLiteTensor* tensor = &tensors_[tensor_index]; + if (tensor->delegate && tensor->delegate != node.delegate && + tensor->data_is_stale) { + EnsureTensorDataIsReadable(tensor_index); + } + } + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } } + return status; } @@ -464,6 +522,7 @@ TfLiteStatus Interpreter::AddTensors(int tensors_to_add, tensors_.resize(tensors_.size() + tensors_to_add); for (int i = base_index; i < tensors_.size(); i++) { memset(&tensors_[i], 0, sizeof(tensors_[i])); + tensors_[i].delegate_buffer_handle = kTfLiteNullBufferHandle; } context_.tensors = tensors_.data(); context_.tensors_size = tensors_.size(); @@ -511,7 +570,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); } invokable_ = false; - TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), quantization, const_cast(buffer), bytes, kTfLiteMmapRo, allocation, &context_.tensors[tensor_index]); return kTfLiteOk; @@ -536,7 +595,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadWrite( TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), &required_bytes)); } - TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), quantization, /*buffer=*/nullptr, required_bytes, type == kTfLiteString ? kTfLiteDynamic : kTfLiteArenaRw, @@ -613,7 +672,7 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { ReplaceSubgraphsWithDelegateKernels; context_.GetExecutionPlan = GetExecutionPlan; - TfLiteStatus status = delegate->Prepare(&context_, delegate->data_); + TfLiteStatus status = delegate->Prepare(&context_, delegate); // Remove additional context info. context_.GetNodeAndRegistration = nullptr; context_.ReplaceSubgraphsWithDelegateKernels = nullptr; @@ -621,4 +680,35 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { return status; } +TfLiteStatus Interpreter::SetDelegateBufferHandle( + int tensor_index, TfLiteDelegateBufferHandle delegate_buffer_handle, + TfLiteDelegate* delegate) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + + TF_LITE_ENSURE(&context_, + tensor->delegate == nullptr || tensor->delegate == delegate); + tensor->delegate = delegate; + if (tensor->delegate_buffer_handle != kTfLiteNullBufferHandle) { + TF_LITE_ENSURE(&context_, tensor->delegate->FreeBufferHandle != nullptr); + tensor->delegate->FreeBufferHandle(tensor->delegate, + &tensor->delegate_buffer_handle); + } + tensor->delegate_buffer_handle = delegate_buffer_handle; + + return kTfLiteOk; +} + +TfLiteStatus Interpreter::GetDelegateBufferHandle( + int tensor_index, TfLiteDelegateBufferHandle* delegate_buffer_handle, + TfLiteDelegate** delegate) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + + *delegate = tensor->delegate; + *delegate_buffer_handle = tensor->delegate_buffer_handle; + + return kTfLiteOk; +} + } // namespace tflite diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 04c19644a0..f5fcae90cc 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,46 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // Ensure the data in `tensor.data` is readable. In case delegate is used, + // it might require to copy the data from delegate buffer to raw memory. + TfLiteStatus EnsureTensorDataIsReadable(int tensor_index) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + if (tensor->data_is_stale) { + TF_LITE_ENSURE(&context_, tensor->delegate != nullptr); + TF_LITE_ENSURE(&context_, + tensor->delegate_buffer_handle != kTfLiteNullBufferHandle); + // This can be null if the delegate doesn't use its own buffer. + TF_LITE_ENSURE(&context_, + tensor->delegate->CopyFromBufferHandle != nullptr); + tensor->delegate->CopyFromBufferHandle(tensor->delegate, + tensor->delegate_buffer_handle, + tensor->data.raw, tensor->bytes); + tensor->data_is_stale = false; + } + return kTfLiteOk; + } + + // Set the delegate buffer handle to a tensor. It can be called in the + // following cases: + // 1. Set the buffer handle to a tensor that's not being written by a + // delegate. For example, feeding an OpenGL texture as the input of the + // inference graph. + // 2. Set the buffer handle to a tensor that uses the same delegate. + // For example, set an OpenGL texture as the output of inference, while + // the node which produces output is an OpenGL delegate node. + // WARNING: This is an experimental API and subject to change. + TfLiteStatus SetDelegateBufferHandle( + int tensor_index, TfLiteDelegateBufferHandle delegate_buffer_handle, + TfLiteDelegate* delegate); + + // Get the delegate buffer handle, and the delegate which can process the + // buffer handle. + // WARNING: This is an experimental API and subject to change. + TfLiteStatus GetDelegateBufferHandle( + int tensor_index, TfLiteDelegateBufferHandle* delegate_buffer_handle, + TfLiteDelegate** delegate); + // The default capacity of `tensors_` vector. static constexpr int kTensorsReservedCapacity = 128; // The capacity headroom of `tensors_` vector before calling ops' @@ -355,14 +395,15 @@ class Interpreter { // Entry point for C API ReplaceSubgraphsWithDelegateKernels static TfLiteStatus ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace); + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); // Update the execution graph to replace some of the nodes with stub // nodes. Specifically any node index that has `nodes[index]==1` will be // slated for replacement with a delegate kernel specified by registration. // WARNING: This is an experimental interface that is subject to change. TfLiteStatus ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace); + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, + TfLiteDelegate* delegate); // WARNING: This is an experimental interface that is subject to change. // Gets the internal pointer to a TensorFlow lite node by node_index. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 2e6727b323..11578fcb69 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -763,24 +763,38 @@ TfLiteRegistration AddOpRegistration() { } class TestDelegate : public ::testing::Test { - public: - TestDelegate() { - interpreter_.AddTensors(5); - interpreter_.SetInputs({0, 1}); - interpreter_.SetOutputs({3, 4}); + protected: + void SetUp() override { + interpreter_ = absl::make_unique(); + interpreter_->AddTensors(5); + interpreter_->SetInputs({0, 1}); + interpreter_->SetOutputs({3, 4}); TfLiteQuantizationParams quant; - interpreter_.SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, - quant); + interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, + quant); TfLiteRegistration reg = AddOpRegistration(); - interpreter_.AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); - interpreter_.AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); - interpreter_.AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); + } + + void TearDown() override { + // Interpreter relies on delegate_ to free the resources properly. Thus + // the life cycle of delegate must be longer than interpreter. + interpreter_.reset(); + delegate_.reset(); + } + + TfLiteDelegateBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle; + + TfLiteDelegateBufferHandle AllocateBufferHandle() { + return ++last_allocated_handle_; } protected: @@ -791,8 +805,8 @@ class TestDelegate : public ::testing::Test { // value-copyable and compatible with TfLite. explicit SimpleDelegate(const std::vector& nodes) : nodes_(nodes) { delegate_.Prepare = [](TfLiteContext* context, - void* data) -> TfLiteStatus { - auto* simple = reinterpret_cast(data); + TfLiteDelegate* delegate) -> TfLiteStatus { + auto* simple = reinterpret_cast(delegate->data_); TfLiteIntArray* nodes_to_separate = TfLiteIntArrayCreate(simple->nodes_.size()); // Mark nodes that we want in TfLiteIntArray* structure. @@ -823,10 +837,28 @@ class TestDelegate : public ::testing::Test { } context->ReplaceSubgraphsWithDelegateKernels( - context, FakeFusedRegistration(), nodes_to_separate); + context, FakeFusedRegistration(), nodes_to_separate, delegate); TfLiteIntArrayFree(nodes_to_separate); return kTfLiteOk; }; + delegate_.CopyToBufferHandle = + [](TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, + int size) -> TfLiteStatus { + // TODO(ycling): Implement tests to test buffer copying logic. + return kTfLiteOk; + }; + delegate_.CopyFromBufferHandle = + [](TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, + int size) -> TfLiteStatus { + // TODO(ycling): Implement tests to test buffer copying logic. + return kTfLiteOk; + }; + delegate_.FreeBufferHandle = [](TfLiteDelegate* delegate, + TfLiteDelegateBufferHandle* handle) { + *handle = kTfLiteNullBufferHandle; + }; // Store type-punned data SimpleDelegate structure. delegate_.data_ = reinterpret_cast(this); } @@ -843,36 +875,102 @@ class TestDelegate : public ::testing::Test { std::vector nodes_; TfLiteDelegate delegate_; }; - Interpreter interpreter_; + std::unique_ptr interpreter_; + std::unique_ptr delegate_; }; TEST_F(TestDelegate, BasicDelegate) { - interpreter_.Invoke(); - SimpleDelegate simple({0, 1, 2}); - interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); - ASSERT_EQ(interpreter_.execution_plan().size(), 1); - int node = interpreter_.execution_plan()[0]; - const auto* node_and_reg = interpreter_.node_and_registration(node); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + int node = interpreter_->execution_plan()[0]; + const auto* node_and_reg = interpreter_->node_and_registration(node); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } TEST_F(TestDelegate, ComplexDeligate) { - interpreter_.Invoke(); - SimpleDelegate simple({1, 2}); - interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({1, 2})); + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); - ASSERT_EQ(interpreter_.execution_plan().size(), 2); + ASSERT_EQ(interpreter_->execution_plan().size(), 2); // 0th should be a non-delegated original op - ASSERT_EQ(interpreter_.execution_plan()[0], 0); + ASSERT_EQ(interpreter_->execution_plan()[0], 0); // 1st should be a new macro op (3) which didn't exist) - ASSERT_EQ(interpreter_.execution_plan()[1], 3); - const auto* node_and_reg = interpreter_.node_and_registration(3); + ASSERT_EQ(interpreter_->execution_plan()[1], 3); + const auto* node_and_reg = interpreter_->node_and_registration(3); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } +TEST_F(TestDelegate, SetBufferHandleToInput) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + constexpr int kOutputTensorIndex = 0; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + ASSERT_EQ(tensor->delegate, nullptr); + ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); + + TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = interpreter_->SetDelegateBufferHandle( + kOutputTensorIndex, handle, delegate); + ASSERT_EQ(status, kTfLiteOk); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->delegate_buffer_handle, handle); +} + +TEST_F(TestDelegate, SetBufferHandleToOutput) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + // Before setting the buffer handle, the tensor's `delegate` is already set + // because it will be written by the delegate. + ASSERT_EQ(tensor->delegate, delegate); + ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); + + TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = interpreter_->SetDelegateBufferHandle( + kOutputTensorIndex, handle, delegate); + ASSERT_EQ(status, kTfLiteOk); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->delegate_buffer_handle, handle); +} + +TEST_F(TestDelegate, SetInvalidHandleToTensor) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + SimpleDelegate another_simple_delegate({0, 1, 2}); + + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + // Before setting the buffer handle, the tensor's `delegate` is already set + // because it will be written by the delegate. + ASSERT_EQ(tensor->delegate, delegate); + ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); + + TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = interpreter_->SetDelegateBufferHandle( + kOutputTensorIndex, handle, + another_simple_delegate.get_tf_lite_delegate()); + // Setting a buffer handle to a tensor with another delegate will fail. + ASSERT_EQ(status, kTfLiteError); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc new file mode 100644 index 0000000000..b2c7e6c7a6 --- /dev/null +++ b/tensorflow/contrib/lite/util.cc @@ -0,0 +1,27 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/util.h" + +namespace tflite { + +TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { + TfLiteIntArray* output = TfLiteIntArrayCreate(input.size()); + for (size_t i = 0; i < input.size(); i++) { + output->data[i] = input[i]; + } + return output; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h new file mode 100644 index 0000000000..50e4fb839e --- /dev/null +++ b/tensorflow/contrib/lite/util.h @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file provides general C++ utility functions in TFLite. +// For example: Converting between `TfLiteIntArray`, `std::vector` and +// Flatbuffer vectors. These functions can't live in `context.h` since it's pure +// C. + +#ifndef TENSORFLOW_CONTRIB_LITE_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_UTIL_H_ + +#include +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// Converts a `std::vector` to a `TfLiteIntArray`. +TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input); + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_UTIL_H_ diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc new file mode 100644 index 0000000000..04579c53aa --- /dev/null +++ b/tensorflow/contrib/lite/util_test.cc @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/util.h" + +namespace tflite { +namespace { + +TEST(ConvertVectorToTfLiteIntArray, TestWithVector) { + std::vector input = {1, 2}; + TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); + ASSERT_NE(output, nullptr); + EXPECT_EQ(output->size, 2); + EXPECT_EQ(output->data[0], 1); + EXPECT_EQ(output->data[1], 2); + TfLiteIntArrayFree(output); +} + +TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) { + std::vector input; + TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); + ASSERT_NE(output, nullptr); + EXPECT_EQ(output->size, 0); + TfLiteIntArrayFree(output); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- GitLab From 988cc15b2212fb389a94edc239634eef3d10518d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 17:44:57 -0800 Subject: [PATCH 570/884] Fix OP_REQUIRES to be OP_REQUIRES_ASYNC PiperOrigin-RevId: 188263337 --- tensorflow/core/kernels/sendrecv_ops.cc | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/kernels/sendrecv_ops.cc b/tensorflow/core/kernels/sendrecv_ops.cc index 688e61fcad..2f87057f4e 100644 --- a/tensorflow/core/kernels/sendrecv_ops.cc +++ b/tensorflow/core/kernels/sendrecv_ops.cc @@ -169,9 +169,10 @@ Rendezvous::DoneCallback make_recv_callback(OpKernelContext* ctx, } // namespace void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) { - OP_REQUIRES( + OP_REQUIRES_ASYNC( ctx, ctx->rendezvous() != nullptr, - errors::Internal("Op kernel context needs to provide a rendezvous.")); + errors::Internal("Op kernel context needs to provide a rendezvous."), + done); Rendezvous::Args args; args.device_context = ctx->op_device_context(); -- GitLab From 7c0b967fdf77d5aa0255f2c0af58677e58937bdf Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Wed, 7 Mar 2018 17:52:04 -0800 Subject: [PATCH 571/884] Restores accumulate_n's functionality when shapes are unknown. PiperOrigin-RevId: 188264080 --- .../common_runtime/accumulate_n_optimizer.cc | 34 ++++++++++++++++--- .../python/kernel_tests/accumulate_n_test.py | 7 ++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc index 832a55f255..822d0065b6 100644 --- a/tensorflow/core/common_runtime/accumulate_n_optimizer.cc +++ b/tensorflow/core/common_runtime/accumulate_n_optimizer.cc @@ -114,19 +114,43 @@ class AccumulateNV2RemovePass : public GraphOptimizationPass { const string accumulator_name = strings::StrCat(n->name(), "/Internal/Accumulator"); + TensorShapeProto variable_shape; + variable_shape.add_dim()->set_size(0); TF_RETURN_IF_ERROR(make_node("TemporaryVariable") - .Attr("shape", shape) + .Attr("shape", variable_shape) .Attr("dtype", dtype) .Attr("var_name", accumulator_name) .Finalize(g, &create_accumulator)); - TF_RETURN_IF_ERROR(make_node("Const") - .Attr("value", make_zeros(dtype, shape)) - .Attr("dtype", dtype) - .Finalize(g, &initial_val)); + if (PartialTensorShape(shape).IsFullyDefined()) { + // For fully defined shapes make a constant zero tensor. + TF_RETURN_IF_ERROR(make_node("Const") + .Attr("value", make_zeros(dtype, shape)) + .Attr("dtype", dtype) + .Finalize(g, &initial_val)); + } else { + // For partial shapes make a Fill operation to make a zero tensor with the + // shape of the first input. + Node* shape_node; + TF_RETURN_IF_ERROR( + make_node("Shape") + .Input(data_edges[0]->src(), data_edges[0]->src_output()) + .Finalize(g, &shape_node)); + Node* zero; + TF_RETURN_IF_ERROR( + make_node("Const") + .Attr("value", make_zeros(dtype, TensorShapeProto())) + .Attr("dtype", dtype) + .Finalize(g, &zero)); + TF_RETURN_IF_ERROR(make_node("Fill") + .Input(shape_node) + .Input(zero) + .Finalize(g, &initial_val)); + } TF_RETURN_IF_ERROR(make_node("Assign") .Attr("T", dtype) .Input(create_accumulator) // ref: Ref(T) .Input(initial_val) // value: T + .Attr("validate_shape", false) .Finalize(g, &initialize_accumulator)); for (int i = 0; i < data_edges.size(); ++i) { Node* assignAdd; diff --git a/tensorflow/python/kernel_tests/accumulate_n_test.py b/tensorflow/python/kernel_tests/accumulate_n_test.py index 0a6d4aea37..b793906fac 100644 --- a/tensorflow/python/kernel_tests/accumulate_n_test.py +++ b/tensorflow/python/kernel_tests/accumulate_n_test.py @@ -22,6 +22,7 @@ import numpy as np from tensorflow.python.framework import dtypes as dtypes_lib from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients from tensorflow.python.ops import math_ops from tensorflow.python.ops import variables @@ -49,6 +50,12 @@ class AccumulateNV2Test(test_util.TensorFlowTestCase): self.assertAllEqual(x[0] * 6, math_ops.accumulate_n([tf_x[0]] * 6).eval()) + def testUnknownShape(self): + with self.test_session(use_gpu=True): + x0 = array_ops.placeholder(dtype=dtypes_lib.int32, shape=[None]) + acc = math_ops.accumulate_n([x0, x0], shape=[None]) + self.assertAllEqual([2, 4], acc.eval(feed_dict={x0: [1, 2]})) + def testGrad(self): np.random.seed(42) for num_inputs in range(1, 10): -- GitLab From d9fa587b816f8f625633c9e5b1a428e4cca27d4c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 17:58:24 -0800 Subject: [PATCH 572/884] Add support for converting range, and an actual build rule for the builtins_test, which somehow got left out. PiperOrigin-RevId: 188264644 --- tensorflow/contrib/py2tf/utils/BUILD | 11 +++++++ tensorflow/contrib/py2tf/utils/builtins.py | 30 +++++++++++++++-- .../contrib/py2tf/utils/builtins_test.py | 33 +++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/py2tf/utils/BUILD b/tensorflow/contrib/py2tf/utils/BUILD index c6a894b508..d029289f5a 100644 --- a/tensorflow/contrib/py2tf/utils/BUILD +++ b/tensorflow/contrib/py2tf/utils/BUILD @@ -33,11 +33,22 @@ py_library( srcs_version = "PY2AND3", visibility = ["//tensorflow:__subpackages__"], deps = [ + "//tensorflow/python:list_ops", "//tensorflow/python:script_ops", "@six_archive//:six", ], ) +py_test( + name = "builtins_test", + srcs = ["builtins_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":utils", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "context_managers_test", srcs = ["context_managers_test.py"], diff --git a/tensorflow/contrib/py2tf/utils/builtins.py b/tensorflow/contrib/py2tf/utils/builtins.py index 0a50b80b60..3cb62b55d4 100644 --- a/tensorflow/contrib/py2tf/utils/builtins.py +++ b/tensorflow/contrib/py2tf/utils/builtins.py @@ -18,22 +18,32 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import six + from tensorflow.contrib.py2tf.utils import py_func +from tensorflow.contrib.py2tf.utils import type_check from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import logging_ops +from tensorflow.python.ops import math_ops from tensorflow.python.util import tf_inspect def dynamic_builtin(f, *args, **kwargs): """Converts a builtin function call inline.""" - if not tf_inspect.isbuiltin(f): + # Some built-ins may be objects. + if not tf_inspect.isbuiltin(f) and f not in (range,): return f(*args, **kwargs) if f is len: return dynamic_len(*args, **kwargs) + if six.PY2 and f is xrange: + return dynamic_range(*args, **kwargs) + if f is range: + return dynamic_range(*args, **kwargs) - raise NotImplementedError('The "%s" builtin is not yet supported.' % f) + raise NotImplementedError( + 'The "%s" builtin is not yet supported.' % f.__name__) def dynamic_len(list_or_tensor): @@ -48,6 +58,22 @@ def dynamic_len(list_or_tensor): return len(list_or_tensor) +def dynamic_range(start_or_stop, stop=None, step=None): + """Implementation of range using dynamic dispatch.""" + if type_check.is_tensor(start_or_stop, stop, step): + if step is not None: + return math_ops.range(start_or_stop, stop, step) + if stop is not None: + return math_ops.range(start_or_stop, stop) + return math_ops.range(start_or_stop) + + if step is not None: + return range(start_or_stop, stop, step) + elif stop is not None: + return range(start_or_stop, stop) + return range(start_or_stop) + + def is_tf_print_compatible(value): # TODO(mdan): Enable once we can reliably test this. # This is currently disabled because we can't capture the output of diff --git a/tensorflow/contrib/py2tf/utils/builtins_test.py b/tensorflow/contrib/py2tf/utils/builtins_test.py index 19a72c63ec..59b3573d38 100644 --- a/tensorflow/contrib/py2tf/utils/builtins_test.py +++ b/tensorflow/contrib/py2tf/utils/builtins_test.py @@ -53,6 +53,39 @@ class BuiltinsTest(test.TestCase): self.assertEqual(5, builtins.dynamic_builtin(len, a)) + def test_dynamic_range_all_python(self): + self.assertListEqual(list(builtins.dynamic_builtin(range, 3)), [0, 1, 2]) + self.assertListEqual(list(builtins.dynamic_builtin(range, 1, 3)), [1, 2]) + self.assertListEqual( + list(builtins.dynamic_builtin(range, 2, 0, -1)), [2, 1]) + + def test_dynamic_range_tf(self): + with self.test_session() as sess: + self.assertAllEqual( + sess.run(builtins.dynamic_builtin(range, constant_op.constant(3))), + [0, 1, 2]) + self.assertAllEqual( + sess.run(builtins.dynamic_builtin(range, 1, constant_op.constant(3))), + [1, 2]) + self.assertAllEqual( + sess.run( + builtins.dynamic_builtin(range, 2, 0, constant_op.constant(-1))), + [2, 1]) + + def test_dynamic_range_detection(self): + def range(x): # pylint:disable=redefined-builtin + return x + + # Functions that just have the names of builtins are ignored. + self.assertEqual(builtins.dynamic_builtin(range, 1), 1) + if six.PY2: + self.assertListEqual( + list(builtins.dynamic_builtin(xrange, 3)), [0, 1, 2]) + self.assertListEqual( + list(builtins.dynamic_builtin(six.moves.range, 3)), [0, 1, 2]) + self.assertListEqual( + list(builtins.dynamic_builtin(six.moves.xrange, 3)), [0, 1, 2]) + def test_dynamic_print_tf(self): try: out_capturer = six.StringIO() -- GitLab From 0111abf5ce79b87274d3a08a095ddf43016bf652 Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Wed, 7 Mar 2018 18:05:53 -0800 Subject: [PATCH 573/884] TFE: Implement __r*__ operators for `Dimension`. This lets you use Dimension objects in numerical computations; e.g., it lets you evaluate expressions like 3 + my_tensor.shape[0] when executing eagerly. At time of writing, without this change, `matplotlib.pyplot.plt(my_tensor, my_other_tensor)` fails when executing eagerly, but it works with this change. PiperOrigin-RevId: 188265500 --- tensorflow/python/framework/tensor_shape.py | 81 +++++++++++++++++-- .../python/framework/tensor_shape_test.py | 13 +++ 2 files changed, 87 insertions(+), 7 deletions(-) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 6f2ab8408e..d2dad313f8 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -156,7 +156,7 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is the sum of `self` and `other`. @@ -167,6 +167,17 @@ class Dimension(object): else: return Dimension(self._value + other.value) + def __radd__(self, other): + """Returns the sum of `other` and `self`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the sum of `self` and `other`. + """ + return self + other + def __sub__(self, other): """Returns the subtraction of `other` from `self`. @@ -180,10 +191,10 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: - A Dimension whose value is the subtraction of sum of `other` from `self`. + A Dimension whose value is the subtraction of `other` from `self`. """ other = as_dimension(other) if self._value is None or other.value is None: @@ -191,6 +202,21 @@ class Dimension(object): else: return Dimension(self._value - other.value) + def __rsub__(self, other): + """Returns the subtraction of `self` from `other`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the subtraction of `self` from `other`. + """ + other = as_dimension(other) + if self._value is None or other.value is None: + return Dimension(None) + else: + return Dimension(other.value - self._value) + def __mul__(self, other): """Returns the product of `self` and `other`. @@ -204,7 +230,7 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is the product of `self` and `other`. @@ -215,6 +241,17 @@ class Dimension(object): else: return Dimension(self._value * other.value) + def __rmul__(self, other): + """Returns the product of `self` and `other`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the product of `self` and `other`. + """ + return self * other + def __floordiv__(self, other): """Returns the quotient of `self` and `other` rounded down. @@ -228,7 +265,7 @@ class Dimension(object): ``` Args: - other: Another `Dimension`. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A `Dimension` whose value is the integer quotient of `self` and `other`. @@ -239,6 +276,21 @@ class Dimension(object): else: return Dimension(self._value // other.value) + def __rfloordiv__(self, other): + """Returns the quotient of `other` and `self` rounded down. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A `Dimension` whose value is the integer quotient of `self` and `other`. + """ + other = as_dimension(other) + if self._value is None or other.value is None: + return Dimension(None) + else: + return Dimension(other.value // self._value) + def __div__(self, other): """DEPRECATED: Use `__floordiv__` via `x // y` instead. @@ -256,7 +308,7 @@ class Dimension(object): return self // other def __mod__(self, other): - """Returns `self` modulo `other. + """Returns `self` modulo `other`. Dimension moduli are computed as follows: @@ -268,7 +320,7 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is `self` modulo `other`. @@ -279,6 +331,21 @@ class Dimension(object): else: return Dimension(self._value % other.value) + def __rmod__(self, other): + """Returns `other` modulo `self`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is `other` modulo `self`. + """ + other = as_dimension(other) + if self._value is None or other.value is None: + return Dimension(None) + else: + return Dimension(other.value % self._value) + def __lt__(self, other): """Returns True if `self` is known to be less than `other`. diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py index fffd86c7a6..4cf0e9fcd6 100644 --- a/tensorflow/python/framework/tensor_shape_test.py +++ b/tensorflow/python/framework/tensor_shape_test.py @@ -34,12 +34,17 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(tensor_shape.Dimension(15), dim + tensor_shape.Dimension(3)) self.assertEqual(tensor_shape.Dimension(15), dim + 3) + self.assertEqual(tensor_shape.Dimension(15), 3 + dim) + self.assertEqual(tensor_shape.Dimension(9), dim - 3) + self.assertEqual(tensor_shape.Dimension(1), 13 - dim) self.assertEqual(tensor_shape.Dimension(24), dim * tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(24), dim * 2) + self.assertEqual(tensor_shape.Dimension(24), 2 * dim) self.assertEqual( tensor_shape.Dimension(6), dim // tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(6), dim // 2) + self.assertEqual(tensor_shape.Dimension(0), 2 // dim) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(tensor_shape.Dimension(12))) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(12)) @@ -176,6 +181,14 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(str(tensor_shape.Dimension(7)), "7") self.assertEqual(str(tensor_shape.Dimension(None)), "?") + def testMod(self): + four = tensor_shape.Dimension(4) + nine = tensor_shape.Dimension(9) + self.assertEqual(nine % four, 1) + # test both __mod__ and __rmod__. + self.assertEqual(nine % 4, 1) + self.assertEqual(4 % nine, 4) + class ShapeTest(test_util.TensorFlowTestCase): -- GitLab From 615eb3b1788c446cc5bfe97eed418ef9bc93cd2d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 18:26:56 -0800 Subject: [PATCH 574/884] Remove StackPush nodes where the corresponding StackPop nodes have no consumers.. PiperOrigin-RevId: 188267649 --- .../core/grappler/optimizers/loop_optimizer.cc | 9 +++++---- .../grappler/optimizers/loop_optimizer_test.cc | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer.cc b/tensorflow/core/grappler/optimizers/loop_optimizer.cc index 9e427001d5..131466430e 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer.cc @@ -51,9 +51,10 @@ std::vector GetStackPushNodesToConvert(const SimpleGraphView& graph_view, op_types_to_traverse.find(fanout_node.op()) != op_types_to_traverse.end()) { continue; - } else { - // The node is either a StackPop node or something unexpected behind which - // may hide a StackPop node, so we leave the graph alone. + } else if (!IsStackPopOp(fanout_node) || + !graph_view.outputs(fanout_idx).empty()) { + // The node is either a stack pop with consumers or something unexpected + // so we leave the graph alone. nodes_to_convert.clear(); break; } @@ -72,7 +73,7 @@ Status RemoveStackOps(const GraphDef& graph, GraphDef* optimized_graph) { GetStackPushNodesToConvert(graph_view, node_idx)) { // We found push nodes without corresponding pops. Convert them to // Identity passing the data through and add a control dependency from - // the op supplying the handle. + // the op supplying the stack handle. NodeDef* push_node = optimized_graph->mutable_node(push_node_idx); VLOG(1) << "Converting " << push_node_idx << " : " << push_node->DebugString(); diff --git a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc index cc9dd22b9e..3d54aa7a79 100644 --- a/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/loop_optimizer_test.cc @@ -87,6 +87,7 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { AddNode("stack1", "StackV2", {}, {}, &graph); AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); AddNode("pop1", "StackPopV2", {"stack1"}, {}, &graph); + AddNode("id1", "Identity", {"pop1"}, {}, &graph); // Stack with corresponding push/pop behind Enter. AddNode("stack2", "StackV2", {}, {}, &graph); AddNode("push_enter", "Enter", {"stack2"}, @@ -95,6 +96,7 @@ TEST_F(LoopOptimizerTest, RemovePush_NoOp) { AddNode("pop_enter", "Enter", {"stack2"}, {{"T", type}, {"frame_name", frame_name}}, &graph); AddNode("pop2", "StackPopV2", {"pop_enter"}, {}, &graph); + AddNode("id2", "Identity", {"pop2"}, {}, &graph); // Stack with unexpected op type in fanout of Stack. AddNode("stack3", "StackV2", {}, {}, &graph); AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); @@ -114,17 +116,24 @@ TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { AttrValue type; type.set_type(DT_RESOURCE); AddNode("c", "Const", {}, {}, &graph); + // Push without Pop. AddNode("stack1", "StackV2", {}, {}, &graph); AddNode("push1", "StackPushV2", {"stack1", "c"}, {}, &graph); + // Push without Pop behind Enter. AddNode("stack2", "StackV2", {}, {}, &graph); AddNode("push_enter", "Enter", {"stack2"}, {{"T", type}, {"frame_name", frame_name}}, &graph); AddNode("push2", "StackPushV2", {"push_enter", "c"}, {}, &graph); + // Pop without consumer. + AddNode("stack3", "StackV2", {}, {}, &graph); + AddNode("push3", "StackPushV2", {"stack3", "c"}, {}, &graph); + AddNode("pop3", "StackPopV2", {"stack3"}, {}, &graph); + LoopOptimizer optimizer; GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(6, output.node_size()); + EXPECT_EQ(9, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); if (node.name() == "push1") { @@ -137,6 +146,11 @@ TEST_F(LoopOptimizerTest, RemovePushWithoutMatchingPop) { EXPECT_EQ(2, node.input_size()); EXPECT_EQ("c", node.input(0)); EXPECT_EQ("^push_enter", node.input(1)); + } else if (node.name() == "push3") { + EXPECT_EQ("Identity", node.op()); + EXPECT_EQ(2, node.input_size()); + EXPECT_EQ("c", node.input(0)); + EXPECT_EQ("^stack3", node.input(1)); } else { const NodeDef& orig_node = item.graph.node(i); EXPECT_EQ(orig_node.ShortDebugString(), node.ShortDebugString()); -- GitLab From 34bd27fe9aa000dd9ba09d26320a478f9bb1e865 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 18:30:38 -0800 Subject: [PATCH 575/884] PiperOrigin-RevId: 188267957 --- tensorflow/contrib/decision_trees/proto/BUILD | 2 -- tensorflow/contrib/tensorboard/BUILD | 1 - tensorflow/contrib/training/BUILD | 1 - tensorflow/core/BUILD | 1 - tensorflow/core/profiler/BUILD | 1 - tensorflow/python/BUILD | 1 - 6 files changed, 7 deletions(-) diff --git a/tensorflow/contrib/decision_trees/proto/BUILD b/tensorflow/contrib/decision_trees/proto/BUILD index f6de5998d7..ae3847b8b6 100644 --- a/tensorflow/contrib/decision_trees/proto/BUILD +++ b/tensorflow/contrib/decision_trees/proto/BUILD @@ -25,7 +25,6 @@ tf_proto_library( name = "generic_tree_model", srcs = ["generic_tree_model.proto"], cc_api_version = 2, - go_api_version = 2, java_api_version = 2, visibility = ["//visibility:public"], ) @@ -34,7 +33,6 @@ tf_proto_library( name = "generic_tree_model_extensions", srcs = ["generic_tree_model_extensions.proto"], cc_api_version = 2, - go_api_version = 2, protodeps = [":generic_tree_model"], visibility = ["//visibility:public"], ) diff --git a/tensorflow/contrib/tensorboard/BUILD b/tensorflow/contrib/tensorboard/BUILD index 2e0a46ffe4..d833744d0c 100644 --- a/tensorflow/contrib/tensorboard/BUILD +++ b/tensorflow/contrib/tensorboard/BUILD @@ -13,7 +13,6 @@ load("//tensorflow/core:platform/default/build_config.bzl", "tf_proto_library") tf_proto_library( name = "protos_all", srcs = glob(["**/*.proto"]), - go_api_version = 2, visibility = ["//visibility:public"], ) diff --git a/tensorflow/contrib/training/BUILD b/tensorflow/contrib/training/BUILD index 6db373d2d5..6ae2f38252 100644 --- a/tensorflow/contrib/training/BUILD +++ b/tensorflow/contrib/training/BUILD @@ -324,7 +324,6 @@ tf_proto_library( name = "protos_all", srcs = glob(["**/*.proto"]), cc_api_version = 2, - go_api_version = 2, java_api_version = 2, visibility = ["//visibility:public"], ) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 619899ae95..8d556193d7 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -220,7 +220,6 @@ tf_proto_library( srcs = CORE_PROTO_SRCS + ADDITIONAL_CORE_PROTO_SRCS, cc_api_version = 2, default_header = True, - go_api_version = 2, j2objc_api_version = 1, java_api_version = 2, js_api_version = 2, diff --git a/tensorflow/core/profiler/BUILD b/tensorflow/core/profiler/BUILD index 35d9993018..5ce6f1046d 100644 --- a/tensorflow/core/profiler/BUILD +++ b/tensorflow/core/profiler/BUILD @@ -57,7 +57,6 @@ tf_proto_library( name = "protos_all", srcs = glob(["**/*.proto"]), cc_api_version = 2, - go_api_version = 2, java_api_version = 2, protodeps = tf_additional_all_protos(), visibility = ["//visibility:public"], diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 8e07c3e7a1..73b17e7e3c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3081,7 +3081,6 @@ tf_proto_library( "framework/cpp_shape_inference.proto", ], ), - go_api_version = 2, ) tf_proto_library_py( -- GitLab From 94c0c93b1ebedcf624d79f5f07400621fb7b236c Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Wed, 7 Mar 2018 19:04:18 -0800 Subject: [PATCH 576/884] [XLA]: Rewrite the test cases for while loop simplifier with HLO strings. PiperOrigin-RevId: 188270727 --- tensorflow/compiler/xla/service/BUILD | 1 + .../xla/service/while_loop_simplifier_test.cc | 547 ++++++++---------- 2 files changed, 243 insertions(+), 305 deletions(-) diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index 0e272e1eea..a0f0635e52 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1306,6 +1306,7 @@ tf_cc_test( ":while_loop_simplifier", "//tensorflow/compiler/xla:test", "//tensorflow/compiler/xla/tests:hlo_verified_test_base", + "//tensorflow/core:lib", "//tensorflow/core:test", ], ) diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index c5183f8d3a..cbea3e3cf2 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -19,6 +19,7 @@ limitations under the License. #include "tensorflow/compiler/xla/test.h" #include "tensorflow/compiler/xla/tests/hlo_verified_test_base.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/strings/str_util.h" namespace xla { namespace { @@ -26,112 +27,78 @@ namespace { namespace op = xla::testing::opcode_matchers; class WhileLoopSimplifierTest : public HloVerifiedTestBase { - public: - // Makes a computation that contains a loop that runs num_iters times. - HloComputation* MakeSimpleLoop(int num_iters, HloModule* module); - - // Makes a computation which has one parameter, of the given shape, and always - // returns PRED[]{true}. This is useful as a dummy loop condition. - HloComputation* MakeAlwaysTrueComputation(const Shape& param_shape, - HloModule* module); + protected: + // Makes an HloModule that contains a loop with `num_iters` iteration. + void MakeModuleWithSimpleLoop(int num_iters); }; -HloComputation* WhileLoopSimplifierTest::MakeSimpleLoop(int num_iters, - HloModule* module) { - HloComputation::Builder builder(TestName()); - - auto loop_iter_init = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42))); - auto loop_data_init = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR1({0, 1, 2}))); - auto loop_init = builder.AddInstruction( - HloInstruction::CreateTuple({loop_iter_init, loop_data_init})); - - HloComputation* condition; - { - HloComputation::Builder cond_builder(TestName() + ".condition"); - auto loop_var = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - auto loop_induction_var = - cond_builder.AddInstruction(HloInstruction::CreateGetTupleElement( - ShapeUtil::MakeShape(S32, {}), loop_var, 0)); - auto limit = cond_builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR0(42 + num_iters))); - cond_builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(PRED, {}), HloOpcode::kLt, loop_induction_var, - limit)); - condition = module->AddEmbeddedComputation(cond_builder.Build()); +void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { + string hlo_string_template = R"( + HloModule SimpleLoop + SimpleLoop.body { + loop_var.1 = (s32[], s32[3]{0}) parameter(0) + get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0 + constant.1 = s32[] constant(1) + add = s32[] add(get-tuple-element.1, constant.1) + get-tuple-element.2 = s32[3]{0} get-tuple-element(loop_var.1), index=1 + multiply = s32[3]{0} multiply(get-tuple-element.2, get-tuple-element.2) + ROOT tuple = (s32[], s32[3]{0}) tuple(add, multiply) } - - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - auto loop_var = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - auto loop_induction_var = - body_builder.AddInstruction(HloInstruction::CreateGetTupleElement( - ShapeUtil::MakeShape(S32, {}), loop_var, 0)); - auto new_loop_induction_var = - body_builder.AddInstruction(HloInstruction::CreateBinary( - loop_induction_var->shape(), HloOpcode::kAdd, loop_induction_var, - body_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))))); - auto loop_data = - body_builder.AddInstruction(HloInstruction::CreateGetTupleElement( - loop_data_init->shape(), loop_var, 1)); - auto new_loop_data = - body_builder.AddInstruction(HloInstruction::CreateBinary( - loop_data_init->shape(), HloOpcode::kMultiply, loop_data, - loop_data)); - body_builder.AddInstruction( - HloInstruction::CreateTuple({new_loop_induction_var, new_loop_data})); - body = module->AddEmbeddedComputation(body_builder.Build()); + SimpleLoop.condition { + loop_var.2 = (s32[], s32[3]{0}) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0 + constant.2 = s32[] constant({{LOOP_BOUND}}) + ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2) } + ENTRY SimpleLoop { + constant.3 = s32[] constant(42) + constant.4 = s32[3]{0} constant({0, 1, 2}) + tuple.1 = (s32[], s32[3]{0}) tuple(constant.3, constant.4) + ROOT while = (s32[], s32[3]{0}) while(tuple.1), condition= + SimpleLoop.condition, body=SimpleLoop.body + } + )"; - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - - return module->AddEntryComputation(builder.Build()); -} - -HloComputation* WhileLoopSimplifierTest::MakeAlwaysTrueComputation( - const Shape& param_shape, HloModule* module) { - HloComputation::Builder builder(TestName() + ".always_true"); - builder.AddInstruction( - HloInstruction::CreateParameter(0, param_shape, "param")); - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(true))); - return module->AddEmbeddedComputation(builder.Build()); + string hlo_string = tensorflow::str_util::StringReplace( + hlo_string_template, "{{LOOP_BOUND}}", + tensorflow::strings::StrCat(42 + num_iters), + /*replace_all=*/true); + ParseAndVerifyModule(hlo_string.c_str()); } -TEST_F(WhileLoopSimplifierTest, WhileLoopWithZeroIterations) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/0, &module()); - ASSERT_TRUE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), +TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/0); + HloModule* the_module = &module(); + ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); + EXPECT_THAT(the_module->entry_computation()->root_instruction(), op::Tuple(op::Constant(), op::Constant())); } -TEST_F(WhileLoopSimplifierTest, WhileLoopWithOneIteration) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/1, &module()); - ASSERT_TRUE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); - EXPECT_THAT(computation->root_instruction(), +TEST_F(WhileLoopSimplifierTest, LoopWithOneIterationSimplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/1); + HloModule* the_module = &module(); + ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); + EXPECT_THAT(the_module->entry_computation()->root_instruction(), op::Tuple(op::Add(), op::Multiply())); } -TEST_F(WhileLoopSimplifierTest, WhileLoopWithTwoIterations) { - MakeSimpleLoop(/*num_iters=*/2, &module()); +TEST_F(WhileLoopSimplifierTest, LoopWithTwoIterationsNotSimplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/2); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } -TEST_F(WhileLoopSimplifierTest, WhileLoopWithControlDependency) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/1, &module()); +TEST_F(WhileLoopSimplifierTest, + LoopWithControlDependencySimplifiedDependencyPreserved) { + MakeModuleWithSimpleLoop(/*num_iters=*/1); + HloModule* the_module = &module(); + HloComputation* computation = the_module->entry_computation(); auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* true_op = while_op->while_body()->AddInstruction( HloInstruction::CreateConstant(Literal::CreateR0(true))); TF_ASSERT_OK(true_op->AddControlDependencyTo( while_op->while_body()->root_instruction())); - ASSERT_TRUE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); + ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); EXPECT_THAT(computation->root_instruction()->control_predecessors(), ElementsAre(op::Constant())) << computation->ToString(); @@ -139,8 +106,10 @@ TEST_F(WhileLoopSimplifierTest, WhileLoopWithControlDependency) { // Loops that contain send/recv nodes can't be simplified; the loop structure // around send/recv nodes must be preserved. -TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsSend) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/1, &module()); +TEST_F(WhileLoopSimplifierTest, LoopWithSendNotSimplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/1); + HloModule* the_module = &module(); + HloComputation* computation = the_module->entry_computation(); auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); @@ -149,11 +118,13 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsSend) { HloInstruction::CreateConstant(Literal::CreateR0(true))), /*channel_id=*/0)); while_body->AddInstruction(HloInstruction::CreateSendDone(send)); - EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); + EXPECT_FALSE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); } -TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsRecv) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/1, &module()); +TEST_F(WhileLoopSimplifierTest, LoopWithRecvNotSimplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/1); + HloModule* the_module = &module(); + HloComputation* computation = the_module->entry_computation(); auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); @@ -161,247 +132,217 @@ TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsRecv) { HloInstruction::CreateRecv(ShapeUtil::MakeShape(F32, {1}), /*channel_id=*/0)); while_body->AddInstruction(HloInstruction::CreateRecvDone(recv)); - EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); + EXPECT_FALSE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); } // The limitation on not being able to simplify loops that contain infeeds (and // other non-removable instructions) isn't fundamental -- it just stems from the // fact that our infrastructure sees simplifying such a loop as tantamount to // removing the non-removable instruction. -TEST_F(WhileLoopSimplifierTest, NotRemovedIfContainsNonRemovableInstruction) { - HloComputation* computation = MakeSimpleLoop(/*num_iters=*/1, &module()); +TEST_F(WhileLoopSimplifierTest, LoopWithInfeedNotSimplified) { + MakeModuleWithSimpleLoop(/*num_iters=*/1); + HloModule* the_module = &module(); + HloComputation* computation = the_module->entry_computation(); auto* while_op = computation->root_instruction(); ASSERT_EQ(while_op->opcode(), HloOpcode::kWhile); auto* while_body = while_op->while_body(); while_body->AddInstruction( HloInstruction::CreateInfeed(ShapeUtil::MakeShape(F32, {1}), "config")); - EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); + EXPECT_FALSE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); } -// Check that we don't crash when given a loop whose shape is not a tuple. -TEST_F(WhileLoopSimplifierTest, IgnoreNonTupleShapedLoop) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(42))); - - HloComputation* condition; - { - HloComputation::Builder cond_builder(TestName() + ".condition"); - auto param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - cond_builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(PRED, {}), HloOpcode::kLt, param, - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(100))))); - condition = module().AddEmbeddedComputation(cond_builder.Build()); +// A non-tuple shaped loop shouldn't be simplified or crash the compiler. +TEST_F(WhileLoopSimplifierTest, NonTupleShapedLoopNotSimplified) { + const string hlo_string = R"( + HloModule NonTupleShapedLoop + NonTupleShapedLoop.body { + loop_var.1 = s32[] parameter(0) + constant.1 = s32[] constant(-1) + ROOT add = s32[] add(s32[] loop_var.1, s32[] constant.1) + } + NonTupleShapedLoop.condition { + loop_var = s32[] parameter(0) + constant = s32[] constant(100) + ROOT less-than = pred[] less-than(s32[] loop_var, s32[] constant) + } + ENTRY INonTupleShapedLoop { + constant.2 = s32[] constant(42) + ROOT while = s32[] while(s32[] constant.2), + condition=NonTupleShapedLoop.condition, + body=NonTupleShapedLoop.body } + )"; - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - auto param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - body_builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(S32, {}), HloOpcode::kAdd, param, - body_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(-1))))); - body = module().AddEmbeddedComputation(body_builder.Build()); - } - - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - - module().AddEntryComputation(builder.Build()); + ParseAndVerifyModule(hlo_string.c_str()); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } -// Construct a loop where we swap the tuple elements in each iteration. -// Although the tuple elements aren't used in the loop, we don't eliminate them, -// because the swapping side-effect is visible to users of the loop. -TEST_F(WhileLoopSimplifierTest, SwapTupleIndices) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction(HloInstruction::CreateTuple({ - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))), - })); - - HloComputation* condition = - MakeAlwaysTrueComputation(loop_init->shape(), &module()); - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - auto param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); - body_builder.AddInstruction(HloInstruction::CreateTuple({ - body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, 1)), - body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, 0)), - })); - body = module().AddEmbeddedComputation(body_builder.Build()); +// A while loop that does nothing else besides swapping tuple elements +// can't be simplified as the result of the swapping is visible to users of the +// loop. +TEST_F(WhileLoopSimplifierTest, LoopSwappingTupleElementsNotSimplified) { + const string hlo_string = R"( + HloModule SwappingTupleElements + SwappingTupleElements.body { + loop_var = (s32[], s32[]) parameter(0) + get-tuple-element = s32[] get-tuple-element((s32[], s32[]) loop_var),index=1 + get-tuple-element.1 = s32[] get-tuple-element((s32[], s32[]) loop_var), + index=0 + ROOT tuple = (s32[], s32[]) tuple(s32[] get-tuple-element, + s32[] get-tuple-element.1) } + SwappingTupleElements.always_true { + param = (s32[], s32[]) parameter(0) + ROOT constant = pred[] constant(true) + } + ENTRY SwappingTupleElements { + x = s32[] parameter(0) + y = s32[] parameter(1) + tuple.1 = (s32[], s32[]) tuple(s32[] x, s32[] y) + ROOT while = (s32[], s32[]) while((s32[], s32[]) tuple.1), + condition=SwappingTupleElements.always_true, + body=SwappingTupleElements.body + } + )"; - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - - module().AddEntryComputation(builder.Build()); + ParseAndVerifyModule(hlo_string.c_str()); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } // Construct a loop where we assign a constant to tuple element 0 in each // iteration. We can't eliminate tuple element 0, even though we never use its // value. -TEST_F(WhileLoopSimplifierTest, UnusedButModifiedTupleElement) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction( - HloInstruction::CreateTuple({builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0)))})); - - HloComputation* condition = - MakeAlwaysTrueComputation(loop_init->shape(), &module()); - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - body_builder.AddInstruction(HloInstruction::CreateTuple({ - body_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))), - })); - body = module().AddEmbeddedComputation(body_builder.Build()); +TEST_F(WhileLoopSimplifierTest, + LoopWithUnusedButModifiedTupleElementNotSimplified) { + const string hlo_string = R"( + HloModule UnusedButModifiedTupleElement + UnusedButModifiedTupleElement.body { + loop_var = (s32[]) parameter(0) + constant.1 = s32[] constant(1) + ROOT tuple = (s32[]) tuple(s32[] constant.1) } + UnusedButModifiedTupleElement.always_true { + param = (s32[]) parameter(0) + ROOT constant = pred[] constant(true) + } + ENTRY UnusedButModifiedTupleElement { + constant.2 = s32[] constant(0) + tuple.1 = (s32[]) tuple(s32[] constant.2) + ROOT while = (s32[]) while((s32[]) tuple.1), + condition=UnusedButModifiedTupleElement.always_true, + body=UnusedButModifiedTupleElement.body + } + )"; - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - - module().AddEntryComputation(builder.Build()); + ParseAndVerifyModule(hlo_string.c_str()); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } // Nothing to simplify in a while loop whose tuple has 0 elements. -TEST_F(WhileLoopSimplifierTest, EmptyTuple) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction(HloInstruction::CreateTuple({})); - - HloComputation* condition = - MakeAlwaysTrueComputation(loop_init->shape(), &module()); - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "loop_var")); - body_builder.AddInstruction(HloInstruction::CreateTuple({})); - body = module().AddEmbeddedComputation(body_builder.Build()); +TEST_F(WhileLoopSimplifierTest, LoopWithEmptyTupleNotSimplified) { + const string hlo_string = R"( + HloModule EmptyTuple + EmptyTuple.body { + loop_var = () parameter(0) + ROOT tuple = () tuple() + } + EmptyTuple.always_true { + param = () parameter(0) + ROOT constant = pred[] constant(true) } + ENTRY EmptyTuple { + tuple.1 = () tuple() + ROOT while = () while(() tuple.1), condition=EmptyTuple.always_true, + body=EmptyTuple.body + } + )"; - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - module().AddEntryComputation(builder.Build()); + ParseAndVerifyModule(hlo_string.c_str()); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } // While loop where one tuple element is used twice in the body, and thus can't // be simplified away. -TEST_F(WhileLoopSimplifierTest, ElemUsedTwice) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction(HloInstruction::CreateTuple({ - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))), - })); - - HloComputation* condition = - MakeAlwaysTrueComputation(loop_init->shape(), &module()); - - auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - auto* param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_init->shape(), "param0")); - auto* gte0 = body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, /*index=*/0)); - // get0 is used twice in the loop body's tuple. - body_builder.AddInstruction(HloInstruction::CreateTuple({gte0, gte0})); - body = module().AddEmbeddedComputation(body_builder.Build()); +TEST_F(WhileLoopSimplifierTest, LoopWithElemUsedTwiceNotSimplified) { + const string hlo_string = R"( + HloModule ElemUsedTwice + ElemUsedTwice.body { + param0 = (s32[], s32[]) parameter(0) + get-tuple-element = s32[] get-tuple-element((s32[], s32[]) param0), index=0 + ROOT tuple = (s32[], s32[]) tuple(s32[] get-tuple-element, + s32[] get-tuple-element) + } + ElemUsedTwice.always_true { + param = (s32[], s32[]) parameter(0) + ROOT constant = pred[] constant(true) + } + ENTRY ElemUsedTwice { + x = s32[] parameter(0) + y = s32[] parameter(1) + tuple.1 = (s32[], s32[]) tuple(s32[] x, s32[] y) + ROOT while = (s32[], s32[]) while((s32[], s32[]) tuple.1), + condition=ElemUsedTwice.always_true, body=ElemUsedTwice.body } + )"; - builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - module().AddEntryComputation(builder.Build()); + ParseAndVerifyModule(hlo_string.c_str()); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } // This while loop has three tuple elements. Element 0 is unused and should be // removed. Element 1 is used by the loop body, and element 2 is used by the // loop condition; these two should stay. -TEST_F(WhileLoopSimplifierTest, RemoveUnusedOperand) { - HloComputation::Builder builder(TestName()); - auto loop_init = builder.AddInstruction(HloInstruction::CreateTuple({ - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - })); - auto loop_shape = loop_init->shape(); - auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); - - HloComputation* condition; - { - HloComputation::Builder cond_builder(TestName() + ".loop_condition"); - auto param = cond_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_shape, "param0")); - cond_builder.AddInstruction(HloInstruction::CreateBinary( - ShapeUtil::MakeShape(PRED, {}), HloOpcode::kEq, - cond_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(0))), - cond_builder.AddInstruction(HloInstruction::CreateGetTupleElement( - scalar_s32, param, /*index=*/2)))); - condition = module().AddEmbeddedComputation(cond_builder.Build()); +TEST_F(WhileLoopSimplifierTest, RemoveUnusedLoopOperands) { + const string hlo_string = R"( + HloModule RemoveUnusedOperands + RemoveUnusedOperands.body { + loop_var = (s32[], s32[], s32[]) parameter(0) + get-tuple-element.1 = s32[] get-tuple-element((s32[], s32[], + s32[]) loop_var), index=0 + get-tuple-element.2 = s32[] get-tuple-element((s32[], s32[], + s32[]) loop_var), index=1 + constant.1 = s32[] constant(1) + add = s32[] add(s32[] get-tuple-element.2, s32[] constant.1) + get-tuple-element.3 = s32[] get-tuple-element((s32[], s32[], s32[]) + loop_var), index=2 + ROOT tuple = (s32[], s32[], s32[]) tuple(s32[] get-tuple-element.1, + s32[] add, s32[] get-tuple-element.3) } - - HloComputation* body; - { - HloComputation::Builder body_builder(TestName() + ".body"); - auto* param = body_builder.AddInstruction( - HloInstruction::CreateParameter(0, loop_shape, "loop_var")); - - auto* tuple0 = body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, /*index=*/0)); - auto* tuple1 = body_builder.AddInstruction(HloInstruction::CreateBinary( - scalar_s32, HloOpcode::kAdd, - body_builder.AddInstruction(HloInstruction::CreateGetTupleElement( - scalar_s32, param, /*index=*/1)), - body_builder.AddInstruction( - HloInstruction::CreateConstant(Literal::CreateR0(1))))); - auto* tuple2 = body_builder.AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, /*index=*/2)); - body_builder.AddInstruction( - HloInstruction::CreateTuple({tuple0, tuple1, tuple2})); - - body = module().AddEmbeddedComputation(body_builder.Build()); + RemoveUnusedOperands.loop_condition { + constant.2 = s32[] constant(0) + param0 = (s32[], s32[], s32[]) parameter(0) + get-tuple-element = s32[] get-tuple-element((s32[], s32[], s32[]) param0), + index=2 + ROOT equal-to = pred[] equal-to(s32[] constant.2, s32[] get-tuple-element) } + ENTRY RemoveUnusedOperands { + x = s32[] parameter(0) + constant.3 = s32[] constant(0) + y = s32[] parameter(1) + tuple.1 = (s32[], s32[], s32[]) tuple(s32[] x, s32[] constant.3, + s32[] y) + ROOT while = (s32[], s32[], s32[]) while((s32[], s32[], s32[]) tuple.1), + condition=RemoveUnusedOperands.loop_condition, + body=RemoveUnusedOperands.body + } + )"; + + ParseAndVerifyModule(hlo_string.c_str()); + HloModule* the_module = &module(); + EXPECT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); + + // The original while instruction is still left in the module as a dead + // instruction, find a while instruction with a different name as the new + // while instruction. + HloInstruction* new_while_op = + *std::find_if(the_module->entry_computation()->instructions().begin(), + the_module->entry_computation()->instructions().end(), + [&](const HloInstruction* instr) { + return (instr->opcode() == HloOpcode::kWhile && + instr->name() != "while"); + }); - auto* while_op = builder.AddInstruction(HloInstruction::CreateWhile( - loop_init->shape(), condition, body, loop_init)); - - module().AddEntryComputation(builder.Build()); - EXPECT_TRUE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); - - // We leave most of the checking to HloVerifiedTestBase, which runs the - // verifier on module() at the end of this test. - HloInstruction* new_while_op = *std::find_if( - module().entry_computation()->instructions().begin(), - module().entry_computation()->instructions().end(), - [&](const HloInstruction* instr) { - return instr != while_op && instr->opcode() == HloOpcode::kWhile; - }); + auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); EXPECT_TRUE( ShapeUtil::Equal(new_while_op->shape(), ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32}))) @@ -418,31 +359,27 @@ TEST_F(WhileLoopSimplifierTest, RemoveUnusedOperand) { op::GetTupleElement(op::Parameter(0), /*tuple_index=*/1))); } -TEST_F(WhileLoopSimplifierTest, BodyHasNonTupleRoot) { - auto scalar_s32 = ShapeUtil::MakeShape(S32, {}); - Shape while_shape = ShapeUtil::MakeTupleShape({scalar_s32, scalar_s32}); - - HloComputation* while_body = [&]() { - HloComputation::Builder builder(TestName() + ".passthrough"); - HloInstruction* param = builder.AddInstruction( - HloInstruction::CreateParameter(0, while_shape, "param")); - HloComputation* result = module().AddEmbeddedComputation(builder.Build()); - - result->AddInstruction( - HloInstruction::CreateGetTupleElement(scalar_s32, param, 1)); - return result; - }(); - - HloComputation::Builder builder(TestName()); - auto* init_value = builder.AddInstruction( - HloInstruction::CreateParameter(0, while_shape, "init_value")); - builder.AddInstruction(HloInstruction::CreateWhile( - while_shape, MakeAlwaysTrueComputation(while_shape, &module()), - while_body, init_value)); - module().AddEntryComputation(builder.Build()); - TF_ASSERT_OK_AND_ASSIGN(bool simplified_loop, - WhileLoopSimplifier{}.Run(&module())); - EXPECT_FALSE(simplified_loop); +TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { + const string hlo_string = R"( + HloModule BodyHasNonTupleRoot + BodyHasNonTupleRoot.passthrough { + ROOT param = (s32[], s32[]) parameter(0) + get-tuple-element = s32[] get-tuple-element((s32[], s32[]) param), index=1 + } + BodyHasNonTupleRoot.always_true { + param.1 = (s32[], s32[]) parameter(0) + ROOT constant = pred[] constant(true) + } + ENTRY BodyHasNonTupleRoot { + init_value = (s32[], s32[]) parameter(0) + ROOT while = (s32[], s32[]) while((s32[], s32[]) init_value), + condition=BodyHasNonTupleRoot.always_true, + body=BodyHasNonTupleRoot.passthrough + } + )"; + + ParseAndVerifyModule(hlo_string.c_str()); + EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } } // namespace -- GitLab From 6a3e9078acab56c1d6883d0433d841b3fde2dd16 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 19:24:20 -0800 Subject: [PATCH 577/884] Fix docstring. PiperOrigin-RevId: 188272354 --- tensorflow/contrib/py2tf/utils/type_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/py2tf/utils/type_check.py b/tensorflow/contrib/py2tf/utils/type_check.py index 9ca2dec872..b9b2b451a4 100644 --- a/tensorflow/contrib/py2tf/utils/type_check.py +++ b/tensorflow/contrib/py2tf/utils/type_check.py @@ -22,12 +22,12 @@ from tensorflow.python.framework import tensor_util def is_tensor(*args): - """Check if all arguments are tensors. + """Check if any arguments are tensors. Args: *args: Python objects that may or may not be tensors. Returns: - True if all *args are TensorFlow types, False if one or more are not. + True if any *args are TensorFlow types, False if none are. """ return any([tensor_util.is_tensor(a) for a in args]) -- GitLab From cae39caf2cb4e6a5c5636a5432f7ebf888f6a5b7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 19:39:04 -0800 Subject: [PATCH 578/884] internal PiperOrigin-RevId: 188273192 --- tensorflow/core/platform/default/build_config.bzl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 2102c5cca3..e01e076bcf 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -219,7 +219,7 @@ def tf_proto_library_cc(name, srcs = [], has_services = None, cc_stubby_versions = None, cc_grpc_version = None, j2objc_api_version = 1, - cc_api_version = 2, go_api_version = 2, + cc_api_version = 2, java_api_version = 2, py_api_version = 2, js_api_version = 2, js_codegen = "jspb", default_header = False): @@ -280,7 +280,6 @@ def tf_proto_library(name, srcs = [], has_services = None, visibility = [], testonly = 0, cc_libs = [], cc_api_version = 2, cc_grpc_version = None, - go_api_version = 2, j2objc_api_version = 1, java_api_version = 2, py_api_version = 2, js_api_version = 2, js_codegen = "jspb", -- GitLab From e9ea48126a80f6edb32425ced922e899c1439937 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 7 Mar 2018 19:46:00 -0800 Subject: [PATCH 579/884] Disable the predict input warning in TPUEstimator. PiperOrigin-RevId: 188273641 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 11 +++++++++++ tensorflow/python/estimator/estimator.py | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index f3c2a510fd..33251f2412 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1795,6 +1795,17 @@ class TPUEstimator(estimator_lib.Estimator): return _input_fn + def _validate_features_in_predict_input(self, result): + """Skip the validation. + + For TPUEstimator, we do not need to check the result type. `_InputPipeline` + has stronger check. Parent class's check generates confusing warning msg. + + Args: + result: `features` returned by input_fn. + """ + pass + def _augment_model_fn(self, model_fn, batch_axis): """Returns a new model_fn, which wraps the TPU support.""" diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 8ed3e4cd19..6c402d8dc9 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -516,7 +516,7 @@ class Estimator(object): allowed_overrides = set([ '_call_input_fn', '_create_global_step', '_convert_train_steps_to_hooks', '_convert_eval_steps_to_hooks', - '_tf_api_names' + '_tf_api_names', '_validate_features_in_predict_input' ]) estimator_members = set([m for m in Estimator.__dict__.keys() if not m.startswith('__')]) @@ -669,11 +669,14 @@ class Estimator(object): # Unconditionally drop the label (the second element of result). result = result[0] + self._validate_features_in_predict_input(result) + return result, input_hooks + + def _validate_features_in_predict_input(self, result): if not _has_dataset_or_queue_runner(result): logging.warning('Input graph does not use tf.data.Dataset or contain a ' 'QueueRunner. That means predict yields forever. ' 'This is probably a mistake.') - return result, input_hooks def _get_features_and_labels_from_input_fn(self, input_fn, mode): """Extracts the `features` and labels from return values of `input_fn`.""" -- GitLab From cf3603919b16e7974087345dc5bc53c9e0edf214 Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Wed, 7 Mar 2018 22:13:05 -0800 Subject: [PATCH 580/884] Making dockerhub the primary installation location. (#17521) --- tensorflow/docs_src/install/install_linux.md | 31 ++++++++++---------- tensorflow/docs_src/install/install_mac.md | 13 ++++---- 2 files changed, 21 insertions(+), 23 deletions(-) diff --git a/tensorflow/docs_src/install/install_linux.md b/tensorflow/docs_src/install/install_linux.md index fb1e3efbc2..3e8744bf9d 100644 --- a/tensorflow/docs_src/install/install_linux.md +++ b/tensorflow/docs_src/install/install_linux.md @@ -357,24 +357,23 @@ where: to 6006. * TensorFlowCPUImage is required. It identifies the Docker container. Specify one of the following values: - * gcr.io/tensorflow/tensorflow, which is the TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel, which is the latest + * tensorflow/tensorflow, which is the TensorFlow CPU binary image. + * tensorflow/tensorflow:latest-devel, which is the latest TensorFlow CPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version, which is the + * tensorflow/tensorflow:version, which is the specified version (for example, 1.1.0rc1) of TensorFlow CPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel, which is + * tensorflow/tensorflow:version-devel, which is the specified version (for example, 1.1.0rc1) of the TensorFlow GPU binary image plus source code. - gcr.io is the Google Container Registry. Note that some - TensorFlow images are also available at + TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches the latest TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
The following command also launches the latest TensorFlow CPU binary image in a @@ -382,7 +381,7 @@ Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+$ docker run -it -p 8888:8888 tensorflow/tensorflow
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -406,14 +405,14 @@ where: hostPort and containerPort to `8888`. * TensorFlowGPUImage specifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow:latest-gpu, which is the latest + * tensorflow/tensorflow:latest-gpu, which is the latest TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:latest-devel-gpu, which is + * tensorflow/tensorflow:latest-devel-gpu, which is the latest TensorFlow GPU Binary image plus source code. - * gcr.io/tensorflow/tensorflow:version-gpu, which is the + * tensorflow/tensorflow:version-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image. - * gcr.io/tensorflow/tensorflow:version-devel-gpu, which is + * tensorflow/tensorflow:version-devel-gpu, which is the specified version (for example, 0.12.1) of the TensorFlow GPU binary image plus source code. @@ -422,7 +421,7 @@ following command launches the latest TensorFlow GPU binary image in a Docker container from which you can run TensorFlow programs in a shell:
-$ nvidia-docker run -it gcr.io/tensorflow/tensorflow:latest-gpu bash
+$ nvidia-docker run -it tensorflow/tensorflow:latest-gpu bash
 
The following command also launches the latest TensorFlow GPU binary image @@ -430,13 +429,13 @@ in a Docker container. In this Docker container, you can run TensorFlow programs in a Jupyter notebook:
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:latest-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:latest-gpu
 
The following command installs an older TensorFlow version (0.12.1):
-$ nvidia-docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow:0.12.1-gpu
+$ nvidia-docker run -it -p 8888:8888 tensorflow/tensorflow:0.12.1-gpu
 
Docker will download the TensorFlow binary image the first time you launch it. @@ -506,7 +505,7 @@ If you installed through Docker, start a Docker container from which you can run bash. For example:
-$ docker run -it gcr.io/tensorflow/tensorflow bash
+$ docker run -it tensorflow/tensorflow bash
 
diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 222463023f..94defcd18c 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -292,24 +292,23 @@ where: to 6006. * TensorFlowImage is required. It identifies the Docker container. You must specify one of the following values: - * gcr.io/tensorflow/tensorflow: TensorFlow binary image. - * gcr.io/tensorflow/tensorflow:latest-devel: TensorFlow + * tensorflow/tensorflow: TensorFlow binary image. + * tensorflow/tensorflow:latest-devel: TensorFlow Binary image plus source code. -gcr.io is the Google Container Registry. Note that some -TensorFlow images are also available at +The TensorFlow images are available at [dockerhub](https://hub.docker.com/r/tensorflow/tensorflow/). For example, the following command launches a TensorFlow CPU binary image in a Docker container from which you can run TensorFlow programs in a shell: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
The following command also launches a TensorFlow CPU binary image in a Docker container. However, in this Docker container, you can run TensorFlow programs in a Jupyter notebook: -
$ docker run -it -p 8888:8888 gcr.io/tensorflow/tensorflow
+
$ docker run -it -p 8888:8888 tensorflow/tensorflow
Docker will download the TensorFlow binary image the first time you launch it. @@ -376,7 +375,7 @@ do the following: If you installed through Docker, start a Docker container that runs bash. For example: -
$ docker run -it gcr.io/tensorflow/tensorflow bash
+
$ docker run -it tensorflow/tensorflow bash
-- GitLab From 9cb1de8e02dd6f6e81009130a9c9fcc152ebade9 Mon Sep 17 00:00:00 2001 From: Suharsh Sivakumar Date: Wed, 7 Mar 2018 22:36:15 -0800 Subject: [PATCH 581/884] Add support for ResourceVariable weights to the quantization rewriter. PiperOrigin-RevId: 188284335 --- tensorflow/contrib/quantize/BUILD | 1 + tensorflow/contrib/quantize/python/common.py | 9 +- .../contrib/quantize/python/common_test.py | 8 + .../quantize/python/fold_batch_norms.py | 15 +- .../contrib/quantize/python/quantize.py | 5 +- .../python/quantize_parameterized_test.py | 489 ++++++++---------- 6 files changed, 256 insertions(+), 271 deletions(-) diff --git a/tensorflow/contrib/quantize/BUILD b/tensorflow/contrib/quantize/BUILD index aec9f47ccb..0b76296204 100644 --- a/tensorflow/contrib/quantize/BUILD +++ b/tensorflow/contrib/quantize/BUILD @@ -24,6 +24,7 @@ py_test( "//tensorflow/python:framework_test_lib", "//tensorflow/python:platform_test", "//tensorflow/python:session", + "//tensorflow/python:variable_scope", ], ) diff --git a/tensorflow/contrib/quantize/python/common.py b/tensorflow/contrib/quantize/python/common.py index 3a1fa61e43..3138149468 100644 --- a/tensorflow/contrib/quantize/python/common.py +++ b/tensorflow/contrib/quantize/python/common.py @@ -23,6 +23,7 @@ import re from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope @@ -101,7 +102,7 @@ def CreateOrGetQuantizationStep(): Quantization step Tensor. """ quantization_step_name = 'fake_quantization_step' - quantization_step_tensor_name = quantization_step_name + '/AssignAdd:0' + quantization_step_tensor_name = quantization_step_name + '/Identity:0' g = ops.get_default_graph() try: return g.get_tensor_by_name(quantization_step_tensor_name) @@ -118,5 +119,7 @@ def CreateOrGetQuantizationStep(): with g.name_scope(quantization_step_tensor.op.name + '/'): # We return the incremented variable tensor. Since this is used in conds # for quant_delay and freeze_bn_delay, it will run once per graph - # execution. - return state_ops.assign_add(quantization_step_tensor, 1) + # execution. We return an identity to force resource variables and + # normal variables to return a tensor of the same name. + return array_ops.identity( + state_ops.assign_add(quantization_step_tensor, 1)) diff --git a/tensorflow/contrib/quantize/python/common_test.py b/tensorflow/contrib/quantize/python/common_test.py index d6237fe5e3..06c62f2d26 100644 --- a/tensorflow/contrib/quantize/python/common_test.py +++ b/tensorflow/contrib/quantize/python/common_test.py @@ -22,6 +22,7 @@ from tensorflow.contrib.quantize.python import common from tensorflow.python.client import session from tensorflow.python.framework import ops from tensorflow.python.framework import test_util +from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import googletest @@ -29,8 +30,15 @@ from tensorflow.python.platform import googletest class CommonTest(test_util.TensorFlowTestCase): def testCreateOrGetQuantizationStep(self): + self._TestCreateOrGetQuantizationStep(False) + + def testCreateOrGetQuantizationStepResourceVar(self): + self._TestCreateOrGetQuantizationStep(True) + + def _TestCreateOrGetQuantizationStep(self, use_resource): g = ops.Graph() with session.Session(graph=g) as sess: + variable_scope.get_variable_scope().set_use_resource(use_resource) quantization_step_tensor = common.CreateOrGetQuantizationStep() # Check that operations are added to the graph. diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index 1f0648bbb6..b278265639 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -31,6 +31,7 @@ from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.util import compat @@ -502,15 +503,23 @@ def _GetBatchNormParams(graph, context, has_scaling): base_context = split_context[-1] oplist = graph.get_operations() - op_suffix_gamma = base_context + '/BatchNorm/gamma' op_suffix_mean = base_context + '/BatchNorm/moments/Squeeze' op_suffix_variance = base_context + '/BatchNorm/moments/Squeeze_1' - op_suffix_moving_variance = base_context + '/BatchNorm/moving_variance/read' - op_suffix_moving_mean = base_context + '/BatchNorm/moving_mean/read' op_suffix_epsilon = base_context + '/BatchNorm/batchnorm/add/y' op_suffix_bn_decay_mean = base_context + '/BatchNorm/AssignMovingAvg/decay' op_suffix_bn_decay_var = base_context + '/BatchNorm/AssignMovingAvg_1/decay' + if variable_scope.get_variable_scope().use_resource: + op_suffix_gamma = base_context + '/BatchNorm/gamma/Read/ReadVariableOp' + op_suffix_moving_variance = ( + base_context + '/BatchNorm/moving_variance/Read/ReadVariableOp') + op_suffix_moving_mean = ( + base_context + '/BatchNorm/moving_mean/Read/ReadVariableOp') + else: + op_suffix_gamma = base_context + '/BatchNorm/gamma' + op_suffix_moving_variance = base_context + '/BatchNorm/moving_variance/read' + op_suffix_moving_mean = base_context + '/BatchNorm/moving_mean/read' + # Parse through list of ops to find relevant ops for op in oplist: if op.name.endswith(op_suffix_mean): diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 5fd806d195..0608ab9302 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -35,8 +35,7 @@ _QUANTIZABLE_TYPES = {'Conv2D', 'MatMul', 'DepthwiseConv2dNative'} _ACTIVATION_TYPES = {'Relu', 'Relu6', 'Identity'} # Weight types that are supported by the quantization rewrite. -# TODO(suharshs): Add support for ResourceVariable. -_WEIGHT_TYPES = {'Variable', 'VariableV2'} +_WEIGHT_TYPES = {'Variable', 'VariableV2', 'VarHandleOp'} def Quantize(graph, @@ -137,7 +136,7 @@ def _FindLayersToQuantize(graph): input_pattern = graph_matcher.OpTypePattern('*') weight_var_pattern = graph_matcher.OpTypePattern('|'.join(_WEIGHT_TYPES)) weight_pattern = graph_matcher.OpTypePattern( - 'Identity', inputs=[weight_var_pattern]) + 'Identity|ReadVariableOp', inputs=[weight_var_pattern]) folded_weight_pattern = graph_matcher.OpTypePattern('Mul') diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index dd73f6c860..0624cc878b 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -28,6 +28,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import init_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import variable_scope from tensorflow.python.platform import googletest batch_norm = layers.batch_norm @@ -56,52 +57,46 @@ class QuantizeTest(test_util.TensorFlowTestCase): (array_ops.identity, 'Identity', True, 5000), ] for params in parameters_list: - test_fn(params[0], params[1], params[2], params[3]) - - def _TestQuantize_Conv2dWithoutBatchNorm(self, activation, activation_op_name, - with_bypass, delay): - """Tests quantization: inputs -> Conv2d no batch norm -> Activation. - - Args: - activation: Callable that returns an Operation, a factory method for the - Activation. - activation_op_name: String, name of the Activation operation. - with_bypass: Bool, when true there is an extra connection added from - inputs to just before Activation. - delay: Int (optional), delay in number of steps until quantization starts. - """ - graph = ops.Graph() - with graph.as_default(): - batch_size, height, width, depth = 5, 128, 128, 3 - inputs = array_ops.zeros((batch_size, height, width, depth)) - stride = 1 if with_bypass else 2 - out_depth = 3 if with_bypass else 32 - activation_fn = None if with_bypass else activation - scope = 'test/test2' if with_bypass else 'test' - node = conv2d(inputs, out_depth, [5, 5], stride=stride, padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, scope=scope) - if with_bypass: - node = math_ops.add(inputs, node, name='test/Add') - node = activation(node, name='test/' + activation_op_name) - update_barrier = control_flow_ops.no_op(name='update_barrier') - with ops.control_dependencies([update_barrier]): - array_ops.identity(node, name='control_dependency') - quantize.Quantize(graph, True, quant_delay=delay) + # Test everything with resource variables and normal variables. + test_fn(params[0], params[1], params[2], params[3], False) + test_fn(params[0], params[1], params[2], params[3], True) + def _AssertCorrectQuantizedGraphWithoutBatchNorm( + self, graph, scope, layer, activation_op_name, with_bypass, delay, + use_resource): quantization_node_name = 'FakeQuantWithMinMaxVars' weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + quantization_node_name) self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' - ] + + # Assemble the expected inputs. + if use_resource: + expected_inputs = [ + scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + ] + if layer == 'DepthwiseConv2dNative': + expected_inputs.append(scope + '/depthwise/ReadVariableOp') + else: + expected_inputs.append(scope + '/' + layer + '/ReadVariableOp') + else: + expected_inputs = [ + scope + '/weights_quant/AssignMinLast', + scope + '/weights_quant/AssignMaxLast', + ] + if layer == 'DepthwiseConv2dNative': + expected_inputs.append(scope + '/depthwise_weights/read') + else: + expected_inputs.append(scope + '/weights/read') + self._AssertInputOpsAre(weights_quant, expected_inputs) if delay and delay > 0: output_op_name = scope + '/weights_quant/delayed_quant/Switch_1' else: - output_op_name = scope + '/Conv2D' + if layer == 'DepthwiseConv2dNative': + output_op_name = scope + '/depthwise' + else: + output_op_name = scope + '/' + layer self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) @@ -109,10 +104,17 @@ class QuantizeTest(test_util.TensorFlowTestCase): conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + quantization_node_name) self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' - ] + if use_resource: + expected_inputs = [ + scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + scope + '/BiasAdd', + ] + else: + expected_inputs = [ + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' + ] self._AssertInputOpsAre(conv_quant, expected_inputs) output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' if delay else 'test/Add') @@ -121,23 +123,76 @@ class QuantizeTest(test_util.TensorFlowTestCase): act_quant = graph.get_operation_by_name('test/act_quant/' + quantization_node_name) self.assertEqual(act_quant.type, quantization_node_name) - - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] + if use_resource: + expected_inputs = [ + 'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + 'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + 'test/' + activation_op_name, + ] + else: + expected_inputs = [ + 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', + 'test/' + activation_op_name + ] self._AssertInputOpsAre(act_quant, expected_inputs) output_op_name = ('test/act_quant/delayed_quant/Switch_1' if delay else 'control_dependency') self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) + self._AssertIdempotent(graph) def testQuantize_Conv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( self._TestQuantize_Conv2dWithoutBatchNorm) + def _TestQuantize_Conv2dWithoutBatchNorm(self, activation, activation_op_name, + with_bypass, delay, use_resource): + """Tests quantization: inputs -> Conv2d no batch norm -> Activation. + + Args: + activation: Callable that returns an Operation, a factory method for the + Activation. + activation_op_name: String, name of the Activation operation. + with_bypass: Bool, when true there is an extra connection added from + inputs to just before Activation. + delay: Int (optional), delay in number of steps until quantization starts. + use_resource: Bool, when true uses resource variables. + """ + graph = ops.Graph() + with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) + batch_size, height, width, depth = 5, 128, 128, 3 + inputs = array_ops.zeros((batch_size, height, width, depth)) + stride = 1 if with_bypass else 2 + out_depth = 3 if with_bypass else 32 + activation_fn = None if with_bypass else activation + scope = 'test/test2' if with_bypass else 'test' + node = conv2d( + inputs, + out_depth, [5, 5], + stride=stride, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + scope=scope) + if with_bypass: + node = math_ops.add(inputs, node, name='test/Add') + node = activation(node, name='test/' + activation_op_name) + update_barrier = control_flow_ops.no_op(name='update_barrier') + with ops.control_dependencies([update_barrier]): + array_ops.identity(node, name='control_dependency') + + quantize.Quantize(graph, True, quant_delay=delay) + + self._AssertCorrectQuantizedGraphWithoutBatchNorm( + graph, scope, 'Conv2D', activation_op_name, with_bypass, delay, + use_resource) + + def testQuantize_FCWithoutBatchNorm(self): + self._RunWithoutBatchNormTestOverParameters( + self._TestQuantize_FCWithoutBatchNorm) + def _TestQuantize_FCWithoutBatchNorm(self, activation, activation_op_name, - with_bypass, delay): + with_bypass, delay, use_resource): """Tests quantization: inputs -> FC no batch norm -> Activation. Args: @@ -147,17 +202,22 @@ class QuantizeTest(test_util.TensorFlowTestCase): with_bypass: Bool, when true there is an extra connection added from inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. + use_resource: Bool, when true uses resource variables. """ graph = ops.Graph() with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) batch_size, depth = 5, 256 inputs = array_ops.zeros((batch_size, depth)) out_depth = 256 if with_bypass else 128 activation_fn = None if with_bypass else activation scope = 'test/test2' if with_bypass else 'test' - node = fully_connected(inputs, out_depth, - weights_initializer=self._WeightInit(0.03), - activation_fn=activation_fn, scope=scope) + node = fully_connected( + inputs, + out_depth, + weights_initializer=self._WeightInit(0.03), + activation_fn=activation_fn, + scope=scope) if with_bypass: node = math_ops.add(inputs, node, name='test/Add') node = activation(node, name='test/' + activation_op_name) @@ -166,53 +226,16 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') quantize.Quantize(graph, True, quant_delay=delay) - quantization_node_name = 'FakeQuantWithMinMaxVars' - weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + - quantization_node_name) - self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', scope + '/weights/read' - ] - self._AssertInputOpsAre(weights_quant, expected_inputs) - if delay and delay > 0: - output_op_name = scope + '/weights_quant/delayed_quant/Switch_1' - else: - output_op_name = scope + '/MatMul' - self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) + self._AssertCorrectQuantizedGraphWithoutBatchNorm( + graph, scope, 'MatMul', activation_op_name, with_bypass, delay, + use_resource) - if with_bypass: - conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + - quantization_node_name) - self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' - ] - self._AssertInputOpsAre(conv_quant, expected_inputs) - output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' - if delay else 'test/Add') - self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) - - act_quant = graph.get_operation_by_name('test/act_quant/' + - quantization_node_name) - self.assertEqual(act_quant.type, quantization_node_name) - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] - self._AssertInputOpsAre(act_quant, expected_inputs) - output_op_name = ('test/act_quant/delayed_quant/Switch_1' - if delay else 'control_dependency') - self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) - - def testQuantize_FCWithoutBatchNorm(self): + def testQuantize_DepthwiseConv2dWithoutBatchNorm(self): self._RunWithoutBatchNormTestOverParameters( - self._TestQuantize_FCWithoutBatchNorm) + self._TestQuantize_DepthwiseConv2dWithoutBatchNorm) def _TestQuantize_DepthwiseConv2dWithoutBatchNorm( - self, activation, activation_op_name, with_bypass, delay): + self, activation, activation_op_name, with_bypass, delay, use_resource): """Tests quantization: inputs -> DWConv2d no batch norm -> Activation. Args: @@ -222,18 +245,25 @@ class QuantizeTest(test_util.TensorFlowTestCase): with_bypass: Bool, when true there is an extra connection added from inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. + use_resource: Bool, when true uses resource variables. """ graph = ops.Graph() with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) batch_size, height, width, depth = 5, 128, 128, 3 inputs = array_ops.zeros((batch_size, height, width, depth)) stride = 1 if with_bypass else 2 activation_fn = None if with_bypass else activation scope = 'test/test2' if with_bypass else 'test' - node = separable_conv2d(inputs, None, [5, 5], stride=stride, - depth_multiplier=1.0, padding='SAME', - weights_initializer=self._WeightInit(0.09), - activation_fn=activation_fn, scope=scope) + node = separable_conv2d( + inputs, + None, [5, 5], + stride=stride, + depth_multiplier=1.0, + padding='SAME', + weights_initializer=self._WeightInit(0.09), + activation_fn=activation_fn, + scope=scope) if with_bypass: node = math_ops.add(inputs, node, name='test/Add') node = activation(node, name='test/' + activation_op_name) @@ -242,51 +272,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): array_ops.identity(node, name='control_dependency') quantize.Quantize(graph, True, quant_delay=delay) - quantization_node_name = 'FakeQuantWithMinMaxVars' - weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + - quantization_node_name) - self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/AssignMinLast', - scope + '/weights_quant/AssignMaxLast', - scope + '/depthwise_weights/read' - ] - self._AssertInputOpsAre(weights_quant, expected_inputs) - if delay and delay > 0: - output_op_name = scope + '/weights_quant/delayed_quant/Switch_1' - else: - output_op_name = scope + '/depthwise' - self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) - - if with_bypass: - conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + - quantization_node_name) - self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/BiasAdd' - ] - self._AssertInputOpsAre(conv_quant, expected_inputs) - output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' - if delay else 'test/Add') - self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) - - act_quant = graph.get_operation_by_name('test/act_quant/' + - quantization_node_name) - self.assertEqual(act_quant.type, quantization_node_name) - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] - self._AssertInputOpsAre(act_quant, expected_inputs) - output_op_name = ('test/act_quant/delayed_quant/Switch_1' - if delay else 'control_dependency') - self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) - - def testQuantize_DepthwiseConv2dWithoutBatchNorm(self): - self._RunWithoutBatchNormTestOverParameters( - self._TestQuantize_DepthwiseConv2dWithoutBatchNorm) + self._AssertCorrectQuantizedGraphWithoutBatchNorm( + graph, scope, 'DepthwiseConv2dNative', activation_op_name, with_bypass, + delay, use_resource) def _RunBatchNormTestOverParameters(self, test_fn): # TODO(suharshs): Use parameterized test once OSS TF supports it. @@ -318,13 +306,88 @@ class QuantizeTest(test_util.TensorFlowTestCase): (array_ops.identity, 'Identity', True, 5000, True) ] for params in parameters_list: - test_fn(params[0], params[1], params[2], params[3], params[4]) + # Test everything with resource variables and normal variables. + test_fn(params[0], params[1], params[2], params[3], params[4], False) + test_fn(params[0], params[1], params[2], params[3], params[4], True) + + def _AssertCorrectQuantizedGraphWithBatchNorm(self, graph, scope, layer, + activation_op_name, with_bypass, + delay, use_resource): + quantization_node_name = 'FakeQuantWithMinMaxVars' + weights_quant = graph.get_operation_by_name( + scope + '/weights_quant/' + quantization_node_name) + self.assertEqual(weights_quant.type, quantization_node_name) + if use_resource: + expected_inputs = [ + scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + scope + '/weights_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + ] + else: + expected_inputs = [ + scope + '/weights_quant/' + 'AssignMinLast', + scope + '/weights_quant/' + 'AssignMaxLast' + ] + expected_inputs.append(scope + '/mul_fold') + + self._AssertInputOpsAre(weights_quant, expected_inputs) + if layer == 'DepthwiseConv2dNative': + output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' + if delay else '/depthwise_Fold') + else: + output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' + if delay else '/' + layer + '_Fold') + self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) + + if with_bypass: + conv_quant = graph.get_operation_by_name( + scope + '/conv_quant/' + quantization_node_name) + self.assertEqual(conv_quant.type, quantization_node_name) + + if use_resource: + expected_inputs = [ + scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + scope + '/conv_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + ] + else: + expected_inputs = [ + scope + '/conv_quant/AssignMinEma', + scope + '/conv_quant/AssignMaxEma', + ] + expected_inputs.append(scope + '/add_fold') + + self._AssertInputOpsAre(conv_quant, expected_inputs) + output_op_name = ( + scope + '/conv_quant/delayed_quant/Switch_1' if delay else 'test/Add') + self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) + + act_quant = graph.get_operation_by_name( + 'test/act_quant/' + quantization_node_name) + self.assertEqual(act_quant.type, quantization_node_name) + + if use_resource: + expected_inputs = [ + 'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp', + 'test/act_quant/FakeQuantWithMinMaxVars/ReadVariableOp_1', + ] + else: + expected_inputs = [ + 'test/act_quant/AssignMinEma', + 'test/act_quant/AssignMaxEma', + ] + expected_inputs.append('test/' + activation_op_name) + + self._AssertInputOpsAre(act_quant, expected_inputs) + output_op_name = ('test/act_quant/delayed_quant/Switch_1' + if delay else 'control_dependency') + self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) + self._AssertIdempotent(graph) def testQuantize_Conv2dWithBatchNorm(self): self._RunBatchNormTestOverParameters(self._TestQuantize_Conv2dWithBatchNorm) def _TestQuantize_Conv2dWithBatchNorm(self, activation, activation_op_name, - with_bypass, delay, fused_batch_norm): + with_bypass, delay, fused_batch_norm, + use_resource): """Tests quantization: inputs -> Conv2d with batch norm -> Activation. Args: @@ -335,9 +398,11 @@ class QuantizeTest(test_util.TensorFlowTestCase): inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. fused_batch_norm: Bool, when true use FusedBatchNorm. + use_resource: Bool, when true uses resource variables. """ graph = ops.Graph() with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) batch_size, height, width, depth = 5, 128, 128, 3 inputs = array_ops.zeros((batch_size, height, width, depth)) stride = 1 if with_bypass else 2 @@ -367,50 +432,16 @@ class QuantizeTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms(graph, is_training=True) quantize.Quantize(graph, True, quant_delay=delay) - quantization_node_name = 'FakeQuantWithMinMaxVars' - weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + - quantization_node_name) - self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/' + 'AssignMinLast', - scope + '/weights_quant/' + 'AssignMaxLast', scope + '/mul_fold' - ] - self._AssertInputOpsAre(weights_quant, expected_inputs) - output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' - if delay else '/Conv2D_Fold') - self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) - - if with_bypass: - conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + - quantization_node_name) - self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' - ] - self._AssertInputOpsAre(conv_quant, expected_inputs) - output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' - if delay else 'test/Add') - self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) - - act_quant = graph.get_operation_by_name('test/act_quant/' + - quantization_node_name) - self.assertEqual(act_quant.type, quantization_node_name) - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] - self._AssertInputOpsAre(act_quant, expected_inputs) - output_op_name = ('test/act_quant/delayed_quant/Switch_1' - if delay else 'control_dependency') - self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) + self._AssertCorrectQuantizedGraphWithBatchNorm( + graph, scope, 'Conv2D', activation_op_name, with_bypass, delay, + use_resource) def testQuantize_FCWithBatchNorm(self): self._RunBatchNormTestOverParameters(self._TestQuantize_FCWithBatchNorm) def _TestQuantize_FCWithBatchNorm(self, activation, activation_op_name, - with_bypass, delay, fused_batch_norm): + with_bypass, delay, fused_batch_norm, + use_resource): """Tests quantization: inputs -> FC with batch norm -> Activation. Args: @@ -421,9 +452,11 @@ class QuantizeTest(test_util.TensorFlowTestCase): inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. fused_batch_norm: Bool, when true use FusedBatchNorm. + use_resource: Bool, when true uses resource variables. """ graph = ops.Graph() with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) batch_size, depth = 5, 256 inputs = array_ops.zeros((batch_size, depth)) out_depth = 256 if with_bypass else 128 @@ -451,44 +484,9 @@ class QuantizeTest(test_util.TensorFlowTestCase): quantize.Quantize(graph, True, quant_delay=delay) - quantization_node_name = 'FakeQuantWithMinMaxVars' - weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + - quantization_node_name) - self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/' + 'AssignMinLast', - scope + '/weights_quant/' + 'AssignMaxLast', scope + '/mul_fold' - ] - self._AssertInputOpsAre(weights_quant, expected_inputs) - output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' - if delay else '/MatMul_Fold') - self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) - - if with_bypass: - conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + - quantization_node_name) - self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' - ] - self._AssertInputOpsAre(conv_quant, expected_inputs) - output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' - if delay else 'test/Add') - self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) - - act_quant = graph.get_operation_by_name('test/act_quant/' + - quantization_node_name) - self.assertEqual(act_quant.type, quantization_node_name) - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] - self._AssertInputOpsAre(act_quant, expected_inputs) - output_op_name = ('test/act_quant/delayed_quant/Switch_1' - if delay else 'control_dependency') - self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) + self._AssertCorrectQuantizedGraphWithBatchNorm( + graph, scope, 'MatMul', activation_op_name, with_bypass, delay, + use_resource) def testQuantize_DepthwiseConv2dWithBatchNorm(self): self._RunBatchNormTestOverParameters( @@ -496,7 +494,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): def _TestQuantize_DepthwiseConv2dWithBatchNorm( self, activation, activation_op_name, with_bypass, delay, - fused_batch_norm): + fused_batch_norm, use_resource): """Tests quantization: inputs -> DWConv2d with batch norm -> Activation. Args: @@ -507,9 +505,11 @@ class QuantizeTest(test_util.TensorFlowTestCase): inputs to just before Activation. delay: Int (optional), delay in number of steps until quantization starts. fused_batch_norm: Bool, when true use FusedBatchNorm. + use_resource: Bool, when true uses resource variables. """ graph = ops.Graph() with graph.as_default(): + variable_scope.get_variable_scope().set_use_resource(use_resource) batch_size, height, width, depth = 5, 128, 128, 3 inputs = array_ops.zeros((batch_size, height, width, depth)) stride = 1 if with_bypass else 2 @@ -539,46 +539,11 @@ class QuantizeTest(test_util.TensorFlowTestCase): fold_batch_norms.FoldBatchNorms(graph, is_training=True) quantize.Quantize(graph, True, quant_delay=delay) - quantization_node_name = 'FakeQuantWithMinMaxVars' - weights_quant = graph.get_operation_by_name(scope + '/weights_quant/' + - quantization_node_name) - self.assertEqual(weights_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/weights_quant/' + 'AssignMinLast', - scope + '/weights_quant/' + 'AssignMaxLast', scope + '/mul_fold' - ] - self._AssertInputOpsAre(weights_quant, expected_inputs) - output_op_name = scope + ('/weights_quant/delayed_quant/Switch_1' - if delay else '/depthwise_Fold') - self._AssertOutputGoesToOps(weights_quant, graph, [output_op_name]) - - if with_bypass: - conv_quant = graph.get_operation_by_name(scope + '/conv_quant/' + - quantization_node_name) - self.assertEqual(conv_quant.type, quantization_node_name) - expected_inputs = [ - scope + '/conv_quant/AssignMinEma', - scope + '/conv_quant/AssignMaxEma', scope + '/add_fold' - ] - self._AssertInputOpsAre(conv_quant, expected_inputs) - output_op_name = (scope + '/conv_quant/delayed_quant/Switch_1' - if delay else 'test/Add') - self._AssertOutputGoesToOps(conv_quant, graph, [output_op_name]) - - act_quant = graph.get_operation_by_name('test/act_quant/' + - quantization_node_name) - self.assertEqual(act_quant.type, quantization_node_name) - expected_inputs = [ - 'test/act_quant/AssignMinEma', 'test/act_quant/AssignMaxEma', - 'test/' + activation_op_name - ] - self._AssertInputOpsAre(act_quant, expected_inputs) - output_op_name = ('test/act_quant/delayed_quant/Switch_1' - if delay else 'control_dependency') - self._AssertOutputGoesToOps(act_quant, graph, [output_op_name]) - self._TestIdempotent(graph) + self._AssertCorrectQuantizedGraphWithBatchNorm( + graph, scope, 'DepthwiseConv2dNative', activation_op_name, + with_bypass, delay, use_resource) - def _TestIdempotent(self, graph): + def _AssertIdempotent(self, graph): # Ensure that calling the rewrite again doesn't change the graph. graph_def_before = str(graph.as_graph_def()) with graph.as_default(): -- GitLab From 6ff54600831b0af86855b492da938c0ba0e4d910 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Wed, 7 Mar 2018 22:56:41 -0800 Subject: [PATCH 582/884] Make comparison functors const In libc++ std::map and std::multimap call the comparison functor from a const object, which requires the `operator()` to be a const method. PiperOrigin-RevId: 188285407 --- tensorflow/compiler/jit/mark_for_compilation_pass.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/compiler/jit/mark_for_compilation_pass.cc b/tensorflow/compiler/jit/mark_for_compilation_pass.cc index a0211acbbe..e145a21e76 100644 --- a/tensorflow/compiler/jit/mark_for_compilation_pass.cc +++ b/tensorflow/compiler/jit/mark_for_compilation_pass.cc @@ -174,7 +174,9 @@ bool HasResourceInputOrOutput(const Node& node) { } struct NodeCompare { - bool operator()(const Node* a, const Node* b) { return a->id() < b->id(); } + bool operator()(const Node* a, const Node* b) const { + return a->id() < b->id(); + } }; using OrderedNodeSet = std::set; -- GitLab From 5fa816d17640509b19567c6d72f85fb00a8fefc0 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Wed, 7 Mar 2018 23:20:50 -0800 Subject: [PATCH 583/884] Revert "Update external protobuf codebase version for Windows cmake build" This reverts commit 07bec47ba5db4c2f2e33ecb49f23253a371bfbbe. --- tensorflow/contrib/cmake/external/protobuf.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/cmake/external/protobuf.cmake b/tensorflow/contrib/cmake/external/protobuf.cmake index aba8a5244e..ab464bc99a 100644 --- a/tensorflow/contrib/cmake/external/protobuf.cmake +++ b/tensorflow/contrib/cmake/external/protobuf.cmake @@ -16,7 +16,7 @@ include (ExternalProject) set(PROTOBUF_INCLUDE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/protobuf/src/protobuf/src) set(PROTOBUF_URL https://github.com/google/protobuf.git) -set(PROTOBUF_TAG 396336eb961b75f03b25824fe86cf6490fb75e3a) +set(PROTOBUF_TAG b04e5cba356212e4e8c66c61bbe0c3a20537c5b9) if(WIN32) if(${CMAKE_GENERATOR} MATCHES "Visual Studio.*") -- GitLab From f9fb7e7736423f0bd416e1949e614d302c929709 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 7 Mar 2018 23:22:17 -0800 Subject: [PATCH 584/884] Fix cmake Dockerfile issue on Linux (#17416) * Fix cmake Dockerfile issue on Linux When running cmake on Linux with (clean build with no cached docker images): ``` tensorflow/tools/ci_build/ci_build.sh CMAKE tensorflow/tools/ci_build/builds/cmake.sh ``` The following isse was encountered: ``` Step 11/13 : RUN pip install --upgrade termcolor ---> Running in 838167596eb6 Collecting termcolor Downloading termcolor-1.1.0.tar.gz ...... ...... ...... error: invalid command 'bdist_wheel' ---------------------------------------- Failed building wheel for termcolor ``` This fix adds the missing `pip install wheel` Signed-off-by: Yong Tang * Update golang installation in cmake Dockerfile This fix updates the golang installation in cmake Dockerfile. Previously, `ppa:ubuntu-lxc/lxd-stable` was used but it has been deprecated, see: http://lxc-users.linuxcontainers.narkive.com/IlHLLHqN/lxd-official-ppa-deprecation That caused the following error: ``` Step 13/14 : RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable ---> Running in 09301ba43a33 Cannot add PPA: 'ppa:~ubuntu-lxc/ubuntu/lxd-stable'. The team named '~ubuntu-lxc' has no PPA named 'ubuntu/lxd-stable' Please choose from the following available PPAs: * 'buildd-backports': linuxcontainers.org: buildd backports * 'daily': linuxcontainers.org: development builds ...... ...... ``` This fix updates the golang installation and use backported xenial (16.04), as was suggested in the link: http://lxc-users.linuxcontainers.narkive.com/IlHLLHqN/lxd-official-ppa-deprecation Signed-off-by: Yong Tang --- tensorflow/tools/ci_build/Dockerfile.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/ci_build/Dockerfile.cmake b/tensorflow/tools/ci_build/Dockerfile.cmake index ec90c83aac..d5dea4f3e4 100644 --- a/tensorflow/tools/ci_build/Dockerfile.cmake +++ b/tensorflow/tools/ci_build/Dockerfile.cmake @@ -23,11 +23,12 @@ RUN /install/install_deb_packages.sh RUN apt-get update RUN apt-get install -y --no-install-recommends python-pip +RUN pip install --upgrade wheel RUN pip install --upgrade astor RUN pip install --upgrade gast RUN pip install --upgrade numpy RUN pip install --upgrade termcolor # Install golang -RUN add-apt-repository -y ppa:ubuntu-lxc/lxd-stable -RUN apt-get install -y golang +RUN apt-get install -t xenial-backports -y golang-1.9 +ENV PATH=${PATH}:/usr/lib/go-1.9/bin -- GitLab From 37f6d224d69edd532197d615ace872933be5d74b Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Wed, 7 Mar 2018 23:22:47 -0800 Subject: [PATCH 585/884] Fix build issue with KafkaDataset (#17418) * Fix build issue with KafkaDataset This fix tries to address the issue raised in 17210 where error of `NotFoundError: Op type not registered 'KafkaDataset' in binary.` returned from kafka ops. The issue was that the inclusion of kafka ops was removed due to the conflict merge from the other PR. This fix fixes the issue. This fix fixes 17210. Signed-off-by: Yong Tang * Change `import readers.Dataset` to `import dataset_ops.Dataset`, due to the changes in some other places. Signed-off-by: Yong Tang * Fix library dependency issues in bazel Signed-off-by: Yong Tang * Add dependency to bazel rules Signed-off-by: Yong Tang * Add license to lib and pip package Signed-off-by: Yong Tang * Remove unneeded changes in bazel Signed-off-by: Yong Tang * Address review feedback Signed-off-by: Yong Tang * Fix sanity check Signed-off-by: Yong Tang * Add zlib dependency and include path Signed-off-by: Yong Tang * Add copts in bazel to address the discrepancy in clang and gcc Signed-off-by: Yong Tang --- tensorflow/contrib/BUILD | 3 +- tensorflow/contrib/kafka/BUILD | 107 +++++++++++------- .../kafka/kernels/kafka_dataset_ops.cc | 4 +- .../ops/{kafka_ops.cc => dataset_ops.cc} | 0 .../kafka/python/ops/kafka_dataset_ops.py | 9 +- .../kafka/python/ops/kafka_op_loader.py | 24 ++++ tensorflow/tools/pip_package/BUILD | 1 + third_party/kafka/BUILD | 13 ++- 8 files changed, 110 insertions(+), 51 deletions(-) rename tensorflow/contrib/kafka/ops/{kafka_ops.cc => dataset_ops.cc} (100%) create mode 100644 tensorflow/contrib/kafka/python/ops/kafka_op_loader.py diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 07d7fa64cc..17ab200b28 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -123,6 +123,7 @@ cc_library( "//tensorflow/contrib/coder:all_kernels", "//tensorflow/contrib/cudnn_rnn:cudnn_rnn_kernels", "//tensorflow/contrib/data/kernels:dataset_kernels", + "//tensorflow/contrib/kafka:dataset_kernels", "//tensorflow/contrib/factorization/kernels:all_kernels", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_kernels", "//tensorflow/contrib/layers:sparse_feature_cross_op_kernel", @@ -149,7 +150,7 @@ cc_library( "//tensorflow/contrib/factorization:all_ops", "//tensorflow/contrib/framework:all_ops", "//tensorflow/contrib/input_pipeline:input_pipeline_ops_op_lib", - "//tensorflow/contrib/kafka:kafka_ops_op_lib", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", "//tensorflow/contrib/layers:sparse_feature_cross_op_op_lib", "//tensorflow/contrib/nccl:nccl_ops_op_lib", "//tensorflow/contrib/nearest_neighbor:nearest_neighbor_ops_op_lib", diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index efb403462a..14a62fb075 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -1,66 +1,93 @@ -package( - default_visibility = ["//visibility:private"], -) +package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) -load("//tensorflow:tensorflow.bzl", "tf_gen_op_libs") -load("//tensorflow:tensorflow.bzl", "tf_gen_op_wrapper_py") -load("//tensorflow:tensorflow.bzl", "tf_kernel_library") -load("//tensorflow:tensorflow.bzl", "tf_py_test") +load( + "//tensorflow:tensorflow.bzl", + "tf_gen_op_wrapper_py", + "tf_kernel_library", + "tf_custom_op_library", + "tf_custom_op_py_library", + "tf_gen_op_libs", + "tf_py_test", +) -tf_kernel_library( - name = "kafka_kernels", +py_library( + name = "kafka", + srcs = ["__init__.py"], + srcs_version = "PY2AND3", + deps = [ + ":dataset_ops", + ], +) + +tf_custom_op_library( + name = "_dataset_ops.so", + srcs = ["ops/dataset_ops.cc"], + deps = [":dataset_kernels"], +) + +tf_gen_op_libs( + op_lib_names = ["dataset_ops"], +) + +cc_library( + name = "dataset_kernels", srcs = ["kernels/kafka_dataset_ops.cc"], - visibility = ["//visibility:public"], deps = [ - "//tensorflow/core:framework", - "//tensorflow/core:lib", - "//tensorflow/core:lib_internal", - "//tensorflow/core/kernels:bounds_check_lib", - "//tensorflow/core/kernels:dataset", + "//tensorflow/core:framework_headers_lib", "//third_party/eigen3", "@kafka", + "@protobuf_archive//:protobuf_headers", ], + alwayslink = 1, ) -tf_gen_op_libs( - op_lib_names = ["kafka_ops"], +py_library( + name = "dataset_ops", + srcs = [ + "python/ops/kafka_dataset_ops.py", + ], + srcs_version = "PY2AND3", deps = [ - "//tensorflow/core:lib", + ":kafka_op_loader", + "//tensorflow/python:dataset_ops_gen", + "//tensorflow/python:util", + "//tensorflow/python/data/ops:dataset_ops", + "//tensorflow/python/data/util:nest", ], ) tf_gen_op_wrapper_py( - name = "gen_kafka_ops", - out = "python/ops/gen_kafka_ops.py", - require_shape_functions = True, - deps = [":kafka_ops_op_lib"], + name = "gen_dataset_ops", + out = "python/ops/gen_dataset_ops.py", + deps = ["//tensorflow/contrib/kafka:dataset_ops_op_lib"], ) -py_library( - name = "kafka", - srcs = [ - "__init__.py", - "python/ops/kafka_dataset_ops.py", +tf_kernel_library( + name = "dataset_ops_kernels", + deps = [ + ":dataset_kernels", + "//tensorflow/core:framework", + ], + alwayslink = 1, +) + +tf_custom_op_py_library( + name = "kafka_op_loader", + srcs = ["python/ops/kafka_op_loader.py"], + dso = ["//tensorflow/contrib/kafka:_dataset_ops.so"], + kernels = [ + ":dataset_ops_kernels", + "//tensorflow/contrib/kafka:dataset_ops_op_lib", ], srcs_version = "PY2AND3", - visibility = ["//visibility:public"], deps = [ - ":gen_kafka_ops", + ":gen_dataset_ops", "//tensorflow/contrib/util:util_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:control_flow_ops", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", "//tensorflow/python:platform", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python/data/ops:dataset_ops", - "//tensorflow/python/data/ops:iterator_ops", - "//tensorflow/python/data/ops:readers", ], ) @@ -95,7 +122,9 @@ tf_py_test( filegroup( name = "all_files", srcs = glob( - ["**/*"], + include = [ + "**/*", + ], exclude = [ "**/METADATA", "**/OWNERS", diff --git a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc index 88ef5f3571..a4cd4a2cc4 100644 --- a/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc +++ b/tensorflow/contrib/kafka/kernels/kafka_dataset_ops.cc @@ -13,9 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/kernels/dataset.h" - -#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/dataset.h" #include "src-cpp/rdkafkacpp.h" diff --git a/tensorflow/contrib/kafka/ops/kafka_ops.cc b/tensorflow/contrib/kafka/ops/dataset_ops.cc similarity index 100% rename from tensorflow/contrib/kafka/ops/kafka_ops.cc rename to tensorflow/contrib/kafka/ops/dataset_ops.cc diff --git a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py index 8e51d27a34..a1624614d1 100644 --- a/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py +++ b/tensorflow/contrib/kafka/python/ops/kafka_dataset_ops.py @@ -17,8 +17,9 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function -from tensorflow.contrib.kafka.python.ops import gen_kafka_ops -from tensorflow.python.data.ops.readers import Dataset +from tensorflow.contrib.kafka.python.ops import kafka_op_loader # pylint: disable=unused-import +from tensorflow.contrib.kafka.python.ops import gen_dataset_ops +from tensorflow.python.data.ops.dataset_ops import Dataset from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape @@ -58,8 +59,8 @@ class KafkaDataset(Dataset): timeout, dtype=dtypes.int64, name="timeout") def _as_variant_tensor(self): - return gen_kafka_ops.kafka_dataset(self._topics, self._servers, self._group, - self._eof, self._timeout) + return gen_dataset_ops.kafka_dataset(self._topics, self._servers, + self._group, self._eof, self._timeout) @property def output_classes(self): diff --git a/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py new file mode 100644 index 0000000000..ec2fdea962 --- /dev/null +++ b/tensorflow/contrib/kafka/python/ops/kafka_op_loader.py @@ -0,0 +1,24 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python helper for loading kafka ops and kernels.""" +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.util import loader +from tensorflow.python.platform import resource_loader + +_dataset_ops = loader.load_op_library( + resource_loader.get_path_to_datafile("../../_dataset_ops.so")) diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index ed5801b8bd..9b02b2f94c 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -137,6 +137,7 @@ filegroup( "@highwayhash//:LICENSE", "@jemalloc//:COPYING", "@jpeg//:LICENSE.md", + "@kafka//:LICENSE", "@libxsmm_archive//:LICENSE", "@lmdb//:LICENSE", "@local_config_sycl//sycl:LICENSE.text", diff --git a/third_party/kafka/BUILD b/third_party/kafka/BUILD index a61a9e1f6c..a839ca717e 100644 --- a/third_party/kafka/BUILD +++ b/third_party/kafka/BUILD @@ -130,12 +130,16 @@ cc_library( ], hdrs = [ "config.h", + "src-cpp/rdkafkacpp.h", + "src-cpp/rdkafkacpp_int.h", + "src/lz4.c", + "src/snappy_compat.h", ], - defines = [ + copts = [ + "-Iexternal/kafka/src", + "-Iexternal/kafka/src-cpp", ], - includes = [ - "src", - "src-cpp", + defines = [ ], linkopts = [ "-lpthread", @@ -143,5 +147,6 @@ cc_library( visibility = ["//visibility:public"], deps = [ "@boringssl//:ssl", + "@zlib_archive//:zlib", ], ) -- GitLab From def9013bcb037abf9112c0a44f6bc1d4f61e59fd Mon Sep 17 00:00:00 2001 From: Harald Husum Date: Thu, 8 Mar 2018 08:24:12 +0100 Subject: [PATCH 586/884] Update TrainingSpec and EvalSpec pydoc (#17205) Bring TrainingSpec and EvalSpec pydoc in line with pydoc of estimator.train() and evaluate() --- tensorflow/python/estimator/training.py | 26 +++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/estimator/training.py b/tensorflow/python/estimator/training.py index 2cc3331a15..e38b765da5 100644 --- a/tensorflow/python/estimator/training.py +++ b/tensorflow/python/estimator/training.py @@ -128,9 +128,16 @@ class TrainSpec( """Creates a validated `TrainSpec` instance. Args: - input_fn: Training input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that provides input data for training as minibatches. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + max_steps: Int. Positive number of total steps for which to train model. If `None`, train forever. The training `input_fn` is not expected to generate `OutOfRangeError` or `StopIteration` exceptions. See the @@ -185,9 +192,16 @@ class EvalSpec( """Creates a validated `EvalSpec` instance. Args: - input_fn: Evaluation input function returning a tuple of: - features - `Tensor` or dictionary of string feature name to `Tensor`. - labels - `Tensor` or dictionary of `Tensor` with labels. + input_fn: A function that constructs the input data for evaluation. + See @{$get_started/premade_estimators#create_input_functions} for more + information. The function should construct and return one of + the following: + * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a + tuple (features, labels) with same constraints as below. + * A tuple (features, labels): Where features is a `Tensor` or a + dictionary of string feature name to `Tensor` and labels is a + `Tensor` or a dictionary of string label name to `Tensor`. + steps: Int. Positive number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. See `Estimator.evaluate` for details. -- GitLab From 74fc896ff4e78d0bfad810e0716cf78845bae36c Mon Sep 17 00:00:00 2001 From: ImSheridan Date: Thu, 8 Mar 2018 15:24:28 +0800 Subject: [PATCH 587/884] Supplement how trained model to make predictions (#17276) --- tensorflow/docs_src/tutorials/wide.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tensorflow/docs_src/tutorials/wide.md b/tensorflow/docs_src/tutorials/wide.md index 005dc020f9..bf6b9d6cc6 100644 --- a/tensorflow/docs_src/tutorials/wide.md +++ b/tensorflow/docs_src/tutorials/wide.md @@ -247,7 +247,7 @@ hours_per_week = tf.feature_column.numeric_column('hours_per_week') ### Making Continuous Features Categorical through Bucketization Sometimes the relationship between a continuous feature and the label is not -linear. As an hypothetical example, a person's income may grow with age in the +linear. As a hypothetical example, a person's income may grow with age in the early stage of one's career, then the growth may slow at some point, and finally the income decreases after retirement. In this scenario, using the raw `age` as a real-valued feature column might not be a good choice because the model can @@ -361,6 +361,16 @@ The first line of the final output should be something like `accuracy: 0.83557522`, which means the accuracy is 83.6%. Feel free to try more features and transformations and see if you can do even better! +After the model is evaluated, we can use the model to predict whether an individual has an annual income of over +50,000 dollars given an individual's information input. +```python + pred_iter = model.predict(input_fn=lambda: input_fn(FLAGS.test_data, 1, False, 1)) + for pred in pred_iter: + print(pred['classes']) +``` + +The model prediction output would be like `[b'1']` or `[b'0']` which means whether corresponding individual has an annual income of over 50,000 dollars or not. + If you'd like to see a working end-to-end example, you can download our [example code](https://github.com/tensorflow/models/tree/master/official/wide_deep/wide_deep.py) and set the `model_type` flag to `wide`. -- GitLab From 9cd677c093315294eb1aa79472422616e04e63b9 Mon Sep 17 00:00:00 2001 From: cclauss Date: Thu, 8 Mar 2018 08:25:05 +0100 Subject: [PATCH 588/884] Change unicode() --> six.text_type() for Python 3 (#17225) __unicode()__ was removed in Python 3 because all str are Unicode so this PR changes four calls to __unicode()__ into calls to [__six.text_type()__](http://six.readthedocs.io/#six.text_type). --- tensorflow/tools/test/upload_test_benchmarks.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tensorflow/tools/test/upload_test_benchmarks.py b/tensorflow/tools/test/upload_test_benchmarks.py index 77cc9f75f7..edd093510e 100644 --- a/tensorflow/tools/test/upload_test_benchmarks.py +++ b/tensorflow/tools/test/upload_test_benchmarks.py @@ -88,6 +88,7 @@ import os import shutil from google.cloud import datastore +from six import text_type def is_real_file(dirpath, fname): @@ -150,7 +151,7 @@ def upload_benchmark_data(client, data): """ test_result = json.loads(data) - test_name = unicode(test_result["name"]) + test_name = text_type(test_result["name"]) start_time = datetime.datetime.utcfromtimestamp( float(test_result["startTime"])) batch = [] @@ -162,7 +163,7 @@ def upload_benchmark_data(client, data): t_val.update({ "test": test_name, "start": start_time, - "info": unicode(data) + "info": text_type(data) }) batch.append(t_val) @@ -170,7 +171,7 @@ def upload_benchmark_data(client, data): # the attribute to be fetched and displayed. The full entry information is # also stored as a non-indexed JSON blob. for ent in test_result["entries"].get("entry", []): - ent_name = unicode(ent["name"]) + ent_name = text_type(ent["name"]) e_key = client.key("Entry") e_val = datastore.Entity(e_key, exclude_from_indexes=["info"]) e_val.update({ @@ -178,7 +179,7 @@ def upload_benchmark_data(client, data): "start": start_time, "entry": ent_name, "timing": ent["wallTime"], - "info": unicode(json.dumps(ent)) + "info": text_type(json.dumps(ent)) }) batch.append(e_val) -- GitLab From 584aa04bfc816a6cf9f0390d33c3595837355935 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Wed, 7 Mar 2018 23:28:16 -0800 Subject: [PATCH 589/884] Fix build issues when having packed git refs. (#17162) This is a workaround to fix build failure caused by packed git refs. The tf.__git_version__ string will be "unknown" in this case. --- tensorflow/tools/git/gen_git_source.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/tools/git/gen_git_source.py b/tensorflow/tools/git/gen_git_source.py index 3630dbd740..cbcdbf5b80 100755 --- a/tensorflow/tools/git/gen_git_source.py +++ b/tensorflow/tools/git/gen_git_source.py @@ -114,6 +114,13 @@ def configure(src_base_path, gen_path, debug=False): for target, src in link_map.items(): if src is None: open(os.path.join(gen_path, target), "w").write("") + elif not os.path.exists(src): + # Git repo is configured in a way we don't support such as having + # packed refs. Even though in a git repo, tf.__git_version__ will not + # be accurate. + # TODO(mikecase): Support grabbing git info when using packed refs. + open(os.path.join(gen_path, target), "w").write("") + spec["git"] = False else: try: # In python 3.5, symlink function exists even on Windows. But requires -- GitLab From ab1cab51265f8b0fb38d007a1d3d93a857ca864d Mon Sep 17 00:00:00 2001 From: Scott Tseng Date: Thu, 8 Mar 2018 15:29:18 +0800 Subject: [PATCH 590/884] Fix a bug in tf.strided_slice() (#16989) Current implementation modifies TfLiteNode::builtin_data every time when a loaded graph is executed. The three masks in params will continually flipping, and cause the op produce incorrect result every two executions. --- tensorflow/contrib/lite/kernels/strided_slice.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/strided_slice.cc b/tensorflow/contrib/lite/kernels/strided_slice.cc index fb1e11e0ca..3907a6620d 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice.cc @@ -48,7 +48,7 @@ struct StridedSliceContext { output = GetOutput(context, node, kOutputTensor); dims = NumDimensions(input); } - TfLiteStridedSliceParams* params; + const TfLiteStridedSliceParams* params; TfLiteTensor* input; TfLiteTensor* begin; TfLiteTensor* end; @@ -199,18 +199,18 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { strides.emplace_back(1); } - op_context.params->begin_mask = + int begin_mask = ReverseMaskBits(op_context.params->begin_mask, op_context.dims); - op_context.params->end_mask = + int end_mask = ReverseMaskBits(op_context.params->end_mask, op_context.dims); - op_context.params->shrink_axis_mask = + int shrink_axis_mask = ReverseMaskBits(op_context.params->shrink_axis_mask, op_context.dims); #define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ kernel_type::StridedSlice( \ GetTensorData(op_context.input), \ - GetTensorDims(op_context.input), op_context.params->begin_mask, \ - op_context.params->end_mask, op_context.params->shrink_axis_mask, \ + GetTensorDims(op_context.input), \ + begin_mask, end_mask, shrink_axis_mask, \ starts, stops, strides, GetTensorData(op_context.output), \ GetTensorDims(op_context.output)) -- GitLab From d2d74f0d8256730955d3015861161d3b63eccb3a Mon Sep 17 00:00:00 2001 From: David Norman Date: Thu, 8 Mar 2018 07:30:03 +0000 Subject: [PATCH 591/884] allow 3rd party backends to subclass the generic transfer manager (#16978) --- .../compiler/xla/service/generic_transfer_manager.cc | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc index 78dc0ad4fc..a99e2b7794 100644 --- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc @@ -38,14 +38,7 @@ namespace xla { GenericTransferManager::GenericTransferManager(se::Platform::Id platform_id, size_t pointer_size) - : platform_id_(platform_id), pointer_size_(pointer_size) { - // We currently only support kHostPlatformId for CPU, kCudaPlatformId for - // GPU and kInterpreterPlatformId for Interpreter. Before supporting other - // platforms, we need to test this transfer manager on them. - CHECK(platform_id_ == se::host::kHostPlatformId || - platform_id_ == se::interpreter::kInterpreterPlatformId || - platform_id_ == se::cuda::kCudaPlatformId); -} + : platform_id_(platform_id), pointer_size_(pointer_size) {} se::Platform::Id GenericTransferManager::PlatformId() const { return platform_id_; -- GitLab From 9d867e0c34ea34ac74ebdab2cdcfc5b8c61fed25 Mon Sep 17 00:00:00 2001 From: David Norman Date: Thu, 8 Mar 2018 07:31:01 +0000 Subject: [PATCH 592/884] Add header and macros to allow these tests to be disabled in a manifest (#16977) --- tensorflow/compiler/xla/tests/convolution_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/convolution_test.cc b/tensorflow/compiler/xla/tests/convolution_test.cc index 99640f5bb5..72715398de 100644 --- a/tensorflow/compiler/xla/tests/convolution_test.cc +++ b/tensorflow/compiler/xla/tests/convolution_test.cc @@ -723,7 +723,7 @@ INSTANTIATE_TEST_CASE_P( ); #endif -TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { +XLA_TEST_F(ConvolutionTest, Convolve_bf16_1x1x1x2_1x1x1x2_Valid) { ComputationBuilder builder(client_, TestName()); Shape input_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); Shape filter_shape = ShapeUtil::MakeShape(BF16, {1, 1, 1, 2}); -- GitLab From f73d7c90ed05bcf9f36f6a3be0c29efa5fef0f6e Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Thu, 8 Mar 2018 00:23:00 -0800 Subject: [PATCH 593/884] Add missing `#define OMPI_SKIP_MPICXX` for consistency (#17414) This fix adds the missing `#define OMPI_SKIP_MPICXX` in `tensorflow/contrib/mpi/mpi_utils.h` so that it is consistent with other usages of `mpi.h` includes. `OMPI_SKIP_MPICXX` skip the MPI C++ bindings support. This fix fixes 17388. Signed-off-by: Yong Tang --- tensorflow/contrib/mpi/mpi_utils.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/mpi/mpi_utils.h b/tensorflow/contrib/mpi/mpi_utils.h index fa297c28cb..df055ff567 100644 --- a/tensorflow/contrib/mpi/mpi_utils.h +++ b/tensorflow/contrib/mpi/mpi_utils.h @@ -24,6 +24,8 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" +// Skip MPI C++ bindings support, this matches the usage in other places +#define OMPI_SKIP_MPICXX #include "third_party/mpi/mpi.h" #define MPI_CHECK(cmd) \ do { \ -- GitLab From e52f916b87557d6b6d28f27f570462debb5ee262 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 8 Mar 2018 00:32:17 -0800 Subject: [PATCH 594/884] Automated g4 rollback of changelist 188263046 PiperOrigin-RevId: 188293315 --- tensorflow/contrib/lite/BUILD | 22 --- tensorflow/contrib/lite/context.c | 7 +- tensorflow/contrib/lite/context.h | 64 +------- tensorflow/contrib/lite/interpreter.cc | 154 ++++-------------- tensorflow/contrib/lite/interpreter.h | 45 +----- tensorflow/contrib/lite/interpreter_test.cc | 164 ++++---------------- tensorflow/contrib/lite/util.cc | 27 ---- tensorflow/contrib/lite/util.h | 34 ---- tensorflow/contrib/lite/util_test.cc | 50 ------ 9 files changed, 71 insertions(+), 496 deletions(-) delete mode 100644 tensorflow/contrib/lite/util.cc delete mode 100644 tensorflow/contrib/lite/util.h delete mode 100644 tensorflow/contrib/lite/util_test.cc diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 5cfbb544b7..44c4a7e2ca 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -132,7 +132,6 @@ cc_library( ":memory_planner", ":schema_fbs_version", ":simple_memory_arena", - ":util", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/schema:schema_fbs", @@ -233,27 +232,6 @@ cc_test( ], ) -cc_library( - name = "util", - srcs = ["util.cc"], - hdrs = ["util.h"], - deps = [ - ":context", - ], -) - -cc_test( - name = "util_test", - size = "small", - srcs = ["util_test.cc"], - deps = [ - ":context", - ":util", - "//tensorflow/contrib/lite/testing:util", - "@com_google_googletest//:gtest", - ], -) - # Test the serialization of a model with optional tensors. # Model tests diff --git a/tensorflow/contrib/lite/context.c b/tensorflow/contrib/lite/context.c index 620de5d678..c09e838c5c 100644 --- a/tensorflow/contrib/lite/context.c +++ b/tensorflow/contrib/lite/context.c @@ -17,14 +17,9 @@ limitations under the License. #include #include -int TfLiteIntArrayGetSizeInBytes(int size) { - static TfLiteIntArray dummy; - return sizeof(dummy) + sizeof(dummy.data[0]) * size; -} - TfLiteIntArray* TfLiteIntArrayCreate(int size) { TfLiteIntArray* ret = - (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size)); + (TfLiteIntArray*)malloc(sizeof(*ret) + sizeof(ret->data[0]) * size); ret->size = size; return ret; } diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index d901b9f065..ed7f4515fa 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -29,7 +29,6 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ #define TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ -#include #include #include @@ -41,7 +40,6 @@ typedef enum { kTfLiteOk = 0, kTfLiteError = 1 } TfLiteStatus; // Forward declare so GetNode can use this is in Context. typedef struct _TfLiteRegistration TfLiteRegistration; -typedef struct _TfLiteDelegate TfLiteDelegate; #define kOptionalTensor (-1) @@ -59,10 +57,6 @@ typedef struct { #endif } TfLiteIntArray; -// Given the size (number of elements) in a TfLiteIntArray, calculate its size -// in bytes. -int TfLiteIntArrayGetSizeInBytes(int size); - // Create a array of a given `size` (uninitialized entries). // This returns a pointer, that you must free using TfLiteIntArrayFree(). TfLiteIntArray* TfLiteIntArrayCreate(int size); @@ -168,11 +162,6 @@ typedef enum { kTfLiteDynamic, } TfLiteAllocationType; -// The delegates should use zero or positive integers to represent handles. -// -1 is reserved from unallocated status. -typedef int TfLiteDelegateBufferHandle; -const TfLiteDelegateBufferHandle kTfLiteNullBufferHandle = -1; - // An tensor in the interpreter system which is a wrapper around a buffer of // data including a dimensionality (or NULL if not currently defined). typedef struct { @@ -205,22 +194,6 @@ typedef struct { // Null-terminated name of this tensor. const char* name; - - // The delegate which knows how to handle `delegate_buffer_handle`. - // WARNING: This is an experimental interface that is subject to change. - TfLiteDelegate* delegate; - - // An integer buffer handle that can be handled by `delegate`. - // The value is valid only when delegate is not null. - // WARNING: This is an experimental interface that is subject to change. - TfLiteDelegateBufferHandle delegate_buffer_handle; - - // If the delegate uses its own buffer (e.g. GPU memory), the delegate is - // responsible to set data_is_stale to true. - // `delegate->CopyFromBufferHandle` can be called to copy the data from - // delegate buffer. - // WARNING: This is an // experimental interface that is subject to change. - bool data_is_stale; } TfLiteTensor; // Free memory of tensor `t`; @@ -261,11 +234,6 @@ typedef struct { // WARNING: This is an experimental interface that is subject to change. const void* custom_initial_data; int custom_initial_data_size; - - // The pointer to the delegate. This is non-null only when the node is - // created by calling `interpreter.ModifyGraphWithDelegate`. - // WARNING: This is an experimental interface that is subject to change. - TfLiteDelegate* delegate; } TfLiteNode; typedef struct TfLiteContext { @@ -319,7 +287,7 @@ typedef struct TfLiteContext { // does not take ownership of `nodes_to_replace`. TfLiteStatus (*ReplaceSubgraphsWithDelegateKernels)( struct TfLiteContext*, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); + const TfLiteIntArray* nodes_to_replace); // TODO(ahentz): we should create a more general mechanism for this sort of // library-global objects. @@ -370,45 +338,19 @@ typedef struct _TfLiteRegistration { } TfLiteRegistration; // WARNING: This is an experimental interface that is subject to change. -typedef struct _TfLiteDelegate { +typedef struct { // Data that delegate needs to identify itself. This data is owned by the // delegate. The delegate is owned in the user code, so the delegate is // responsible for doing this when it is destroyed. void* data_; - // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the // delegate a view of the current graph through TfLiteContext*. It typically // will look at the nodes and call ReplaceSubgraphsWithDelegateKernels() // to ask the TensorFlow lite runtime to create macro-nodes to represent // delegated subgraphs of the original graph. - TfLiteStatus (*Prepare)(TfLiteContext* context, TfLiteDelegate* delegate); - - // Copy the data from delegate buffer handle to raw memory. - // This can be null if the delegate doesn't use its own buffer. - TfLiteStatus (*CopyFromBufferHandle)( - TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, int size); - - // Copy the data from raw memory to delegate buffer handle. - // This can be null if the delegate doesn't use its own buffer. - TfLiteStatus (*CopyToBufferHandle)( - TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, int size); - - // Free the Delegate Buffer Handle. Note: This only frees the handle, but - // this doesn't release the underlying resource (e.g. textures). The - // resources are either owned by application layer or the delegate. - // This can be null if the delegate doesn't use its own buffer. - void (*FreeBufferHandle)(TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle* handle); + TfLiteStatus (*Prepare)(TfLiteContext* context, void* data); } TfLiteDelegate; -// WARNING: This is an experimental interface that is subject to change. -typedef struct { - TfLiteDelegate* delegate; - TfLiteIntArray* nodes_to_replace; -} TfLiteDelegateParams; - #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 733c47852e..0f5e17f0de 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -26,7 +26,6 @@ limitations under the License. #include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" -#include "tensorflow/contrib/lite/util.h" namespace tflite { @@ -97,57 +96,19 @@ Interpreter::~Interpreter() { } for (int i = 0; i < context_.tensors_size; i++) { - TfLiteTensor* tensor = &context_.tensors[i]; - if (tensor->delegate_buffer_handle != kTfLiteNullBufferHandle) { - tensor->delegate->FreeBufferHandle(tensor->delegate, - &tensor->delegate_buffer_handle); - } - TfLiteTensorFree(tensor); + TfLiteTensorFree(&context_.tensors[i]); } } TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) { + const TfLiteIntArray* nodes_to_replace) { return static_cast(context->impl_) - ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace, - delegate); -} - -namespace { - -// This function allocates a continuous memory space that contains a -// TfLiteDelegateParams followed by a TfLiteIntArray. The pointer will be -// deallocated by C `free` function later. -TfLiteDelegateParams* CreateDelegateParams( - TfLiteDelegate* delegate, const std::vector& nodes_to_replace) { - int nodes_to_replace_size_in_bytes = - TfLiteIntArrayGetSizeInBytes(nodes_to_replace.size()); - void* allocation = - malloc(sizeof(TfLiteDelegateParams) + nodes_to_replace_size_in_bytes); - TfLiteDelegateParams* params = - reinterpret_cast(allocation); - TfLiteIntArray* nodes_to_replace_arr = reinterpret_cast( - static_cast(allocation) + sizeof(TfLiteDelegateParams)); - - nodes_to_replace_arr->size = nodes_to_replace.size(); - for (int i = 0; i < nodes_to_replace.size(); ++i) { - nodes_to_replace_arr->data[i] = nodes_to_replace[i]; - } - - params->delegate = delegate; - params->nodes_to_replace = nodes_to_replace_arr; - return params; + ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace); } -} // Anonymous namespace - TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, - TfLiteDelegate* delegate) { - // Annotate the registration as DELEGATE op. - registration.builtin_code = BuiltinOperator_DELEGATE; - + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace) { // Annotate the registration as DELEGATE op. registration.builtin_code = BuiltinOperator_DELEGATE; @@ -159,38 +120,30 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( execution_plan_.clear(); for (auto& subgraph : subgraphs) { + // Turn subgraph.nodes into a TfLiteIntArray compatible data structure. + // TODO(aselle): Avoid this copy by constructing subgraph.nodes that way + // in the first place + subgraph.nodes.insert(subgraph.nodes.begin(), + static_cast(subgraph.nodes.size())); // Subgraphs calimed by the delegate should have a "macro" op created, the // other subgraphs (kTfNonPartition) just have their nodes added back to // the execution plan. switch (subgraph.type) { case Subgraph::kTfNonPartition: - for (auto it = subgraph.nodes.begin(); it != subgraph.nodes.end(); + for (auto it = subgraph.nodes.begin() + 1; it != subgraph.nodes.end(); ++it) { execution_plan_.push_back(*it); } break; case Subgraph::kTfPartition: { + void* builtin_data = nullptr; int node_index; - - TfLiteDelegateParams* params = - CreateDelegateParams(delegate, subgraph.nodes); - AddNodeWithParameters(subgraph.input_tensors, subgraph.output_tensors, - nullptr, 0, params, ®istration, &node_index); - - // Initialize the output tensors's delegate-related fields. - for (int tensor_index : subgraph.output_tensors) { - TfLiteTensor* tensor = &tensors_[tensor_index]; - TF_LITE_ENSURE_EQ(&context_, tensor->delegate, nullptr); - TF_LITE_ENSURE_EQ(&context_, tensor->delegate_buffer_handle, - kTfLiteNullBufferHandle); - // delegate_buffer_handle will be filled in delegate's `Prepare` - // function. - tensor->delegate = delegate; - } - - // Associate the node with the delegate. - TfLiteNode* node = &nodes_and_registration_[node_index].first; - node->delegate = delegate; + // Create a node that represents computation of this subgraph. + AddNodeWithParameters( + subgraph.input_tensors, subgraph.output_tensors, + reinterpret_cast(subgraph.nodes.data()), + subgraph.nodes.size() * sizeof(subgraph.nodes[0]), builtin_data, + ®istration, &node_index); } break; case Subgraph::kTfUnexplored: return kTfLiteError; @@ -280,6 +233,14 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, return kTfLiteOk; } +namespace { +TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector& x) { + TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); + for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i]; + return lite; +} +} // namespace + TfLiteStatus Interpreter::AllocateTensors() { next_execution_plan_index_to_prepare_ = 0; if (memory_planner_) { @@ -314,6 +275,7 @@ TfLiteStatus Interpreter::AddNodeWithParameters( int new_node_index = nodes_and_registration_.size(); if (node_index) *node_index = new_node_index; nodes_and_registration_.resize(nodes_and_registration_.size() + 1); + auto& node_and_reg = nodes_and_registration_.back(); TfLiteNode& node = node_and_reg.first; if (node.inputs) TfLiteIntArrayFree(node.inputs); @@ -323,8 +285,8 @@ TfLiteStatus Interpreter::AddNodeWithParameters( // NOTE, here we are not using move semantics yet, since our internal // representation isn't std::vector, but in the future we would like to avoid // copies, so we want the interface to take r-value references now. - node.inputs = ConvertVectorToTfLiteIntArray(inputs); - node.outputs = ConvertVectorToTfLiteIntArray(outputs); + node.inputs = convertVectorToTfLiteIntArray(inputs); + node.outputs = convertVectorToTfLiteIntArray(outputs); node.temporaries = TfLiteIntArrayCreate(0); if (init_data) { node.user_data = OpInit(*registration, init_data, init_data_size); @@ -337,7 +299,6 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.builtin_data = builtin_data_deleter.release(); // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size` // properly for nodes generated by ReplaceSubgraphsWithDelegateKernels. - if (registration->builtin_code == BuiltinOperator_CUSTOM) { // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer // `Operator` table is passed in. @@ -348,7 +309,6 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.custom_initial_data_size = 0; } - node.delegate = nullptr; node_and_reg.second = *registration; execution_plan_.push_back(new_node_index); return kTfLiteOk; @@ -362,7 +322,7 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); invokable_ = false; - TfLiteIntArray* dims_lite = ConvertVectorToTfLiteIntArray(dims); + TfLiteIntArray* dims_lite = convertVectorToTfLiteIntArray(dims); return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); } @@ -464,29 +424,11 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; - - // TODO(ycling): This is an extra loop through inputs to check if the data - // need to be copied from Delegate buffer to raw memory, which is often not - // needed. We may want to cache this in prepare to know if this needs to be - // done for a node or not. - for (int i = 0; i < node.inputs->size; ++i) { - int tensor_index = node.inputs->data[i]; - if (tensor_index == kOptionalTensor) { - continue; - } - TfLiteTensor* tensor = &tensors_[tensor_index]; - if (tensor->delegate && tensor->delegate != node.delegate && - tensor->data_is_stale) { - EnsureTensorDataIsReadable(tensor_index); - } - } - EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } } - return status; } @@ -522,7 +464,6 @@ TfLiteStatus Interpreter::AddTensors(int tensors_to_add, tensors_.resize(tensors_.size() + tensors_to_add); for (int i = base_index; i < tensors_.size(); i++) { memset(&tensors_[i], 0, sizeof(tensors_[i])); - tensors_[i].delegate_buffer_handle = kTfLiteNullBufferHandle; } context_.tensors = tensors_.data(); context_.tensors_size = tensors_.size(); @@ -570,7 +511,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); } invokable_ = false; - TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), quantization, const_cast(buffer), bytes, kTfLiteMmapRo, allocation, &context_.tensors[tensor_index]); return kTfLiteOk; @@ -595,7 +536,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadWrite( TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), &required_bytes)); } - TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), quantization, /*buffer=*/nullptr, required_bytes, type == kTfLiteString ? kTfLiteDynamic : kTfLiteArenaRw, @@ -672,7 +613,7 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { ReplaceSubgraphsWithDelegateKernels; context_.GetExecutionPlan = GetExecutionPlan; - TfLiteStatus status = delegate->Prepare(&context_, delegate); + TfLiteStatus status = delegate->Prepare(&context_, delegate->data_); // Remove additional context info. context_.GetNodeAndRegistration = nullptr; context_.ReplaceSubgraphsWithDelegateKernels = nullptr; @@ -680,35 +621,4 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { return status; } -TfLiteStatus Interpreter::SetDelegateBufferHandle( - int tensor_index, TfLiteDelegateBufferHandle delegate_buffer_handle, - TfLiteDelegate* delegate) { - TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); - TfLiteTensor* tensor = &tensors_[tensor_index]; - - TF_LITE_ENSURE(&context_, - tensor->delegate == nullptr || tensor->delegate == delegate); - tensor->delegate = delegate; - if (tensor->delegate_buffer_handle != kTfLiteNullBufferHandle) { - TF_LITE_ENSURE(&context_, tensor->delegate->FreeBufferHandle != nullptr); - tensor->delegate->FreeBufferHandle(tensor->delegate, - &tensor->delegate_buffer_handle); - } - tensor->delegate_buffer_handle = delegate_buffer_handle; - - return kTfLiteOk; -} - -TfLiteStatus Interpreter::GetDelegateBufferHandle( - int tensor_index, TfLiteDelegateBufferHandle* delegate_buffer_handle, - TfLiteDelegate** delegate) { - TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); - TfLiteTensor* tensor = &tensors_[tensor_index]; - - *delegate = tensor->delegate; - *delegate_buffer_handle = tensor->delegate_buffer_handle; - - return kTfLiteOk; -} - } // namespace tflite diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index f5fcae90cc..04c19644a0 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,46 +265,6 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } - // Ensure the data in `tensor.data` is readable. In case delegate is used, - // it might require to copy the data from delegate buffer to raw memory. - TfLiteStatus EnsureTensorDataIsReadable(int tensor_index) { - TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); - TfLiteTensor* tensor = &tensors_[tensor_index]; - if (tensor->data_is_stale) { - TF_LITE_ENSURE(&context_, tensor->delegate != nullptr); - TF_LITE_ENSURE(&context_, - tensor->delegate_buffer_handle != kTfLiteNullBufferHandle); - // This can be null if the delegate doesn't use its own buffer. - TF_LITE_ENSURE(&context_, - tensor->delegate->CopyFromBufferHandle != nullptr); - tensor->delegate->CopyFromBufferHandle(tensor->delegate, - tensor->delegate_buffer_handle, - tensor->data.raw, tensor->bytes); - tensor->data_is_stale = false; - } - return kTfLiteOk; - } - - // Set the delegate buffer handle to a tensor. It can be called in the - // following cases: - // 1. Set the buffer handle to a tensor that's not being written by a - // delegate. For example, feeding an OpenGL texture as the input of the - // inference graph. - // 2. Set the buffer handle to a tensor that uses the same delegate. - // For example, set an OpenGL texture as the output of inference, while - // the node which produces output is an OpenGL delegate node. - // WARNING: This is an experimental API and subject to change. - TfLiteStatus SetDelegateBufferHandle( - int tensor_index, TfLiteDelegateBufferHandle delegate_buffer_handle, - TfLiteDelegate* delegate); - - // Get the delegate buffer handle, and the delegate which can process the - // buffer handle. - // WARNING: This is an experimental API and subject to change. - TfLiteStatus GetDelegateBufferHandle( - int tensor_index, TfLiteDelegateBufferHandle* delegate_buffer_handle, - TfLiteDelegate** delegate); - // The default capacity of `tensors_` vector. static constexpr int kTensorsReservedCapacity = 128; // The capacity headroom of `tensors_` vector before calling ops' @@ -395,15 +355,14 @@ class Interpreter { // Entry point for C API ReplaceSubgraphsWithDelegateKernels static TfLiteStatus ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); + const TfLiteIntArray* nodes_to_replace); // Update the execution graph to replace some of the nodes with stub // nodes. Specifically any node index that has `nodes[index]==1` will be // slated for replacement with a delegate kernel specified by registration. // WARNING: This is an experimental interface that is subject to change. TfLiteStatus ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, - TfLiteDelegate* delegate); + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace); // WARNING: This is an experimental interface that is subject to change. // Gets the internal pointer to a TensorFlow lite node by node_index. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 11578fcb69..2e6727b323 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -763,38 +763,24 @@ TfLiteRegistration AddOpRegistration() { } class TestDelegate : public ::testing::Test { - protected: - void SetUp() override { - interpreter_ = absl::make_unique(); - interpreter_->AddTensors(5); - interpreter_->SetInputs({0, 1}); - interpreter_->SetOutputs({3, 4}); + public: + TestDelegate() { + interpreter_.AddTensors(5); + interpreter_.SetInputs({0, 1}); + interpreter_.SetOutputs({3, 4}); TfLiteQuantizationParams quant; - interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, - quant); - interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, - quant); - interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, - quant); - interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, - quant); + interpreter_.SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_.SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + interpreter_.SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); + interpreter_.SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, + quant); TfLiteRegistration reg = AddOpRegistration(); - interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); - interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); - interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); - } - - void TearDown() override { - // Interpreter relies on delegate_ to free the resources properly. Thus - // the life cycle of delegate must be longer than interpreter. - interpreter_.reset(); - delegate_.reset(); - } - - TfLiteDelegateBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle; - - TfLiteDelegateBufferHandle AllocateBufferHandle() { - return ++last_allocated_handle_; + interpreter_.AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); + interpreter_.AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); + interpreter_.AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); } protected: @@ -805,8 +791,8 @@ class TestDelegate : public ::testing::Test { // value-copyable and compatible with TfLite. explicit SimpleDelegate(const std::vector& nodes) : nodes_(nodes) { delegate_.Prepare = [](TfLiteContext* context, - TfLiteDelegate* delegate) -> TfLiteStatus { - auto* simple = reinterpret_cast(delegate->data_); + void* data) -> TfLiteStatus { + auto* simple = reinterpret_cast(data); TfLiteIntArray* nodes_to_separate = TfLiteIntArrayCreate(simple->nodes_.size()); // Mark nodes that we want in TfLiteIntArray* structure. @@ -837,28 +823,10 @@ class TestDelegate : public ::testing::Test { } context->ReplaceSubgraphsWithDelegateKernels( - context, FakeFusedRegistration(), nodes_to_separate, delegate); + context, FakeFusedRegistration(), nodes_to_separate); TfLiteIntArrayFree(nodes_to_separate); return kTfLiteOk; }; - delegate_.CopyToBufferHandle = - [](TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, - int size) -> TfLiteStatus { - // TODO(ycling): Implement tests to test buffer copying logic. - return kTfLiteOk; - }; - delegate_.CopyFromBufferHandle = - [](TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle delegate_buffer_handle, void* data, - int size) -> TfLiteStatus { - // TODO(ycling): Implement tests to test buffer copying logic. - return kTfLiteOk; - }; - delegate_.FreeBufferHandle = [](TfLiteDelegate* delegate, - TfLiteDelegateBufferHandle* handle) { - *handle = kTfLiteNullBufferHandle; - }; // Store type-punned data SimpleDelegate structure. delegate_.data_ = reinterpret_cast(this); } @@ -875,102 +843,36 @@ class TestDelegate : public ::testing::Test { std::vector nodes_; TfLiteDelegate delegate_; }; - std::unique_ptr interpreter_; - std::unique_ptr delegate_; + Interpreter interpreter_; }; TEST_F(TestDelegate, BasicDelegate) { - interpreter_->Invoke(); - delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); - interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); + interpreter_.Invoke(); + SimpleDelegate simple({0, 1, 2}); + interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); - ASSERT_EQ(interpreter_->execution_plan().size(), 1); - int node = interpreter_->execution_plan()[0]; - const auto* node_and_reg = interpreter_->node_and_registration(node); + ASSERT_EQ(interpreter_.execution_plan().size(), 1); + int node = interpreter_.execution_plan()[0]; + const auto* node_and_reg = interpreter_.node_and_registration(node); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } TEST_F(TestDelegate, ComplexDeligate) { - interpreter_->Invoke(); - delegate_ = std::unique_ptr(new SimpleDelegate({1, 2})); - interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); + interpreter_.Invoke(); + SimpleDelegate simple({1, 2}); + interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); - ASSERT_EQ(interpreter_->execution_plan().size(), 2); + ASSERT_EQ(interpreter_.execution_plan().size(), 2); // 0th should be a non-delegated original op - ASSERT_EQ(interpreter_->execution_plan()[0], 0); + ASSERT_EQ(interpreter_.execution_plan()[0], 0); // 1st should be a new macro op (3) which didn't exist) - ASSERT_EQ(interpreter_->execution_plan()[1], 3); - const auto* node_and_reg = interpreter_->node_and_registration(3); + ASSERT_EQ(interpreter_.execution_plan()[1], 3); + const auto* node_and_reg = interpreter_.node_and_registration(3); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } -TEST_F(TestDelegate, SetBufferHandleToInput) { - interpreter_->Invoke(); - delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); - TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); - interpreter_->ModifyGraphWithDelegate(delegate); - - constexpr int kOutputTensorIndex = 0; - TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); - ASSERT_EQ(tensor->delegate, nullptr); - ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); - - TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); - TfLiteStatus status = interpreter_->SetDelegateBufferHandle( - kOutputTensorIndex, handle, delegate); - ASSERT_EQ(status, kTfLiteOk); - EXPECT_EQ(tensor->delegate, delegate); - EXPECT_EQ(tensor->delegate_buffer_handle, handle); -} - -TEST_F(TestDelegate, SetBufferHandleToOutput) { - interpreter_->Invoke(); - delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); - TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); - interpreter_->ModifyGraphWithDelegate(delegate); - - constexpr int kOutputTensorIndex = 3; - TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); - // Before setting the buffer handle, the tensor's `delegate` is already set - // because it will be written by the delegate. - ASSERT_EQ(tensor->delegate, delegate); - ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); - - TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); - TfLiteStatus status = interpreter_->SetDelegateBufferHandle( - kOutputTensorIndex, handle, delegate); - ASSERT_EQ(status, kTfLiteOk); - EXPECT_EQ(tensor->delegate, delegate); - EXPECT_EQ(tensor->delegate_buffer_handle, handle); -} - -TEST_F(TestDelegate, SetInvalidHandleToTensor) { - interpreter_->Invoke(); - delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); - TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); - interpreter_->ModifyGraphWithDelegate(delegate); - - SimpleDelegate another_simple_delegate({0, 1, 2}); - - constexpr int kOutputTensorIndex = 3; - TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); - // Before setting the buffer handle, the tensor's `delegate` is already set - // because it will be written by the delegate. - ASSERT_EQ(tensor->delegate, delegate); - ASSERT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); - - TfLiteDelegateBufferHandle handle = AllocateBufferHandle(); - TfLiteStatus status = interpreter_->SetDelegateBufferHandle( - kOutputTensorIndex, handle, - another_simple_delegate.get_tf_lite_delegate()); - // Setting a buffer handle to a tensor with another delegate will fail. - ASSERT_EQ(status, kTfLiteError); - EXPECT_EQ(tensor->delegate, delegate); - EXPECT_EQ(tensor->delegate_buffer_handle, kTfLiteNullBufferHandle); -} - } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc deleted file mode 100644 index b2c7e6c7a6..0000000000 --- a/tensorflow/contrib/lite/util.cc +++ /dev/null @@ -1,27 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/contrib/lite/util.h" - -namespace tflite { - -TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { - TfLiteIntArray* output = TfLiteIntArrayCreate(input.size()); - for (size_t i = 0; i < input.size(); i++) { - output->data[i] = input[i]; - } - return output; -} - -} // namespace tflite diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h deleted file mode 100644 index 50e4fb839e..0000000000 --- a/tensorflow/contrib/lite/util.h +++ /dev/null @@ -1,34 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -// This file provides general C++ utility functions in TFLite. -// For example: Converting between `TfLiteIntArray`, `std::vector` and -// Flatbuffer vectors. These functions can't live in `context.h` since it's pure -// C. - -#ifndef TENSORFLOW_CONTRIB_LITE_UTIL_H_ -#define TENSORFLOW_CONTRIB_LITE_UTIL_H_ - -#include -#include "tensorflow/contrib/lite/context.h" - -namespace tflite { - -// Converts a `std::vector` to a `TfLiteIntArray`. -TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input); - -} // namespace tflite - -#endif // TENSORFLOW_CONTRIB_LITE_UTIL_H_ diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc deleted file mode 100644 index 04579c53aa..0000000000 --- a/tensorflow/contrib/lite/util_test.cc +++ /dev/null @@ -1,50 +0,0 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/util.h" - -namespace tflite { -namespace { - -TEST(ConvertVectorToTfLiteIntArray, TestWithVector) { - std::vector input = {1, 2}; - TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); - ASSERT_NE(output, nullptr); - EXPECT_EQ(output->size, 2); - EXPECT_EQ(output->data[0], 1); - EXPECT_EQ(output->data[1], 2); - TfLiteIntArrayFree(output); -} - -TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) { - std::vector input; - TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); - ASSERT_NE(output, nullptr); - EXPECT_EQ(output->size, 0); - TfLiteIntArrayFree(output); -} - -} // namespace -} // namespace tflite - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} -- GitLab From 4ac1fee7f13586ce6633a45bbe88592f605583e0 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 04:11:24 -0800 Subject: [PATCH 595/884] - FisherEstimator now supports computing products with arbitrary matrix powers of the approximate Fisher - Added multi-tower support to multi/RNN fully connected layers - All op creation is now done inside functions that explicitly create ops, thus allowing fine control of their placement. One result of this is that we no longer need any colocation statements (and these have been removed) - Multi-tower computations are now handled using ParitionedTensor class, which appears to be a single tensor to the FisherFactors but actually contains a list of tensors. - To achieve the above damping values are passed around as special functions that are packaged along with "ids" that can be used to uniquely identify the computation they perform. Topohash might provide a better solution for this in the future. - Variable creation in the factors is now done via special methods so we can have fine control over where these are placed - FisherEstimator now has special functions to create ops and variables using different placement strategies (currently: no strategy, round-robin, and as thunks). By default this will use the round-robin strategy and manufacture the usual convenience properties ("inv_update_ops", etc). This default behavior is to preserve backwards compatibility but in the future we should deprecate this and require the user to ask for an explicit strategy. - LossFunctions no longer make any ops in their constructors. The only make ops when evaluated. LayerCollection maintains a list of tensors/ops which we can colocate LossFunction computations with (typically their inputs) - LossFunctions no longer support multi-tower/mini-batches directly. Instead LayerCollection maintains a list of these objects, one for each tower. This solution is better since now the loss function related computations can take place exclusively on the corresponding tower. - All loss functions now support multiple towers/minibatches (via LayerCollection). - tf.gradients is passed list of loss function values instead of their sum, which will prevent extraneous gradient ops being placed on arbitrary devices. Hopefully with this change and the above one for loss functions all ops associated with gradient computations (for computing stats) will occur completely on the device that defines that part of the graph. e.g. this will do the right thing for multiple towers - I've also made sure that sensible colocation occurs for the extra ops needed by the curvature_propagation and exact estimation modes. - Variables and ops made by FisherEstimator are now placed inside of name scopes (based on the name given to FisherEstimator) - Restored old variable use count tracker implementation, thus fixing the issue with how generic registrations were handled by check_registration(). - Restored interface to FisherEstimator (which was changed in the previous CL). - Fixed bug in LazyKFacOptimizer: optional/named arguments weren't being passed in properly - Lots of other minor refactors/improvements PiperOrigin-RevId: 188310846 --- .../contrib/kfac/python/kernel_tests/BUILD | 1 + .../python/kernel_tests/estimator_test.py | 61 +- .../python/kernel_tests/fisher_blocks_test.py | 95 ++- .../kernel_tests/fisher_factors_test.py | 144 ++-- .../kernel_tests/layer_collection_test.py | 25 +- .../kernel_tests/loss_functions_test.py | 35 - .../contrib/kfac/python/ops/estimator.py | 395 +++++++-- .../contrib/kfac/python/ops/fisher_blocks.py | 624 +++++++------- .../contrib/kfac/python/ops/fisher_factors.py | 800 +++++++++--------- .../kfac/python/ops/layer_collection.py | 229 +++-- .../contrib/kfac/python/ops/loss_functions.py | 58 +- .../contrib/kfac/python/ops/optimizer.py | 251 +++++- tensorflow/contrib/kfac/python/ops/utils.py | 72 ++ 13 files changed, 1638 insertions(+), 1152 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/BUILD b/tensorflow/contrib/kfac/python/kernel_tests/BUILD index f4ed978174..146ae8b7e2 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/BUILD +++ b/tensorflow/contrib/kfac/python/kernel_tests/BUILD @@ -36,6 +36,7 @@ py_test( srcs = ["fisher_factors_test.py"], srcs_version = "PY2AND3", deps = [ + "//tensorflow/contrib/kfac/python/ops:fisher_blocks", "//tensorflow/contrib/kfac/python/ops:fisher_factors", "//tensorflow/python:array_ops", "//tensorflow/python:client_testlib", diff --git a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py index b12f7be769..c1ea296b43 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/estimator_test.py @@ -90,59 +90,75 @@ class EstimatorTest(test.TestCase): def testEstimatorInitManualRegistration(self): with self._graph.as_default(): # We should be able to build an estimator for only the registered vars. - estimator.FisherEstimator(lambda: 0.2, [self.weights], 0.1, + estimator.FisherEstimator([self.weights], 0.1, 0.2, self.layer_collection) # Check that we throw an error if we try to build an estimator for vars # that were not manually registered. with self.assertRaises(ValueError): - estimator.FisherEstimator(lambda: 0.2, [self.weights, self.bias], 0.1, + estimator.FisherEstimator([self.weights, self.bias], 0.1, 0.2, self.layer_collection) # Check that we throw an error if we don't include registered variables, # i.e. self.weights with self.assertRaises(ValueError): - estimator.FisherEstimator(lambda: 0.2, [], 0.1, self.layer_collection) + estimator.FisherEstimator([], 0.1, 0.2, self.layer_collection) @test.mock.patch.object(utils.SubGraph, "variable_uses", return_value=42) def testVariableWrongNumberOfUses(self, mock_uses): with self.assertRaises(ValueError): - estimator.FisherEstimator(lambda: 0.2, [self.weights], 0.1, + estimator.FisherEstimator([self.weights], 0.1, 0.2, self.layer_collection) def testInvalidEstimationMode(self): with self.assertRaises(ValueError): - estimator.FisherEstimator(lambda: 0.2, [self.weights], 0.1, - self.layer_collection, "not_a_real_mode") + estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode="not_a_real_mode") - def testModeListCorrect(self): + def testGradientsModeBuild(self): with self._graph.as_default(): - est = estimator.FisherEstimator(lambda: 0.2, [self.weights], 0.1, - self.layer_collection) - self.assertItemsEqual(_ALL_ESTIMATION_MODES, est._gradient_fns.keys()) + estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode="gradients") - def testAllModesBuild(self): - for mode in _ALL_ESTIMATION_MODES: - with self._graph.as_default(): - estimator.FisherEstimator(lambda: 0.2, [self.weights], 0.1, - self.layer_collection, mode) + def testEmpiricalModeBuild(self): + with self._graph.as_default(): + estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode="empirical") + + def testCurvaturePropModeBuild(self): + with self._graph.as_default(): + estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode="curvature_prop") + + def testExactModeBuild(self): + with self._graph.as_default(): + estimator.FisherEstimator([self.weights], 0.1, 0.2, + self.layer_collection, + estimation_mode="exact") def test_cov_update_thunks(self): """Ensures covariance update ops run once per global_step.""" with self._graph.as_default(), self.test_session() as sess: fisher_estimator = estimator.FisherEstimator( - damping_fn=lambda: 0.2, variables=[self.weights], layer_collection=self.layer_collection, + damping=0.2, cov_ema_decay=0.0) # Construct an op that executes one covariance update per step. global_step = training_util.get_or_create_global_step() + (cov_variable_thunks, cov_update_op_thunks, + _, _) = fisher_estimator.create_ops_and_vars_thunks() + for thunk in cov_variable_thunks: + thunk() cov_matrices = [ fisher_factor.get_cov() for fisher_factor in self.layer_collection.get_factors() ] - cov_update_op_thunks = fisher_estimator.cov_update_thunks cov_update_op = control_flow_ops.case( [(math_ops.equal(global_step, i), thunk) for i, thunk in enumerate(cov_update_op_thunks)]) @@ -178,19 +194,24 @@ class EstimatorTest(test.TestCase): """Ensures inverse update ops run once per global_step.""" with self._graph.as_default(), self.test_session() as sess: fisher_estimator = estimator.FisherEstimator( - damping_fn=lambda: 0.2, variables=[self.weights], layer_collection=self.layer_collection, + damping=0.2, cov_ema_decay=0.0) # Construct op that updates one inverse per global step. global_step = training_util.get_or_create_global_step() + (cov_variable_thunks, _, inv_variable_thunks, + inv_update_op_thunks) = fisher_estimator.create_ops_and_vars_thunks() + for thunk in cov_variable_thunks: + thunk() + for thunk in inv_variable_thunks: + thunk() inv_matrices = [ matrix for fisher_factor in self.layer_collection.get_factors() - for matrix in fisher_factor._inverses_by_damping.values() + for matrix in fisher_factor._matpower_by_exp_and_damping.values() ] - inv_update_op_thunks = fisher_estimator.inv_update_thunks inv_update_op = control_flow_ops.case( [(math_ops.equal(global_step, i), thunk) for i, thunk in enumerate(inv_update_op_thunks)]) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index fb4b3a241c..c9c0f8e0ae 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -94,6 +94,9 @@ class FullFBTest(test.TestCase): block.register_additional_minibatch(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors((grads,), 0.5) + block._factor.instantiate_cov_variables() + block.register_inverse() + block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -112,6 +115,9 @@ class FullFBTest(test.TestCase): block.register_additional_minibatch(32) grads = params**2 block.instantiate_factors((grads,), 0.5) + block._factor.instantiate_cov_variables() + block.register_inverse() + block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -131,6 +137,9 @@ class FullFBTest(test.TestCase): grads = (array_ops.constant([2., 3.]), array_ops.constant(4.)) damping = 0.5 block.instantiate_factors((grads,), damping) + block._factor.instantiate_cov_variables() + block.register_inverse() + block._factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(state_ops.assign(block._factor._cov, _make_psd(3))) @@ -185,6 +194,7 @@ class NaiveDiagonalFBTest(test.TestCase): block.register_additional_minibatch(32) grads = (params[0]**2, math_ops.sqrt(params[1])) block.instantiate_factors((grads,), 0.5) + block._factor.instantiate_cov_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -203,6 +213,7 @@ class NaiveDiagonalFBTest(test.TestCase): block.register_additional_minibatch(32) grads = params**2 block.instantiate_factors((grads,), 0.5) + block._factor.instantiate_cov_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -221,6 +232,7 @@ class NaiveDiagonalFBTest(test.TestCase): grads = (params[0]**2, math_ops.sqrt(params[1])) damping = 0.5 block.instantiate_factors((grads,), damping) + block._factor.instantiate_cov_variables() cov = array_ops.reshape(array_ops.constant([2., 3., 4.]), [-1, 1]) sess.run(state_ops.assign(block._factor._cov, cov)) @@ -367,6 +379,7 @@ class FullyConnectedDiagonalFBTest(test.TestCase): block.register_additional_minibatch(i, o) block.instantiate_factors((output_grads,), damping=0.0) + block._factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) sess.run(block._factor.make_covariance_update_op(0.0)) @@ -394,7 +407,7 @@ class EmbeddingKFACFBTest(test.TestCase): # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. damping = array_ops.constant(0.) - block.instantiate_factors(([grads],), damping) + block.instantiate_factors(((grads,),), damping) def testMultiplyInverse(self): with ops.Graph().as_default(), self.test_session() as sess: @@ -412,7 +425,12 @@ class EmbeddingKFACFBTest(test.TestCase): # Instantiate factor's variables. Ensure it doesn't fail. grads = outputs**2. damping = array_ops.constant(0.) - block.instantiate_factors(([grads],), damping) + block.instantiate_factors(((grads,),), damping) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Create a sparse update. indices = array_ops.constant([1, 3, 4]) @@ -456,7 +474,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) def testInstantiateFactorsNoBias(self): with ops.Graph().as_default(): @@ -467,7 +485,7 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: @@ -477,7 +495,13 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_minibatch(inputs, outputs) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) + + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -503,7 +527,12 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): block = fb.FullyConnectedKFACBasicFB(lc.LayerCollection(), has_bias=False) block.register_additional_minibatch(inputs, outputs) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -527,10 +556,17 @@ class FullyConnectedKFACBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. - block.instantiate_factors(([grads],), damping) + block.instantiate_factors(((grads,),), damping) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() sess.run(state_ops.assign(block._input_factor._cov, _make_psd(3))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) + + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() + sess.run(block._input_factor.make_inverse_update_ops()) sess.run(block._output_factor.make_inverse_update_ops()) @@ -718,6 +754,7 @@ class ConvDiagonalFBTest(test.TestCase): block.register_additional_minibatch(i, o) block.instantiate_factors((output_grads,), damping=0.0) + block._factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) sess.run(block._factor.make_covariance_update_op(0.0)) @@ -759,7 +796,12 @@ class ConvKFCBasicFBTest(test.TestCase): 'SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -786,7 +828,12 @@ class ConvKFCBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) self.assertFalse(block._has_bias) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -809,7 +856,12 @@ class ConvKFCBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 - block.instantiate_factors(([grads],), 0.5) + block.instantiate_factors(((grads,),), 0.5) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() # Make sure our inverse is something other than the identity. sess.run(tf_variables.global_variables_initializer()) @@ -832,7 +884,12 @@ class ConvKFCBasicFBTest(test.TestCase): block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. - block.instantiate_factors(([grads],), damping) + block.instantiate_factors(((grads,),), damping) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() sess.run(state_ops.assign(block._input_factor._cov, _make_psd(8))) sess.run(state_ops.assign(block._output_factor._cov, _make_psd(2))) @@ -857,9 +914,9 @@ class FullyConnectedSeriesFBTest(test.TestCase): random_seed.set_random_seed(200) inputs = array_ops.constant([1., 2.]) outputs = array_ops.constant([3., 4.]) - block = fb.FullyConnectedSeriesFB( - lc.LayerCollection(), inputs=[inputs], outputs=[outputs]) - self.assertAllEqual([outputs], block.tensors_to_compute_grads()) + block = fb.FullyConnectedSeriesFB(lc.LayerCollection()) + block.register_additional_minibatch([inputs], [outputs]) + self.assertAllEqual([[outputs]], block.tensors_to_compute_grads()) def testInstantiateFactorsHasBias(self): with ops.Graph().as_default(): @@ -868,11 +925,10 @@ class FullyConnectedSeriesFBTest(test.TestCase): outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedSeriesFB( lc.LayerCollection(), - inputs=[inputs], - outputs=[outputs], has_bias=True) + block.register_additional_minibatch([inputs], [outputs]) grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) + block.instantiate_factors((((grads,),),), 0.5) def testInstantiateFactorsNoBias(self): with ops.Graph().as_default(): @@ -881,11 +937,10 @@ class FullyConnectedSeriesFBTest(test.TestCase): outputs = array_ops.constant([[3., 4.], [5., 6.]]) block = fb.FullyConnectedSeriesFB( lc.LayerCollection(), - inputs=[inputs], - outputs=[outputs], has_bias=False) + block.register_additional_minibatch([inputs], [outputs]) grads = outputs**2 - block.instantiate_factors(((grads,),), 0.5) + block.instantiate_factors((((grads,),),), 0.5) def as_tensors(tensor_or_tuple): diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index 66e18974ab..beb427bdcc 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -21,8 +21,8 @@ from __future__ import print_function import numpy as np import numpy.random as npr +from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb from tensorflow.contrib.kfac.python.ops import fisher_factors as ff -from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops as tf_ops from tensorflow.python.framework import random_seed @@ -33,32 +33,8 @@ from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import test -class MaybeColocateTest(test.TestCase): - - def setUp(self): - self._colocate_cov_ops_with_inputs = ff.COLOCATE_COV_OPS_WITH_INPUTS - - def tearDown(self): - ff.set_global_constants( - colocate_cov_ops_with_inputs=self._colocate_cov_ops_with_inputs) - - def testFalse(self): - ff.set_global_constants(colocate_cov_ops_with_inputs=False) - with tf_ops.Graph().as_default(): - a = constant_op.constant([2.0], name='a') - with ff.maybe_colocate_with(a): - b = constant_op.constant(3.0, name='b') - self.assertEqual([b'loc:@a'], a.op.colocation_groups()) - self.assertEqual([b'loc:@b'], b.op.colocation_groups()) - - def testTrue(self): - ff.set_global_constants(colocate_cov_ops_with_inputs=True) - with tf_ops.Graph().as_default(): - a = constant_op.constant([2.0], name='a') - with ff.maybe_colocate_with(a): - b = constant_op.constant(3.0, name='b') - self.assertEqual([b'loc:@a'], a.op.colocation_groups()) - self.assertEqual([b'loc:@a'], b.op.colocation_groups()) +def make_damping_func(damping): + return fb._package_func(lambda: damping, damping) class FisherFactorTestingDummy(ff.FisherFactor): @@ -98,10 +74,13 @@ class FisherFactorTestingDummy(ff.FisherFactor): def right_multiply(self, x, damping): return NotImplementedError - def left_multiply_inverse(self, x, damping): + def left_multiply_matpower(self, x, exp, damping): + return NotImplementedError + + def right_multiply_matpower(self, x, exp, damping): return NotImplementedError - def right_multiply_inverse(self, x, damping): + def instantiate_inv_variables(self): return NotImplementedError @@ -246,21 +225,24 @@ class InverseProvidingFactorTest(test.TestCase): factor = InverseProvidingFactorTestingDummy(shape) factor_var_scope = 'dummy/a_b_c' - dampings = 0.1, 1e-1, 0.00001, 1e-5 + damping_funcs = [make_damping_func(0.1), + make_damping_func(0.1), + make_damping_func(1e-5), + make_damping_func(1e-5)] + for damping_func in damping_funcs: + factor.register_inverse(damping_func) - for damping in dampings: - factor.register_damped_inverse(damping) + factor.instantiate_inv_variables() - self.assertEqual(set(dampings), set(factor._inverses_by_damping.keys())) - inv = factor._inverses_by_damping[dampings[0]] - self.assertEqual(inv, factor._inverses_by_damping[dampings[1]]) - self.assertNotEqual(inv, factor._inverses_by_damping[dampings[2]]) - self.assertEqual(factor._inverses_by_damping[dampings[2]], - factor._inverses_by_damping[dampings[3]]) + inv = factor.get_inverse(damping_funcs[0]) + self.assertEqual(inv, factor.get_inverse(damping_funcs[1])) + self.assertNotEqual(inv, factor.get_inverse(damping_funcs[2])) + self.assertEqual(factor.get_inverse(damping_funcs[2]), + factor.get_inverse(damping_funcs[3])) factor_vars = tf_ops.get_collection(tf_ops.GraphKeys.GLOBAL_VARIABLES, factor_var_scope) - self.assertListEqual([inv, factor._inverses_by_damping[dampings[2]]], - factor_vars) + self.assertEqual(set([inv, factor.get_inverse(damping_funcs[2])]), + set(factor_vars)) self.assertEqual(shape, inv.get_shape()) def testRegisterMatpower(self): @@ -270,17 +252,22 @@ class InverseProvidingFactorTest(test.TestCase): factor = InverseProvidingFactorTestingDummy(shape) factor_var_scope = 'dummy/a_b_c' - factor.register_matpower(1, 0.5) - factor.register_matpower(2, 0.5) + # TODO(b/74201126): Change to using the same func for both once + # Topohash is in place. + damping_func_1 = make_damping_func(0.5) + damping_func_2 = make_damping_func(0.5) + + factor.register_matpower(-0.5, damping_func_1) + factor.register_matpower(2, damping_func_2) + + factor.instantiate_inv_variables() - self.assertEqual( - set([(1, 0.5), (2, 0.5)]), - set(factor._matpower_by_exp_and_damping.keys())) factor_vars = tf_ops.get_collection(tf_ops.GraphKeys.GLOBAL_VARIABLES, factor_var_scope) - matpower1 = factor.get_matpower(1, 0.5) - matpower2 = factor.get_matpower(2, 0.5) - self.assertListEqual([matpower1, matpower2], factor_vars) + matpower1 = factor.get_matpower(-0.5, damping_func_1) + matpower2 = factor.get_matpower(2, damping_func_2) + + self.assertEqual(set([matpower1, matpower2]), set(factor_vars)) self.assertEqual(shape, matpower1.get_shape()) self.assertEqual(shape, matpower2.get_shape()) @@ -299,17 +286,24 @@ class InverseProvidingFactorTest(test.TestCase): factor = InverseProvidingFactorTestingDummy(cov.shape) factor._cov = array_ops.constant(cov, dtype=dtypes.float32) + damping_funcs = [] for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1): - factor.register_damped_inverse(1. / i) + damping_funcs.append(make_damping_func(1./i)) + + for i in range(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD): + factor.register_inverse(damping_funcs[i]) + + factor.instantiate_inv_variables() ops = factor.make_inverse_update_ops() self.assertEqual(1, len(ops)) sess.run(tf_variables.global_variables_initializer()) new_invs = [] sess.run(ops) - for i in range(1, ff.EIGENVALUE_DECOMPOSITION_THRESHOLD + 1): + for i in range(ff.EIGENVALUE_DECOMPOSITION_THRESHOLD): # The inverse op will assign the damped inverse of cov to the inv var. - new_invs.append(sess.run(factor._inverses_by_damping[1. / i])) + new_invs.append(sess.run(factor.get_inverse(damping_funcs[i]))) + # We want to see that the new invs are all different from each other. for i in range(len(new_invs)): for j in range(i + 1, len(new_invs)): @@ -324,14 +318,16 @@ class InverseProvidingFactorTest(test.TestCase): factor._cov = array_ops.constant(cov, dtype=dtypes.float32) exp = 2 # NOTE(mattjj): must be int to test with np.linalg.matrix_power damping = 0.5 + damping_func = make_damping_func(damping) - factor.register_matpower(exp, damping) + factor.register_matpower(exp, damping_func) + factor.instantiate_inv_variables() ops = factor.make_inverse_update_ops() self.assertEqual(1, len(ops)) sess.run(tf_variables.global_variables_initializer()) sess.run(ops[0]) - matpower = sess.run(factor._matpower_by_exp_and_damping[(exp, damping)]) + matpower = sess.run(factor.get_matpower(exp, damping_func)) matpower_np = np.linalg.matrix_power(cov + np.eye(2) * damping, exp) self.assertAllClose(matpower, matpower_np) @@ -342,18 +338,21 @@ class InverseProvidingFactorTest(test.TestCase): factor = InverseProvidingFactorTestingDummy(cov.shape) factor._cov = array_ops.constant(cov, dtype=dtypes.float32) - factor.register_damped_inverse(0) + damping_func = make_damping_func(0) + + factor.register_inverse(damping_func) + factor.instantiate_inv_variables() ops = factor.make_inverse_update_ops() self.assertEqual(1, len(ops)) sess.run(tf_variables.global_variables_initializer()) # The inverse op will assign the damped inverse of cov to the inv var. - old_inv = sess.run(factor._inverses_by_damping[0]) + old_inv = sess.run(factor.get_inverse(damping_func)) self.assertAllClose( sess.run(ff.inverse_initializer(cov.shape, dtypes.float32)), old_inv) sess.run(ops) - new_inv = sess.run(factor._inverses_by_damping[0]) + new_inv = sess.run(factor.get_inverse(damping_func)) self.assertAllClose(new_inv, np.linalg.inv(cov)) @@ -364,6 +363,7 @@ class FullFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.FullFactor((tensor,), 32) + factor.instantiate_cov_variables() self.assertEqual([6, 6], factor.get_cov().get_shape().as_list()) def testFullFactorInitFloat64(self): @@ -372,6 +372,7 @@ class FullFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.FullFactor((tensor,), 32) + factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) self.assertEqual([6, 6], cov.get_shape().as_list()) @@ -381,6 +382,7 @@ class FullFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.constant([1., 2.], name='a/b/c') factor = ff.FullFactor((tensor,), 2) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -394,6 +396,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) + factor.instantiate_cov_variables() self.assertEqual([6, 1], factor.get_cov_var().get_shape().as_list()) def testNaiveDiagonalFactorInitFloat64(self): @@ -402,6 +405,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 32) + factor.instantiate_cov_variables() cov = factor.get_cov_var() self.assertEqual(cov.dtype, dtype) self.assertEqual([6, 1], cov.get_shape().as_list()) @@ -411,6 +415,7 @@ class NaiveDiagonalFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.constant([1., 2.], name='a/b/c') factor = ff.NaiveDiagonalFactor((tensor,), 2) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -423,7 +428,8 @@ class EmbeddingInputKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(): input_ids = array_ops.constant([[0], [1], [4]]) vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + factor = ff.EmbeddingInputKroneckerFactor(input_ids, vocab_size) + factor.instantiate_cov_variables() cov = factor.get_cov_var() self.assertEqual(cov.shape.as_list(), [vocab_size]) @@ -431,7 +437,8 @@ class EmbeddingInputKroneckerFactorTest(test.TestCase): with tf_ops.Graph().as_default(): input_ids = array_ops.constant([[0], [1], [4]]) vocab_size = 5 - factor = ff.EmbeddingInputKroneckerFactor((input_ids,), vocab_size) + factor = ff.EmbeddingInputKroneckerFactor(input_ids, vocab_size) + factor.instantiate_cov_variables() cov_update_op = factor.make_covariance_update_op(0.0) with self.test_session() as sess: @@ -450,6 +457,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=has_bias) + factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) self.assertEqual(final_shape, cov.get_shape().as_list()) @@ -467,6 +475,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') factor = ff.FullyConnectedKroneckerFactor((tensor,), has_bias=True) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -477,6 +486,7 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') factor = ff.FullyConnectedKroneckerFactor((tensor,)) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -491,6 +501,7 @@ class ConvInputKroneckerFactorTest(test.TestCase): tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( tensor, (1, 2, 3, 4), 3, 2, has_bias=False) + factor.instantiate_cov_variables() self.assertEqual([1 * 2 * 3, 1 * 2 * 3], factor.get_cov().get_shape().as_list()) @@ -500,6 +511,7 @@ class ConvInputKroneckerFactorTest(test.TestCase): tensor = array_ops.ones((2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( tensor, (1, 2, 3, 4), 3, 2, has_bias=True) + factor.instantiate_cov_variables() self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], factor.get_cov().get_shape().as_list()) @@ -510,6 +522,7 @@ class ConvInputKroneckerFactorTest(test.TestCase): tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') factor = ff.ConvInputKroneckerFactor( tensor, (1, 2, 3, 4), 3, 2, has_bias=True) + factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], @@ -522,6 +535,7 @@ class ConvInputKroneckerFactorTest(test.TestCase): np.arange(1., 17.).reshape(2, 2, 2, 2), dtype=dtypes.float32) factor = ff.ConvInputKroneckerFactor( tensor, (1, 2, 1, 1), [1, 1, 1, 1], 'SAME', has_bias=True) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -533,8 +547,9 @@ class ConvInputKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.constant( np.arange(1., 17.).reshape(2, 2, 2, 2), dtype=dtypes.float32) - factor = ff.ConvInputKroneckerFactor(tensor, (1, 2, 1, 1), [1, 1, 1, 1], - 'SAME') + factor = ff.ConvInputKroneckerFactor(tensor, (1, 2, 1, 1), + [1, 1, 1, 1], 'SAME') + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -548,6 +563,7 @@ class ConvOutputKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3, 4, 5), name='a/b/c') factor = ff.ConvOutputKroneckerFactor((tensor,)) + factor.instantiate_cov_variables() self.assertEqual([5, 5], factor.get_cov().get_shape().as_list()) def testConvOutputKroneckerFactorInitFloat64(self): @@ -556,6 +572,7 @@ class ConvOutputKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3, 4, 5), dtype=dtype, name='a/b/c') factor = ff.ConvOutputKroneckerFactor((tensor,)) + factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) self.assertEqual([5, 5], cov.get_shape().as_list()) @@ -565,13 +582,14 @@ class ConvOutputKroneckerFactorTest(test.TestCase): random_seed.set_random_seed(200) tensor = array_ops.ones((2, 3), name='a/b/c') with self.assertRaises(IndexError): - ff.ConvOutputKroneckerFactor(tensor) + ff.ConvOutputKroneckerFactor((tensor,)) def testMakeCovarianceUpdateOp(self): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) tensor = np.arange(1, 17).reshape(2, 2, 2, 2).astype(np.float32) factor = ff.ConvOutputKroneckerFactor((array_ops.constant(tensor),)) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -586,6 +604,7 @@ class FullyConnectedMultiKFTest(test.TestCase): tensor = array_ops.ones((2, 3), name='a/b/c') tensor_list = [tensor] factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + factor.instantiate_cov_variables() self.assertEqual([3, 3], factor.get_cov().get_shape().as_list()) def testFullyConnectedMultiKFInitFloat64(self): @@ -595,6 +614,7 @@ class FullyConnectedMultiKFTest(test.TestCase): tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') tensor_list = [tensor] factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=False) + factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) self.assertEqual([3, 3], cov.get_shape().as_list()) @@ -605,6 +625,7 @@ class FullyConnectedMultiKFTest(test.TestCase): tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') tensor_list = [tensor] factor = ff.FullyConnectedMultiKF((tensor_list,), has_bias=True) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) @@ -616,6 +637,7 @@ class FullyConnectedMultiKFTest(test.TestCase): tensor = array_ops.constant([[1., 2.], [3., 4.]], name='a/b/c') tensor_list = [tensor] factor = ff.FullyConnectedMultiKF((tensor_list,)) + factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) new_cov = sess.run(factor.make_covariance_update_op(.5)) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py index b8ccbeadd0..889f336811 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py @@ -237,16 +237,16 @@ class LayerCollectionTest(test.TestCase): # Create a new loss function by name. lc.register_categorical_predictive_distribution(logits, name='loss1') - self.assertEqual(1, len(lc.losses)) + self.assertEqual(1, len(lc.towers_by_loss)) # Add logits to same loss function. lc.register_categorical_predictive_distribution( logits, name='loss1', reuse=True) - self.assertEqual(1, len(lc.losses)) + self.assertEqual(1, len(lc.towers_by_loss)) # Add another new loss function. lc.register_categorical_predictive_distribution(logits, name='loss2') - self.assertEqual(2, len(lc.losses)) + self.assertEqual(2, len(lc.towers_by_loss)) def testLossFunctionWithoutName(self): """Ensure loss functions get unique names if 'name' not specified.""" @@ -298,13 +298,9 @@ class LayerCollectionTest(test.TestCase): name='loss1', reuse=layer_collection.VARIABLE_SCOPE) - self.assertEqual(len(lc.losses), 1) - loss = lc.losses[0] - + self.assertEqual(len(lc.towers_by_loss), 1) # Three successful registrations. - self.assertEqual(loss.params.shape.as_list(), - [3 * batch_size, output_size]) - self.assertEqual(loss.targets.shape.as_list(), [3 * batch_size]) + self.assertEqual(len(lc.towers_by_loss[0]), 3) def testRegisterCategoricalPredictiveDistributionBatchSize1(self): with ops.Graph().as_default(): @@ -479,17 +475,6 @@ class LayerCollectionTest(test.TestCase): variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) self.assertTrue(all([var.name.startswith(scope) for var in variables])) - def testGetUseCountMap(self): - """Ensure get_use_count_map() sums 'num_registered_minibatches'.""" - lc = layer_collection.LayerCollection() - lc.fisher_blocks = { - 'a': MockFisherBlock(), - ('a', 'c'): MockFisherBlock(), - ('b', 'c'): MockFisherBlock() - } - use_count_map = lc.get_use_count_map() - self.assertDictEqual({'a': 4, 'b': 2, 'c': 4}, use_count_map) - def testIdentifyLinkedParametersSomeRegisteredInOtherTuples(self): x = variable_scope.get_variable('x', shape=()) y = variable_scope.get_variable('y', shape=()) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py index ae787b6f1a..c00af5593f 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/loss_functions_test.py @@ -24,7 +24,6 @@ from tensorflow.contrib.kfac.python.ops import loss_functions from tensorflow.python.framework import constant_op from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops -from tensorflow.python.ops import random_ops from tensorflow.python.platform import test @@ -97,22 +96,6 @@ class CategoricalLogitsNegativeLogProbLossTest(test.TestCase): # difficult to say if the output is correct or not... neg_log_prob = sess.run(neg_log_prob) - def testMultiMinibatchRegistration(self): - """Ensure this loss function supports registering multiple minibatches.""" - with ops.Graph().as_default(): - tower_logits = [] - loss = None - num_towers = 5 - for _ in range(num_towers): - logits = random_ops.random_uniform(shape=[2, 3]) - tower_logits.append(logits) - if loss is None: - loss = loss_functions.CategoricalLogitsNegativeLogProbLoss(logits) - else: - loss.register_additional_minibatch(logits) - self.assertListEqual(loss.input_minibatches, tower_logits) - self.assertEqual(loss.num_registered_minibatches, num_towers) - def testMultiplyFisherSingleVector(self): with ops.Graph().as_default(), self.test_session() as sess: logits = np.array([1., 2., 3.]) @@ -203,23 +186,5 @@ class OnehotCategoricalLogitsNegativeLogProbLossTest(test.TestCase): # difficult to say if the output is correct or not... neg_log_prob = sess.run(neg_log_prob) - def testMultiMinibatchRegistration(self): - """Ensure this loss function supports registering multiple minibatches.""" - with ops.Graph().as_default(): - tower_logits = [] - loss = None - num_towers = 5 - for _ in range(num_towers): - logits = random_ops.random_uniform(shape=[2, 3]) - tower_logits.append(logits) - if loss is None: - loss = loss_functions.OnehotCategoricalLogitsNegativeLogProbLoss( - logits) - else: - loss.register_additional_minibatch(logits) - self.assertListEqual(loss.input_minibatches, tower_logits) - self.assertEqual(loss.num_registered_minibatches, num_towers) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/kfac/python/ops/estimator.py b/tensorflow/contrib/kfac/python/ops/estimator.py index a7e268c48a..fdfd9599f4 100644 --- a/tensorflow/contrib/kfac/python/ops/estimator.py +++ b/tensorflow/contrib/kfac/python/ops/estimator.py @@ -27,6 +27,7 @@ from tensorflow.contrib.kfac.python.ops import utils from tensorflow.python.framework import ops as tf_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl +from tensorflow.python.ops import variable_scope from tensorflow.python.util import nest @@ -65,6 +66,13 @@ class _DeviceContextGenerator(object): yield +def _make_thunk_on_device(func, device): + def thunk(): + with tf_ops.device(device): + return func() + return thunk + + class FisherEstimator(object): """Fisher estimator class supporting various approximations of the Fisher. @@ -83,26 +91,35 @@ class FisherEstimator(object): """ def __init__(self, - damping_fn, variables, cov_ema_decay, + damping, layer_collection, + exps=(-1,), estimation_mode="gradients", colocate_gradients_with_ops=True, - cov_devices=None, - inv_devices=None): + name="FisherEstimator"): """Create a FisherEstimator object. Args: - damping_fn: Function, accepts no arguments and returns damping value. variables: A list of the variables for which to estimate the Fisher. This must match the variables registered in layer_collection (if it is not None). cov_ema_decay: The decay factor used when calculating the covariance estimate moving averages. + damping: float. The damping factor used to stabilize training due to + errors in the local approximation with the Fisher information matrix, + and to regularize the update direction by making it closer to the + gradient. (Higher damping means the update looks more like a standard + gradient update - see Tikhonov regularization.) layer_collection: The layer collection object, which holds the fisher blocks, kronecker factors, and losses associated with the graph. + exps: List of floats or ints. These represent the different matrix + powers of the approximate Fisher that the FisherEstimator will be able + to multiply vectors by. If the user asks for a matrix power other + one of these (or 1, which is always supported), there will be a + failure. (Default: (-1,)) estimation_mode: The type of estimator to use for the Fishers. Can be 'gradients', 'empirical', 'curvature_prop', or 'exact'. (Default: 'gradients'). 'gradients' is the basic estimation approach @@ -121,19 +138,15 @@ class FisherEstimator(object): equal to the output dimension, roughly speaking. colocate_gradients_with_ops: Whether we should request gradients be colocated with their respective ops. (Default: True) - cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance - computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. - inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion - computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. - + name: A string. A name given to this estimator, which is added to the + variable scope when constructing variables and ops. + (Default: "FisherEstimator") Raises: ValueError: If no losses have been registered with layer_collection. """ - self._damping_fn = damping_fn - self._cov_ema_decay = cov_ema_decay self._variables = variables + self._cov_ema_decay = cov_ema_decay + self._damping = damping self._estimation_mode = estimation_mode self._layers = layer_collection self._layers.create_subgraph() @@ -146,30 +159,13 @@ class FisherEstimator(object): } self._colocate_gradients_with_ops = colocate_gradients_with_ops - # TODO(b/70674513): Factor device placement outside of this class. - self._cov_device_context_generator = _DeviceContextGenerator(cov_devices) - if inv_devices == cov_devices: - self._inv_device_context_generator = self._cov_device_context_generator - else: - self._inv_device_context_generator = _DeviceContextGenerator(inv_devices) + self._made_vars = False + self._exps = exps - self._instantiate_factors() - - self.cov_update_thunks = [ - self._create_cov_update_thunk(factor) - for factor in self._layers.get_factors() - ] - self.cov_update_ops = [thunk() for thunk in self.cov_update_thunks] - self.cov_update_op = control_flow_ops.group( - self.cov_update_ops, name="cov_update_op") + self._name = name - self.inv_update_thunks = [ - self._create_inv_update_thunk(factor) - for factor in self._layers.get_factors() - ] - self.inv_update_ops = [thunk() for thunk in self.inv_update_thunks] - self.inv_update_op = control_flow_ops.group( - self.inv_update_ops, name="inv_update_op") + self._instantiate_factors() + self._register_matrix_functions() @property def variables(self): @@ -177,7 +173,21 @@ class FisherEstimator(object): @property def damping(self): - return self._damping_fn() + return self._damping + + @property + def blocks(self): + """All registered FisherBlocks.""" + return self._layers.get_blocks() + + @property + def factors(self): + """All registered FisherFactors.""" + return self._layers.get_factors() + + @property + def name(self): + return self._name def _apply_transformation(self, vecs_and_vars, transform): """Applies an block-wise transformation to the corresponding vectors. @@ -212,9 +222,7 @@ class FisherEstimator(object): A list of (transformed vector, var) pairs in the same order as vecs_and_vars. """ - - return self._apply_transformation(vecs_and_vars, - lambda fb, vec: fb.multiply_inverse(vec)) + return self.multiply_matpower(-1, vecs_and_vars) def multiply(self, vecs_and_vars): """Multiplies the vectors by the corresponding (damped) blocks. @@ -226,9 +234,22 @@ class FisherEstimator(object): A list of (transformed vector, var) pairs in the same order as vecs_and_vars. """ + return self.multiply_matpower(1, vecs_and_vars) + + def multiply_matpower(self, exp, vecs_and_vars): + """Multiplies the vecs by the corresponding matrix powers of the blocks. - return self._apply_transformation(vecs_and_vars, - lambda fb, vec: fb.multiply(vec)) + Args: + exp: A float representing the power to raise the blocks by before + multiplying it by the vector. + vecs_and_vars: List of (vector, variable) pairs. + + Returns: + A list of (transformed vector, var) pairs in the same order as + vecs_and_vars. + """ + fcn = lambda fb, vec: fb.multiply_matpower(vec, exp) + return self._apply_transformation(vecs_and_vars, fcn) def _instantiate_factors(self): """Instantiates FisherFactors' variables. @@ -236,9 +257,9 @@ class FisherEstimator(object): Raises: ValueError: If estimation_mode was improperly specified at construction. """ - fisher_blocks_list = self._layers.get_blocks() + blocks = self.blocks tensors_to_compute_grads = [ - fb.tensors_to_compute_grads() for fb in fisher_blocks_list + block.tensors_to_compute_grads() for block in blocks ] try: @@ -248,45 +269,275 @@ class FisherEstimator(object): raise ValueError("Unrecognized value {} for estimation_mode.".format( self._estimation_mode)) - # TODO(b/68033310): This loop round-robins the "concat" operations which - # gather the inputs for the cov_updates. In future, we might do these - # computations locally then communicate the results, which would require a - # modification to this code. - for grads_list, fb in zip(grads_lists, fisher_blocks_list): - with self._cov_device_context_generator(): - fb.instantiate_factors(grads_list, self.damping) + for grads_list, block in zip(grads_lists, blocks): + block.instantiate_factors(grads_list, self.damping) + + def _check_vars_unmade_and_set_made_flag(self): + if self._made_vars: + raise Exception("Already made variables.") + self._made_vars = True + + def made_vars(self): + return self._made_vars + + def _register_matrix_functions(self): + for exp in self._exps: + for block in self.blocks: + block.register_matpower(exp) + + def make_ops_and_vars(self, scope=None): + """Make ops and vars with no specific device placement. + + See make_ops_and_vars_round_robin for further details. + + Args: + scope: A string or None. If None it will be set to the name of this + estimator (given by the name property). All variables will be created, + and all ops will execute, inside of a variable scope of the given + name. (Default: None) + Returns: + cov_update_ops: List of ops that compute the cov updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_ops: List of ops that compute the inv updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + inv_update_op: inv_update_ops grouped into a single op. + cov_update_thunks: Thunks that make the ops in cov_update_ops. + inv_update_thunks: Thunks that make the ops in inv_update_ops. + """ + return self.make_ops_and_vars_round_robin(scope=scope) + + # TODO(b/70674513): Factor device placement outside of this class. + def make_ops_and_vars_round_robin(self, scope=None, cov_devices=None, + inv_devices=None): + """Make ops and vars with a round-robin device placement strategy. + + For each factor, all of that factor's cov variables and their associated + update ops will be placed on a particular device. A new device is chosen + for each factor by cycling through list of devices in the cov_devices + argument. If cov_devices is None then no explicit device placement occurs. + + An analogous strategy is followed for inverse update ops, with the list of + devices being given by the inv_devices argument. + + Inverse variables on the other hand are not placed on any specific device + (they will just use the current the device placement context, whatever + that happens to be). The idea is that the inverse variable belong where + they will be accessed most often, which is the device that actually applies + the preconditioner to the gradient. The user will be responsible for setting + the device context for this. + + Args: + scope: A string or None. If None it will be set to the name of this + estimator (given by the name property). All variables will be created, + and all ops will execute, inside of a variable scope of the given + name. (Default: None) + cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + + Returns: + cov_update_ops: List of ops that compute the cov updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_ops: List of ops that compute the inv updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + inv_update_op: inv_update_ops grouped into a single op. + cov_update_thunks: Thunks that make the ops in cov_update_ops. + inv_update_thunks: Thunks that make the ops in inv_update_ops. + """ + (cov_update_thunks, + inv_update_thunks) = self.make_vars_and_create_op_thunks_round_robin( + scope=scope, + cov_devices=cov_devices, + inv_devices=inv_devices) + cov_update_ops = [thunk() for thunk in cov_update_thunks] + inv_update_ops = [thunk() for thunk in inv_update_thunks] + + scope = self.name if scope is None else scope + with variable_scope.variable_scope(scope): + cov_update_op = control_flow_ops.group(cov_update_ops, + name="cov_update_op") + inv_update_op = control_flow_ops.group(inv_update_ops, + name="inv_update_op") + + return (cov_update_ops, cov_update_op, inv_update_ops, inv_update_op, + cov_update_thunks, inv_update_thunks) + + def make_vars_and_create_op_thunks_round_robin(self, + scope=None, + cov_devices=None, + inv_devices=None): + """Make vars and create op thunks w/ a round-robin device placement strat. + + For each factor, all of that factor's cov variables and their associated + update ops will be placed on a particular device. A new device is chosen + for each factor by cycling through list of devices in the cov_devices + argument. If cov_devices is None then no explicit device placement occurs. + + An analogous strategy is followed for inverse update ops, with the list of + devices being given by the inv_devices argument. + + Inverse variables on the other hand are not placed on any specific device + (they will just use the current the device placement context, whatever + that happens to be). The idea is that the inverse variable belong where + they will be accessed most often, which is the device that actually applies + the preconditioner to the gradient. The user will be responsible for setting + the device context for this. + + Args: + scope: A string or None. If None it will be set to the name of this + estimator (given by the name property). All variables will be created, + and all thunks will execute, inside of a variable scope of the given + name. (Default: None) + cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + Returns: + cov_update_thunks: List of cov update thunks. Corresponds one-to-one with + the list of factors given by the "factors" property. + inv_update_thunks: List of inv update thunks. Corresponds one-to-one with + the list of factors given by the "factors" property. + """ + + (cov_variable_thunks_raw, cov_update_thunks_raw, inv_variable_thunks_raw, + inv_update_thunks_raw) = self.create_ops_and_vars_thunks(scope=scope) + + if cov_devices: + cov_update_thunks = [] + for cov_variable_thunk, cov_update_thunk, device in zip( + cov_variable_thunks_raw, cov_update_thunks_raw, + itertools.cycle(cov_devices)): + with tf_ops.device(device): + cov_variable_thunk() + cov_update_thunks.append(_make_thunk_on_device(cov_update_thunk, + device)) + else: + for cov_variable_thunk in cov_variable_thunks_raw: + cov_variable_thunk() + cov_update_thunks = cov_update_thunks_raw + + for inv_variable_thunk in inv_variable_thunks_raw: + inv_variable_thunk() + + if inv_devices: + inv_update_thunks = [] + for inv_update_thunk, device in zip(inv_update_thunks_raw, + itertools.cycle(inv_devices)): + inv_update_thunks.append(_make_thunk_on_device(inv_update_thunk, + device)) + else: + inv_update_thunks = inv_update_thunks_raw + + return cov_update_thunks, inv_update_thunks + + def create_ops_and_vars_thunks(self, scope=None): + """Create thunks that make the ops and vars on demand. + + This function returns 4 lists of thunks: cov_variable_thunks, + cov_update_thunks, inv_variable_thunks, and inv_update_thunks. + + The length of each list is the number of factors and the i-th element of + each list corresponds to the i-th factor (given by the "factors" property). + + Note that the execution of these thunks must happen in a certain + partial order. The i-th element of cov_variable_thunks must execute + before the i-th element of cov_update_thunks (and also the i-th element + of inv_update_thunks). Similarly, the i-th element of inv_variable_thunks + must execute before the i-th element of inv_update_thunks. + + TL;DR (oversimplified): Execute the thunks according to the order that + they are returned. - def _create_cov_update_thunk(self, factor): + Args: + scope: A string or None. If None it will be set to the name of this + estimator (given by the name property). All thunks will execute inside + of a variable scope of the given name. (Default: None) + Returns: + cov_variable_thunks: A list of thunks that make the cov variables. + cov_update_thunks: A list of thunks that make the cov update ops. + inv_variable_thunks: A list of thunks that make the inv variables. + inv_update_thunks: A list of thunks that make the inv update ops. + """ + self._check_vars_unmade_and_set_made_flag() + + scope = self.name if scope is None else scope + + cov_variable_thunks = [ + self._create_cov_variable_thunk(factor, scope) + for factor in self.factors + ] + cov_update_thunks = [ + self._create_cov_update_thunk(factor, scope) for factor in self.factors + ] + inv_variable_thunks = [ + self._create_inv_variable_thunk(factor, scope) + for factor in self.factors + ] + inv_update_thunks = [ + self._create_inv_update_thunk(factor, scope) for factor in self.factors + ] + + return (cov_variable_thunks, cov_update_thunks, + inv_variable_thunks, inv_update_thunks) + + def _create_cov_variable_thunk(self, factor, scope): + """Constructs a covariance variable thunk for a single FisherFactor.""" + + def thunk(): + with variable_scope.variable_scope(scope): + return factor.instantiate_cov_variables() + + return thunk + + def _create_cov_update_thunk(self, factor, scope): """Constructs a covariance update thunk for a single FisherFactor.""" def thunk(): - with tf_ops.name_scope( - "create_cov_update_thunk", values=[self._cov_ema_decay]): + with variable_scope.variable_scope(scope): return factor.make_covariance_update_op(self._cov_ema_decay) return thunk - def _create_inv_update_thunk(self, factor): + def _create_inv_variable_thunk(self, factor, scope): + """Constructs a inverse variable thunk for a single FisherFactor.""" + + def thunk(): + with variable_scope.variable_scope(scope): + return factor.instantiate_inv_variables() + + return thunk + + def _create_inv_update_thunk(self, factor, scope): """Constructs an inverse update thunk for a single FisherFactor.""" def thunk(): - with tf_ops.name_scope("create_inv_update_thunk"): - with self._inv_device_context_generator(): - return control_flow_ops.group(factor.make_inverse_update_ops()) + with variable_scope.variable_scope(scope): + return control_flow_ops.group(factor.make_inverse_update_ops()) return thunk def _get_grads_lists_gradients(self, tensors): + # Passing in a list of loss values is better than passing in the sum as + # the latter creates unnessesary ops on the default device grads_flat = gradients_impl.gradients( - self._layers.total_sampled_loss(), + self._layers.eval_losses_on_samples(), nest.flatten(tensors), colocate_gradients_with_ops=self._colocate_gradients_with_ops) grads_all = nest.pack_sequence_as(tensors, grads_flat) return tuple((grad,) for grad in grads_all) def _get_grads_lists_empirical(self, tensors): + # Passing in a list of loss values is better than passing in the sum as + # the latter creates unnessesary ops on the default device grads_flat = gradients_impl.gradients( - self._layers.total_loss(), + self._layers.eval_losses(), nest.flatten(tensors), colocate_gradients_with_ops=self._colocate_gradients_with_ops) grads_all = nest.pack_sequence_as(tensors, grads_flat) @@ -295,9 +546,10 @@ class FisherEstimator(object): def _get_transformed_random_signs(self): transformed_random_signs = [] for loss in self._layers.losses: - transformed_random_signs.append( - loss.multiply_fisher_factor( - utils.generate_random_signs(loss.fisher_factor_inner_shape))) + with tf_ops.colocate_with(self._layers.loss_colocation_ops[loss]): + transformed_random_signs.append( + loss.multiply_fisher_factor( + utils.generate_random_signs(loss.fisher_factor_inner_shape))) return transformed_random_signs def _get_grads_lists_curvature_prop(self, tensors): @@ -316,13 +568,14 @@ class FisherEstimator(object): # Loop over all coordinates of all losses. grads_all = [] for loss in self._layers.losses: - for index in np.ndindex(*loss.fisher_factor_inner_static_shape[1:]): - transformed_one_hot = loss.multiply_fisher_factor_replicated_one_hot( - index) - grads_flat = gradients_impl.gradients( - loss.inputs, - nest.flatten(tensors), - grad_ys=transformed_one_hot, - colocate_gradients_with_ops=self._colocate_gradients_with_ops) - grads_all.append(nest.pack_sequence_as(tensors, grads_flat)) + with tf_ops.colocate_with(self._layers.loss_colocation_ops[loss]): + for index in np.ndindex(*loss.fisher_factor_inner_static_shape[1:]): + transformed_one_hot = loss.multiply_fisher_factor_replicated_one_hot( + index) + grads_flat = gradients_impl.gradients( + loss.inputs, + nest.flatten(tensors), + grad_ys=transformed_one_hot, + colocate_gradients_with_ops=self._colocate_gradients_with_ops) + grads_all.append(nest.pack_sequence_as(tensors, grads_flat)) return zip(*grads_all) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index cf38d28b43..521a98866b 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -121,12 +121,44 @@ def compute_pi_adjusted_damping(left_cov, right_cov, damping): return (damping, damping) +class PackagedFunc(object): + """A Python thunk with a stable ID. + + Enables stable names for lambdas. + """ + + def __init__(self, func, func_id): + """Initializes PackagedFunc. + + Args: + func: a zero-arg Python function. + func_id: a hashable, function that produces a hashable, or a list/tuple + thereof. + """ + self._func = func + func_id = func_id if isinstance(func_id, (tuple, list)) else (func_id,) + self._func_id = func_id + + def __call__(self): + return self._func() + + @property + def func_id(self): + """A hashable identifier for this function.""" + return tuple(elt() if callable(elt) else elt for elt in self._func_id) + + +def _package_func(func, func_id): + return PackagedFunc(func, func_id) + + @six.add_metaclass(abc.ABCMeta) class FisherBlock(object): """Abstract base class for objects modeling approximate Fisher matrix blocks. - Subclasses must implement multiply_inverse(), instantiate_factors(), and - tensors_to_compute_grads() methods. + Subclasses must implement register_matpower, multiply_matpower, + instantiate_factors, tensors_to_compute_grads, and num_registered_minibatches + methods. """ def __init__(self, layer_collection): @@ -145,6 +177,32 @@ class FisherBlock(object): pass @abc.abstractmethod + def register_matpower(self, exp): + """Registers a matrix power to be computed by the block. + + Args: + exp: A float representing the power to raise the block by. + """ + pass + + def register_inverse(self): + """Registers a matrix inverse to be computed by the block.""" + self.register_matpower(-1) + + @abc.abstractmethod + def multiply_matpower(self, vector, exp): + """Multiplies the vector by the (damped) matrix-power of the block. + + Args: + vector: The vector (a Tensor or tuple of Tensors) to be multiplied. + exp: A float representing the power to raise the block by before + multiplying it by the vector. + + Returns: + The vector left-multiplied by the (damped) matrix-power of the block. + """ + pass + def multiply_inverse(self, vector): """Multiplies the vector by the (damped) inverse of the block. @@ -154,9 +212,8 @@ class FisherBlock(object): Returns: The vector left-multiplied by the (damped) inverse of the block. """ - pass + return self.multiply_matpower(vector, -1) - @abc.abstractmethod def multiply(self, vector): """Multiplies the vector by the (damped) block. @@ -166,7 +223,7 @@ class FisherBlock(object): Returns: The vector left-multiplied by the (damped) block. """ - pass + return self.multiply_matpower(vector, 1) @abc.abstractmethod def tensors_to_compute_grads(self): @@ -207,21 +264,18 @@ class FullFB(FisherBlock): super(FullFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - self._damping = damping + self._damping_func = _package_func(lambda: damping, (damping,)) + self._factor = self._layer_collection.make_or_get_factor( fisher_factors.FullFactor, (grads_list, self._batch_size)) - self._factor.register_damped_inverse(damping) - def multiply_inverse(self, vector): - vector_flat = utils.tensors_to_column(vector) - out_flat = self._factor.left_multiply_inverse( - vector_flat, self._damping) - return utils.column_to_tensors(vector, out_flat) + def register_matpower(self, exp): + self._factor.register_matpower(exp, self._damping_func) - def multiply(self, vector): + def multiply_matpower(self, vector, exp): vector_flat = utils.tensors_to_column(vector) - out_flat = self._factor.left_multiply( - vector_flat, self._damping) + out_flat = self._factor.left_multiply_matpower( + vector_flat, exp, self._damping_func) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): @@ -271,22 +325,20 @@ class NaiveDiagonalFB(FisherBlock): super(NaiveDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - self._damping = damping + self._damping_func = _package_func(lambda: damping, (damping,)) + self._factor = self._layer_collection.make_or_get_factor( fisher_factors.NaiveDiagonalFactor, (grads_list, self._batch_size)) - def multiply_inverse(self, vector): - vector_flat = utils.tensors_to_column(vector) - print("vector_flat: %s" % vector_flat) - out_flat = self._factor.left_multiply_inverse( - vector_flat, self._damping) - print("out_flat: %s" % out_flat) - return utils.column_to_tensors(vector, out_flat) + def register_matpower(self, exp): + # Not needed for this. Matrix powers are computed on demand in the + # diagonal case + pass - def multiply(self, vector): + def multiply_matpower(self, vector, exp): vector_flat = utils.tensors_to_column(vector) - out_flat = self._factor.left_multiply( - vector_flat, self._damping) + out_flat = self._factor.left_multiply_matpower( + vector_flat, exp, self._damping_func) return utils.column_to_tensors(vector, out_flat) def full_fisher_block(self): @@ -312,7 +364,89 @@ class NaiveDiagonalFB(FisherBlock): return math_ops.reduce_sum(self._batch_sizes) -class FullyConnectedDiagonalFB(FisherBlock): +class InputOutputMultiMinibatch(object): + """Mix-in class for blocks with inputs & outputs and multiple mini-batches.""" + + def __init__(self, *args, **kwargs): + self.__inputs = [] + self.__outputs = [] + super(InputOutputMultiMinibatch, self).__init__(*args, **kwargs) + + def tensors_to_compute_grads(self): + """Tensors to compute derivative of loss with respect to.""" + return self._outputs + + def register_additional_minibatch(self, inputs, outputs): + self._inputs.append(inputs) + self._outputs.append(outputs) + + @property + def num_registered_minibatches(self): + result = len(self._inputs) + assert result == len(self._outputs) + return result + + @property + def _inputs(self): + return self.__inputs + + @property + def _outputs(self): + return self.__outputs + + def _package_minibatches(self, grads_list): + """Constructs PartitionedTensor for inputs, grads_list. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + Args: + grads_list: 2-D list of Tensors. First index is for source, second + index for tower. + + Returns: + inputs: PartitionedTensor. + grads_list: Tuple of PartitionedTensors, one per source. + """ + inputs = utils.PartitionedTensor(self._inputs) + grads_list = tuple(utils.PartitionedTensor(grads) for grads in grads_list) + + return inputs, grads_list + + def _package_minibatches_multi(self, grads_list): + """Constructs PartitionedTensors for inputs, grads_list. + + The purpose of this method is to package up the towers/minibatch dimension + of these arrays into PartitionedTensor objects. + + This version of this function is for use with FisherBlocks that deal with + multiple uses or time-steps. One PartitionedTensor is created for each + use/time-step. + + Args: + grads_list: 3-D tuple of Tensors. First index is for source, second + index is for tower, third is for use/time-step. + + Returns: + inputs: A tuple of PartitionedTensor's, one per use/time-step. + grads_list: 2-D tuple of PartitionedTensors. First index is for source, + second is for use/time-step. + """ + # self._inputs is a 2-D tuple. First index is tower/mini-batch, second is + # use/time-step. + inputs = self._inputs + num_uses = len(inputs[0]) + assert all(len(input_) == num_uses for input_ in inputs) + assert all(len(grad) == num_uses for grads in grads_list for grad in grads) + + inputs = tuple(utils.PartitionedTensor(input_) for input_ in zip(*inputs)) + grads_list = tuple(tuple(utils.PartitionedTensor(grad) + for grad in zip(*grads)) for grads in grads_list) + + return inputs, grads_list + + +class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): """FisherBlock for fully-connected (dense) layers using a diagonal approx. Estimates the Fisher Information matrix's diagonal entries for a fully @@ -344,79 +478,45 @@ class FullyConnectedDiagonalFB(FisherBlock): has_bias: Whether the component Kronecker factors have an additive bias. (Default: False) """ - self._inputs = [] - self._outputs = [] self._has_bias = has_bias super(FullyConnectedDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - inputs = _concat_along_batch_dim(self._inputs) - grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) + inputs, grads_list = self._package_minibatches(grads_list) - self._damping = damping self._factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedDiagonalFactor, (inputs, grads_list, self._has_bias)) - def multiply_inverse(self, vector): - """Approximate damped inverse Fisher-vector product. - - Args: - vector: Tensor or 2-tuple of Tensors. if self._has_bias, Tensor of shape - [input_size, output_size] corresponding to layer's weights. If not, a - 2-tuple of the former and a Tensor of shape [output_size] corresponding - to the layer's bias. + self._damping_func = _package_func(lambda: damping, (damping,)) - Returns: - Tensor of the same shape, corresponding to the inverse Fisher-vector - product. - """ - reshaped_vec = utils.layer_params_to_mat2d(vector) - reshaped_out = self._factor.left_multiply_inverse( - reshaped_vec, self._damping) - return utils.mat2d_to_layer_params(vector, reshaped_out) + def register_matpower(self, exp): + # Not needed for this. Matrix powers are computed on demand in the + # diagonal case + pass - def multiply(self, vector): - """Approximate damped Fisher-vector product. + def multiply_matpower(self, vector, exp): + """Multiplies the vector by the (damped) matrix-power of the block. Args: vector: Tensor or 2-tuple of Tensors. if self._has_bias, Tensor of shape [input_size, output_size] corresponding to layer's weights. If not, a 2-tuple of the former and a Tensor of shape [output_size] corresponding to the layer's bias. + exp: A scalar representing the power to raise the block before multiplying + it by the vector. Returns: - Tensor of the same shape, corresponding to the Fisher-vector product. + The vector left-multiplied by the (damped) matrix-power of the block. """ reshaped_vec = utils.layer_params_to_mat2d(vector) - reshaped_out = self._factor.left_multiply( - reshaped_vec, self._damping) + reshaped_out = self._factor.left_multiply_matpower( + reshaped_vec, exp, self._damping_func) return utils.mat2d_to_layer_params(vector, reshaped_out) - def tensors_to_compute_grads(self): - """Tensors to compute derivative of loss with respect to.""" - return self._outputs - def register_additional_minibatch(self, inputs, outputs): - """Registers an additional minibatch to the FisherBlock. - - Args: - inputs: Tensor of shape [batch_size, input_size]. Inputs to the - matrix-multiply. - outputs: Tensor of shape [batch_size, output_size]. Layer preactivations. - """ - self._inputs.append(inputs) - self._outputs.append(outputs) - - @property - def num_registered_minibatches(self): - result = len(self._inputs) - assert result == len(self._outputs) - return result - - -class ConvDiagonalFB(FisherBlock): +class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): """FisherBlock for convolutional layers using a diagonal approx. Estimates the Fisher Information matrix's diagonal entries for a convolutional @@ -454,8 +554,6 @@ class ConvDiagonalFB(FisherBlock): strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (e.g. "SAME"). """ - self._inputs = [] - self._outputs = [] self._strides = tuple(strides) if isinstance(strides, list) else strides self._padding = padding self._has_bias = isinstance(params, (tuple, list)) @@ -466,54 +564,37 @@ class ConvDiagonalFB(FisherBlock): super(ConvDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - # Concatenate inputs, grads_list into single Tensors. - inputs = _concat_along_batch_dim(self._inputs) - grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) - # Infer number of locations upon which convolution is applied. - inputs_shape = tuple(inputs.shape.as_list()) + inputs_shape = tuple(self._inputs[0].shape.as_list()) self._num_locations = ( inputs_shape[1] * inputs_shape[2] // (self._strides[1] * self._strides[2])) - self._damping = (self._num_locations - * normalize_damping(damping, self._num_locations)) + inputs, grads_list = self._package_minibatches(grads_list) self._factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvDiagonalFactor, - (inputs, grads_list, self._filter_shape, self._strides, self._padding, - self._has_bias)) + (inputs, grads_list, self._filter_shape, self._strides, + self._padding, self._has_bias)) - def multiply_inverse(self, vector): - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = self._factor.left_multiply_inverse( - reshaped_vect, self._damping) - return utils.mat2d_to_layer_params(vector, reshaped_out) + def damping_func(): + return self._num_locations * normalize_damping(damping, + self._num_locations) - def multiply(self, vector): - reshaped_vect = utils.layer_params_to_mat2d(vector) - reshaped_out = self._factor.left_multiply( - reshaped_vect, self._damping) - return utils.mat2d_to_layer_params(vector, reshaped_out) + damping_id = (self._num_locations, "mult", "normalize_damping", damping, + self._num_locations) + self._damping_func = _package_func(damping_func, damping_id) - def tensors_to_compute_grads(self): - return self._outputs - - def register_additional_minibatch(self, inputs, outputs): - """Registers an additional minibatch to the FisherBlock. - - Args: - inputs: Tensor of shape [batch_size, height, width, input_size]. Inputs to - the convolution. - outputs: Tensor of shape [batch_size, height, width, output_size]. Layer - preactivations. - """ - self._inputs.append(inputs) - self._outputs.append(outputs) + def register_matpower(self, exp): + # Not needed for this. Matrix powers are computed on demand in the + # diagonal case + pass - @property - def num_registered_minibatches(self): - return len(self._inputs) + def multiply_matpower(self, vector, exp): + reshaped_vect = utils.layer_params_to_mat2d(vector) + reshaped_out = self._factor.left_multiply_matpower( + reshaped_vect, exp, self._damping_func) + return utils.mat2d_to_layer_params(vector, reshaped_out) class KroneckerProductFB(FisherBlock): @@ -523,22 +604,40 @@ class KroneckerProductFB(FisherBlock): output factors. """ - def _register_damped_input_and_output_inverses(self, damping): - """Registers damped inverses for both the input and output factors. - - Sets the instance members _input_damping and _output_damping. Requires the - instance members _input_factor and _output_factor. + def __init__(self, layer_collection): + super(KroneckerProductFB, self).__init__(layer_collection) + + def _setup_damping(self, damping, normalization=None): + """Makes functions that compute the damping values for both factors.""" + def compute_damping(): + if normalization is not None: + maybe_normalized_damping = normalize_damping(damping, normalization) + else: + maybe_normalized_damping = damping + + return compute_pi_adjusted_damping(self._input_factor.get_cov(), + self._output_factor.get_cov(), + maybe_normalized_damping**0.5) + + if normalization is not None: + damping_id = ("compute_pi_adjusted_damping", + "cov", self._input_factor.name, + "cov", self._output_factor.name, + "normalize_damping", damping, normalization, "power", 0.5) + else: + damping_id = ("compute_pi_adjusted_damping", + "cov", self._input_factor.name, + "cov", self._output_factor.name, + damping, "power", 0.5) - Args: - damping: The base damping factor (float or Tensor) for the damped inverse. - """ - self._input_damping, self._output_damping = compute_pi_adjusted_damping( - self._input_factor.get_cov(), - self._output_factor.get_cov(), - damping**0.5) + self._input_damping_func = _package_func(lambda: compute_damping()[0], + damping_id + ("ref", 0)) + self._output_damping_func = _package_func(lambda: compute_damping()[1], + damping_id + ("ref", 1)) - self._input_factor.register_damped_inverse(self._input_damping) - self._output_factor.register_damped_inverse(self._output_damping) + def register_matpower(self, exp): + self._input_factor.register_matpower(exp, self._input_damping_func) + self._output_factor.register_matpower(exp, self._output_damping_func) @property def _renorm_coeff(self): @@ -552,28 +651,15 @@ class KroneckerProductFB(FisherBlock): """ return 1.0 - def multiply_inverse(self, vector): + def multiply_matpower(self, vector, exp): reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = self._output_factor.right_multiply_inverse( - reshaped_vector, - self._output_damping) - reshaped_out = self._input_factor.left_multiply_inverse( - reshaped_out, self._input_damping) - if self._renorm_coeff != 1.0: - reshaped_out /= math_ops.cast( - self._renorm_coeff, dtype=reshaped_out.dtype) - return utils.mat2d_to_layer_params(vector, reshaped_out) - - def multiply(self, vector): - reshaped_vector = utils.layer_params_to_mat2d(vector) - reshaped_out = self._output_factor.right_multiply( - reshaped_vector, - self._output_damping) - reshaped_out = self._input_factor.left_multiply( - reshaped_out, self._input_damping) + reshaped_out = self._output_factor.right_multiply_matpower( + reshaped_vector, exp, self._output_damping_func) + reshaped_out = self._input_factor.left_multiply_matpower( + reshaped_out, exp, self._input_damping_func) if self._renorm_coeff != 1.0: reshaped_out *= math_ops.cast( - self._renorm_coeff, dtype=reshaped_out.dtype) + self._renorm_coeff**exp, dtype=reshaped_out.dtype) return utils.mat2d_to_layer_params(vector, reshaped_out) def full_fisher_block(self): @@ -590,7 +676,7 @@ class KroneckerProductFB(FisherBlock): right_factor) -class EmbeddingKFACFB(KroneckerProductFB): +class EmbeddingKFACFB(InputOutputMultiMinibatch, KroneckerProductFB): """K-FAC FisherBlock for embedding layers. This FisherBlock is similar to EmbeddingKFACFB, except that its @@ -608,8 +694,6 @@ class EmbeddingKFACFB(KroneckerProductFB): Fisher information matrix to which this FisherBlock belongs. vocab_size: int. Size of vocabulary for this embedding layer. """ - self._inputs = [] - self._outputs = [] self._vocab_size = vocab_size super(EmbeddingKFACFB, self).__init__(layer_collection) @@ -624,41 +708,18 @@ class EmbeddingKFACFB(KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - # TODO(b/68033310): Validate which of, - # (1) summing on a single device (as below), or - # (2) on each device in isolation and aggregating - # is faster. - inputs = _concat_along_batch_dim(self._inputs) - grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) + inputs, grads_list = self._package_minibatches(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( # fisher_factors.EmbeddingInputKroneckerFactor, # - ((inputs,), self._vocab_size)) + (inputs, self._vocab_size)) self._output_factor = self._layer_collection.make_or_get_factor( # fisher_factors.FullyConnectedKroneckerFactor, # (grads_list,)) - self._register_damped_input_and_output_inverses(damping) - - def tensors_to_compute_grads(self): - return self._outputs + self._setup_damping(damping) - def register_additional_minibatch(self, inputs, outputs): - """Registers an additional minibatch to the FisherBlock. - Args: - inputs: Tensor of shape [batch_size, input_size]. Inputs to the - matrix-multiply. - outputs: Tensor of shape [batch_size, output_size]. Layer preactivations. - """ - self._inputs.append(inputs) - self._outputs.append(outputs) - - @property - def num_registered_minibatches(self): - return len(self._inputs) - - -class FullyConnectedKFACBasicFB(KroneckerProductFB): +class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): """K-FAC FisherBlock for fully-connected (dense) layers. This uses the Kronecker-factorized approximation from the original @@ -674,8 +735,6 @@ class FullyConnectedKFACBasicFB(KroneckerProductFB): has_bias: Whether the component Kronecker factors have an additive bias. (Default: False) """ - self._inputs = [] - self._outputs = [] self._has_bias = has_bias super(FullyConnectedKFACBasicFB, self).__init__(layer_collection) @@ -690,12 +749,7 @@ class FullyConnectedKFACBasicFB(KroneckerProductFB): damping: 0-D Tensor or float. 'damping' * identity is approximately added to this FisherBlock's Fisher approximation. """ - # TODO(b/68033310): Validate which of, - # (1) summing on a single device (as below), or - # (2) on each device in isolation and aggregating - # is faster. - inputs = _concat_along_batch_dim(self._inputs) - grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) + inputs, grads_list = self._package_minibatches(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( # fisher_factors.FullyConnectedKroneckerFactor, # @@ -703,28 +757,10 @@ class FullyConnectedKFACBasicFB(KroneckerProductFB): self._output_factor = self._layer_collection.make_or_get_factor( # fisher_factors.FullyConnectedKroneckerFactor, # (grads_list,)) - self._register_damped_input_and_output_inverses(damping) - - def tensors_to_compute_grads(self): - return self._outputs + self._setup_damping(damping) - def register_additional_minibatch(self, inputs, outputs): - """Registers an additional minibatch to the FisherBlock. - - Args: - inputs: Tensor of shape [batch_size, input_size]. Inputs to the - matrix-multiply. - outputs: Tensor of shape [batch_size, output_size]. Layer preactivations. - """ - self._inputs.append(inputs) - self._outputs.append(outputs) - - @property - def num_registered_minibatches(self): - return len(self._inputs) - -class ConvKFCBasicFB(KroneckerProductFB): +class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): """FisherBlock for 2D convolutional layers using the basic KFC approx. Estimates the Fisher Information matrix's blog for a convolutional @@ -761,8 +797,6 @@ class ConvKFCBasicFB(KroneckerProductFB): strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (1-D of Tensor length 4). """ - self._inputs = [] - self._outputs = [] self._strides = tuple(strides) if isinstance(strides, list) else strides self._padding = padding self._has_bias = isinstance(params, (tuple, list)) @@ -773,17 +807,12 @@ class ConvKFCBasicFB(KroneckerProductFB): super(ConvKFCBasicFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - # TODO(b/68033310): Validate which of, - # (1) summing on a single device (as below), or - # (2) on each device in isolation and aggregating - # is faster. - inputs = _concat_along_batch_dim(self._inputs) - grads_list = tuple(_concat_along_batch_dim(grads) for grads in grads_list) - # Infer number of locations upon which convolution is applied. - self._num_locations = num_conv_locations(inputs.shape.as_list(), + self._num_locations = num_conv_locations(self._inputs[0].shape.as_list(), self._strides) + inputs, grads_list = self._package_minibatches(grads_list) + self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, (inputs, self._filter_shape, self._strides, self._padding, @@ -791,60 +820,12 @@ class ConvKFCBasicFB(KroneckerProductFB): self._output_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) - damping = normalize_damping(damping, self._num_locations) - self._register_damped_input_and_output_inverses(damping) - self._damping = damping + self._setup_damping(damping, normalization=self._num_locations) @property def _renorm_coeff(self): return self._num_locations - def tensors_to_compute_grads(self): - return self._outputs - - def register_additional_minibatch(self, inputs, outputs): - """Registers an additional minibatch to the FisherBlock. - - Args: - inputs: Tensor of shape [batch_size, height, width, input_size]. Inputs to - the convolution. - outputs: Tensor of shape [batch_size, height, width, output_size]. Layer - preactivations. - """ - self._inputs.append(inputs) - self._outputs.append(outputs) - - @property - def num_registered_minibatches(self): - return len(self._inputs) - - -def _concat_along_batch_dim(tensor_list): - """Concatenate tensors along batch (first) dimension. - - Args: - tensor_list: list of Tensors or list of tuples of Tensors. - - Returns: - Tensor or tuple of Tensors. - - Raises: - ValueError: If 'tensor_list' is empty. - - """ - if not tensor_list: - raise ValueError( - "Cannot concatenate Tensors if there are no Tensors to concatenate.") - - if isinstance(tensor_list[0], (tuple, list)): - # [(tensor1a, tensor1b), - # (tensor2a, tensor2b), ...] --> (tensor_a, tensor_b) - return tuple( - array_ops.concat(tensors, axis=0) for tensors in zip(*tensor_list)) - else: - # [tensor1, tensor2] --> tensor - return array_ops.concat(tensor_list, axis=0) - def num_conv_locations(input_shape, strides): """Returns the number of spatial locations a 2D Conv kernel is applied to. @@ -859,49 +840,35 @@ def num_conv_locations(input_shape, strides): return input_shape[1] * input_shape[2] // (strides[1] * strides[2]) -class FullyConnectedMultiIndepFB(KroneckerProductFB): +class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): """FisherBlock for fully-connected layers that share parameters. """ - def __init__(self, layer_collection, inputs, outputs, has_bias=False): + def __init__(self, layer_collection, has_bias=False): """Creates a FullyConnectedMultiIndepFB block. Args: layer_collection: LayerCollection instance. - inputs: list or tuple of Tensors. Each Tensor has shape [batch_size, - inputs_size]. - outputs: list or tuple of Tensors. Each Tensor has shape [batch_size, - outputs_size]. has_bias: bool. If True, estimates Fisher with respect to a bias parameter as well as the layer's parameters. """ - - assert len(inputs) == len(outputs) - # We need to make sure inputs and outputs are tuples and not lists so that - # they get hashed by layer_collection.make_or_get_factor properly. - self._inputs = tuple(inputs) - self._outputs = tuple(outputs) self._has_bias = has_bias - self._num_uses = len(inputs) super(FullyConnectedMultiIndepFB, self).__init__(layer_collection) - @property - def num_registered_minibatches(self): - # TODO(b/69411207): Add support for registering additional minibatches. - return 1 - def instantiate_factors(self, grads_list, damping): + self._num_uses = len(self._inputs[0]) + inputs, grads_list = self._package_minibatches_multi(grads_list) + self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, - ((self._inputs,), self._has_bias)) + ((inputs,), self._has_bias)) self._output_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, (grads_list,)) - damping = normalize_damping(damping, self._num_uses) - self._register_damped_input_and_output_inverses(damping) + self._setup_damping(damping, normalization=self._num_uses) @property def _renorm_coeff(self): @@ -910,9 +877,6 @@ class FullyConnectedMultiIndepFB(KroneckerProductFB): def tensors_to_compute_grads(self): return self._outputs - def num_inputs(self): - return len(self._inputs) - class SeriesFBApproximation(enum.IntEnum): """See FullyConnectedSeriesFB.__init__ for description and usage.""" @@ -920,22 +884,20 @@ class SeriesFBApproximation(enum.IntEnum): option2 = 2 -class FullyConnectedSeriesFB(FisherBlock): +class FullyConnectedSeriesFB(InputOutputMultiMinibatch, FisherBlock): """FisherBlock for fully-connected layers that share parameters across time. See the following preprint for details: https://openreview.net/pdf?id=HyMTkQZAb See the end of the appendix of the paper for a pseudo-code of the - algorithm being implemented by multiply_inverse here. Note that we are + algorithm being implemented by multiply_matpower here. Note that we are using pre-computed versions of certain matrix-matrix products to speed things up. This is explicitly explained wherever it is done. """ def __init__(self, layer_collection, - inputs, - outputs, has_bias=False, option=SeriesFBApproximation.option2): """Constructs a new `FullyConnectedSeriesFB`. @@ -943,10 +905,6 @@ class FullyConnectedSeriesFB(FisherBlock): Args: layer_collection: The collection of all layers in the K-FAC approximate Fisher information matrix to which this FisherBlock belongs. - inputs: List of tensors of shape [batch_size, input_size]. - Inputs to the layer. - outputs: List of tensors of shape [batch_size, input_size]. - Outputs of the layer (before activations). has_bias: Whether the layer includes a bias parameter. option: A `SeriesFBApproximation` specifying the simplifying assumption to be used in this block. `option1` approximates the cross-covariance @@ -955,48 +913,61 @@ class FullyConnectedSeriesFB(FisherBlock): 3.5 of the paper for more details. """ - assert len(inputs) == len(outputs) - # We need to make sure inputs and outputs are tuples and not lists so that - # they get hashed by layer_collection.make_or_get_factor properly. - self._inputs = tuple(inputs) - self._outputs = tuple(outputs) self._has_bias = has_bias - self._num_timesteps = len(inputs) self._option = option super(FullyConnectedSeriesFB, self).__init__(layer_collection) - @property - def num_registered_minibatches(self): - # TODO(b/69411207): Add support for registering additional minibatches. - return 1 - def instantiate_factors(self, grads_list, damping): + self._num_timesteps = len(self._inputs[0]) + inputs, grads_list = self._package_minibatches_multi(grads_list) + self._input_factor = self._layer_collection.make_or_get_factor( - fisher_factors.FullyConnectedMultiKF, ((self._inputs,), self._has_bias)) + fisher_factors.FullyConnectedMultiKF, ((inputs,), self._has_bias)) + self._input_factor.register_cov_dt1() self._output_factor = self._layer_collection.make_or_get_factor( fisher_factors.FullyConnectedMultiKF, (grads_list,)) - - damping = normalize_damping(damping, self._num_timesteps) - self._damping_input, self._damping_output = compute_pi_adjusted_damping( - self._input_factor.get_cov(), - self._output_factor.get_cov(), - damping**0.5) + self._output_factor.register_cov_dt1() + + def compute_damping(): + normalized_damping = normalize_damping(damping, self._num_timesteps) + return compute_pi_adjusted_damping(self._input_factor.get_cov(), + self._output_factor.get_cov(), + normalized_damping**0.5) + + damping_id = ("compute_pi_adjusted_damping", + "cov", self._input_factor.name, + "cov", self._output_factor.name, + "normalize_damping", + damping, self._num_timesteps, "power", 0.5) + self._input_damping_func = _package_func(lambda: compute_damping()[0], + damping_id + ("ref", 0)) + self._output_damping_func = _package_func(lambda: compute_damping()[1], + damping_id + ("ref", 1)) + + def register_matpower(self, exp): + if exp != -1: + raise NotImplementedError("FullyConnectedSeriesFB only supports inverse" + "multiplications.") if self._option == SeriesFBApproximation.option1: - self._input_factor.register_option1quants(self._damping_input) - self._output_factor.register_option1quants(self._damping_output) + self._input_factor.register_option1quants(self._input_damping_func) + self._output_factor.register_option1quants(self._output_damping_func) elif self._option == SeriesFBApproximation.option2: - self._input_factor.register_option2quants(self._damping_input) - self._output_factor.register_option2quants(self._damping_output) + self._input_factor.register_option2quants(self._input_damping_func) + self._output_factor.register_option2quants(self._output_damping_func) else: raise ValueError( "Unrecognized FullyConnectedSeriesFB approximation: {}".format( self._option)) - def multiply_inverse(self, vector): + def multiply_matpower(self, vector, exp): + if exp != -1: + raise NotImplementedError("FullyConnectedSeriesFB only supports inverse" + "multiplications.") + # pylint: disable=invalid-name Z = utils.layer_params_to_mat2d(vector) @@ -1008,8 +979,10 @@ class FullyConnectedSeriesFB(FisherBlock): if self._option == SeriesFBApproximation.option1: # Note that L_A = A0^(-1/2) * U_A and L_G = G0^(-1/2) * U_G. - L_A, psi_A = self._input_factor.get_option1quants(self._damping_input) - L_G, psi_G = self._output_factor.get_option1quants(self._damping_output) + L_A, psi_A = self._input_factor.get_option1quants( + self._input_damping_func) + L_G, psi_G = self._output_factor.get_option1quants( + self._output_damping_func) def gamma(x): # We are assuming that each case has the same number of time-steps. @@ -1046,9 +1019,10 @@ class FullyConnectedSeriesFB(FisherBlock): # Note that P_A = A_1^T * A_0^(-1) and P_G = G_1^T * G_0^(-1), # and K_A = A_0^(-1/2) * E_A and K_G = G_0^(-1/2) * E_G. - P_A, K_A, mu_A = self._input_factor.get_option2quants(self._damping_input) + P_A, K_A, mu_A = self._input_factor.get_option2quants( + self._input_damping_func) P_G, K_G, mu_G = self._output_factor.get_option2quants( - self._damping_output) + self._output_damping_func) # Our approach differs superficially from the pseudo-code in the paper # in order to reduce the total number of matrix-matrix multiplies. @@ -1102,11 +1076,5 @@ class FullyConnectedSeriesFB(FisherBlock): # pylint: enable=invalid-name - def multiply(self, vector): - raise NotImplementedError - def tensors_to_compute_grads(self): return self._outputs - - def num_inputs(self): - return len(self._inputs) diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 603d8b8b21..8ac63bc764 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import abc -import contextlib import numpy as np import six @@ -36,6 +35,7 @@ from tensorflow.python.ops import special_math_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.training import moving_averages +from tensorflow.python.util import nest # Whether to initialize covariance estimators at a zero matrix (or the identity # matrix). @@ -53,36 +53,16 @@ EIGENVALUE_DECOMPOSITION_THRESHOLD = 2 # matrix powers. Must be nonnegative. EIGENVALUE_CLIPPING_THRESHOLD = 0.0 -# Colocate the covariance ops and variables with the input tensors for each -# factor. -COLOCATE_COV_OPS_WITH_INPUTS = True - - -@contextlib.contextmanager -def maybe_colocate_with(op): - """Context to colocate with `op` if `COLOCATE_COV_OPS_WITH_INPUTS`.""" - if COLOCATE_COV_OPS_WITH_INPUTS: - if isinstance(op, (list, tuple)): - with tf_ops.colocate_with(op[0]): - yield - else: - with tf_ops.colocate_with(op): - yield - else: - yield - def set_global_constants(init_covariances_at_zero=None, zero_debias=None, eigenvalue_decomposition_threshold=None, - eigenvalue_clipping_threshold=None, - colocate_cov_ops_with_inputs=None): + eigenvalue_clipping_threshold=None): """Sets various global constants used by the classes in this module.""" global INIT_COVARIANCES_AT_ZERO global ZERO_DEBIAS global EIGENVALUE_DECOMPOSITION_THRESHOLD global EIGENVALUE_CLIPPING_THRESHOLD - global COLOCATE_COV_OPS_WITH_INPUTS if init_covariances_at_zero is not None: INIT_COVARIANCES_AT_ZERO = init_covariances_at_zero @@ -92,8 +72,6 @@ def set_global_constants(init_covariances_at_zero=None, EIGENVALUE_DECOMPOSITION_THRESHOLD = eigenvalue_decomposition_threshold if eigenvalue_clipping_threshold is not None: EIGENVALUE_CLIPPING_THRESHOLD = eigenvalue_clipping_threshold - if colocate_cov_ops_with_inputs is not None: - COLOCATE_COV_OPS_WITH_INPUTS = colocate_cov_ops_with_inputs def inverse_initializer(shape, dtype, partition_info=None): # pylint: disable=unused-argument @@ -190,6 +168,8 @@ def scope_string_from_params(params): name_parts.append(str(param)) elif isinstance(param, (tf_ops.Tensor, variables.Variable)): name_parts.append(scope_string_from_name(param)) + elif isinstance(param, utils.PartitionedTensor): + name_parts.append(scope_string_from_name(param.tensors)) else: raise ValueError("Encountered an unsupported param type {}".format( type(param))) @@ -207,6 +187,22 @@ def scalar_or_tensor_to_string(val): return repr(val) if np.isscalar(val) else scope_string_from_name(val) +def list_to_string(lst): + return "_".join(val if isinstance(val, six.string_types) + else scalar_or_tensor_to_string(val) for val in lst) + + +def graph_func_to_id(func): + """Returns a hashable object that represents func's computation.""" + # TODO(b/74201126): replace with Topohash of func's output + return func.func_id + + +def graph_func_to_string(func): + # TODO(b/74201126): replace with Topohash of func's output + return list_to_string(func.func_id) + + @six.add_metaclass(abc.ABCMeta) class FisherFactor(object): """Base class for objects modeling factors of approximate Fisher blocks. @@ -223,13 +219,10 @@ class FisherFactor(object): Note that for blocks that aren't based on approximations, a 'factor' can be the entire block itself, as is the case for the diagonal and full representations. - - Subclasses must implement the _compute_new_cov() method, and the _var_scope - and _cov_shape properties. """ def __init__(self): - self.instantiate_covariance() + self._cov = None @abc.abstractproperty def _var_scope(self): @@ -240,6 +233,10 @@ class FisherFactor(object): """ pass + @property + def name(self): + return self._var_scope + @abc.abstractproperty def _cov_shape(self): """The shape of the variable backing this FisherFactor.""" @@ -267,8 +264,9 @@ class FisherFactor(object): """Function for initializing covariance variable.""" return covariance_initializer - def instantiate_covariance(self): - """Instantiates the covariance Variable as the instance member _cov.""" + def instantiate_cov_variables(self): + """Makes the internal cov variable(s).""" + assert self._cov is None with variable_scope.variable_scope(self._var_scope): self._cov = variable_scope.get_variable( "cov", @@ -300,20 +298,17 @@ class FisherFactor(object): """ new_cov_contribs = tuple(self._compute_new_cov(idx) for idx in range(self._num_sources)) - # This gets the job done but we might want a better solution in the future. - # In particular, we could have a separate way of specifying where the - # the cov variables finally end up, independent of where their various - # contributions are computed. Right now these are the same thing, but in - # the future we might want to perform the cov computations on each tower, - # so that each tower will be considered a "source" (allowing us to reuse - # the existing "source" code for this). - with maybe_colocate_with(new_cov_contribs[0]): - new_cov = math_ops.add_n(new_cov_contribs) - # Synchronize value across all TPU cores. - if utils.on_tpu(): - new_cov = utils.cross_replica_mean(new_cov) - return moving_averages.assign_moving_average( - self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS) + new_cov = math_ops.add_n(new_cov_contribs) + # Synchronize value across all TPU cores. + if utils.on_tpu(): + new_cov = utils.cross_replica_mean(new_cov) + return moving_averages.assign_moving_average( + self._cov, new_cov, ema_decay, zero_debias=ZERO_DEBIAS) + + @abc.abstractmethod + def instantiate_inv_variables(self): + """Makes the internal "inverse" variable(s).""" + pass @abc.abstractmethod def make_inverse_update_ops(self): @@ -341,70 +336,47 @@ class FisherFactor(object): return self._cov @abc.abstractmethod - def left_multiply(self, x, damping): - """Multiplies 'x' by the damped covariance of this factor. + def left_multiply_matpower(self, x, exp, damping_func): + """Left multiplies 'x' by matrix power of this factor (w/ damping applied). - Let C be the covariance matrix this factor represents, and - D = C + damping * I be its damped variant. This method calculates - matmul(D, vec(x)). - - Args: - x: Tensor. Represents a single vector. Shape depends on implementation. - damping: 0-D Tensor. Damping to add to C's diagonal. + This calculation is essentially: + (C + damping * I)**exp * x + where * is matrix-multiplication, ** is matrix power, I is the identity + matrix, and C is the matrix represented by this factor. - Returns: - Tensor of same shape as 'x'. - """ - pass - - @abc.abstractmethod - def right_multiply(self, x, damping): - """Multiplies 'x' by the damped covariance of this factor. - - Let C be the covariance matrix this factor represents, and - D = C + damping * I be its damped variant. This method calculates - matmul(vec(x), D). + x can represent either a matrix or a vector. For some factors, 'x' might + represent a vector but actually be stored as a 2D matrix for convenience. Args: x: Tensor. Represents a single vector. Shape depends on implementation. - damping: 0-D Tensor. Damping to add to C's diagonal. + exp: float. The matrix exponent to use. + damping_func: A function that computes a 0-D Tensor or a float which will + be the damping value used. i.e. damping = damping_func(). Returns: - Tensor of same shape as 'x'. + Tensor of same shape as 'x' representing the result of the multiplication. """ pass @abc.abstractmethod - def left_multiply_inverse(self, x, damping): - """Multiplies 'x' by damped inverse of this factor. - - Let C be the covariance matrix this factor represents and - E = inv(C + damping * I) be its damped inverse. This method calculates - matmul(E, vec(x)). - - Args: - x: Tensor. Represents a single vector. Shape depends on implementation. - damping: 0-D Tensor. Damping to add to C's diagonal. + def right_multiply_matpower(self, x, exp, damping_func): + """Right multiplies 'x' by matrix power of this factor (w/ damping applied). - Returns: - Tensor of same shape as 'x'. - """ - pass - - @abc.abstractmethod - def right_multiply_inverse(self, x, damping): - """Multiplies 'x' by damped inverse of this factor. + This calculation is essentially: + x * (C + damping * I)**exp + where * is matrix-multiplication, ** is matrix power, I is the identity + matrix, and C is the matrix represented by this factor. - Let C be the covariance matrix this factor represents and - E = inv(C + damping * I) be its damped inverse. This method calculates - matmul(vec(x), E). + Unlike left_multiply_matpower, x will always be a matrix. Args: x: Tensor. Represents a single vector. Shape depends on implementation. - damping: 0-D Tensor. Damping to add to C's diagonal. + exp: float. The matrix exponent to use. + damping_func: A function that computes a 0-D Tensor or a float which will + be the damping value used. i.e. damping = damping_func(). Returns: - Tensor of same shape as 'x'. + Tensor of same shape as 'x' representing the result of the multiplication. """ pass @@ -428,47 +400,52 @@ class InverseProvidingFactor(FisherFactor): # the latter. def __init__(self): - self._inverses_by_damping = {} - self._matpower_by_exp_and_damping = {} + self._matpower_by_exp_and_damping = {} # { (float, hashable): variable } + self._matpower_registrations = set() # { (float, hashable) } self._eigendecomp = None + self._damping_funcs_by_id = {} # {hashable: lambda} super(InverseProvidingFactor, self).__init__() - def register_damped_inverse(self, damping): - """Registers a damped inverse needed by a FisherBlock. - - This creates a variable and signals make_inverse_update_ops to make the - corresponding update op. The variable can be read via the method - get_inverse. + def _register_damping(self, damping_func): + damping_id = graph_func_to_id(damping_func) + if damping_id not in self._damping_funcs_by_id: + self._damping_funcs_by_id[damping_id] = damping_func + return damping_id - Args: - damping: The damping value (float or Tensor) for this factor. - """ - if damping not in self._inverses_by_damping: - damping_string = scalar_or_tensor_to_string(damping) - with variable_scope.variable_scope(self._var_scope): - inv = variable_scope.get_variable( - "inv_damp{}".format(damping_string), - initializer=inverse_initializer, - shape=self._cov_shape, - trainable=False, - dtype=self._dtype) - self._inverses_by_damping[damping] = inv + def register_inverse(self, damping_func): + # Just for backwards compatibility of some old code and tests + self.register_matpower(-1, damping_func) - def register_matpower(self, exp, damping): - """Registers a matrix power needed by a FisherBlock. + def register_matpower(self, exp, damping_func): + """Registers a matrix power to be maintained and served on demand. This creates a variable and signals make_inverse_update_ops to make the corresponding update op. The variable can be read via the method get_matpower. Args: - exp: The exponent (float or Tensor) to raise the matrix to. - damping: The damping value (float or Tensor). + exp: float. The exponent to use in the matrix power. + damping_func: A function that computes a 0-D Tensor or a float which will + be the damping value used. i.e. damping = damping_func(). """ - if (exp, damping) not in self._matpower_by_exp_and_damping: + if exp == 1.0: + # We don't register these. The user shouldn't even be calling this + # function with exp = 1.0. + return + + damping_id = self._register_damping(damping_func) + + if (exp, damping_id) not in self._matpower_registrations: + self._matpower_registrations.add((exp, damping_id)) + + def instantiate_inv_variables(self): + """Makes the internal "inverse" variable(s).""" + + for (exp, damping_id) in self._matpower_registrations: exp_string = scalar_or_tensor_to_string(exp) - damping_string = scalar_or_tensor_to_string(damping) + damping_func = self._damping_funcs_by_id[damping_id] + damping_string = graph_func_to_string(damping_func) with variable_scope.variable_scope(self._var_scope): matpower = variable_scope.get_variable( "matpower_exp{}_damp{}".format(exp_string, damping_string), @@ -476,34 +453,35 @@ class InverseProvidingFactor(FisherFactor): shape=self._cov_shape, trainable=False, dtype=self._dtype) - self._matpower_by_exp_and_damping[(exp, damping)] = matpower + assert (exp, damping_id) not in self._matpower_by_exp_and_damping + self._matpower_by_exp_and_damping[(exp, damping_id)] = matpower def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" ops = [] - # We do this to ensure that we don't reuse the eigendecomp from old calls - # to make_inverse_update_ops that may be placed on different devices. This - # can happen is the user has both a permanent and lazily constructed - # version of the inverse ops (and only uses one of them). - self.reset_eigendecomp() + num_inverses = sum(1 for (exp, _) in self._matpower_by_exp_and_damping + if exp == -1) + + num_other_matpower = len(self._matpower_by_exp_and_damping) - num_inverses + + other_matrix_power_registered = num_other_matpower >= 1 - num_inverses = len(self._inverses_by_damping) - matrix_power_registered = bool(self._matpower_by_exp_and_damping) use_eig = ( - self._eigendecomp or matrix_power_registered or + self._eigendecomp or other_matrix_power_registered or num_inverses >= EIGENVALUE_DECOMPOSITION_THRESHOLD) + # We precompute these so we don't need to evaluate them multiple times (for + # each matrix power that uses them) + damping_value_by_id = {damping_id: self._damping_funcs_by_id[damping_id]() + for damping_id in self._damping_funcs_by_id} + if use_eig: eigenvalues, eigenvectors = self.get_eigendecomp() # pylint: disable=unpacking-non-sequence - for damping, inv in self._inverses_by_damping.items(): - ops.append( - inv.assign( - math_ops.matmul(eigenvectors / (eigenvalues + damping), - array_ops.transpose(eigenvectors)))) - - for (exp, damping), matpower in self._matpower_by_exp_and_damping.items(): + for (exp, damping_id), matpower in ( + self._matpower_by_exp_and_damping.items()): + damping = damping_value_by_id[damping_id] ops.append( matpower.assign( math_ops.matmul(eigenvectors * @@ -512,28 +490,31 @@ class InverseProvidingFactor(FisherFactor): # These ops share computation and should be run on a single device. ops = [control_flow_ops.group(*ops)] else: - for damping, inv in self._inverses_by_damping.items(): - ops.append(inv.assign(utils.posdef_inv(self._cov, damping))) + for (exp, damping_id), matpower in ( + self._matpower_by_exp_and_damping.items()): + assert exp == -1 + damping = damping_value_by_id[damping_id] + ops.append(matpower.assign(utils.posdef_inv(self._cov, damping))) + self._eigendecomp = False return ops - def get_damped_inverse(self, damping): - # Note that this function returns a variable which gets updated by the - # inverse ops. It may be stale / inconsistent with the latest value of - # get_cov(). - return self._inverses_by_damping[damping] + def get_inverse(self, damping_func): + # Just for backwards compatibility of some old code and tests + damping_id = graph_func_to_id(damping_func) + return self._matpower_by_exp_and_damping[(-1, damping_id)] - def get_matpower(self, exp, damping): + def get_matpower(self, exp, damping_func): # Note that this function returns a variable which gets updated by the # inverse ops. It may be stale / inconsistent with the latest value of # get_cov(). - return self._matpower_by_exp_and_damping[(exp, damping)] + damping_id = graph_func_to_id(damping_func) + return self._matpower_by_exp_and_damping[(exp, damping_id)] def get_eigendecomp(self): """Creates or retrieves eigendecomposition of self._cov.""" - # Unlike get_inverse and get_matpower this doesn't retrieve a stored - # variable, but instead always computes a fresh version from the current - # value of get_cov(). + # Unlike get_matpower this doesn't retrieve a stored variable, but instead + # always computes a fresh version from the current value of get_cov(). if not self._eigendecomp: eigenvalues, eigenvectors = linalg_ops.self_adjoint_eig(self._cov) @@ -546,63 +527,42 @@ class InverseProvidingFactor(FisherFactor): return self._eigendecomp - def reset_eigendecomp(self): - self._eigendecomp = None - def get_cov(self): # Variable contains full covariance matrix. return self.get_cov_var() - def left_multiply(self, x, damping): - n = self.get_cov().shape[0] - damped_cov = self.get_cov() + damping * array_ops.eye(n) - + def left_multiply_matpower(self, x, exp, damping_func): if isinstance(x, tf_ops.IndexedSlices): - raise NotImplementedError( - "Left-multiply not yet supported for IndexedSlices.") + raise ValueError("Left-multiply not yet supported for IndexedSlices.") - if len(x.shape) != 2: + if x.shape.ndims != 2: raise ValueError( "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." % (x,)) - return math_ops.matmul(damped_cov, x) + if exp == 1: + return math_ops.matmul(self.get_cov(), x) + damping_func() * x - def right_multiply(self, x, damping): - n = self.get_cov().shape[0] - damped_cov = self.get_cov() + damping * array_ops.eye(n) + return math_ops.matmul(self.get_matpower(exp, damping_func), x) + def right_multiply_matpower(self, x, exp, damping_func): if isinstance(x, tf_ops.IndexedSlices): - return utils.matmul_sparse_dense(x, damped_cov) - - if len(x.shape) != 2: - raise ValueError( - "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." - % (x,)) + if exp == 1: + n = self.get_cov().shape[0] + damped_cov = self.get_cov() + damping_func() * array_ops.eye(n) + return utils.matmul_sparse_dense(x, damped_cov) - return math_ops.matmul(x, damped_cov) - - def left_multiply_inverse(self, x, damping): - if isinstance(x, tf_ops.IndexedSlices): - raise ValueError("Left-multiply not yet supported for IndexedSlices.") + return utils.matmul_sparse_dense(x, self.get_matpower(exp, damping_func)) if x.shape.ndims != 2: raise ValueError( "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." % (x,)) - return math_ops.matmul(self.get_damped_inverse(damping), x) - - def right_multiply_inverse(self, x, damping): - if isinstance(x, tf_ops.IndexedSlices): - return utils.matmul_sparse_dense(x, self.get_damped_inverse(damping)) - - if x.shape.ndims != 2: - raise ValueError( - "InverseProvidingFactors apply to matrix-shaped vectors. Found: %s." - % (x,)) + if exp == 1: + return math_ops.matmul(x, self.get_cov()) + damping_func() * x - return math_ops.matmul(x, self.get_damped_inverse(damping)) + return math_ops.matmul(x, self.get_matpower(exp, damping_func)) class FullFactor(InverseProvidingFactor): @@ -622,7 +582,7 @@ class FullFactor(InverseProvidingFactor): @property def _var_scope(self): - return "ff_full/" + scope_string_from_params( + return "ff_full_" + scope_string_from_params( [self._params_grads, self._batch_size]) @property @@ -641,11 +601,10 @@ class FullFactor(InverseProvidingFactor): def _compute_new_cov(self, idx=0): # This will be a very basic rank 1 estimate - with maybe_colocate_with(self._params_grads[idx]): - params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) - return ((params_grads_flat * array_ops.transpose( - params_grads_flat)) / math_ops.cast(self._batch_size, - params_grads_flat.dtype)) + params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) + return ((params_grads_flat * array_ops.transpose( + params_grads_flat)) / math_ops.cast(self._batch_size, + params_grads_flat.dtype)) class DiagonalFactor(FisherFactor): @@ -656,6 +615,7 @@ class DiagonalFactor(FisherFactor): """ def __init__(self): + self._damping_funcs_by_id = {} # { hashable: lambda } super(DiagonalFactor, self).__init__() @property @@ -665,43 +625,30 @@ class DiagonalFactor(FisherFactor): def make_inverse_update_ops(self): return [] + def instantiate_inv_variables(self): + pass + def get_cov(self): # self.get_cov() could be any shape, but it must have one entry per # parameter. Flatten it into a vector. cov_diag_vec = array_ops.reshape(self.get_cov_var(), [-1]) return array_ops.diag(cov_diag_vec) - def left_multiply(self, x, damping): - damped_cov = self.get_cov_var() + damping - if isinstance(x, tf_ops.IndexedSlices): - return utils.matmul_diag_sparse(array_ops.reshape(damped_cov, [-1]), x) - - if x.shape != damped_cov.shape: - raise ValueError("x (%s) and cov (%s) must have same shape." % - (x, damped_cov)) - - return damped_cov * x - - def right_multiply(self, x, damping): - raise NotImplementedError("Only left-multiply is currently supported.") - - def left_multiply_inverse(self, x, damping): - inverse = 1. / (self.get_cov_var() + damping) + def left_multiply_matpower(self, x, exp, damping_func): + matpower = (self.get_cov_var() + damping_func())**exp if isinstance(x, tf_ops.IndexedSlices): - return utils.matmul_diag_sparse(array_ops.reshape(inverse, [-1]), x) + return utils.matmul_diag_sparse(array_ops.reshape(matpower, [-1]), x) - if x.shape != inverse.shape: + if x.shape != matpower.shape: raise ValueError("x (%s) and cov (%s) must have same shape." % - (x, inverse)) - - return inverse * x + (x, matpower)) + return matpower * x - def right_multiply_inverse(self, x, damping): + def right_multiply_matpower(self, x, exp, damping_func): raise NotImplementedError("Only left-multiply is currently supported.") - def register_damped_inverse(self, damping): - # DiagonalFactors don't keep explicit inverses. + def register_matpower(self, exp, damping_func): pass @@ -730,7 +677,7 @@ class NaiveDiagonalFactor(DiagonalFactor): @property def _var_scope(self): - return "ff_naivediag/" + scope_string_from_params( + return "ff_naivediag_" + scope_string_from_params( [self._params_grads, self._batch_size]) @property @@ -748,10 +695,9 @@ class NaiveDiagonalFactor(DiagonalFactor): return self._params_grads[0][0].dtype def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._params_grads[idx]): - params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) - return (math_ops.square(params_grads_flat) / math_ops.cast( - self._batch_size, params_grads_flat.dtype)) + params_grads_flat = utils.tensors_to_column(self._params_grads[idx]) + return (math_ops.square(params_grads_flat) / math_ops.cast( + self._batch_size, params_grads_flat.dtype)) class EmbeddingInputKroneckerFactor(DiagonalFactor): @@ -772,8 +718,8 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): """Instantiate EmbeddingInputKroneckerFactor. Args: - input_ids: Tuple of Tensors of shape [batch_size, input_size] and dtype - int32. Indices into embedding matrix. + input_ids: Tensor of shape [batch_size, input_size] and dtype int32. + Indices into embedding matrix. vocab_size: int or 0-D Tensor. Maximum value for entries in 'input_ids'. dtype: dtype for covariance statistics. Must be a floating point type. Defaults to float32. @@ -786,7 +732,7 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): @property def _var_scope(self): - return "ff_diag_embedding/" + scope_string_from_params(self._input_ids) + return "ff_diag_embedding_" + scope_string_from_params(self._input_ids) @property def _cov_shape(self): @@ -794,42 +740,45 @@ class EmbeddingInputKroneckerFactor(DiagonalFactor): @property def _num_sources(self): - return len(self._input_ids) + return 1 @property def _dtype(self): return self._cov_dtype def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._input_ids): - input_ids = self._input_ids[idx] - if len(input_ids.shape) > 2: - raise ValueError( - "Input to embeddings must have rank <= 2. Found rank %d." % len( - input_ids.shape)) - - batch_size = array_ops.shape(input_ids)[0] - - # Transform indices into one-hot vectors. - # - # TODO(b/72714822): There must be a faster way to construct the diagonal - # covariance matrix! This operation is O(batch_size * vocab_size), where - # it should be O(batch_size * input_size). - flat_input_ids = array_ops.reshape(input_ids, [-1]) - one_hots = array_ops.one_hot(flat_input_ids, - self._vocab_size) # [?, vocab_size] - - # Take average across examples. Note that, because all entries have - # magnitude zero or one, there's no need to square the entries. - # - # TODO(b/72714822): Support for SparseTensor, other kinds of aggregation - # within an example such as average. - # - # TODO(b/72714822): Support for partitioned embeddings. - new_cov = math_ops.reduce_sum(one_hots, axis=0) # [vocab_size] - new_cov /= math_ops.cast(batch_size, new_cov.dtype) - - return new_cov + if idx != 0: + raise ValueError("EmbeddingInputKroneckerFactor only supports idx = 0") + + input_ids = self._input_ids + + if len(input_ids.shape) > 2: + raise ValueError( + "Input to embeddings must have rank <= 2. Found rank %d." % len( + input_ids.shape)) + + batch_size = array_ops.shape(input_ids)[0] + + # Transform indices into one-hot vectors. + # + # TODO(b/72714822): There must be a faster way to construct the diagonal + # covariance matrix! This operation is O(batch_size * vocab_size), where + # it should be O(batch_size * input_size). + flat_input_ids = array_ops.reshape(input_ids, [-1]) + one_hots = array_ops.one_hot(flat_input_ids, + self._vocab_size) # [?, vocab_size] + + # Take average across examples. Note that, because all entries have + # magnitude zero or one, there's no need to square the entries. + # + # TODO(b/72714822): Support for SparseTensor, other kinds of aggregation + # within an example such as average. + # + # TODO(b/72714822): Support for partitioned embeddings. + new_cov = math_ops.reduce_sum(one_hots, axis=0) # [vocab_size] + new_cov /= math_ops.cast(batch_size, new_cov.dtype) + + return new_cov class FullyConnectedDiagonalFactor(DiagonalFactor): @@ -850,23 +799,23 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): """Instantiate FullyConnectedDiagonalFactor. Args: - inputs: Tensor of shape [batch_size, input_size]. Inputs to fully - connected layer. - outputs_grads: List of Tensors of shape [batch_size, output_size]. - Gradient of loss with respect to layer's preactivations. + inputs: Tensor of shape [batch_size, input_size]. Inputs to this layer. + outputs_grads: List of Tensors, each of shape [batch_size, output_size], + which are the gradients of the loss with respect to the layer's + outputs. One Tensor for each "source". + has_bias: bool. If True, append '1' to each input. """ self._inputs = inputs self._has_bias = has_bias self._outputs_grads = outputs_grads - self._batch_size = array_ops.shape(inputs)[0] self._squared_inputs = None super(FullyConnectedDiagonalFactor, self).__init__() @property def _var_scope(self): - return "ff_diagfc/" + scope_string_from_params( + return "ff_diagfc_" + scope_string_from_params( (self._inputs,) + tuple(self._outputs_grads)) @property @@ -883,25 +832,30 @@ class FullyConnectedDiagonalFactor(DiagonalFactor): def _dtype(self): return self._outputs_grads[0].dtype + def make_covariance_update_op(self, ema_decay): + inputs = self._inputs + + if self._has_bias: + inputs = append_homog(inputs) + self._squared_inputs = math_ops.square(inputs) + + return super(FullyConnectedDiagonalFactor, self).make_covariance_update_op( + ema_decay) + def _compute_new_cov(self, idx=0): + batch_size = array_ops.shape(self._squared_inputs)[0] + outputs_grad = self._outputs_grads[idx] + # The well-known special formula that uses the fact that the entry-wise # square of an outer product is the outer-product of the entry-wise squares. # The gradient is the outer product of the input and the output gradients, # so we just square both and then take their outer-product. - with maybe_colocate_with(self._outputs_grads[idx]): - # We only need to compute squared_inputs once - if self._squared_inputs is None: - inputs = self._inputs - if self._has_bias: - inputs = append_homog(self._inputs) - self._squared_inputs = math_ops.square(inputs) - - new_cov = math_ops.matmul( - self._squared_inputs, - math_ops.square(self._outputs_grads[idx]), - transpose_a=True) - new_cov /= math_ops.cast(self._batch_size, new_cov.dtype) - return new_cov + new_cov = math_ops.matmul( + self._squared_inputs, + math_ops.square(outputs_grad), + transpose_a=True) + new_cov /= math_ops.cast(batch_size, new_cov.dtype) + return new_cov class ConvDiagonalFactor(DiagonalFactor): @@ -919,9 +873,9 @@ class ConvDiagonalFactor(DiagonalFactor): Args: inputs: Tensor of shape [batch_size, height, width, in_channels]. Input activations to this layer. - outputs_grads: Tensor of shape [batch_size, height, width, out_channels]. - Per-example gradients to the loss with respect to the layer's output - preactivations. + outputs_grads: List of Tensors, each of shape [batch_size, + height, width, out_channels], which are the gradients of the loss + with respect to the layer's outputs. One Tensor for each "source". filter_shape: Tuple of 4 ints: (kernel_height, kernel_width, in_channels, out_channels). Represents shape of kernel used in this layer. strides: The stride size in this layer (1-D Tensor of length 4). @@ -941,7 +895,7 @@ class ConvDiagonalFactor(DiagonalFactor): @property def _var_scope(self): - return "ff_convdiag/" + scope_string_from_name( + return "ff_convdiag_" + scope_string_from_params( (self._inputs,) + tuple(self._outputs_grads)) @property @@ -961,38 +915,32 @@ class ConvDiagonalFactor(DiagonalFactor): return self._outputs_grads[0].dtype def make_covariance_update_op(self, ema_decay): - with maybe_colocate_with(self._inputs): - filter_height, filter_width, _, _ = self._filter_shape + filter_height, filter_width, _, _ = self._filter_shape - # TODO(b/64144716): there is potential here for a big savings in terms - # of memory use. - patches = array_ops.extract_image_patches( - self._inputs, - ksizes=[1, filter_height, filter_width, 1], - strides=self._strides, - rates=[1, 1, 1, 1], - padding=self._padding) + # TODO(b/64144716): there is potential here for a big savings in terms + # of memory use. + patches = array_ops.extract_image_patches( + self._inputs, + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], + padding=self._padding) - if self._has_bias: - patches = append_homog(patches) + if self._has_bias: + patches = append_homog(patches) - self._patches = patches - - op = super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay) - - self._patches = None + self._patches = patches - return op + return super(ConvDiagonalFactor, self).make_covariance_update_op(ema_decay) def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._outputs_grads[idx]): - outputs_grad = self._outputs_grads[idx] - batch_size = array_ops.shape(self._patches)[0] + batch_size = array_ops.shape(self._patches)[0] + outputs_grad = self._outputs_grads[idx] - new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad) - new_cov /= math_ops.cast(batch_size, new_cov.dtype) + new_cov = self._convdiag_sum_of_squares(self._patches, outputs_grad) + new_cov /= math_ops.cast(batch_size, new_cov.dtype) - return new_cov + return new_cov def _convdiag_sum_of_squares(self, patches, outputs_grad): # This computes the sum of the squares of the per-training-case "gradients". @@ -1013,8 +961,9 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): """Instantiate FullyConnectedKroneckerFactor. Args: - tensors: List of Tensors of shape [batch_size, n]. Represents either a - layer's inputs or its output's gradients. + tensors: List of Tensors, each of shape [batch_size, n], one for each + source. The Tensors are typically either a layer's inputs or its + output's gradients. has_bias: bool. If True, append '1' to each row. """ # The tensor argument is either a tensor of input activations or a tensor of @@ -1025,8 +974,8 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): @property def _var_scope(self): - return "ff_fckron/" + scope_string_from_params( - [self._tensors, self._has_bias]) + return "ff_fckron_" + scope_string_from_params( + tuple(self._tensors) + (self._has_bias,)) @property def _cov_shape(self): @@ -1042,11 +991,10 @@ class FullyConnectedKroneckerFactor(InverseProvidingFactor): return self._tensors[0].dtype def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._tensors[idx]): - tensor = self._tensors[idx] - if self._has_bias: - tensor = append_homog(tensor) - return compute_cov(tensor) + tensor = self._tensors[idx] + if self._has_bias: + tensor = append_homog(tensor) + return compute_cov(tensor) class ConvInputKroneckerFactor(InverseProvidingFactor): @@ -1068,8 +1016,8 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): """Initializes ConvInputKroneckerFactor. Args: - inputs: Tensor of shape [batch_size, height, width, in_channels]. Inputs - to layer. + inputs: A Tensor of shape [batch_size, height, width, in_channels] + which is the inputs to the layer (before being processed into patches). filter_shape: 1-D Tensor of length 4. Contains [kernel_height, kernel_width, in_channels, out_channels]. strides: 1-D Tensor of length 4. Contains [batch_stride, height_stride, @@ -1086,7 +1034,7 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): @property def _var_scope(self): - return "ff_convinkron/" + scope_string_from_params([ + return "ff_convinkron_" + scope_string_from_params([ self._inputs, self._filter_shape, self._strides, self._padding, self._has_bias ]) @@ -1109,37 +1057,36 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") - with maybe_colocate_with(self._inputs): - filter_height, filter_width, in_channels, _ = self._filter_shape - - # TODO(b/64144716): there is potential here for a big savings in terms of - # memory use. - patches = array_ops.extract_image_patches( - self._inputs, - ksizes=[1, filter_height, filter_width, 1], - strides=self._strides, - rates=[1, 1, 1, 1], - padding=self._padding) - - flatten_size = (filter_height * filter_width * in_channels) - # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde - # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), - # where M = minibatch size, |T| = number of spatial locations, - # |Delta| = number of spatial offsets, and J = number of input maps - # for convolutional layer l. - patches_flat = array_ops.reshape(patches, [-1, flatten_size]) - # We append a homogenous coordinate to patches_flat if the layer has - # bias parameters. This gives us [[A_l]]_H from the paper. - if self._has_bias: - patches_flat = append_homog(patches_flat) - # We call compute_cov without passing in a normalizer. compute_cov uses - # the first dimension of patches_flat i.e. M|T| as the normalizer by - # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with - # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from - # the paper but has a different scale here for consistency with - # ConvOutputKroneckerFactor. - # (Tilde omitted over A for clarity.) - return compute_cov(patches_flat) + filter_height, filter_width, in_channels, _ = self._filter_shape + + # TODO(b/64144716): there is potential here for a big savings in terms of + # memory use. + patches = array_ops.extract_image_patches( + self._inputs, + ksizes=[1, filter_height, filter_width, 1], + strides=self._strides, + rates=[1, 1, 1, 1], + padding=self._padding) + + flatten_size = (filter_height * filter_width * in_channels) + # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde + # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), + # where M = minibatch size, |T| = number of spatial locations, + # |Delta| = number of spatial offsets, and J = number of input maps + # for convolutional layer l. + patches_flat = array_ops.reshape(patches, [-1, flatten_size]) + # We append a homogenous coordinate to patches_flat if the layer has + # bias parameters. This gives us [[A_l]]_H from the paper. + if self._has_bias: + patches_flat = append_homog(patches_flat) + # We call compute_cov without passing in a normalizer. compute_cov uses + # the first dimension of patches_flat i.e. M|T| as the normalizer by + # default. Hence we end up computing 1/M|T| * [[A_l]]^T [[A_l]], with + # shape J|Delta| x J|Delta|. This is related to hat{Omega}_l from + # the paper but has a different scale here for consistency with + # ConvOutputKroneckerFactor. + # (Tilde omitted over A for clarity.) + return compute_cov(patches_flat) class ConvOutputKroneckerFactor(InverseProvidingFactor): @@ -1157,8 +1104,8 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): """Initializes ConvOutputKroneckerFactor. Args: - outputs_grads: list of Tensors. Each Tensor is of shape - [batch_size, height, width, out_channels]. + outputs_grads: List of Tensors, each of shape [batch_size, + height, width, out_channels]. One Tensor for each "source". """ self._out_channels = outputs_grads[0].shape.as_list()[3] self._outputs_grads = outputs_grads @@ -1166,7 +1113,7 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): @property def _var_scope(self): - return "ff_convoutkron/" + scope_string_from_params(self._outputs_grads) + return "ff_convoutkron_" + scope_string_from_params(self._outputs_grads) @property def _cov_shape(self): @@ -1182,22 +1129,22 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): return self._outputs_grads[0].dtype def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._outputs_grads[idx]): - # reshaped_tensor below is the matrix DS_l defined in the KFC paper - # (tilde omitted over S for clarity). It has shape M|T| x I, where - # M = minibatch size, |T| = number of spatial locations, and - # I = number of output maps for convolutional layer l. - reshaped_tensor = array_ops.reshape(self._outputs_grads[idx], - [-1, self._out_channels]) - # Following the reasoning in ConvInputKroneckerFactor._compute_new_cov, - # compute_cov here returns 1/M|T| * DS_l^T DS_l = hat{Gamma}_l - # as defined in the paper, with shape I x I. - # (Tilde omitted over S for clarity.) - return compute_cov(reshaped_tensor) + outputs_grad = self._outputs_grads[idx] + + # reshaped_tensor below is the matrix DS_l defined in the KFC paper + # (tilde omitted over S for clarity). It has shape M|T| x I, where + # M = minibatch size, |T| = number of spatial locations, and + # I = number of output maps for convolutional layer l. + reshaped_tensor = array_ops.reshape(outputs_grad, [-1, self._out_channels]) + # Following the reasoning in ConvInputKroneckerFactor._compute_new_cov, + # compute_cov here returns 1/M|T| * DS_l^T DS_l = hat{Gamma}_l + # as defined in the paper, with shape I x I. + # (Tilde omitted over S for clarity.) + return compute_cov(reshaped_tensor) class FullyConnectedMultiKF(InverseProvidingFactor): - """Kronecker factor for a fully connected recurrent layer.""" + """Kronecker factor for a fully connected layer used multiple times.""" def __init__(self, tensor_lists, @@ -1205,25 +1152,32 @@ class FullyConnectedMultiKF(InverseProvidingFactor): """Constructs a new `FullyConnectedMultiKF`. Args: - tensor_lists: List of lists of Tensors of shape [batch_size, n]. + tensor_lists: 2D array (list of lists) of Tensors of shape + [batch_size, n]. Each of these tensors is usually a layer's inputs or + its output's gradients. The first dimension of the array is the source, + and the second is the use in the graph (which is sometimes a + "time-step"). has_bias: bool. If True, '1' is appended to each row. """ self._tensor_lists = tensor_lists self._has_bias = has_bias - self._batch_size = array_ops.shape(tensor_lists[0][0])[0] self._num_timesteps = len(tensor_lists[0]) self._tensors = [None] * len(tensor_lists) self._cov_dt1 = None + self._make_cov_dt1 = False self._option1quants_by_damping = {} self._option2quants_by_damping = {} + self._option1quants_registrations = set() + self._option2quants_registrations = set() super(FullyConnectedMultiKF, self).__init__() @property def _var_scope(self): - return "ff_fc_multi/" + scope_string_from_params(self._tensor_lists) + return "ff_fc_multi_" + scope_string_from_params( + tuple(nest.flatten(self._tensor_lists)) + (self._has_bias,)) @property def _num_sources(self): @@ -1240,43 +1194,40 @@ class FullyConnectedMultiKF(InverseProvidingFactor): if self._cov_dt1 is not None: new_cov_dt1_contribs = tuple(self._compute_new_cov_dt1(idx) for idx in range(self._num_sources)) + new_cov_dt1 = math_ops.add_n(new_cov_dt1_contribs) + op2 = moving_averages.assign_moving_average( + self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) - with maybe_colocate_with(new_cov_dt1_contribs[0]): - new_cov_dt1 = math_ops.add_n(new_cov_dt1_contribs) - - op2 = moving_averages.assign_moving_average( - self._cov_dt1, new_cov_dt1, ema_decay, zero_debias=ZERO_DEBIAS) - - # TODO(b/69112164): - # It's important that _cov and _cov_dt1 remain consistent with each - # other while the inverse ops are happening. How can we ensure this? - # We will need to add explicit synchronization for this to - # work with asynchronous training. - op = control_flow_ops.group(op, op2) + # TODO(b/69112164): + # It's important that _cov and _cov_dt1 remain consistent with each + # other while the inverse ops are happening. How can we ensure this? + # We will need to add explicit synchronization for this to + # work with asynchronous training. + op = control_flow_ops.group(op, op2) return op def _compute_new_cov(self, idx=0): - with maybe_colocate_with(self._tensor_lists[idx]): - tensor = array_ops.concat(self._tensor_lists[idx], 0) - if self._has_bias: - tensor = append_homog(tensor) - # We save these so they can be used by _compute_new_cov_dt1 - self._tensors[idx] = tensor - return compute_cov(tensor) - - def _compute_new_cov_dt1(self, idx=0): + # Concatenate across time/replications + tensor = array_ops.concat(self._tensor_lists[idx], 0) + if self._has_bias: + tensor = append_homog(tensor) + # We save these so they can be used by _compute_new_cov_dt1 + self._tensors[idx] = tensor + return compute_cov(tensor) + + def _compute_new_cov_dt1(self, idx=0): # pylint: disable=missing-docstring tensor = self._tensors[idx] - with maybe_colocate_with(tensor): - # Is there a more elegant way to do this computation? - tensor_present = tensor[:-self._batch_size, :] - tensor_future = tensor[self._batch_size:, :] - # We specify a normalizer for this computation to ensure a PSD Fisher - # block estimate. This is equivalent to padding with zeros, as was done - # in Section B.2 of the appendix. - normalizer = self._num_timesteps * self._batch_size - return compute_cov( - tensor_future, tensor_right=tensor_present, normalizer=normalizer) + batch_size = array_ops.shape(self._tensor_lists[idx][0])[0] + # Is there a more elegant way to do this computation? + tensor_present = tensor[:-batch_size, :] + tensor_future = tensor[batch_size:, :] + # We specify a normalizer for this computation to ensure a PSD Fisher + # block estimate. This is equivalent to padding with zeros, as was done + # in Section B.2 of the appendix. + normalizer = self._num_timesteps * batch_size + return compute_cov( + tensor_future, tensor_right=tensor_present, normalizer=normalizer) @property def _cov_shape(self): @@ -1288,23 +1239,25 @@ class FullyConnectedMultiKF(InverseProvidingFactor): size = self._tensor_lists[0][0].shape[1] + self._has_bias return [size] - def get_option1quants(self, damping): - return self._option1quants_by_damping[damping] + def get_option1quants(self, damping_func): + damping_id = graph_func_to_id(damping_func) + return self._option1quants_by_damping[damping_id] - def get_option2quants(self, damping): - return self._option2quants_by_damping[damping] + def get_option2quants(self, damping_func): + damping_id = graph_func_to_id(damping_func) + return self._option2quants_by_damping[damping_id] def get_cov_dt1(self): assert self._cov_dt1 is not None return self._cov_dt1 def register_cov_dt1(self): - """Create a variable representing temporal cross-covariance. + self._make_cov_dt1 = True - (This is technically the second moment, not covariance, since it's - not mean subtracted.) - """ - if self._cov_dt1 is None: + def instantiate_cov_variables(self): + super(FullyConnectedMultiKF, self).instantiate_cov_variables() + assert self._cov_dt1 is None + if self._make_cov_dt1: with variable_scope.variable_scope(self._var_scope): self._cov_dt1 = variable_scope.get_variable( "cov_dt1", @@ -1313,15 +1266,25 @@ class FullyConnectedMultiKF(InverseProvidingFactor): trainable=False, dtype=self._dtype) - def register_option1quants(self, damping): + def register_option1quants(self, damping_func): + damping_id = self._register_damping(damping_func) + if damping_id not in self._option1quants_registrations: + self._option1quants_registrations.add(damping_id) - self.register_cov_dt1() + def register_option2quants(self, damping_func): + damping_id = self._register_damping(damping_func) + if damping_id not in self._option2quants_registrations: + self._option2quants_registrations.add(damping_id) - if damping not in self._option1quants_by_damping: + def instantiate_inv_variables(self): + super(FullyConnectedMultiKF, self).instantiate_inv_variables() + + for damping_id in self._option1quants_registrations: + damping_func = self._damping_funcs_by_id[damping_id] + damping_string = graph_func_to_string(damping_func) # It's questionable as to whether we should initialize with stuff like # this at all. Ideally these values should never be used until they are # updated at least once. - damping_string = scalar_or_tensor_to_string(damping) with variable_scope.variable_scope(self._var_scope): Lmat = variable_scope.get_variable( # pylint: disable=invalid-name "Lmat_damp{}".format(damping_string), @@ -1336,17 +1299,15 @@ class FullyConnectedMultiKF(InverseProvidingFactor): trainable=False, dtype=self._dtype) - self._option1quants_by_damping[damping] = (Lmat, psi) - - def register_option2quants(self, damping): + assert damping_id not in self._option1quants_by_damping + self._option1quants_by_damping[damping_id] = (Lmat, psi) - self.register_cov_dt1() - - if damping not in self._option2quants_by_damping: + for damping_id in self._option2quants_registrations: + damping_func = self._damping_funcs_by_id[damping_id] + damping_string = graph_func_to_string(damping_func) # It's questionable as to whether we should initialize with stuff like # this at all. Ideally these values should never be used until they are # updated at least once. - damping_string = scalar_or_tensor_to_string(damping) with variable_scope.variable_scope(self._var_scope): Pmat = variable_scope.get_variable( # pylint: disable=invalid-name "Lmat_damp{}".format(damping_string), @@ -1367,14 +1328,15 @@ class FullyConnectedMultiKF(InverseProvidingFactor): trainable=False, dtype=self._dtype) - self._option2quants_by_damping[damping] = (Pmat, Kmat, mu) + assert damping_id not in self._option2quants_by_damping + self._option2quants_by_damping[damping_id] = (Pmat, Kmat, mu) def make_inverse_update_ops(self): """Create and return update ops corresponding to registered computations.""" # TODO(b/69918258): Add correctness tests for this method. # pylint: disable=invalid-name - ops = super(FullyConnectedMultiKF, self).make_inverse_update_ops() + ops = [] if (len(self._option1quants_by_damping) + len(self._option2quants_by_damping)): @@ -1395,8 +1357,10 @@ class FullyConnectedMultiKF(InverseProvidingFactor): # consistently, or are somehow read between or during the cov updates. # Can this possibly happen? Is there a way to prevent it? - for damping, (Lmat_var, - psi_var) in self._option1quants_by_damping.items(): + for damping_id, (Lmat_var, + psi_var) in self._option1quants_by_damping.items(): + + damping = self._damping_funcs_by_id[damping_id]() invsqrtC0 = math_ops.matmul( eigen_V * (eigen_e + damping)**(-0.5), eigen_V, transpose_b=True) @@ -1421,8 +1385,10 @@ class FullyConnectedMultiKF(InverseProvidingFactor): ops.append(Lmat_var.assign(Lmat)) ops.append(psi_var.assign(psi)) - for damping, (Pmat_var, Kmat_var, - mu_var) in self._option2quants_by_damping.items(): + for damping_id, (Pmat_var, Kmat_var, + mu_var) in self._option2quants_by_damping.items(): + + damping = self._damping_funcs_by_id[damping_id]() # compute C0^(-1/2) invsqrtC0 = math_ops.matmul( @@ -1463,6 +1429,8 @@ class FullyConnectedMultiKF(InverseProvidingFactor): ops.append(Kmat_var.assign(Kmat)) ops.append(mu_var.assign(mu)) + ops += super(FullyConnectedMultiKF, self).make_inverse_update_ops() return [control_flow_ops.group(*ops)] # pylint: enable=invalid-name + diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index ce9005b9ce..60894ed951 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -130,6 +130,8 @@ class LayerCollection(object): fisher_factors: an OrderedDict mapping tuples to FisherFactor instances. losses: a list of LossFunction objects. The loss to be optimized is their sum. + loss_colocation_ops: ops to colocate loss function evaluations with. These + will typically be the inputs to the losses. """ def __init__(self, @@ -148,14 +150,21 @@ class LayerCollection(object): self._default_convolution_2d_approximation = APPROX_KRONECKER_NAME self._default_fully_connected_multi_approximation = ( APPROX_KRONECKER_SERIES_2_NAME) + self.loss_colocation_ops = {} + self._vars_to_uses = defaultdict(lambda: 0) with variable_scope.variable_scope(None, default_name=name) as scope: self._var_scope = scope.name @property def losses(self): - """LossFunctions registered with this LayerCollection.""" - return list(self._loss_dict.values()) + """Tuple of LossFunction objects registered with this LayerCollection.""" + return nest.flatten(self.towers_by_loss) + + @property + def towers_by_loss(self): + """Tuple across losses of LossFunction objects registered to each tower.""" + return tuple(tuple(lst) for lst in self._loss_dict.values()) @property def registered_variables(self): @@ -290,23 +299,74 @@ class LayerCollection(object): self.fisher_blocks[layer_key] = fisher_block return fisher_block - def get_use_count_map(self): - """Returns a dict of variables to their number of registrations.""" - # TODO(b/70283403): Reimplement this in the old way, where each - # registration function would be responsible for incrementing the count. - # Also, this version has a bug: it won't do the right thing for generic - # registration for parameters that are shared. i.e. it won't set the use - # count to infinity. - vars_to_uses = defaultdict(int) - for key, block in six.iteritems(self.fisher_blocks): - n = ( - block.num_inputs()*block.num_registered_minibatches if isinstance( - block, (fb.FullyConnectedSeriesFB, fb.FullyConnectedMultiIndepFB)) - else block.num_registered_minibatches) - key = utils.ensure_sequence(key) - for k in key: - vars_to_uses[k] += n - return vars_to_uses + def register_loss_function(self, + loss, + colocation_op, + base_name, + name=None, + reuse=VARIABLE_SCOPE): + """Registers a LossFunction object. + + Args: + loss: The LossFunction object. + colocation_op: The op to colocate the loss function's computations with. + base_name: The name to derive a new unique name from is the name argument + is None. + name: (OPTIONAL) str or None. Unique name for this loss function. If None, + a new name is generated. (Default: None) + reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. + If False, create a new FisherBlock. If VARIABLE_SCOPE, use + tf.get_variable_scope().reuse. + + Raises: + ValueError: If reuse == True and name == None. + ValueError: If reuse == True and seed != None. + KeyError: If reuse == True and no existing LossFunction with 'name' found. + KeyError: If reuse == False and existing LossFunction with 'name' found. + """ + + name = name or self._graph.unique_name(base_name) + + if reuse == VARIABLE_SCOPE: + reuse = variable_scope.get_variable_scope().reuse + + if reuse: + if name is None: + raise ValueError( + "If reuse is enabled, loss function's name must be set.") + + loss_list = self._loss_dict.get(name, None) + + if loss_list is None: + raise KeyError( + "Unable to find loss function named {}. Register a new loss " + "function with reuse=False.".format(name)) + else: + if name in self._loss_dict: + raise KeyError( + "Loss function named {} already exists. Set reuse=True to append " + "another minibatch/tower.".format(name)) + + loss_list = [] + self._loss_dict[name] = loss_list + + loss_list.append(loss) + self.loss_colocation_ops[loss] = colocation_op + + def _get_use_count_map(self): + """Returns a dict mapping variables to their number of registrations.""" + return self._vars_to_uses + + def _add_uses(self, params, uses): + """Register additional uses by params in the graph. + + Args: + params: Variable or tuple of Variables. Parameters for a layer. + uses: int or float. Number of additional uses for these parameters. + """ + params = params if isinstance(params, (tuple, list)) else (params,) + for var in params: + self._vars_to_uses[var] += uses def check_registration(self, variables): """Checks that all variable uses have been registered properly. @@ -324,7 +384,7 @@ class LayerCollection(object): # Note that overlapping parameters (i.e. those that share variables) will # be caught by layer_collection.LayerParametersDict during registration. - reg_use_map = self.get_use_count_map() + reg_use_map = self._get_use_count_map() error_messages = [] @@ -414,12 +474,27 @@ class LayerCollection(object): inputs_to_losses = nest.flatten(tuple(loss.inputs for loss in self.losses)) self._subgraph = utils.SubGraph(inputs_to_losses) + def eval_losses(self): + """Return evaluated losses (colocated with inputs to losses).""" + evals = [] + for loss in self.losses: + with ops.colocate_with(self.loss_colocation_ops[loss]): + evals.append(loss.evaluate()) + return evals + + def eval_losses_on_samples(self): + """Return losses evaluated on samples (colocated with inputs to losses).""" + evals = [] + for loss in self.losses: + with ops.colocate_with(self.loss_colocation_ops[loss]): + evals.append(loss.evaluate_on_sample()) + return evals + def total_loss(self): - return math_ops.add_n(tuple(loss.evaluate() for loss in self.losses)) + return math_ops.add_n(self.eval_losses()) def total_sampled_loss(self): - return math_ops.add_n( - tuple(loss.evaluate_on_sample() for loss in self.losses)) + return math_ops.add_n(self.eval_losses_on_samples()) def _get_linked_approx(self, params): """If params were linked, return their specified approximation.""" @@ -469,6 +544,8 @@ class LayerCollection(object): params, fb.EmbeddingKFACFB(self, vocab_size), reuse=reuse) block.register_additional_minibatch(inputs, outputs) + self._add_uses(params, 1) + def register_fully_connected(self, params, inputs, @@ -505,9 +582,12 @@ class LayerCollection(object): block_type = _FULLY_CONNECTED_APPROX_TO_BLOCK_TYPES[approx] has_bias = isinstance(params, (tuple, list)) - block = self.register_block(params, block_type(self, has_bias), reuse=reuse) + block = self.register_block(params, block_type(self, has_bias=has_bias), + reuse=reuse) block.register_additional_minibatch(inputs, outputs) + self._add_uses(params, 1) + def register_conv2d(self, params, strides, @@ -553,6 +633,8 @@ class LayerCollection(object): params, block_type(self, params, strides, padding), reuse=reuse) block.register_additional_minibatch(inputs, outputs) + self._add_uses(params, 1) + def register_generic(self, params, batch_size, @@ -586,8 +668,10 @@ class LayerCollection(object): block = self.register_block(params, block_type(self, params), reuse=reuse) block.register_additional_minibatch(batch_size) + self._add_uses(params, float("inf")) + def register_fully_connected_multi(self, params, inputs, outputs, - approx=None): + approx=None, reuse=VARIABLE_SCOPE): """Register fully connected layers with shared parameters. This can handle general fully-connected layers with shared parameters, but @@ -604,6 +688,9 @@ class LayerCollection(object): [batch_size, output_size]. Outputs produced by layer. In the case of RNNs, one Tensor per time step. approx: str. One of "kron_indep", "kron_series_1", or "kron_series_2". + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. Raises: ValueError: For improper value to 'approx'. @@ -621,11 +708,14 @@ class LayerCollection(object): raise ValueError("Bad value {} for approx.".format(approx)) block_type = _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES[approx] - # For now we don't support multiple minibatches for this type of layer, so - # we set reuse=False - self.register_block(params, - block_type(self, inputs, outputs, has_bias=has_bias), - reuse=False) + block = self.register_block(params, block_type(self, has_bias=has_bias), + reuse=reuse) + block.register_additional_minibatch(inputs, outputs) + self._add_uses(params, len(inputs)) + + # TODO(b/74108452): change the loss registration functions names to refer + # to "loss functions" instead of distributions. Following naming convention + # of the loss function classes themselves. def register_categorical_predictive_distribution(self, logits, @@ -648,50 +738,20 @@ class LayerCollection(object): reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If VARIABLE_SCOPE, use tf.get_variable_scope().reuse. - - Raises: - ValueError: If reuse == True and name == None. - ValueError: If reuse == True and seed != None. - KeyError: If reuse == True and no existing LossFunction with 'name' found. - KeyError: If reuse == False and existing LossFunction with 'name' found. """ - name = name or self._graph.unique_name( - "register_categorical_predictive_distribution") - - if reuse == VARIABLE_SCOPE: - reuse = variable_scope.get_variable_scope().reuse - - if reuse: - if name is None: - raise ValueError( - "If reuse is enabled, loss function's name must be set.") - if seed is not None: - raise ValueError( - "Seed can only be specified at LossFunction instantiation.") - - loss = self._loss_dict.get(name, None) - - if loss is None: - raise KeyError( - "Unable to find loss function named {}. Create a new LossFunction " - "with reuse=False.".format(name)) - - loss.register_additional_minibatch(logits, targets=targets) - else: - if name in self._loss_dict: - raise KeyError( - "Loss function named {} already exists. Set reuse=True to append " - "another minibatch.".format(name)) - loss = lf.CategoricalLogitsNegativeLogProbLoss( - logits, targets=targets, seed=seed) - self._loss_dict[name] = loss + loss = lf.CategoricalLogitsNegativeLogProbLoss(logits, targets=targets, + seed=seed) + self.register_loss_function(loss, logits, + "categorical_predictive_distribution", + name=name, reuse=reuse) def register_normal_predictive_distribution(self, mean, var=0.5, seed=None, targets=None, - name=None): + name=None, + reuse=VARIABLE_SCOPE): """Registers a normal predictive distribution. Args: @@ -708,21 +768,22 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) + reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. + If False, create a new FisherBlock. If VARIABLE_SCOPE, use + tf.get_variable_scope().reuse. """ - name = name or self._graph.unique_name( - "register_normal_predictive_distribution") - if name in self._loss_dict: - raise NotImplementedError( - "Adding logits to an existing LossFunction not yet supported.") - loss = lf.NormalMeanNegativeLogProbLoss( - mean, var, targets=targets, seed=seed) - self._loss_dict[name] = loss + loss = lf.NormalMeanNegativeLogProbLoss(mean, var, targets=targets, + seed=seed) + self.register_loss_function(loss, mean, + "normal_predictive_distribution", + name=name, reuse=reuse) def register_multi_bernoulli_predictive_distribution(self, logits, seed=None, targets=None, - name=None): + name=None, + reuse=VARIABLE_SCOPE): """Registers a multi-Bernoulli predictive distribution. Args: @@ -735,15 +796,15 @@ class LayerCollection(object): (Default: None) name: (OPTIONAL) str or None. Unique name for this loss function. If None, a new name is generated. (Default: None) + reuse: (OPTIONAL) bool or str. If True, reuse an existing FisherBlock. + If False, create a new FisherBlock. If VARIABLE_SCOPE, use + tf.get_variable_scope().reuse. """ - name = name or self._graph.unique_name( - "register_multi_bernoulli_predictive_distribution") - if name in self._loss_dict: - raise NotImplementedError( - "Adding logits to an existing LossFunction not yet supported.") - loss = lf.MultiBernoulliNegativeLogProbLoss( - logits, targets=targets, seed=seed) - self._loss_dict[name] = loss + loss = lf.MultiBernoulliNegativeLogProbLoss(logits, targets=targets, + seed=seed) + self.register_loss_function(loss, logits, + "multi_bernoulli_predictive_distribution", + name=name, reuse=reuse) def make_or_get_factor(self, cls, args): """Insert 'cls(args)' into 'self.fisher_factors' if not already present. diff --git a/tensorflow/contrib/kfac/python/ops/loss_functions.py b/tensorflow/contrib/kfac/python/ops/loss_functions.py index cb3e698b9c..e7d4243fc3 100644 --- a/tensorflow/contrib/kfac/python/ops/loss_functions.py +++ b/tensorflow/contrib/kfac/python/ops/loss_functions.py @@ -57,30 +57,6 @@ class LossFunction(object): """The inputs to the loss function (excluding the targets).""" pass - @property - def input_minibatches(self): - """A `list` of inputs to the loss function, separated by minibatch. - - Typically there will be one minibatch per tower in a multi-tower setup. - Returns a list consisting of `self.inputs` by default; `LossFunction`s - supporting registering multiple minibatches should override this method. - - Returns: - A `list` of `Tensor`s representing - """ - return [self.inputs] - - @property - def num_registered_minibatches(self): - """Number of minibatches registered for this LossFunction. - - Typically equal to the number of towers in a multi-tower setup. - - Returns: - An `int` representing the number of registered minibatches. - """ - return len(self.input_minibatches) - def evaluate(self): """Evaluate the loss function on the targets.""" if self.targets is not None: @@ -474,7 +450,6 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): assert len(variance.shape) == 2, "Expect 2D variance tensor." self._mean = mean self._variance = variance - self._scale = math_ops.sqrt(variance) self._targets = targets super(NormalMeanVarianceNegativeLogProbLoss, self).__init__(seed=seed) @@ -484,7 +459,7 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): @property def dist(self): - return normal.Normal(loc=self._mean, scale=self._scale) + return normal.Normal(loc=self._mean, scale=math_ops.sqrt(self._variance)) @property def params(self): @@ -502,7 +477,7 @@ class NormalMeanVarianceNegativeLogProbLoss(DistributionNegativeLogProbLoss): @property def _fisher_mean_factor(self): - return 1. / self._scale + return 1. / math_ops.sqrt(self._variance) @property def _fisher_var(self): @@ -611,36 +586,13 @@ class CategoricalLogitsNegativeLogProbLoss(DistributionNegativeLogProbLoss, index in [0, output_size). seed: int or None. Default random seed when sampling. """ - self._logits_components = [] - self._targets_components = [] - self.register_additional_minibatch(logits, targets=targets) + self._logits = logits + self._targets = targets super(CategoricalLogitsNegativeLogProbLoss, self).__init__(seed=seed) - def register_additional_minibatch(self, logits, targets=None): - """Register an additiona minibatch's worth of parameters. - - Args: - logits: Tensor of shape [batch_size, output_size]. Parameters for - underlying distribution. - targets: None or Tensor of shape [batch_size, output_size]. Each row must - be a one-hot vector. - """ - self._logits_components.append(logits) - self._targets_components.append(targets) - - @property - def _logits(self): - return array_ops.concat(self._logits_components, axis=0) - - @property - def input_minibatches(self): - return self._logits_components - @property def targets(self): - if all(target is None for target in self._targets_components): - return None - return array_ops.concat(self._targets_components, axis=0) + return self._targets @property def dist(self): diff --git a/tensorflow/contrib/kfac/python/ops/optimizer.py b/tensorflow/contrib/kfac/python/ops/optimizer.py index 5d456bcb79..dee55cfa39 100644 --- a/tensorflow/contrib/kfac/python/ops/optimizer.py +++ b/tensorflow/contrib/kfac/python/ops/optimizer.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import warnings + # pylint disable=long-line from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products as cmvp from tensorflow.contrib.kfac.python.ops import estimator as est @@ -50,6 +52,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): name="KFAC", estimation_mode="gradients", colocate_gradients_with_ops=True, + batch_size=None, cov_devices=None, inv_devices=None): """Initializes the KFAC optimizer with the given settings. @@ -91,12 +94,16 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): colocate_gradients_with_ops: Whether we should request gradients we compute in the estimator be colocated with their respective ops. (Default: True) + batch_size: The size of the mini-batch. Only needed when momentum_type + == 'qmodel' or when automatic adjustment is used. (Default: None) cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. + Can be None, which means that no devices are specified. Only used + with (soon-to-be-depcrecated "convenience" properties). inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion computations will be placed on these devices in a round-robin fashion. - Can be None, which means that no devices are specified. + Can be None, which means that no devices are specified. Only used + with (soon-to-be-depcrecated "convenience" properties). Raises: ValueError: If the momentum type is unsupported. @@ -110,6 +117,15 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): if variables is None: variables = tf_variables.trainable_variables() + # Parameters to be passed to the Fisher estimator: + self._variables = variables + self._cov_ema_decay = cov_ema_decay + self._layers = layer_collection + self._estimation_mode = estimation_mode + self._colocate_gradients_with_ops = colocate_gradients_with_ops + self._cov_devices = cov_devices + self._inv_devices = inv_devices + # The below paramaters are required only if damping needs to be adapated. # These parameters can be set by calling # set_damping_adaptation_params() explicitly. @@ -130,17 +146,6 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): self._q_model_change = None self._update_damping_op = None - self._layers = layer_collection - self._fisher_est = est.FisherEstimator( - lambda: self.damping, - variables, - cov_ema_decay, - layer_collection, - estimation_mode=estimation_mode, - colocate_gradients_with_ops=colocate_gradients_with_ops, - cov_devices=cov_devices, - inv_devices=inv_devices) - momentum_type = momentum_type.lower() legal_momentum_types = ["regular", "adam", "qmodel"] @@ -154,14 +159,21 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): raise ValueError("Momentum must be unspecified if using a momentum_type " "other than 'regular' or 'adam'.") + # Extra parameters of the optimizer self._momentum = momentum self._momentum_type = momentum_type self._norm_constraint = norm_constraint - - # this is a bit of a hack - # TODO(duckworthd): Handle this in a better way (e.g. pass it in?) - self._batch_size = array_ops.shape(layer_collection.losses[0].inputs)[0] - self._losses = layer_collection.losses + self._batch_size = batch_size + + with variable_scope.variable_scope(name): + self._fisher_est = est.FisherEstimator( + self._variables, + self._cov_ema_decay, + self.damping, + self._layers, + exps=(-1,), + estimation_mode=self._estimation_mode, + colocate_gradients_with_ops=self._colocate_gradients_with_ops) super(KfacOptimizer, self).__init__(learning_rate, name=name) @@ -178,6 +190,10 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): style rule described in Section 6.5 of "Optimizing Neural Networks with Kronecker-factored Approximate Curvature". + Note that this function creates Tensorflow variables which store a few + scalars and are accessed by the ops which update the damping (as part + of the training op returned by the minimize() method). + Args: is_chief: `Boolean`, `True` if the worker is chief. prev_train_batch: Training data used to minimize loss in the previous @@ -199,6 +215,7 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): """ if self._adapt_damping: raise ValueError("Damping adaptation parameters already set.") + with variable_scope.variable_scope(self.get_name()): self._adapt_damping = True self._is_chief = is_chief @@ -221,31 +238,37 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): @property def cov_update_thunks(self): - return self._fisher_est.cov_update_thunks + self._maybe_make_and_save_everything() + return self._cov_update_thunks @property def cov_update_ops(self): - return self._fisher_est.cov_update_ops + self._maybe_make_and_save_everything() + return self._cov_update_ops @property def cov_update_op(self): - return self._fisher_est.cov_update_op + self._maybe_make_and_save_everything() + return self._cov_update_op @property def inv_update_thunks(self): - return self._fisher_est.inv_update_thunks + self._maybe_make_and_save_everything() + return self._inv_update_thunks @property def inv_update_ops(self): - return self._fisher_est.inv_update_ops + self._maybe_make_and_save_everything() + return self._inv_update_ops @property def inv_update_op(self): - return self._fisher_est.inv_update_op + self._maybe_make_and_save_everything() + return self._inv_update_op @property def variables(self): - return self._fisher_est.variables + return self._variables @property def damping(self): @@ -258,25 +281,162 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): def damping_adaptation_interval(self): return self._damping_adaptation_interval + def _maybe_make_and_save_everything(self): + if not self._fisher_est.made_vars(): + warnings.warn("These convenience properties will be depcrecated soon. " + "Please use explicit op/thunk creation methods instead " + "(e.g. make_ops_and_vars_round_robin, etc).", + DeprecationWarning) + (self._cov_update_ops, self._cov_update_op, self._inv_update_ops, + self._inv_update_op, self._cov_update_thunks, + self._inv_update_thunks) = self.make_ops_and_vars_round_robin( + cov_devices=self._cov_devices, + inv_devices=self._inv_devices) + + def make_ops_and_vars(self): + """Make ops and vars with no specific device placement. + + See make_ops_and_vars_round_robin for details. + + Returns: + cov_update_ops: List of ops that compute the cov updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_ops: List of ops that compute the inv updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_op: inv_update_ops grouped into a single op. + """ + with variable_scope.variable_scope(self.get_name()): + return self._fisher_est.make_ops_and_vars() + + def make_ops_and_vars_round_robin(self, cov_devices=None, inv_devices=None): + """Make ops and vars with a round-robin device placement strategy. + + For each factor, all of that factor's cov variables and their associated + update ops will be placed on a particular device. A new device is chosen + for each factor by cycling through list of devices in the cov_devices + argument. If cov_devices is None then no explicit device placement occurs. + + An analogous strategy is followed for inverse update ops, with the list of + devices being given by the inv_devices argument. + + Inverse variables on the other hand are not placed on any specific device + (they will just use the current the device placement context, whatever + that happens to be). The idea is that the inverse variable belong where + they will be accessed most often, which is the device that actually applies + the preconditioner to the gradient. The user will be responsible for setting + the device context for this. + + Args: + cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + + Returns: + cov_update_ops: List of ops that compute the cov updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_ops: List of ops that compute the inv updates. Corresponds + one-to-one with the list of factors given by the "factors" property. + cov_update_op: cov_update_ops grouped into a single op. + inv_update_op: inv_update_ops grouped into a single op. + cov_update_thunks: Thunks that make the ops in cov_update_ops. + inv_update_thunks: Thunks that make the ops in inv_update_ops. + """ + with variable_scope.variable_scope(self.get_name()): + return self._fisher_est.make_ops_and_vars_round_robin( + cov_devices=cov_devices, inv_devices=inv_devices) + + def make_vars_and_create_op_thunks_round_robin(self, + cov_devices=None, + inv_devices=None): + """Make vars and create op thunks w/ a round-robin device placement strat. + + For each factor, all of that factor's cov variables and their associated + update ops will be placed on a particular device. A new device is chosen + for each factor by cycling through list of devices in the cov_devices + argument. If cov_devices is None then no explicit device placement occurs. + + An analogous strategy is followed for inverse update ops, with the list of + devices being given by the inv_devices argument. + + Inverse variables on the other hand are not placed on any specific device + (they will just use the current the device placement context, whatever + that happens to be). The idea is that the inverse variable belong where + they will be accessed most often, which is the device that actually applies + the preconditioner to the gradient. The user will be responsible for setting + the device context for this. + + Args: + cov_devices: Iterable of device strings (e.g. '/gpu:0'). Covariance + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + inv_devices: Iterable of device strings (e.g. '/gpu:0'). Inversion + computations will be placed on these devices in a round-robin fashion. + Can be None, which means that no devices are specified. + Returns: + cov_update_thunks: List of cov update thunks. Corresponds one-to-one with + the list of factors given by the "factors" property. + inv_update_thunks: List of inv update thunks. Corresponds one-to-one with + the list of factors given by the "factors" property. + """ + scope = self.get_name() + "/" + self._fisher_est.name + return self._fisher_est.make_vars_and_create_op_thunks_round_robin( + scope=scope, cov_devices=cov_devices, inv_devices=inv_devices) + + def ops_and_vars_thunks(self): + """Create thunks that make the ops and vars on demand. + + This function returns 4 lists of thunks: cov_variable_thunks, + cov_update_thunks, inv_variable_thunks, and inv_update_thunks. + + The length of each list is the number of factors and the i-th element of + each list corresponds to the i-th factor (given by the "factors" property). + + Note that the execution of these thunks must happen in a certain + partial order. The i-th element of cov_variable_thunks must execute + before the i-th element of cov_update_thunks (and also the i-th element + of inv_update_thunks). Similarly, the i-th element of inv_variable_thunks + must execute before the i-th element of inv_update_thunks. + + TL;DR (oversimplified): Execute the thunks according to the order that + they are returned. + + Returns: + cov_variable_thunks: A list of thunks that make the cov variables. + cov_update_thunks: A list of thunks that make the cov update ops. + inv_variable_thunks: A list of thunks that make the inv variables. + inv_update_thunks: A list of thunks that make the inv update ops. + """ + scope = self.get_name() + "/" + self._fisher_est.name + return self._fisher_est.ops_and_vars_thunks(scope=scope) + def minimize(self, *args, **kwargs): - kwargs["var_list"] = kwargs.get("var_list") or self.variables - if set(kwargs["var_list"]) != set(self.variables): - raise ValueError("var_list doesn't match with set of Fisher-estimating " - "variables.") - if self._adapt_damping and self._is_chief: - global_step = kwargs.get("global_step", None) - if not global_step: - raise KeyError("global_step needs to be passed to optimizer.minimize " - "if damping parameter is adapted.") - update_damping_op = self._update_damping(self._prev_train_batch, - global_step) - with ops.control_dependencies([update_damping_op]): - loss = args[0] - loss_assign_op = state_ops.assign(self._prev_loss, loss) - train_op = super(KfacOptimizer, self).minimize(*args, **kwargs) - return control_flow_ops.group(loss_assign_op, train_op) - else: - return super(KfacOptimizer, self).minimize(*args, **kwargs) + # Should this variable scope encompass everything below? Or will the super- + # class make another copy of the same name scope? + with variable_scope.variable_scope(self.get_name()): + kwargs["var_list"] = kwargs.get("var_list") or self.variables + if set(kwargs["var_list"]) != set(self.variables): + raise ValueError("var_list doesn't match with set of Fisher-estimating " + "variables.") + if self._adapt_damping and self._is_chief: + global_step = kwargs.get("global_step", None) + if not global_step: + raise KeyError("global_step needs to be passed to optimizer.minimize " + "if damping parameter is adapted.") + update_damping_op = self._update_damping(self._prev_train_batch, + global_step) + with ops.control_dependencies([update_damping_op]): + loss = args[0] + loss_assign_op = state_ops.assign(self._prev_loss, loss) + train_op = super(KfacOptimizer, self).minimize(*args, **kwargs) + return control_flow_ops.group(loss_assign_op, train_op) + else: + return super(KfacOptimizer, self).minimize(*args, **kwargs) def compute_gradients(self, *args, **kwargs): # args[1] could be our var_list @@ -301,6 +461,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): Returns: An `Operation` that applies the specified gradients. """ + self._maybe_make_and_save_everything() + # In Python 3, grads_and_vars can be a zip() object which can only be # iterated over once. By converting it to a list, we ensure that it can be # iterated over more than once. @@ -450,7 +612,8 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer): = qmodel(alpha*precon_grad + mu*prev_update) - L(theta). """ - cmvpc = cmvp.CurvatureMatrixVectorProductComputer(self._losses, variables) + cmvpc = cmvp.CurvatureMatrixVectorProductComputer(self._layers.losses, + variables) # compute the matrix-vector products with the transposed Fisher factor fft_precon_grads = cmvpc.multiply_fisher_factor_transpose(precon_grads) diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index 88e6fb20e8..5ce5338a9f 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -24,6 +24,7 @@ from tensorflow.contrib.tpu.python.ops import tpu_ops from tensorflow.contrib.tpu.python.tpu import tpu_function from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl @@ -482,5 +483,76 @@ def matmul_diag_sparse(A_diag, B, name=None): # pylint: disable=invalid-name a = array_ops.reshape(a, list(a.shape) + [1] * (B.values.shape.ndims - 1)) return ops.IndexedSlices(a * B.values, B.indices, dense_shape=B.dense_shape) + +class PartitionedTensor(object): + """A Tensor partitioned across its 0-th dimension.""" + + def __init__(self, tensors): + """Initializes PartitionedTensor. + + Args: + tensors: List of Tensors. All Tensors must agree on shape (excepting + batch dimension) and dtype. + + Raises: + ValueError: If 'tensors' has length zero. + ValueError: if contents of 'tensors' don't agree on shape or dtype. + """ + if not tensors: + raise ValueError("tensors must be a list of 1+ Tensors.") + + dtype = tensors[0].dtype + if not all(tensor.dtype == dtype for tensor in tensors): + raise ValueError("all tensors must have dtype = %s." % dtype) + + shape = tensors[0].shape[1:] + if not all(tensor.shape[1:] == shape for tensor in tensors): + raise ValueError("All tensors must have shape = %s (excluding batch " + "dimension)." % shape) + + self.tensors = tensors + self._concats = {} # {device: Tensor} + + @property + def shape(self): + feature_shape = self.tensors[0].shape[1:] + batch_size = sum([tensor.shape[0] for tensor in self.tensors], + tensor_shape.Dimension(0)) + return tensor_shape.TensorShape([batch_size]).concatenate(feature_shape) + + def get_shape(self): + return self.shape + + @property + def dtype(self): + return self.tensors[0].dtype + + def devices(self): + return set(tensor.device for tensor in self.tensors) + + def __str__(self): + return "PartitionedTensor([%s, ...], dtype=%s, shape=%s)" % ( + self.tensors[0].name, self.dtype.name, tuple(self.shape.as_list())) + + def __hash__(self): + return hash(tuple(self.tensors)) + + def as_tensor(self, dtype=None, name=None, as_ref=False): + with ops.name_scope(name, "PartitionedTensor.as_tensor", self.tensors): + assert not as_ref + assert dtype in [None, self.dtype] + result = array_ops.concat(self.tensors, axis=0) + + # Cache 'result' if we haven't already cached a value for this device. + if result.device not in self._concats: + self._concats[result.device] = result + return self._concats[result.device] + + +ops.register_tensor_conversion_function( + PartitionedTensor, + lambda val, dtype, name, as_ref: val.as_tensor(dtype, name, as_ref)) + + # TODO(b/69623235): Add a function for finding tensors that share gradients # to eliminate redundant fisher factor computations. -- GitLab From 51fd9d70b8ef3c11b89e5009357cfbe3abb72473 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 05:12:20 -0800 Subject: [PATCH 596/884] Extract the iterated expression of a for loop into a variable to avoid repeated staging. PiperOrigin-RevId: 188316160 --- .../py2tf/converters/builtin_functions.py | 2 +- .../contrib/py2tf/converters/for_loops.py | 30 +++++++++++-------- .../py2tf/converters/for_loops_test.py | 23 ++++++++++++++ tensorflow/contrib/py2tf/utils/__init__.py | 2 ++ 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/tensorflow/contrib/py2tf/converters/builtin_functions.py b/tensorflow/contrib/py2tf/converters/builtin_functions.py index b5aa9756da..f1129ef153 100644 --- a/tensorflow/contrib/py2tf/converters/builtin_functions.py +++ b/tensorflow/contrib/py2tf/converters/builtin_functions.py @@ -51,7 +51,7 @@ class BuiltinFunctionTransformer(transformer.Base): def visit_Call(self, node): self.generic_visit(node) # TODO(mdan): This won't work if the function was hidden. - if isinstance(node.func, gast.Name) and node.func.id in ('len',): + if isinstance(node.func, gast.Name) and node.func.id in ('len', 'range'): return self._convert_builtin(node) # Print needs to be handled separately because it can be read as statement. if isinstance(node.func, gast.Name) and node.func.id == 'print': diff --git a/tensorflow/contrib/py2tf/converters/for_loops.py b/tensorflow/contrib/py2tf/converters/for_loops.py index 935dade0ed..4297c1cf2a 100644 --- a/tensorflow/contrib/py2tf/converters/for_loops.py +++ b/tensorflow/contrib/py2tf/converters/for_loops.py @@ -37,14 +37,18 @@ class ForLoopCanonicalizationTransformer(transformer.Base): def visit_For(self, node): self.generic_visit(node) body_scope = anno.getanno(node, NodeAnno.BODY_SCOPE) - + i_var = self.context.namer.new_symbol('i', body_scope.referenced) + n_var = self.context.namer.new_symbol('n', body_scope.referenced) + iterated_var = self.context.namer.new_symbol('iterated', + body_scope.referenced) + # TODO(mdan): Use TensorListFromTensor(loop_iter) here. if anno.hasanno(node, 'extra_cond'): template = """ i = 0 - n = len(loop_iter) + iterated = loop_iter + n = len(iterated) while i < n and extra_cond: - # TODO(mdan): Use TensorListFromTensor(loop_iter) here. - target = loop_iter[i] + target = iterated[i] body i += 1 """ @@ -53,17 +57,18 @@ class ForLoopCanonicalizationTransformer(transformer.Base): loop_iter=node.iter, target=node.target, body=node.body, - i=self.context.namer.new_symbol('i', body_scope.referenced), - n=self.context.namer.new_symbol('n', body_scope.referenced), + i=i_var, + n=n_var, + iterated=iterated_var, extra_cond=anno.getanno(node, 'extra_cond')) else: template = """ i = 0 - n = len(loop_iter) + iterated = loop_iter + n = len(iterated) while i < n: - # TODO(mdan): Use TensorListFromTensor(loop_iter) here. - target = loop_iter[i] - body # pylint:disable=pointless-statement + target = iterated[i] + body i += 1 """ repl = templates.replace( @@ -71,8 +76,9 @@ class ForLoopCanonicalizationTransformer(transformer.Base): loop_iter=node.iter, target=node.target, body=node.body, - i=self.context.namer.new_symbol('i', body_scope.referenced), - n=self.context.namer.new_symbol('n', body_scope.referenced)) + i=i_var, + n=n_var, + iterated=iterated_var) return repl def visit_Continue(self, node): diff --git a/tensorflow/contrib/py2tf/converters/for_loops_test.py b/tensorflow/contrib/py2tf/converters/for_loops_test.py index 70a367d3b5..b6e3e8c8d8 100644 --- a/tensorflow/contrib/py2tf/converters/for_loops_test.py +++ b/tensorflow/contrib/py2tf/converters/for_loops_test.py @@ -42,6 +42,29 @@ class ControlFlowTest(converter_test_base.TestCase): l = [] self.assertEqual(test_fn(l), result.test_fn(l)) + def test_for_with_iterated_expression(self): + + eval_count = [0] + + def count_evals(x): + eval_count[0] += 1 + return x + + def test_fn(n): + s = 0 + for e in count_evals(range(n)): + s += e + return s + + node = self.parse_and_analyze(test_fn, {'count_evals': count_evals}) + node = for_loops.transform(node, self.ctx) + + with self.compiled(node) as result: + result.count_evals = count_evals + self.assertEqual(test_fn(5), result.test_fn(5)) + # count_evals ran twice, once for test_fn and another for result.test_fn + self.assertEqual(eval_count[0], 2) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/py2tf/utils/__init__.py b/tensorflow/contrib/py2tf/utils/__init__.py index 997c815887..4fc0121efb 100644 --- a/tensorflow/contrib/py2tf/utils/__init__.py +++ b/tensorflow/contrib/py2tf/utils/__init__.py @@ -20,11 +20,13 @@ from __future__ import print_function from tensorflow.contrib.py2tf.utils.builtins import dynamic_builtin from tensorflow.contrib.py2tf.utils.builtins import dynamic_print +from tensorflow.contrib.py2tf.utils.builtins import dynamic_range from tensorflow.contrib.py2tf.utils.context_managers import control_dependency_on_returns from tensorflow.contrib.py2tf.utils.misc import alias_tensors from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_cond from tensorflow.contrib.py2tf.utils.multiple_dispatch import run_while from tensorflow.contrib.py2tf.utils.py_func import wrap_py_func +from tensorflow.contrib.py2tf.utils.tensor_list import dynamic_list_append from tensorflow.contrib.py2tf.utils.testing import fake_tf from tensorflow.contrib.py2tf.utils.type_check import is_tensor from tensorflow.contrib.py2tf.utils.type_hints import set_element_type -- GitLab From 6d44c84bb26cdc3a477688a631ef6613d70a32cf Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 06:37:55 -0800 Subject: [PATCH 597/884] update docker containers used for remote CPU/GPU builds and the corresponding script. PiperOrigin-RevId: 188324090 --- tensorflow/tools/ci_build/Dockerfile.rbe.cpu | 14 +++++ ...docker_build.sh => ci_rbe_docker_build.sh} | 60 +++---------------- .../install/install_pip_packages_remote.sh | 29 +++++++++ .../tools/ci_build/remote/Dockerfile.cpu | 27 --------- .../tools/ci_build/remote/Dockerfile.gpu | 27 --------- third_party/toolchains/gpus/cuda/BUILD | 2 +- 6 files changed, 53 insertions(+), 106 deletions(-) create mode 100644 tensorflow/tools/ci_build/Dockerfile.rbe.cpu rename tensorflow/tools/ci_build/{remote/remote_docker_build.sh => ci_rbe_docker_build.sh} (58%) create mode 100755 tensorflow/tools/ci_build/install/install_pip_packages_remote.sh delete mode 100644 tensorflow/tools/ci_build/remote/Dockerfile.cpu delete mode 100644 tensorflow/tools/ci_build/remote/Dockerfile.gpu diff --git a/tensorflow/tools/ci_build/Dockerfile.rbe.cpu b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu new file mode 100644 index 0000000000..6f0798b1af --- /dev/null +++ b/tensorflow/tools/ci_build/Dockerfile.rbe.cpu @@ -0,0 +1,14 @@ +FROM launcher.gcr.io/google/rbe-debian8:r322167 +LABEL maintainer="Yu Yi " + +# Copy install scripts +COPY install/*.sh /install/ + +# Setup envvars +ENV CC /usr/local/bin/clang +ENV CXX /usr/local/bin/clang++ +ENV AR /usr/bin/ar + +# Run pip install script for RBE Debian8 container. +RUN /install/install_pip_packages_remote.sh +RUN /install/install_pip_packages.sh diff --git a/tensorflow/tools/ci_build/remote/remote_docker_build.sh b/tensorflow/tools/ci_build/ci_rbe_docker_build.sh similarity index 58% rename from tensorflow/tools/ci_build/remote/remote_docker_build.sh rename to tensorflow/tools/ci_build/ci_rbe_docker_build.sh index e00a66aaba..cd811de6bd 100755 --- a/tensorflow/tools/ci_build/remote/remote_docker_build.sh +++ b/tensorflow/tools/ci_build/ci_rbe_docker_build.sh @@ -16,25 +16,19 @@ # Build TensorFlow Docker images for remote build # # Usage: -# remote_docker_build.sh -c # docker image for cpu build -# remote_docker_build.sh -g # docker image for gpu build - +# ci_rbe_docker_build.sh -c # docker image for cpu build +# ci_rbe_docker_build.sh -g # docker image for gpu build function main { - publish=true cpu_build=false gpu_build=false - publish=true + publish=false script_dir=$(dirname "$(readlink -f "$0")") cd $script_dir - trap cleanup_on_finish EXIT - set_script_flags $@ - build_base_image - build_tf_image if [ "$publish" = true ] ; then @@ -50,17 +44,14 @@ function set_script_flags { c) cpu_build=true ;; - f) - base_image_build_script=$OPTARG - ;; g) gpu_build=true ;; h) print_usage ;; - n) - publish=false + p) + publish=true ;; *) print_usage "ERROR: unknown option" @@ -76,7 +67,6 @@ function print_usage { echo "Usage: $(basename $0) -c | -g [options]" echo " -c build image for CPU build (base image debian8-clang)" echo " -g build image for GPU build (base image nvidia-clang)" - echo " -f the script which build the {debian8,nvidia}-clang base image" echo "[option] is one of" echo " -n not publish the locally-built image to GCR;" echo " the build process will publish image to GCR by default" @@ -87,54 +77,22 @@ function print_usage { exit 1 } - -# Build nvidia-cuba-clang base image for GPU image. -# For CPU the `clang-debian8` from Cloud Launcher will be used directly: -# https://console.cloud.google.com/launcher/details/google/clang-debian8?filter=category:developer-tools&q=clang -function build_base_image { - if [ "$gpu_build" = true ] ; then - base_image="nvidia-cuda" - # Run a 2-stage build for clang base image, see - # https://github.com/llvm-mirror/llvm/blob/master/docs/Docker.rst - $base_image_build_script \ - --source $base_image \ - --branch branches/google/stable \ - --docker-repository ${base_image}-clang --docker-tag "latest" \ - -p clang -i stage2-install-clang -i stage2-install-clang-headers \ - -- \ - -DLLVM_TARGETS_TO_BUILD=Native -DCMAKE_BUILD_TYPE=Release \ - -DBOOTSTRAP_CMAKE_BUILD_TYPE=Release \ - -DCLANG_ENABLE_BOOTSTRAP=ON \ - -DCLANG_BOOTSTRAP_TARGETS="install-clang;install-clang-headers" - fi -} - - function build_tf_image { if [ "$cpu_build" = true ] ; then - dockerfile="Dockerfile.cpu" - tf_image="tensorflow-remote" + dockerfile="Dockerfile.rbe.cpu" + tf_image="tensorflow-rbe-cpu" else - dockerfile="Dockerfile.gpu" - tf_image="tensorflow-remote-gpu" + dockerfile="Dockerfile.rbe.gpu" + tf_image="tensorflow-rbe-gpu" fi docker build -f $dockerfile -t $tf_image . } - function publish_tf_image { gcr_tf_image="gcr.io/tensorflow/${tf_image}" docker tag $tf_image $gcr_tf_image gcloud docker -- push $gcr_tf_image } - -function cleanup_on_finish { - cd $script_dir - rm -rf $llvm_docker_src - docker rmi -f ${base_image}-clang ${base_image}-clang-build -} - - main $@ diff --git a/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh b/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh new file mode 100755 index 0000000000..39a6d557d1 --- /dev/null +++ b/tensorflow/tools/ci_build/install/install_pip_packages_remote.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# Copyright 2015 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +set -e + +if [ ! -f /usr/bin/x86_64-linux-gnu-gcc ]; then + ln -s /usr/local/bin/clang /usr/bin/x86_64-linux-gnu-gcc +fi + +pip2 install -U pip +pip3 install -U pip +pip2 install -U setuptools +pip3 install -U setuptools + +# The rest of the pip packages will be installed in +# `install_pip_packages.sh` diff --git a/tensorflow/tools/ci_build/remote/Dockerfile.cpu b/tensorflow/tools/ci_build/remote/Dockerfile.cpu deleted file mode 100644 index 7b01d8320d..0000000000 --- a/tensorflow/tools/ci_build/remote/Dockerfile.cpu +++ /dev/null @@ -1,27 +0,0 @@ -FROM launcher.gcr.io/google/clang-debian8:latest - -RUN apt-get update && apt-get --no-install-recommends install -y \ - binutils \ - binutils-gold \ - curl \ - libstdc++-4.9-dev \ - python \ - python-dev \ - python-numpy \ - python-pip \ - unzip \ - zip && \ - rm -rf /var/lib/apt/lists/* - -RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \ - python get-pip.py && \ - rm get-pip.py - -# Set up grpc -RUN pip install --upgrade enum34 futures mock numpy six backports.weakref portpicker && \ - pip install --pre 'protobuf>=3.0.0a3' && \ - pip install 'grpcio>=1.1.3' - -# TODO: Set up golang which is compatible with clang - -WORKDIR /botexec diff --git a/tensorflow/tools/ci_build/remote/Dockerfile.gpu b/tensorflow/tools/ci_build/remote/Dockerfile.gpu deleted file mode 100644 index 47ffd44163..0000000000 --- a/tensorflow/tools/ci_build/remote/Dockerfile.gpu +++ /dev/null @@ -1,27 +0,0 @@ -FROM nvidia-cuda-clang:latest - -RUN apt-get update && apt-get --no-install-recommends install -y \ - binutils \ - binutils-gold \ - curl \ - libstdc++-4.9-dev \ - python \ - python-dev \ - python-numpy \ - python-pip \ - unzip \ - zip && \ - rm -rf /var/lib/apt/lists/* - -RUN curl -fSsL -O https://bootstrap.pypa.io/get-pip.py && \ - python get-pip.py && \ - rm get-pip.py - -# Set up grpc -RUN pip install --upgrade \ - enum34 futures astor gast mock numpy six \ - backports.weakref termcolor && \ - pip install --pre 'protobuf>=3.0.0a3' && \ - pip install 'grpcio>=1.1.3' - -WORKDIR /botexec diff --git a/third_party/toolchains/gpus/cuda/BUILD b/third_party/toolchains/gpus/cuda/BUILD index cfc6930851..4cb8380938 100644 --- a/third_party/toolchains/gpus/cuda/BUILD +++ b/third_party/toolchains/gpus/cuda/BUILD @@ -1272,7 +1272,7 @@ genrule( "cuda/lib/libcupti.so.9.0", ], cmd = """ -if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0.176" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0.282" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0.176" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0.176" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0.176" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7.0.5" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0.176" "$(@D)/cuda/lib/libcupti.so.9.0" +if [ -d "$(@D)/extras" ]; then rm $(@D)/extras -drf; fi && if [ -d "$(@D)/include" ]; then rm $(@D)/include -drf; fi && if [ -d "$(@D)/lib" ]; then rm $(@D)/lib -drf; fi && if [ -d "$(@D)/nvvm" ]; then rm $(@D)/nvvm -drf; fi && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs/libcuda.so" "$(@D)/cuda/lib/libcuda.so" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart.so.9.0" "$(@D)/cuda/lib/libcudart.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudart_static.a" "$(@D)/cuda/lib/libcudart_static.a" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcublas.so.9.0" "$(@D)/cuda/lib/libcublas.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcusolver.so.9.0" "$(@D)/cuda/lib/libcusolver.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcurand.so.9.0" "$(@D)/cuda/lib/libcurand.so.9.0" && cp "/usr/local/cuda-9.0/targets/x86_64-linux/lib/libcufft.so.9.0" "$(@D)/cuda/lib/libcufft.so.9.0" && cp "/usr/lib/x86_64-linux-gnu/libcudnn.so.7" "$(@D)/cuda/lib/libcudnn.so.7" && cp "/usr/local/cuda-9.0/extras/CUPTI/lib64/libcupti.so.9.0" "$(@D)/cuda/lib/libcupti.so.9.0" """, ) -- GitLab From ae03359f6109f4d8b0ed22da45dbf9755c00cbbd Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 06:39:52 -0800 Subject: [PATCH 598/884] Enable test CompatibleUseLinearIndexWithReshape. This requires adding a special case to SourceIndexOfBitcast if the bitcast is a reshape. PiperOrigin-RevId: 188324197 --- tensorflow/compiler/xla/service/llvm_ir/ir_array.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index f7821adc74..d444c1d49d 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -213,6 +213,12 @@ IrArray::Index IrArray::Index::SourceIndexOfBitcast( const Shape& shape, const Shape& operand_shape, llvm::IRBuilder<>* builder) const { CHECK(LayoutUtil::HasLayout(shape) && LayoutUtil::HasLayout(operand_shape)); + // In case the bitcast is just a reshape, we can use SourceIndexOfReshape() + // instead. This will reuse linear() if possible, so we don't have to build a + // new 'linear_index'. + if (ShapeUtil::ReshapeIsBitcast(operand_shape, shape)) { + return SourceIndexOfReshape(shape, operand_shape, builder); + } // First linearize the index coming from the output of the bitcast. We want // the physical index of the element in the buffer. This is like Linearize, -- GitLab From 18ca16d73a0e8de47219820ac3c2dbe784861577 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 8 Mar 2018 07:07:58 -0800 Subject: [PATCH 599/884] Disable interleave_dataset_ops_test PiperOrigin-RevId: 188327338 --- tensorflow/contrib/data/python/kernel_tests/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/data/python/kernel_tests/BUILD b/tensorflow/contrib/data/python/kernel_tests/BUILD index 45a0be0ddd..7eaf88043f 100644 --- a/tensorflow/contrib/data/python/kernel_tests/BUILD +++ b/tensorflow/contrib/data/python/kernel_tests/BUILD @@ -168,10 +168,10 @@ py_test( srcs = ["interleave_dataset_op_test.py"], srcs_version = "PY2AND3", tags = [ - "no_cuda_on_cpu_tap", + "manual", "no_oss", "no_pip", - "nomsan", + "notap", ], deps = [ ":dataset_serialization_test", -- GitLab From 6a619489c60e60f85b1576e720c5b17d56f18c07 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Thu, 8 Mar 2018 08:02:35 -0800 Subject: [PATCH 600/884] Update the eager user guide to use object-based saving (and Model) PiperOrigin-RevId: 188332858 --- .../contrib/eager/python/g3doc/guide.md | 102 ++++++++---------- 1 file changed, 47 insertions(+), 55 deletions(-) diff --git a/tensorflow/contrib/eager/python/g3doc/guide.md b/tensorflow/contrib/eager/python/g3doc/guide.md index ebb05051f2..b73dc17e5f 100644 --- a/tensorflow/contrib/eager/python/g3doc/guide.md +++ b/tensorflow/contrib/eager/python/g3doc/guide.md @@ -574,49 +574,45 @@ repository](https://github.com/tensorflow/models/tree/master/official/mnist/mnis ### Checkpointing trained variables -TensorFlow Variables (`tfe.Variable`) provides a way to represent shared, -persistent state of your model. The `tfe.Saver` class (which is a thin wrapper -over the -[`tf.train.Saver`](https://www.tensorflow.org/api_docs/python/tf/train/Saver) -class) provides a means to save and restore variables to and from _checkpoints_. +TensorFlow Variables (`tfe.Variable`) provide a way to represent shared, +persistent state of your model. The `tfe.Checkpoint` class provides a means to +save and restore variables to and from _checkpoints_. For example: ```python # Create variables. -x = tfe.Variable(10., name='x') -y = tfe.Variable(5., name='y') +x = tfe.Variable(10.) +y = tfe.Variable(5.) -# Create a Saver. -saver = tfe.Saver([x, y]) +# Indicate that the variables should be saved as "x" and "y". +checkpoint = tfe.Checkpoint(x=x, y=y) # Assign new values to the variables and save. x.assign(2.) -saver.save('/tmp/ckpt') +checkpoint.save('/tmp/ckpt') # Change the variable after saving. x.assign(11.) assert 16. == (x + y).numpy() # 11 + 5 # Restore the values in the checkpoint. -saver.restore('/tmp/ckpt') +checkpoint.restore('/tmp/ckpt-1') assert 7. == (x + y).numpy() # 2 + 5 ``` -### `tfe.Network` +### `tf.keras.Model` You may often want to organize your models using classes, like the `MNISTModel` -class described above. We recommend inheriting from the `tfe.Network` class as -it provides conveniences like keeping track of all model variables and methods -to save and restore from checkpoints. +class described above. We recommend inheriting from the `tf.keras.Model` class +as it provides conveniences like keeping track of all model variables. -Sub-classes of `tfe.Network` may register `Layer`s (like classes in -[`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers), -or [Keras -layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers)) -using a call to `self.track_layer()` and define the computation in an -implementation of `call()`. +Sub-classes of `tf.keras.Model` may register `Layer`s (like classes in +[`tf.layers`](https://www.tensorflow.org/api_docs/python/tf/layers), or [Keras +layers](https://www.tensorflow.org/api_docs/python/tf/keras/layers)) by +assigning them to attributes (`self.name = layer_object`) and define the +computation in an implementation of `call()`. Note that `tf.layers.Layer` objects (like `tf.layers.Dense`) create variables lazily, when the first input is encountered. @@ -624,12 +620,11 @@ lazily, when the first input is encountered. For example, consider the following two-layer neural network: ```python -class TwoLayerNet(tfe.Network): +class TwoLayerNet(tf.keras.Model): def __init__(self): super(TwoLayerNet, self).__init__() - self.layer1 = self.track_layer( - tf.layers.Dense(2, activation=tf.nn.relu, use_bias=False)) - self.layer2 = self.track_layer(tf.layers.Dense(3, use_bias=False)) + self.layer1 = tf.layers.Dense(2, activation=tf.nn.relu, use_bias=False) + self.layer2 = tf.layers.Dense(3, use_bias=False) def call(self, x): return self.layer2(self.layer1(x)) @@ -653,15 +648,16 @@ assert [1, 2] == net.variables[0].shape.as_list() # weights of layer1. assert [2, 3] == net.variables[1].shape.as_list() # weights of layer2. ``` -The `tfe.Network` class is itself a sub-class of `tf.layers.Layer`. This allows -instances of `tfe.Network` to be embedded in other networks. For example: +The `tf.keras.Model` class is itself a sub-class of `tf.layers.Layer`. This +allows instances of `tf.keras.Model` to be embedded in other models. For +example: ```python -class ThreeLayerNet(tfe.Network): +class ThreeLayerNet(tf.keras.Model): def __init__(self): super(ThreeLayerNet, self).__init__() - self.a = self.track_layer(TwoLayerNet()) - self.b = self.track_layer(tf.layers.Dense(4, use_bias=False)) + self.a = TwoLayerNet() + self.b = tf.layers.Dense(4, use_bias=False) def call(self, x): return self.b(self.a(x)) @@ -678,9 +674,8 @@ assert [3, 4] == net.variables[2].shape.as_list() See more examples in [`tensorflow/contrib/eager/python/examples`](https://www.tensorflow.org/code/tensorflow/contrib/eager/python/examples). -`tfe.Saver` in combination with `tfe.restore_variables_on_create` provides a -convenient way to save and load checkpoints without changing the program once -the checkpoint has been created. For example, we can set an objective for the +`tfe.Checkpoint` provides a convenient way to save and load training +checkpoints. Let's define something simple to train. We set an objective for the output of our network, choose an optimizer, and a location for the checkpoint: ```python @@ -691,30 +686,27 @@ checkpoint_prefix = os.path.join(checkpoint_directory, 'ckpt') net = ThreeLayerNet() ``` -Note that variables have not been created yet. We want them to be restored from -a checkpoint, if one exists, so we create them inside a -`tfe.restore_variables_on_create` context manager. Then our training loop is the -same whether starting training or resuming from a previous checkpoint: +We group them in a `tfe.Checkpoint` and request that it be restored. This +ensures that variables created by these objects are restored before their values +are used. Our training loop is the same whether starting training or resuming +from a previous checkpoint: ```python -with tfe.restore_variables_on_create( - tf.train.latest_checkpoint(checkpoint_directory)): - global_step = tf.train.get_or_create_global_step() - for _ in range(100): - loss_fn = lambda: tf.norm(net(inp) - objective) - optimizer.minimize(loss_fn, global_step=global_step) - if tf.equal(global_step % 20, 0): - print("Step %d, output %s" % (global_step.numpy(), - net(inp).numpy())) - all_variables = ( - net.variables - + optimizer.variables() - + [global_step]) - # Save the checkpoint. - tfe.Saver(all_variables).save(checkpoint_prefix, global_step=global_step) -``` - -The first time it runs, `Network` variables are initialized randomly. Then the +global_step = tf.train.get_or_create_global_step() +checkpoint = tfe.Checkpoint( + global_step=global_step, optimizer=optimizer, network=net) +checkpoint.restore(tf.train.latest_checkpoint(checkpoint_directory)) +for _ in range(100): + loss_fn = lambda: tf.norm(net(inp) - objective) + optimizer.minimize(loss_fn, global_step=global_step) + if tf.equal(global_step % 20, 0): + print("Step %d, output %s" % (global_step.numpy(), + net(inp).numpy())) + # Save the checkpoint. + checkpoint.save(checkpoint_prefix) +``` + +The first time it runs, `Model` variables are initialized randomly. Then the output is trained to match the objective we've set: ``` -- GitLab From 16a6666c1c1a3f4b288472c4f461b6418bda0170 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 08:23:31 -0800 Subject: [PATCH 601/884] Add init op and target node support to benchmark PiperOrigin-RevId: 188335233 --- tensorflow/tools/benchmark/benchmark_model.cc | 139 +++++++++++------- tensorflow/tools/benchmark/benchmark_model.h | 6 +- .../tools/benchmark/benchmark_model_test.cc | 4 +- 3 files changed, 93 insertions(+), 56 deletions(-) diff --git a/tensorflow/tools/benchmark/benchmark_model.cc b/tensorflow/tools/benchmark/benchmark_model.cc index ecab6f8769..15523028c7 100644 --- a/tensorflow/tools/benchmark/benchmark_model.cc +++ b/tensorflow/tools/benchmark/benchmark_model.cc @@ -48,33 +48,14 @@ limitations under the License. namespace tensorflow { namespace benchmark_model { -Status InitializeSession(int num_threads, const string& graph, - std::unique_ptr* session, - std::unique_ptr* graph_def) { - LOG(INFO) << "Loading TensorFlow."; +namespace { - tensorflow::SessionOptions options; - tensorflow::ConfigProto& config = options.config; - if (num_threads > 0) { - config.set_intra_op_parallelism_threads(num_threads); +Status InitializeVariables(Session* session, + const std::vector& init_ops) { + LOG(INFO) << "Initializing graph variables"; + for (const string& init_op : init_ops) { + TF_RETURN_IF_ERROR(session->Run({}, {}, {init_op}, nullptr)); } - LOG(INFO) << "Got config, " << config.device_count_size() << " devices"; - - session->reset(tensorflow::NewSession(options)); - graph_def->reset(new GraphDef()); - tensorflow::GraphDef tensorflow_graph; - Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get()); - if (!s.ok()) { - LOG(ERROR) << "Could not create TensorFlow Graph: " << s; - return s; - } - - s = (*session)->Create(*(graph_def->get())); - if (!s.ok()) { - LOG(ERROR) << "Could not create TensorFlow Session: " << s; - return s; - } - return Status::OK(); } @@ -247,8 +228,56 @@ void RecordBenchmarkEntry(const string& output_prefix, TF_QCHECK_OK(node_reporter.Close()); } +void SleepSeconds(double sleep_seconds) { + if (sleep_seconds <= 0.0) { + return; + } +#ifdef PLATFORM_WINDOWS + Sleep(sleep_seconds * 1000); +#else + // Convert the inference_delay string into a timespec. + timespec req; + req.tv_sec = static_cast(sleep_seconds); + req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000; + nanosleep(&req, nullptr); +#endif +} + +} // namespace + +Status InitializeSession(int num_threads, const string& graph, + std::unique_ptr* session, + std::unique_ptr* graph_def) { + LOG(INFO) << "Loading TensorFlow."; + + tensorflow::SessionOptions options; + tensorflow::ConfigProto& config = options.config; + if (num_threads > 0) { + config.set_intra_op_parallelism_threads(num_threads); + } + LOG(INFO) << "Got config, " << config.device_count_size() << " devices"; + + session->reset(tensorflow::NewSession(options)); + graph_def->reset(new GraphDef()); + tensorflow::GraphDef tensorflow_graph; + Status s = ReadBinaryProto(Env::Default(), graph, graph_def->get()); + if (!s.ok()) { + LOG(ERROR) << "Could not create TensorFlow Graph: " << s; + return s; + } + + s = (*session)->Create(*(graph_def->get())); + if (!s.ok()) { + LOG(ERROR) << "Could not create TensorFlow Session: " << s; + return s; + } + + return Status::OK(); +} + Status RunBenchmark(const std::vector& inputs, - const std::vector& outputs, Session* session, + const std::vector& outputs, + const std::vector& targets, Session* session, StatSummarizer* stats, int64* inference_time_us) { std::vector > input_tensors; CreateTensorsFromInputInfo(inputs, &input_tensors); @@ -264,8 +293,8 @@ Status RunBenchmark(const std::vector& inputs, RunMetadata run_metadata; const int64 start_time = Env::Default()->NowMicros(); - s = session->Run(run_options, input_tensors, outputs, {}, &output_tensors, - &run_metadata); + s = session->Run(run_options, input_tensors, outputs, targets, + &output_tensors, &run_metadata); const int64 end_time = Env::Default()->NowMicros(); *inference_time_us = end_time - start_time; @@ -283,24 +312,10 @@ Status RunBenchmark(const std::vector& inputs, return s; } -void SleepSeconds(double sleep_seconds) { - if (sleep_seconds <= 0.0) { - return; - } -#ifdef PLATFORM_WINDOWS - Sleep(sleep_seconds * 1000); -#else - // Convert the inference_delay string into a timespec. - timespec req; - req.tv_sec = static_cast(sleep_seconds); - req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000; - nanosleep(&req, nullptr); -#endif -} - Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s, const std::vector& inputs, - const std::vector& outputs, Session* session, + const std::vector& outputs, + const std::vector& targets, Session* session, StatSummarizer* stats, int64* total_time_us, int64* actual_num_runs) { *total_time_us = 0; @@ -315,7 +330,8 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s, const bool until_max_time = num_runs <= 0; for (int i = 0; until_max_time || i < num_runs; ++i) { int64 time; - Status run_status = RunBenchmark(inputs, outputs, session, stats, &time); + Status run_status = + RunBenchmark(inputs, outputs, targets, session, stats, &time); stat.UpdateStat(time); (*total_time_us) += time; ++(*actual_num_runs); @@ -345,11 +361,13 @@ Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s, int Main(int argc, char** argv) { string graph = "/data/local/tmp/tensorflow_inception_graph.pb"; + string init_ops_string = ""; string input_layer_string = "input:0"; string input_layer_shape_string = "1,224,224,3"; string input_layer_type_string = "float"; string input_layer_values_string = ""; string output_layer_string = "output:0"; + string target_layer_string = ""; int max_num_runs = 1000; string max_time = "10.0"; string inference_delay = "-1.0"; @@ -371,12 +389,14 @@ int Main(int argc, char** argv) { std::vector flag_list = { Flag("graph", &graph, "graph file name"), + Flag("init_ops", &init_ops_string, "init ops"), Flag("input_layer", &input_layer_string, "input layer names"), Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"), Flag("input_layer_type", &input_layer_type_string, "input layer type"), Flag("input_layer_values", &input_layer_values_string, "values to initialize the inputs with"), Flag("output_layer", &output_layer_string, "output layer name"), + Flag("target_layer", &target_layer_string, "target layer name"), Flag("max_num_runs", &max_num_runs, "number of runs max"), Flag("max_time", &max_time, "length to run max"), Flag("inference_delay", &inference_delay, @@ -410,6 +430,7 @@ int Main(int argc, char** argv) { return -1; } + std::vector init_ops = str_util::Split(init_ops_string, ','); std::vector input_layers = str_util::Split(input_layer_string, ','); std::vector input_layer_shapes = str_util::Split(input_layer_shape_string, ':'); @@ -418,6 +439,7 @@ int Main(int argc, char** argv) { std::vector input_layer_values = str_util::Split(input_layer_values_string, ':'); std::vector output_layers = str_util::Split(output_layer_string, ','); + std::vector target_layers = str_util::Split(target_layer_string, ','); if ((input_layers.size() != input_layer_shapes.size()) || (input_layers.size() != input_layer_types.size())) { LOG(ERROR) << "There must be the same number of items in --input_layer," @@ -441,10 +463,12 @@ int Main(int argc, char** argv) { } LOG(INFO) << "Graph: [" << graph << "]"; + LOG(INFO) << "Init ops:" << init_ops_string; LOG(INFO) << "Input layers: [" << input_layer_string << "]"; LOG(INFO) << "Input shapes: [" << input_layer_shape_string << "]"; LOG(INFO) << "Input types: [" << input_layer_type_string << "]"; LOG(INFO) << "Output layers: [" << output_layer_string << "]"; + LOG(INFO) << "Target layers: [" << target_layer_string << "]"; LOG(INFO) << "Num runs: [" << max_num_runs << "]"; LOG(INFO) << "Inter-inference delay (seconds): [" << inference_delay << "]"; LOG(INFO) << "Inter-benchmark delay (seconds): [" << inter_benchmark_delay @@ -470,6 +494,16 @@ int Main(int argc, char** argv) { return -1; } + if (!init_ops.empty()) { + Status initialize_variables_status = + InitializeVariables(session.get(), init_ops); + if (!initialize_variables_status.ok()) { + LOG(ERROR) << "Graph variables initialization failed with " + << initialize_variables_status; + return -1; + } + } + StatSummarizerOptions stats_options; stats_options.show_run_order = show_run_order; stats_options.run_order_limit = run_order_limit; @@ -520,9 +554,10 @@ int Main(int argc, char** argv) { int64 warmup_time_us = 0; int64 num_warmup_runs = 0; if (warmup_runs > 0) { - Status warmup_time_status = TimeMultipleRuns( - inter_inference_sleep_seconds, warmup_runs, -1.0, inputs, output_layers, - session.get(), nullptr, &warmup_time_us, &num_warmup_runs); + Status warmup_time_status = + TimeMultipleRuns(inter_inference_sleep_seconds, warmup_runs, -1.0, + inputs, output_layers, target_layers, session.get(), + nullptr, &warmup_time_us, &num_warmup_runs); if (!warmup_time_status.ok()) { LOG(ERROR) << "Timing failed with " << warmup_time_status; return -1; @@ -536,8 +571,8 @@ int Main(int argc, char** argv) { int64 no_stat_num_runs = 0; Status no_stat_time_status = TimeMultipleRuns( inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds, - inputs, output_layers, session.get(), nullptr, &no_stat_time_us, - &no_stat_num_runs); + inputs, output_layers, target_layers, session.get(), nullptr, + &no_stat_time_us, &no_stat_num_runs); const double no_stat_wall_time = no_stat_time_us / 1000000.0; if (!no_stat_time_status.ok()) { LOG(ERROR) << "Timing failed with " << no_stat_time_status; @@ -551,8 +586,8 @@ int Main(int argc, char** argv) { int64 stat_num_runs = 0; Status stat_time_status = TimeMultipleRuns( inter_inference_sleep_seconds, max_num_runs, max_benchmark_time_seconds, - inputs, output_layers, session.get(), stats.get(), &stat_time_us, - &stat_num_runs); + inputs, output_layers, target_layers, session.get(), stats.get(), + &stat_time_us, &stat_num_runs); if (!stat_time_status.ok()) { LOG(ERROR) << "Timing failed with " << stat_time_status; return -1; diff --git a/tensorflow/tools/benchmark/benchmark_model.h b/tensorflow/tools/benchmark/benchmark_model.h index dff62c5b5d..dc5f008037 100644 --- a/tensorflow/tools/benchmark/benchmark_model.h +++ b/tensorflow/tools/benchmark/benchmark_model.h @@ -37,13 +37,15 @@ Status InitializeSession(int num_threads, const string& graph, // Does a single run of the model that's been loaded into the given session. Status RunBenchmark(const std::vector& inputs, - const std::vector& outputs, Session* session, + const std::vector& outputs, + const std::vector& targets, Session* session, StatSummarizer* stats, int64* inference_time_us); // Runs the model multiple time, keeping track of timing information. Status TimeMultipleRuns(double sleep_seconds, int num_runs, double max_time_s, const std::vector& inputs, - const std::vector& outputs, Session* session, + const std::vector& outputs, + const std::vector& targets, Session* session, StatSummarizer* stats, int64* total_time_us, int64* actual_num_runs); diff --git a/tensorflow/tools/benchmark/benchmark_model_test.cc b/tensorflow/tools/benchmark/benchmark_model_test.cc index bb4eb53520..16ab2ff66e 100644 --- a/tensorflow/tools/benchmark/benchmark_model_test.cc +++ b/tensorflow/tools/benchmark/benchmark_model_test.cc @@ -64,8 +64,8 @@ TEST(BenchmarkModelTest, InitializeAndRun) { int64 time; int64 num_runs = 0; TF_ASSERT_OK(benchmark_model::TimeMultipleRuns( - 0.0, 10, 0.0, {input}, {output_name}, session.get(), stats.get(), &time, - &num_runs)); + 0.0, 10, 0.0, {input}, {output_name}, {}, session.get(), stats.get(), + &time, &num_runs)); ASSERT_EQ(num_runs, 10); } -- GitLab From a47cd30d960b128e5ed405cb36e914aa36fe462a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 08:26:52 -0800 Subject: [PATCH 602/884] This creates a new helper, xla_launch_util, that contains the business logic of launching an XLA computation. Also changes the resource variable container from a std::vector to a std::map in preparation for backends where the resource variables aren't ordered densely at the end of the argument list. PiperOrigin-RevId: 188335574 --- tensorflow/compiler/jit/BUILD | 23 ++ tensorflow/compiler/jit/kernels/BUILD | 1 + .../compiler/jit/kernels/xla_launch_op.cc | 250 +---------------- .../compiler/jit/kernels/xla_launch_op.h | 8 - .../compiler/jit/xla_compilation_cache.cc | 23 +- .../compiler/jit/xla_compilation_cache.h | 4 +- tensorflow/compiler/jit/xla_launch_util.cc | 255 ++++++++++++++++++ tensorflow/compiler/jit/xla_launch_util.h | 116 ++++++++ 8 files changed, 418 insertions(+), 262 deletions(-) create mode 100644 tensorflow/compiler/jit/xla_launch_util.cc create mode 100644 tensorflow/compiler/jit/xla_launch_util.h diff --git a/tensorflow/compiler/jit/BUILD b/tensorflow/compiler/jit/BUILD index 955d12dc20..c4a2d4ab03 100644 --- a/tensorflow/compiler/jit/BUILD +++ b/tensorflow/compiler/jit/BUILD @@ -135,6 +135,7 @@ cc_library( deps = [ ":common", ":jit_compilation_passes", + ":xla_launch_util", "//tensorflow/compiler/jit/ops:xla_ops", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:dump_graph", @@ -174,6 +175,28 @@ cc_library( visibility = [":friends"], ) +cc_library( + name = "xla_launch_util", + srcs = ["xla_launch_util.cc"], + hdrs = ["xla_launch_util.h"], + deps = [ + ":common", + ":xla_compilation_cache", + "//tensorflow/compiler/tf2xla:xla_compiler", + "//tensorflow/compiler/xla:status_macros", + "//tensorflow/compiler/xla:statusor", + "//tensorflow/compiler/xla/client:client_library", + "//tensorflow/compiler/xla/client:local_client", + "//tensorflow/core:core_cpu", + "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:framework", + "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/kernels:variable_ops", + ], +) + cc_library( name = "xla_compilation_cache", srcs = ["xla_compilation_cache.cc"], diff --git a/tensorflow/compiler/jit/kernels/BUILD b/tensorflow/compiler/jit/kernels/BUILD index 9bea566331..616a7f8f15 100644 --- a/tensorflow/compiler/jit/kernels/BUILD +++ b/tensorflow/compiler/jit/kernels/BUILD @@ -14,6 +14,7 @@ cc_library( "//tensorflow/compiler/jit:common", "//tensorflow/compiler/jit:xla_compilation_cache", "//tensorflow/compiler/jit:xla_device", + "//tensorflow/compiler/jit:xla_launch_util", "//tensorflow/compiler/tf2xla:common", "//tensorflow/compiler/tf2xla:xla_compiler", "//tensorflow/compiler/xla:statusor", diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.cc b/tensorflow/compiler/jit/kernels/xla_launch_op.cc index 6353149e4a..cd7f8dd779 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.cc +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/compiler/jit/defs.h" #include "tensorflow/compiler/jit/xla_device.h" +#include "tensorflow/compiler/jit/xla_launch_util.h" #include "tensorflow/compiler/tf2xla/shape_util.h" #include "tensorflow/compiler/tf2xla/xla_compiler.h" #include "tensorflow/compiler/tf2xla/xla_op_registry.h" @@ -40,111 +41,6 @@ namespace gpu = perftools::gputools; namespace tensorflow { -// Adapter class that wraps a Tensorflow allocator as an XLA allocator. -// Assumes that the Tensorflow allocator permits asynchronous deallocation: -// see comment on `AllowsAsynchronousDeallocation()`. -class XlaAllocator : public xla::DeviceMemoryAllocator { - public: - XlaAllocator(const gpu::Platform* platform, OpKernelContext* op_context); - ~XlaAllocator() override; - xla::StatusOr Allocate(int device_ordinal, uint64 size, - bool retry_on_failure) override; - Status Deallocate(int device_ordinal, gpu::DeviceMemoryBase* mem) override; - - // Register an Tensor (input or resource variable) with the allocator. If - // the operation returns an alias to one of its inputs, then the allocator - // needs to be able to handle it. - Status RegisterArgument(const Tensor* t); - - // Makes 'tensor' a wrapper around the data buffer at 'ptr'. The buffer is - // interpreted as having data type 'dtype' and shape 'shape'. - Status MakeTensorFromBuffer(gpu::DeviceMemoryBase buffer, DataType dtype, - const TensorShape& shape, Tensor* tensor) const; - - // The Tensorflow BFC allocator used on GPU allows host-side deallocation - // before GPU execution takes place. Tensorflow uses the ordering of the main - // compute stream to enforce a happens-before relationship between a memory - // allocation and code that reuses the same memory. If Tensorflow adds - // support for multiple GPU streams or allocators with different ordering - // requirements, this code may need to change. - // (This attribute has no effect on CPU.) - bool AllowsAsynchronousDeallocation() const override { return true; } - - private: - OpKernelContext* const op_context_; - - // Map from pointer address to the owning Tensor; used by - // MakeTensorFromBuffer. Also used to automatically release Tensors when the - // allocator is freed. - std::unordered_map tensors_; -}; - -XlaAllocator::XlaAllocator(const gpu::Platform* platform, - OpKernelContext* op_context) - : xla::DeviceMemoryAllocator(platform), op_context_(op_context) {} - -XlaAllocator::~XlaAllocator() = default; - -xla::StatusOr XlaAllocator::Allocate( - int device_ordinal, uint64 size, bool retry_on_failure) { - AllocatorAttributes allocator_attrs; - allocator_attrs.set_on_host(false); - - AllocationAttributes allocation_attrs; - allocation_attrs.no_retry_on_failure = !retry_on_failure; - - Tensor t; - Status status = op_context_->allocate_temp( - DT_UINT8, TensorShape({static_cast(size)}), &t, allocator_attrs, - allocation_attrs); - if (!status.ok()) { - VLOG(2) << "Allocation failed " << size; - return status; - } - void* data = - reinterpret_cast(const_cast(t.tensor_data().data())); - tensors_[data] = t; - return gpu::DeviceMemoryBase(data, size); -} - -Status XlaAllocator::RegisterArgument(const Tensor* t) { - void* data = - reinterpret_cast(const_cast(t->tensor_data().data())); - tensors_[data] = *t; - return Status::OK(); -} - -Status XlaAllocator::Deallocate(int device_ordinal, - gpu::DeviceMemoryBase* mem) { - if (mem->opaque() != nullptr) { - if (tensors_.erase(mem->opaque()) == 0) { - return tensorflow::errors::InvalidArgument("Unknown tensor address"); - } - } - return Status::OK(); -} - -Status XlaAllocator::MakeTensorFromBuffer(gpu::DeviceMemoryBase buffer, - DataType dtype, - const TensorShape& shape, - Tensor* out_tensor) const { - void* ptr = const_cast(buffer.opaque()); - auto it = tensors_.find(ptr); - if (it == tensors_.end()) { - return errors::InvalidArgument("Unknown tensor address"); - } - const Tensor& tensor = it->second; - - int64 output_size = DataTypeSize(dtype) * shape.num_elements(); - if (tensor.TotalBytes() == output_size) { - out_tensor->UnsafeCopyFromInternal(tensor, dtype, shape); - } else { - Tensor slice = tensor.Slice(0, output_size); - out_tensor->UnsafeCopyFromInternal(slice, dtype, shape); - } - return Status::OK(); -} - XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx) : OpKernel(ctx), device_type_(ctx->device_type()) { const NameAttrList* func; @@ -196,23 +92,6 @@ Status XlaLocalLaunchOp::BuildCompilationCache(OpKernelContext* ctx, return Status::OK(); } -std::vector SnapshotResourceVariables(OpKernelContext* ctx, - int num_variables) { - std::vector snapshot(num_variables); - int first_variable = ctx->num_inputs() - num_variables; - for (int i = 0; i < num_variables; ++i) { - Var* variable = nullptr; - ResourceHandle handle = HandleFromInput(ctx, first_variable + i); - if (LookupResource(ctx, handle, &variable).ok()) { - tf_shared_lock lock(*variable->mu()); - snapshot[i].name = handle.name(); - snapshot[i].present = true; - snapshot[i].value = *variable->tensor(); - } - } - return snapshot; -} - void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { VLOG(1) << "XlaLocalLaunchOp::Compute " << Canonicalize(function_.name(), AttrSlice(&function_.attr())); @@ -244,7 +123,7 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { } } - std::vector variables = + std::map variables = SnapshotResourceVariables(ctx, num_resource_args_); xla::LocalClient* client = static_cast(cache->client()); @@ -269,43 +148,9 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { VLOG(1) << "Executing XLA Computation..."; - std::unique_ptr output; - // Build xla::ShapedBuffers that point directly to the Tensor buffers. - std::vector> arg_buffers; - arg_buffers.reserve(kernel->xla_input_shapes.size() + 1); - arg_buffers.resize(kernel->xla_input_shapes.size()); - std::vector arg_ptrs(arg_buffers.size()); - - const int first_variable_arg = ctx->num_inputs() - num_resource_args_; - // Pass remaining parameters. - const Tensor* t; - for (int i = 0; i < kernel->xla_input_shapes.size(); ++i) { - int arg_num = kernel->input_mapping[i]; - const xla::Shape& shape = kernel->xla_input_shapes[i]; - if (arg_num >= first_variable_arg) { - t = &(variables[arg_num - first_variable_arg].value); - } else { - t = &(ctx->input(arg_num)); - } - - gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase( - const_cast(t->tensor_data().data()), t->tensor_data().size()); - - const xla::Shape on_device_shape = - client->backend().transfer_manager()->HostShapeToDeviceShape(shape); - CHECK(xla::ShapeUtil::Equal(shape, on_device_shape)) - << "On-device shape " - << xla::ShapeUtil::HumanStringWithLayout(on_device_shape) - << " not the same as on-host shape " - << xla::ShapeUtil::HumanStringWithLayout(shape); - arg_buffers[i] = xla::MakeUnique( - /*on_host_shape=*/shape, /*on_device_shape=*/shape, client->platform(), - client->default_device_ordinal()); - arg_buffers[i]->set_buffer(dmem, /*index=*/{}); - arg_ptrs[i] = arg_buffers[i].get(); - - OP_REQUIRES_OK(ctx, xla_allocator.RegisterArgument(t)); - } + XlaComputationLaunchContext launch_context(num_resource_args_, client, + &xla_allocator); + launch_context.PopulateInputs(ctx, kernel, variables); // Execute the computation. VLOG(2) << "Executing computation."; @@ -315,93 +160,14 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) { run_options.set_intra_op_thread_pool(&ctx->eigen_cpu_device()); Env* env = Env::Default(); auto start_time = env->NowMicros(); - auto run_result = executable->Run(arg_ptrs, run_options); + auto run_result = executable->Run(launch_context.arguments(), run_options); OP_REQUIRES(ctx, run_result.ok(), run_result.status()); - output = run_result.ConsumeValueOrDie()->release(); auto elapsed = env->NowMicros() - start_time; VLOG(2) << "Elapsed time: " << elapsed << "us"; - // Computation output should always be a tuple. - if (VLOG_IS_ON(2)) { - VLOG(2) << "Result tuple shape: " << output->on_host_shape().DebugString(); - } - CHECK_EQ(ctx->num_outputs(), kernel->outputs.size()); - - // Copy XLA results to the OpOutputList. - int output_num = 0; - for (int i = 0; i < ctx->num_outputs(); ++i) { - if (kernel->outputs[i].is_constant) { - // Output is a constant. - const Tensor& const_tensor = kernel->outputs[i].constant_value; - const size_t total_bytes = const_tensor.TotalBytes(); - if (stream && total_bytes > 0) { - // Copy host -> device. (Empty tensors don't have backing buffers.) - VLOG(1) << "Constant output tensor on device"; - Tensor* output_tensor; - TF_CHECK_OK( - ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); - - const void* src_ptr = DMAHelper::base(&const_tensor); - void* dst_ptr = DMAHelper::base(output_tensor); - gpu::DeviceMemoryBase gpu_dst_ptr(dst_ptr, total_bytes); - stream->ThenMemcpy(&gpu_dst_ptr, src_ptr, total_bytes); - } else { - // No copy required. - ctx->set_output(i, const_tensor); - } - } else { - const TensorShape& shape = kernel->outputs[i].shape; - VLOG(2) << "Retval " << i << " shape " << shape.DebugString(); - - gpu::DeviceMemoryBase buffer = output->buffer({output_num}); - Tensor output_tensor; - // Looks up the owning Tensor by buffer address. - OP_REQUIRES_OK(ctx, xla_allocator.MakeTensorFromBuffer( - buffer, ctx->expected_output_dtype(i), shape, - &output_tensor)); - ctx->set_output(i, output_tensor); - ++output_num; - } - - if (VLOG_IS_ON(3)) { - VLOG(3) << ctx->mutable_output(i)->DebugString(); - } - } - - // Apply variable updates, if any. - VLOG(2) << "Applying variable updates"; - for (int i = 0; i < kernel->resource_updates.size(); ++i) { - const XlaCompiler::ResourceUpdate& write = kernel->resource_updates[i]; - OP_REQUIRES(ctx, - write.input_index >= 0 && write.input_index < ctx->num_inputs(), - errors::Internal("Invalid input index for variable write.")); - - gpu::DeviceMemoryBase buffer = output->buffer({output_num}); - - Var* variable = nullptr; - // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, not - // a Tensor. - OP_REQUIRES_OK(ctx, LookupOrCreateResource( - ctx, HandleFromInput(ctx, write.input_index), - &variable, [this, ctx, &write](Var** ptr) { - *ptr = new Var(write.type); - return Status::OK(); - })); - - core::ScopedUnref s(variable); - - mutex_lock ml(*variable->mu()); - OP_REQUIRES(ctx, variable->tensor()->dtype() == write.type, - errors::Internal("Mismatched type in variable write")); - - // Looks up the owning Tensor by buffer address. - OP_REQUIRES_OK( - ctx, xla_allocator.MakeTensorFromBuffer(buffer, write.type, write.shape, - variable->tensor())); - ++output_num; - } - + launch_context.PopulateOutputs(ctx, kernel, + run_result.ConsumeValueOrDie()->release()); VLOG(1) << "Done"; } diff --git a/tensorflow/compiler/jit/kernels/xla_launch_op.h b/tensorflow/compiler/jit/kernels/xla_launch_op.h index 47fd912b12..c6cc0986af 100644 --- a/tensorflow/compiler/jit/kernels/xla_launch_op.h +++ b/tensorflow/compiler/jit/kernels/xla_launch_op.h @@ -26,14 +26,6 @@ limitations under the License. namespace tensorflow { -// Takes a snapshot of the values of resource variable arguments, which are -// the last `num_variables` arguments. We snapshot tensors that back -// resource variables since concurrent updates may modify the shape, and it is -// important that the shapes used for compilation match the true shapes of the -// buffers. -std::vector SnapshotResourceVariables(OpKernelContext* ctx, - int num_variables); - // XlaLocalLaunchOp is used to replace a region of the TensorFlow graph // which will be compiled and executed using XLA. The XlaLocalLaunchOp is // responsible for handling interactions with the TensorFlow executor. diff --git a/tensorflow/compiler/jit/xla_compilation_cache.cc b/tensorflow/compiler/jit/xla_compilation_cache.cc index 6d854a920e..8cc79a9bd0 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.cc +++ b/tensorflow/compiler/jit/xla_compilation_cache.cc @@ -93,7 +93,7 @@ uint64 XlaCompilationCache::Signature::Hash::operator()( Status XlaCompilationCache::BuildSignature( const NameAttrList& function, int num_constant_args, - const std::vector& variable_args, OpKernelContext* ctx, + const std::map& variable_args, OpKernelContext* ctx, Signature* signature) { signature->name = Canonicalize(function.name(), AttrSlice(&function.attr())); signature->arg_values.resize(num_constant_args); @@ -115,7 +115,8 @@ Status XlaCompilationCache::BuildSignature( } // For variable signatures, use the type and shape of the variable's // current value. - for (const OptionalTensor& variable : variable_args) { + for (auto& iterator : variable_args) { + const OptionalTensor& variable = iterator.second; TF_RET_CHECK(input_num < ctx->num_inputs()); if (variable.present) { signature->arg_types.emplace_back(variable.value.dtype(), @@ -133,7 +134,7 @@ namespace { // Builds a XlaCompiler::Argument vector from the arguments to the _XlaLaunch // op. The first `num_constant_args` arguments must be host-memory Tensors. Status BuildArguments(int num_constant_args, - const std::vector& variable_args, + const std::map& variable_args, OpKernelContext* ctx, std::vector* args) { args->resize(ctx->num_inputs()); @@ -175,17 +176,17 @@ Status BuildArguments(int num_constant_args, // Handles resource variables. TF_RET_CHECK(input_num + num_variable_args == ctx->num_inputs()); - for (int variable_id = 0; variable_id < num_variable_args; ++variable_id) { + for (auto& iterator : variable_args) { const Tensor& input = ctx->input(input_num); TF_RET_CHECK(input.dtype() == DT_RESOURCE); XlaCompiler::Argument& arg = (*args)[input_num]; - arg.name = variable_args[variable_id].name; + arg.name = iterator.second.name; arg.kind = XlaCompiler::Argument::kResource; arg.resource_kind = XlaResource::kVariable; - if (variable_args[variable_id].present) { - const Tensor& value = variable_args[variable_id].value; + if (iterator.second.present) { + const Tensor& value = iterator.second.value; arg.type = value.dtype(); arg.shape = value.shape(); arg.initialized = true; @@ -233,7 +234,7 @@ Status XlaCompilationCache::BuildExecutable( Status XlaCompilationCache::Compile( const XlaCompiler::Options& options, const NameAttrList& function, - int num_constant_args, const std::vector& variable_args, + int num_constant_args, const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable, @@ -250,10 +251,12 @@ Status XlaCompilationCache::Compile( << " present=" << ctx->has_input(i) << " shape=" << shape.DebugString(); } - for (const OptionalTensor& variable : variable_args) { + for (auto& iterator : variable_args) { + const OptionalTensor& variable = iterator.second; VLOG(2) << "variable present=" << variable.present << " type=" << DataTypeString(variable.value.dtype()) - << " shape=" << variable.value.shape().DebugString(); + << " shape=" << variable.value.shape().DebugString() + << " TF arg= " << iterator.first; } VLOG(2) << "num_outputs = " << ctx->num_outputs(); for (int i = 0; i < ctx->num_outputs(); i++) { diff --git a/tensorflow/compiler/jit/xla_compilation_cache.h b/tensorflow/compiler/jit/xla_compilation_cache.h index 0858020716..d506378314 100644 --- a/tensorflow/compiler/jit/xla_compilation_cache.h +++ b/tensorflow/compiler/jit/xla_compilation_cache.h @@ -63,7 +63,7 @@ class XlaCompilationCache : public ResourceBase { // outputs. Status Compile(const XlaCompiler::Options& options, const NameAttrList& function, int num_constant_args, - const std::vector& variable_args, + const std::map& variable_args, OpKernelContext* ctx, const XlaCompiler::CompilationResult** compilation_result, xla::LocalExecutable** executable, @@ -105,7 +105,7 @@ class XlaCompilationCache : public ResourceBase { // Builds the signature for a compilation. Status BuildSignature(const NameAttrList& function, int num_constant_args, - const std::vector& variable_args, + const std::map& variable_args, OpKernelContext* ctx, Signature* signature); // The value associated with a cache entry. diff --git a/tensorflow/compiler/jit/xla_launch_util.cc b/tensorflow/compiler/jit/xla_launch_util.cc new file mode 100644 index 0000000000..8322dd2e82 --- /dev/null +++ b/tensorflow/compiler/jit/xla_launch_util.cc @@ -0,0 +1,255 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/jit/xla_launch_util.h" + +#include "tensorflow/compiler/jit/defs.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/client_library.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/compiler/xla/statusor.h" +#include "tensorflow/core/common_runtime/dma_helper.h" +#include "tensorflow/core/common_runtime/function.h" +#include "tensorflow/core/framework/allocator.h" +#include "tensorflow/core/framework/node_def_util.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/util/stream_executor_util.h" + +namespace gpu = perftools::gputools; + +namespace tensorflow { + +std::map SnapshotResourceVariables(OpKernelContext* ctx, + int num_variables) { + std::map snapshot; + int first_variable = ctx->num_inputs() - num_variables; + for (int i = 0; i < num_variables; ++i) { + Var* variable = nullptr; + ResourceHandle handle = HandleFromInput(ctx, first_variable + i); + OptionalTensor& tensor = snapshot[first_variable + i]; + if (LookupResource(ctx, handle, &variable).ok()) { + tf_shared_lock lock(*variable->mu()); + tensor.name = handle.name(); + tensor.present = true; + tensor.value = *variable->tensor(); + } + } + return snapshot; +} + +XlaAllocator::XlaAllocator(const gpu::Platform* platform, + OpKernelContext* op_context) + : xla::DeviceMemoryAllocator(platform), op_context_(op_context) {} + +XlaAllocator::~XlaAllocator() = default; + +xla::StatusOr XlaAllocator::Allocate( + int device_ordinal, uint64 size, bool retry_on_failure) { + AllocatorAttributes allocator_attrs; + allocator_attrs.set_on_host(false); + + AllocationAttributes allocation_attrs; + allocation_attrs.no_retry_on_failure = !retry_on_failure; + + Tensor t; + Status status = op_context_->allocate_temp( + DT_UINT8, TensorShape({static_cast(size)}), &t, allocator_attrs, + allocation_attrs); + if (!status.ok()) { + VLOG(2) << "Allocation failed " << size; + return status; + } + void* data = + reinterpret_cast(const_cast(t.tensor_data().data())); + tensors_[data] = t; + return gpu::DeviceMemoryBase(data, size); +} + +Status XlaAllocator::RegisterArgument(const Tensor* t) { + void* data = + reinterpret_cast(const_cast(t->tensor_data().data())); + tensors_[data] = *t; + return Status::OK(); +} + +Status XlaAllocator::Deallocate(int device_ordinal, + gpu::DeviceMemoryBase* mem) { + if (mem->opaque() != nullptr) { + if (tensors_.erase(mem->opaque()) == 0) { + return tensorflow::errors::InvalidArgument("Unknown tensor address"); + } + } + return Status::OK(); +} + +Status XlaAllocator::MakeTensorFromBuffer(gpu::DeviceMemoryBase buffer, + DataType dtype, + const TensorShape& shape, + Tensor* out_tensor) const { + void* ptr = const_cast(buffer.opaque()); + auto it = tensors_.find(ptr); + if (it == tensors_.end()) { + return errors::InvalidArgument("Unknown tensor address"); + } + const Tensor& tensor = it->second; + + int64 output_size = DataTypeSize(dtype) * shape.num_elements(); + if (tensor.TotalBytes() == output_size) { + out_tensor->UnsafeCopyFromInternal(tensor, dtype, shape); + } else { + Tensor slice = tensor.Slice(0, output_size); + out_tensor->UnsafeCopyFromInternal(slice, dtype, shape); + } + return Status::OK(); +} + +XlaComputationLaunchContext::XlaComputationLaunchContext( + int64 num_resource_args, xla::LocalClient* client, + XlaAllocator* xla_allocator) + : num_resource_args_(num_resource_args), + client_(client), + xla_allocator_(xla_allocator) {} + +void XlaComputationLaunchContext::PopulateInputs( + OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, + const std::map& variables) { + // Build xla::ShapedBuffers that point directly to the Tensor buffers. + arg_buffers_.reserve(kernel->xla_input_shapes.size() + 1); + arg_buffers_.resize(kernel->xla_input_shapes.size()); + arg_ptrs_ = std::vector(arg_buffers_.size()); + + // Pass remaining parameters. + const Tensor* t; + for (int i = 0; i < kernel->xla_input_shapes.size(); ++i) { + int arg_num = kernel->input_mapping[i]; + const xla::Shape& shape = kernel->xla_input_shapes[i]; + if (variables.count(arg_num)) { + t = &(variables.at(arg_num).value); + CHECK(t); + } else { + t = &(ctx->input(arg_num)); + } + + gpu::DeviceMemoryBase dmem = gpu::DeviceMemoryBase( + const_cast(t->tensor_data().data()), t->tensor_data().size()); + + const xla::Shape on_device_shape = + client_->backend().transfer_manager()->HostShapeToDeviceShape(shape); + CHECK(xla::ShapeUtil::Equal(shape, on_device_shape)) + << "On-device shape " + << xla::ShapeUtil::HumanStringWithLayout(on_device_shape) + << " not the same as on-host shape " + << xla::ShapeUtil::HumanStringWithLayout(shape); + arg_buffers_[i] = xla::MakeUnique( + /*on_host_shape=*/shape, /*on_device_shape=*/shape, client_->platform(), + client_->default_device_ordinal()); + arg_buffers_[i]->set_buffer(dmem, /*index=*/{}); + arg_ptrs_[i] = arg_buffers_[i].get(); + + OP_REQUIRES_OK(ctx, xla_allocator_->RegisterArgument(t)); + } +} + +void XlaComputationLaunchContext::PopulateOutputs( + OpKernelContext* ctx, const XlaCompiler::CompilationResult* kernel, + std::unique_ptr output) { + gpu::Stream* stream = + ctx->op_device_context() ? ctx->op_device_context()->stream() : nullptr; + + // Computation output should always be a tuple. + if (VLOG_IS_ON(2)) { + VLOG(2) << "Result tuple shape: " << output->on_host_shape().DebugString(); + } + CHECK_EQ(ctx->num_outputs(), kernel->outputs.size()); + + // Copy XLA results to the OpOutputList. + int output_num = 0; + for (int i = 0; i < ctx->num_outputs(); ++i) { + if (kernel->outputs[i].is_constant) { + // Output is a constant. + const Tensor& const_tensor = kernel->outputs[i].constant_value; + const size_t total_bytes = const_tensor.TotalBytes(); + if (stream && total_bytes > 0) { + // Copy host -> device. (Empty tensors don't have backing buffers.) + VLOG(1) << "Constant output tensor on device"; + Tensor* output_tensor; + TF_CHECK_OK( + ctx->allocate_output(i, const_tensor.shape(), &output_tensor)); + + const void* src_ptr = DMAHelper::base(&const_tensor); + void* dst_ptr = DMAHelper::base(output_tensor); + gpu::DeviceMemoryBase gpu_dst_ptr(dst_ptr, total_bytes); + stream->ThenMemcpy(&gpu_dst_ptr, src_ptr, total_bytes); + } else { + // No copy required. + ctx->set_output(i, const_tensor); + } + } else { + const TensorShape& shape = kernel->outputs[i].shape; + VLOG(2) << "Retval " << i << " shape " << shape.DebugString(); + + gpu::DeviceMemoryBase buffer = output->buffer({output_num}); + Tensor output_tensor; + // Looks up the owning Tensor by buffer address. + OP_REQUIRES_OK(ctx, xla_allocator_->MakeTensorFromBuffer( + buffer, ctx->expected_output_dtype(i), shape, + &output_tensor)); + ctx->set_output(i, output_tensor); + ++output_num; + } + + if (VLOG_IS_ON(3)) { + VLOG(3) << ctx->mutable_output(i)->DebugString(); + } + } + + // Apply variable updates, if any. + VLOG(2) << "Applying variable updates"; + for (int i = 0; i < kernel->resource_updates.size(); ++i) { + const XlaCompiler::ResourceUpdate& write = kernel->resource_updates[i]; + OP_REQUIRES(ctx, + write.input_index >= 0 && write.input_index < ctx->num_inputs(), + errors::Internal("Invalid input index for variable write.")); + + gpu::DeviceMemoryBase buffer = output->buffer({output_num}); + + Var* variable = nullptr; + // TODO(b/35625933): tensorflow::Var should contain a PersistentTensor, + // not a Tensor. + OP_REQUIRES_OK(ctx, LookupOrCreateResource( + ctx, HandleFromInput(ctx, write.input_index), + &variable, [this, ctx, &write](Var** ptr) { + *ptr = new Var(write.type); + return Status::OK(); + })); + + core::ScopedUnref s(variable); + + mutex_lock ml(*variable->mu()); + OP_REQUIRES(ctx, variable->tensor()->dtype() == write.type, + errors::Internal("Mismatched type in variable write")); + + // Looks up the owning Tensor by buffer address. + OP_REQUIRES_OK(ctx, + xla_allocator_->MakeTensorFromBuffer( + buffer, write.type, write.shape, variable->tensor())); + ++output_num; + } +} + +} // namespace tensorflow diff --git a/tensorflow/compiler/jit/xla_launch_util.h b/tensorflow/compiler/jit/xla_launch_util.h new file mode 100644 index 0000000000..9fd356fce5 --- /dev/null +++ b/tensorflow/compiler/jit/xla_launch_util.h @@ -0,0 +1,116 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Contains utilities for launching compiled XLA kernels for a KernelContext. + +#ifndef TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_ +#define TENSORFLOW_COMPILER_JIT_XLA_LAUNCH_UTIL_H_ + +#include "tensorflow/compiler/jit/xla_compilation_cache.h" +#include "tensorflow/compiler/tf2xla/xla_compiler.h" +#include "tensorflow/compiler/xla/client/local_client.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/types.h" +#include "tensorflow/core/kernels/variable_ops.h" +#include "tensorflow/core/lib/core/status.h" + +namespace tensorflow { +class XlaAllocator; + +// Takes a snapshot of the values of resource variable arguments, which are +// the last `num_variables` arguments. We snapshot tensors that back +// resource variables since concurrent updates may modify the shape, and it is +// important that the shapes used for compilation match the true shapes of the +// buffers. +// +// Returns a map of TensorFlow argument index to resource variable. +std::map SnapshotResourceVariables(OpKernelContext* ctx, + int num_variables); + +// Adapter class that wraps a Tensorflow allocator as an XLA allocator. +// Assumes that the Tensorflow allocator permits asynchronous deallocation: +// see comment on `AllowsAsynchronousDeallocation()`. +class XlaAllocator : public xla::DeviceMemoryAllocator { + public: + XlaAllocator(const perftools::gputools::Platform* platform, + OpKernelContext* op_context); + ~XlaAllocator() override; + xla::StatusOr Allocate( + int device_ordinal, uint64 size, bool retry_on_failure) override; + Status Deallocate(int device_ordinal, + perftools::gputools::DeviceMemoryBase* mem) override; + + // Register an Tensor (input or resource variable) with the allocator. If + // the operation returns an alias to one of its inputs, then the allocator + // needs to be able to handle it. + Status RegisterArgument(const Tensor* t); + + // Makes 'tensor' a wrapper around the data buffer at 'ptr'. The buffer is + // interpreted as having data type 'dtype' and shape 'shape'. + Status MakeTensorFromBuffer(perftools::gputools::DeviceMemoryBase buffer, + DataType dtype, const TensorShape& shape, + Tensor* out_tensor) const; + + // The Tensorflow BFC allocator used on GPU allows host-side deallocation + // before GPU execution takes place. Tensorflow uses the ordering of the main + // compute stream to enforce a happens-before relationship between a memory + // allocation and code that reuses the same memory. If Tensorflow adds + // support for multiple GPU streams or allocators with different ordering + // requirements, this code may need to change. + // (This attribute has no effect on CPU.) + bool AllowsAsynchronousDeallocation() const override { return true; } + + private: + OpKernelContext* const op_context_; + + // Map from pointer address to the owning Tensor; used by + // MakeTensorFromBuffer. Also used to automatically release Tensors when the + // allocator is freed. + std::unordered_map tensors_; +}; + +// Helper class to perform the marshalling of TensorFlow inputs and outputs to +// ShapedBuffers suitable for passing to an XLA computation. +class XlaComputationLaunchContext { + public: + XlaComputationLaunchContext(int64 num_resource_args, xla::LocalClient* client, + XlaAllocator* xla_allocator); + + // Add all inputs within `ctx` as XLA arguments (returned by arguments()). + // `variables` is a map from TensorFlow argument number to resource variable. + void PopulateInputs(OpKernelContext* ctx, + const XlaCompiler::CompilationResult* kernel, + const std::map& variables); + + // Given the XLA output in `output`, populate all outputs of `ctx`. + void PopulateOutputs(OpKernelContext* ctx, + const XlaCompiler::CompilationResult* kernel, + std::unique_ptr output); + + // Return the argument list. Only valid after PopulateInputs() has been + // called. + const std::vector& arguments() const { return arg_ptrs_; } + + private: + int64 num_resource_args_; + xla::LocalClient* client_; + XlaAllocator* xla_allocator_; + std::vector> arg_buffers_; + std::vector arg_ptrs_; +}; + +} // namespace tensorflow + +#endif -- GitLab From 23384d7d8a60a36c68fbbdc509b22d385ea9a12c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 09:01:41 -0800 Subject: [PATCH 603/884] Fix feature fraction per tree. PiperOrigin-RevId: 188339438 --- .../boosted_trees/python/training/functions/gbdt_batch.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py index 233e21f1cf..85b909e4f2 100644 --- a/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py +++ b/tensorflow/contrib/boosted_trees/python/training/functions/gbdt_batch.py @@ -724,9 +724,9 @@ class GradientBoostedDecisionTreeModel(object): active_handlers_current_layer = ( active_handlers_current_layer < self._learner_config.feature_fraction_per_tree) - active_handlers = array_ops.stack(active_handlers_current_layer, - array_ops.ones( - [len(handlers)], dtype=dtypes.bool)) + active_handlers = array_ops.stack([ + active_handlers_current_layer, + array_ops.ones([len(handlers)], dtype=dtypes.bool)], axis=1) else: active_handlers = array_ops.ones([len(handlers), 2], dtype=dtypes.bool) -- GitLab From ada8d558c94b81a4414599501fb8b611f1dc1702 Mon Sep 17 00:00:00 2001 From: Yun Peng Date: Thu, 8 Mar 2018 11:16:38 +0100 Subject: [PATCH 604/884] Exclude kafka on Windows --- tensorflow/contrib/BUILD | 2 +- tensorflow/contrib/kafka/BUILD | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/BUILD b/tensorflow/contrib/BUILD index 17ab200b28..c2663c5e83 100644 --- a/tensorflow/contrib/BUILD +++ b/tensorflow/contrib/BUILD @@ -51,7 +51,6 @@ py_library( "//tensorflow/contrib/image:single_image_random_dot_stereograms_py", "//tensorflow/contrib/input_pipeline:input_pipeline_py", "//tensorflow/contrib/integrate:integrate_py", - "//tensorflow/contrib/kafka", "//tensorflow/contrib/keras", "//tensorflow/contrib/kernel_methods", "//tensorflow/contrib/kfac", @@ -112,6 +111,7 @@ py_library( ]) + if_not_windows([ "//tensorflow/contrib/ffmpeg:ffmpeg_ops_py", # unix dependency, need to fix code "//tensorflow/contrib/lite/python:lite", # unix dependency, need to fix code + "//tensorflow/contrib/kafka", # has some linking issue on opensssl. ]), ) diff --git a/tensorflow/contrib/kafka/BUILD b/tensorflow/contrib/kafka/BUILD index 14a62fb075..1c3974871c 100644 --- a/tensorflow/contrib/kafka/BUILD +++ b/tensorflow/contrib/kafka/BUILD @@ -115,6 +115,7 @@ tf_py_test( ], tags = [ "manual", + "no_windows", "notap", ], ) -- GitLab From 55cbd319ac0e4bf463c470d0effceac11ec4dfbc Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 8 Mar 2018 09:02:51 -0800 Subject: [PATCH 605/884] Optimize the destruction of CancellationManager and LocalRendezvousImpl. In the common case of clean termination, we can avoid performing several atomic operations and allocations. PiperOrigin-RevId: 188339594 --- tensorflow/core/framework/cancellation.cc | 6 +++++- tensorflow/core/framework/rendezvous.cc | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/framework/cancellation.cc b/tensorflow/core/framework/cancellation.cc index 9da4828bba..1258e40c93 100644 --- a/tensorflow/core/framework/cancellation.cc +++ b/tensorflow/core/framework/cancellation.cc @@ -89,6 +89,10 @@ bool CancellationManager::DeregisterCallback(CancellationToken token) { } } -CancellationManager::~CancellationManager() { StartCancel(); } +CancellationManager::~CancellationManager() { + if (!callbacks_.empty()) { + StartCancel(); + } +} } // end namespace tensorflow diff --git a/tensorflow/core/framework/rendezvous.cc b/tensorflow/core/framework/rendezvous.cc index 90756a4f2f..e84143f1b9 100644 --- a/tensorflow/core/framework/rendezvous.cc +++ b/tensorflow/core/framework/rendezvous.cc @@ -296,7 +296,9 @@ class LocalRendezvousImpl : public Rendezvous { Status status_ GUARDED_BY(mu_); ~LocalRendezvousImpl() override { - StartAbort(errors::Cancelled("LocalRendezvousImpl deleted")); + if (!table_.empty()) { + StartAbort(errors::Cancelled("LocalRendezvousImpl deleted")); + } } TF_DISALLOW_COPY_AND_ASSIGN(LocalRendezvousImpl); -- GitLab From 631a496f756a1a92c63dc8758d0471e38b930fc4 Mon Sep 17 00:00:00 2001 From: Sergio Guadarrama Date: Thu, 8 Mar 2018 09:32:53 -0800 Subject: [PATCH 606/884] Automated g4 rollback of changelist 188265500 PiperOrigin-RevId: 188343238 --- tensorflow/python/framework/tensor_shape.py | 81 ++----------------- .../python/framework/tensor_shape_test.py | 13 --- 2 files changed, 7 insertions(+), 87 deletions(-) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index d2dad313f8..6f2ab8408e 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -156,7 +156,7 @@ class Dimension(object): ``` Args: - other: Another Dimension, or a value accepted by `as_dimension`. + other: Another Dimension. Returns: A Dimension whose value is the sum of `self` and `other`. @@ -167,17 +167,6 @@ class Dimension(object): else: return Dimension(self._value + other.value) - def __radd__(self, other): - """Returns the sum of `other` and `self`. - - Args: - other: Another Dimension, or a value accepted by `as_dimension`. - - Returns: - A Dimension whose value is the sum of `self` and `other`. - """ - return self + other - def __sub__(self, other): """Returns the subtraction of `other` from `self`. @@ -191,10 +180,10 @@ class Dimension(object): ``` Args: - other: Another Dimension, or a value accepted by `as_dimension`. + other: Another Dimension. Returns: - A Dimension whose value is the subtraction of `other` from `self`. + A Dimension whose value is the subtraction of sum of `other` from `self`. """ other = as_dimension(other) if self._value is None or other.value is None: @@ -202,21 +191,6 @@ class Dimension(object): else: return Dimension(self._value - other.value) - def __rsub__(self, other): - """Returns the subtraction of `self` from `other`. - - Args: - other: Another Dimension, or a value accepted by `as_dimension`. - - Returns: - A Dimension whose value is the subtraction of `self` from `other`. - """ - other = as_dimension(other) - if self._value is None or other.value is None: - return Dimension(None) - else: - return Dimension(other.value - self._value) - def __mul__(self, other): """Returns the product of `self` and `other`. @@ -230,7 +204,7 @@ class Dimension(object): ``` Args: - other: Another Dimension, or a value accepted by `as_dimension`. + other: Another Dimension. Returns: A Dimension whose value is the product of `self` and `other`. @@ -241,17 +215,6 @@ class Dimension(object): else: return Dimension(self._value * other.value) - def __rmul__(self, other): - """Returns the product of `self` and `other`. - - Args: - other: Another Dimension, or a value accepted by `as_dimension`. - - Returns: - A Dimension whose value is the product of `self` and `other`. - """ - return self * other - def __floordiv__(self, other): """Returns the quotient of `self` and `other` rounded down. @@ -265,7 +228,7 @@ class Dimension(object): ``` Args: - other: Another Dimension, or a value accepted by `as_dimension`. + other: Another `Dimension`. Returns: A `Dimension` whose value is the integer quotient of `self` and `other`. @@ -276,21 +239,6 @@ class Dimension(object): else: return Dimension(self._value // other.value) - def __rfloordiv__(self, other): - """Returns the quotient of `other` and `self` rounded down. - - Args: - other: Another Dimension, or a value accepted by `as_dimension`. - - Returns: - A `Dimension` whose value is the integer quotient of `self` and `other`. - """ - other = as_dimension(other) - if self._value is None or other.value is None: - return Dimension(None) - else: - return Dimension(other.value // self._value) - def __div__(self, other): """DEPRECATED: Use `__floordiv__` via `x // y` instead. @@ -308,7 +256,7 @@ class Dimension(object): return self // other def __mod__(self, other): - """Returns `self` modulo `other`. + """Returns `self` modulo `other. Dimension moduli are computed as follows: @@ -320,7 +268,7 @@ class Dimension(object): ``` Args: - other: Another Dimension, or a value accepted by `as_dimension`. + other: Another Dimension. Returns: A Dimension whose value is `self` modulo `other`. @@ -331,21 +279,6 @@ class Dimension(object): else: return Dimension(self._value % other.value) - def __rmod__(self, other): - """Returns `other` modulo `self`. - - Args: - other: Another Dimension, or a value accepted by `as_dimension`. - - Returns: - A Dimension whose value is `other` modulo `self`. - """ - other = as_dimension(other) - if self._value is None or other.value is None: - return Dimension(None) - else: - return Dimension(other.value % self._value) - def __lt__(self, other): """Returns True if `self` is known to be less than `other`. diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py index 4cf0e9fcd6..fffd86c7a6 100644 --- a/tensorflow/python/framework/tensor_shape_test.py +++ b/tensorflow/python/framework/tensor_shape_test.py @@ -34,17 +34,12 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(tensor_shape.Dimension(15), dim + tensor_shape.Dimension(3)) self.assertEqual(tensor_shape.Dimension(15), dim + 3) - self.assertEqual(tensor_shape.Dimension(15), 3 + dim) - self.assertEqual(tensor_shape.Dimension(9), dim - 3) - self.assertEqual(tensor_shape.Dimension(1), 13 - dim) self.assertEqual(tensor_shape.Dimension(24), dim * tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(24), dim * 2) - self.assertEqual(tensor_shape.Dimension(24), 2 * dim) self.assertEqual( tensor_shape.Dimension(6), dim // tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(6), dim // 2) - self.assertEqual(tensor_shape.Dimension(0), 2 // dim) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(tensor_shape.Dimension(12))) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(12)) @@ -181,14 +176,6 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(str(tensor_shape.Dimension(7)), "7") self.assertEqual(str(tensor_shape.Dimension(None)), "?") - def testMod(self): - four = tensor_shape.Dimension(4) - nine = tensor_shape.Dimension(9) - self.assertEqual(nine % four, 1) - # test both __mod__ and __rmod__. - self.assertEqual(nine % 4, 1) - self.assertEqual(4 % nine, 4) - class ShapeTest(test_util.TensorFlowTestCase): -- GitLab From b315950540e80d4c67121ecabe4ed69c5f17fef8 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 10:32:41 -0800 Subject: [PATCH 607/884] Also reverting ccedcb --- tensorflow/core/distributed_runtime/tensor_coding.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/distributed_runtime/tensor_coding.cc b/tensorflow/core/distributed_runtime/tensor_coding.cc index 34a4013547..fe2d1a1293 100644 --- a/tensorflow/core/distributed_runtime/tensor_coding.cc +++ b/tensorflow/core/distributed_runtime/tensor_coding.cc @@ -81,7 +81,7 @@ void TensorResponse::InitPartial(const RecvTensorResponse& response) { Status TensorResponse::ParseFrom(Source* source) { if (!on_host_) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited // Pre-parse into local storage, then delegate to device. if (!meta_.ParseFromCodedStream(&input) || !input.ConsumedEntireMessage()) { @@ -217,7 +217,7 @@ bool TensorResponse::ParseTensorSubmessage( bool TensorResponse::ParseFast(Source* source) { protobuf::io::CodedInputStream input(source->contents()); - input.SetTotalBytesLimit(INT_MAX); // Unlimited + input.SetTotalBytesLimit(INT_MAX, INT_MAX); // Unlimited while (true) { auto p = input.ReadTagWithCutoff(127); int tag = GetTagFieldNumber(p.first); -- GitLab From ca59422fe1f463ef89255b73b77cc08821b74b44 Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Thu, 8 Mar 2018 10:38:34 -0800 Subject: [PATCH 608/884] Disable flaky checkpoint_utils_test everywhere PiperOrigin-RevId: 188353354 --- tensorflow/python/BUILD | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 73b17e7e3c..c4f03906fb 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3951,8 +3951,10 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ - "no_cuda_on_cpu_tap", + "manual", + "no_oss", "no_windows", + "notap", ], deps = [ ":client", -- GitLab From 7a9419317f866349890a9f6633050c38e848aee4 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 11:10:24 -0800 Subject: [PATCH 609/884] Update tensorrt import exception. --- tensorflow/contrib/tensorrt/__init__.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index d53a05827a..a07b297900 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -18,16 +18,17 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +from tensorflow.python.framework import errors + # pylint: disable=unused-import,wildcard-import,g-import-not-at-top try: from tensorflow.contrib.tensorrt.python import * -except Exception as e: +except errors.NotFoundError as e: no_trt_message = ( '**** Failed to initialize TensorRT. This is either because the TensorRT' ' installation path is not in LD_LIBRARY_PATH, or because you do not have' ' it installed. If not installed, please go to' ' https://developer.nvidia.com/tensorrt to download and install' ' TensorRT ****') - print(no_trt_message) - raise e + raise e(no_trt_message) # pylint: enable=unused-import,wildcard-import,g-import-not-at-top -- GitLab From 7912a4ac3d39df4ac589801bc638dbea8bdb9e6b Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 11:14:50 -0800 Subject: [PATCH 610/884] Add "//tensorflow/python:errors" to deps --- tensorflow/contrib/tensorrt/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 87a33bb70a..906cc3f034 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -155,6 +155,7 @@ py_library( deps = [ ":trt_convert_py", ":trt_ops_py", + "//tensorflow/python:errors", ], ) -- GitLab From a6a0c0bf9486c11793b7dd0b4883a75ff3dcf3f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 11:12:41 -0800 Subject: [PATCH 611/884] Updating Tensorflow Docs to add reference around measures to ensure forward compatibility around TF op attributes. PiperOrigin-RevId: 188359164 --- .../docs_src/programmers_guide/saved_model.md | 44 +++++++++++++++++-- .../programmers_guide/version_compat.md | 19 ++++++++ 2 files changed, 59 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/saved_model.md b/tensorflow/docs_src/programmers_guide/saved_model.md index b5f63a8e3b..1744494f72 100644 --- a/tensorflow/docs_src/programmers_guide/saved_model.md +++ b/tensorflow/docs_src/programmers_guide/saved_model.md @@ -256,18 +256,53 @@ with tf.Session(graph=tf.Graph()) as sess: builder.add_meta_graph_and_variables(sess, [tag_constants.TRAINING], signature_def_map=foo_signatures, - assets_collection=foo_assets) + assets_collection=foo_assets, + strip_default_attrs=True) ... # Add a second MetaGraphDef for inference. with tf.Session(graph=tf.Graph()) as sess: ... - builder.add_meta_graph([tag_constants.SERVING]) + builder.add_meta_graph([tag_constants.SERVING], strip_default_attrs=True) ... builder.save() ``` +
+#### Forward compatibility via `strip_default_attrs=True` -### Load a SavedModel in Python +Following the guidance below gives you forward compatibility only if the set of +Ops has not changed. + +The @{tf.saved_model.builder.SavedModelBuilder$`SavedModelBuilder`} class allows +users to control whether default-valued attributes must be stripped from the +@{$extend/tool_developers#nodes$`NodeDefs`} +while adding a meta graph to the SavedModel bundle. Both +@{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`SavedModelBuilder.add_meta_graph_and_variables`} +and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`SavedModelBuilder.add_meta_graph`} +methods accept a Boolean flag `strip_default_attrs` that controls this behavior. + +If `strip_default_attrs` is `False`, the exported @{tf.MetaGraphDef} will have +the default valued attributes in all its @{tf.NodeDef} instances. +This can break forward compatibility with a sequence of events such as the +following: + +* An existing Op (`Foo`) is updated to include a new attribute (`T`) with a + default (`bool`) at version 101. +* A model producer such as a "trainer binary" picks up this change (version 101) + to the `OpDef` and re-exports an existing model that uses Op `Foo`. +* A model consumer (such as [Tensorflow Serving](/serving)) running an older + binary (version 100) doesn't have attribute `T` for Op `Foo`, but tries to + import this model. The model consumer doesn't recognize attribute `T` in a + `NodeDef` that uses Op `Foo` and therefore fails to load the model. +* By setting `strip_default_attrs` to True, the model producers can strip away + any default valued attributes in the `NodeDefs`. This helps ensure that newly + added attributes with defaults don't cause older model consumers to fail + loading models regenerated with newer training binaries. + +See [compatibility guidance](https://www.tensorflow.org/programmers_guide/version_compat) +for more information. + +### Loading a SavedModel in Python The Python version of the SavedModel @{tf.saved_model.loader$loader} @@ -458,7 +493,8 @@ To export your trained Estimator, call the `serving_input_receiver_fn`. ```py -estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn) +estimator.export_savedmodel(export_dir_base, serving_input_receiver_fn, + strip_default_attrs=True) ``` This method builds a new graph by first calling the diff --git a/tensorflow/docs_src/programmers_guide/version_compat.md b/tensorflow/docs_src/programmers_guide/version_compat.md index e6613cc69f..5412fba5d0 100644 --- a/tensorflow/docs_src/programmers_guide/version_compat.md +++ b/tensorflow/docs_src/programmers_guide/version_compat.md @@ -245,6 +245,25 @@ contains a main data version which is treated as either `producer` or `TF_CHECKPOINT_VERSION_MIN_CONSUMER`, and `TF_CHECKPOINT_VERSION_MIN_PRODUCER`. +### Add a new attribute with default to an existing Op + +Following the guidance below gives you forward compatibility only if the set of +Ops has not changed. + +1. If forward compatibility is desired, set `strip_default_attrs` to `True` + while exporting the model using either the + @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph_and_variables$`add_meta_graph_and_variables`} + and @{tf.saved_model.builder.SavedModelBuilder.add_meta_graph$`add_meta_graph`} + methods of the `SavedModelBuilder` class, or + @{tf.estimator.Estimator.export_savedmodel$`Estimator.export_savedmodel`} +2. This strips off the default valued attributes at the time of + producing/exporting the models; thereby making sure that the exported + @{tf.MetaGraphDef} does not contain the new Op-attribute when the default + value is used. +3. Having this control lets potentially old consumers aka serving binaries + (lagging behind training binaries) continue loading the models + thereby preventing interruptions in model serving. + ### Evolving GraphDef versions This section explains how to use this versioning mechanism to make different -- GitLab From b1033e52142a0071b6a81969e1e387ea940f6cd6 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 11:26:21 -0800 Subject: [PATCH 612/884] Update __init__.py --- tensorflow/contrib/tensorrt/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/tensorrt/__init__.py b/tensorflow/contrib/tensorrt/__init__.py index a07b297900..140ad48282 100644 --- a/tensorflow/contrib/tensorrt/__init__.py +++ b/tensorflow/contrib/tensorrt/__init__.py @@ -30,5 +30,6 @@ except errors.NotFoundError as e: ' it installed. If not installed, please go to' ' https://developer.nvidia.com/tensorrt to download and install' ' TensorRT ****') - raise e(no_trt_message) + print(no_trt_message) + raise e # pylint: enable=unused-import,wildcard-import,g-import-not-at-top -- GitLab From 05c31035abedb2983899c49d172ac0382b6eceb7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 11:38:46 -0800 Subject: [PATCH 613/884] [SE] Initial perftools::gputools::Platform initialization support Adds initialization methods to Platform. Some platforms require initialization. Those that do not have trivial implementations of these methods. PiperOrigin-RevId: 188363315 --- .../stream_executor/multi_platform_manager.cc | 86 +++++++++++++++---- .../stream_executor/multi_platform_manager.h | 63 ++++++++++---- tensorflow/stream_executor/platform.cc | 11 +++ tensorflow/stream_executor/platform.h | 18 +++- 4 files changed, 142 insertions(+), 36 deletions(-) diff --git a/tensorflow/stream_executor/multi_platform_manager.cc b/tensorflow/stream_executor/multi_platform_manager.cc index f23224ae77..f9f3737a06 100644 --- a/tensorflow/stream_executor/multi_platform_manager.cc +++ b/tensorflow/stream_executor/multi_platform_manager.cc @@ -23,11 +23,37 @@ limitations under the License. namespace perftools { namespace gputools { +/* static */ mutex MultiPlatformManager::platforms_mutex_{LINKER_INITIALIZED}; + +/* static */ port::StatusOr MultiPlatformManager::LookupByNameLocked( + const string& target) { + PlatformMap* platform_map = GetPlatformMap(); + auto it = platform_map->find(port::Lowercase(target)); + if (it == platform_map->end()) { + return port::Status( + port::error::NOT_FOUND, + "could not find registered platform with name: \"" + target + "\""); + } + return it->second; +} + +/* static */ port::StatusOr MultiPlatformManager::LookupByIdLocked( + const Platform::Id& id) { + PlatformIdMap* platform_map = GetPlatformByIdMap(); + auto it = platform_map->find(id); + if (it == platform_map->end()) { + return port::Status( + port::error::NOT_FOUND, + port::Printf("could not find registered platform with id: 0x%p", id)); + } + return it->second; +} + /* static */ port::Status MultiPlatformManager::RegisterPlatform( std::unique_ptr platform) { CHECK(platform != nullptr); string key = port::Lowercase(platform->Name()); - mutex_lock lock(GetPlatformsMutex()); + mutex_lock lock(platforms_mutex_); if (GetPlatformMap()->find(key) != GetPlatformMap()->end()) { return port::Status(port::error::INTERNAL, "platform is already registered with name: \"" + @@ -45,33 +71,63 @@ namespace gputools { /* static */ port::StatusOr MultiPlatformManager::PlatformWithName( const string& target) { - tf_shared_lock lock(GetPlatformsMutex()); - auto it = GetPlatformMap()->find(port::Lowercase(target)); + mutex_lock lock(platforms_mutex_); - if (it == GetPlatformMap()->end()) { - return port::Status( - port::error::NOT_FOUND, - "could not find registered platform with name: \"" + target + "\""); + SE_ASSIGN_OR_RETURN(Platform * platform, LookupByNameLocked(target)); + if (!platform->Initialized()) { + SE_RETURN_IF_ERROR(platform->Initialize({})); } - return it->second; + return platform; } /* static */ port::StatusOr MultiPlatformManager::PlatformWithId( const Platform::Id& id) { - tf_shared_lock lock(GetPlatformsMutex()); - auto it = GetPlatformByIdMap()->find(id); - if (it == GetPlatformByIdMap()->end()) { + mutex_lock lock(platforms_mutex_); + + SE_ASSIGN_OR_RETURN(Platform * platform, LookupByIdLocked(id)); + if (!platform->Initialized()) { + SE_RETURN_IF_ERROR(platform->Initialize({})); + } + + return platform; +} + +/* static */ port::StatusOr +MultiPlatformManager::InitializePlatformWithName( + const string& target, const std::map& options) { + mutex_lock lock(platforms_mutex_); + + SE_ASSIGN_OR_RETURN(Platform * platform, LookupByNameLocked(target)); + if (platform->Initialized()) { + return port::Status(port::error::FAILED_PRECONDITION, + "platform \"" + target + "\" is already initialized"); + } + + SE_RETURN_IF_ERROR(platform->Initialize(options)); + + return platform; +} + +/* static */ port::StatusOr +MultiPlatformManager::InitializePlatformWithId( + const Platform::Id& id, const std::map& options) { + mutex_lock lock(platforms_mutex_); + + SE_ASSIGN_OR_RETURN(Platform * platform, LookupByIdLocked(id)); + if (platform->Initialized()) { return port::Status( - port::error::NOT_FOUND, - port::Printf("could not find registered platform with id: 0x%p", id)); + port::error::FAILED_PRECONDITION, + port::Printf("platform with id 0x%p is already initialized", id)); } - return it->second; + SE_RETURN_IF_ERROR(platform->Initialize(options)); + + return platform; } /* static */ void MultiPlatformManager::ClearPlatformRegistry() { - mutex_lock lock(GetPlatformsMutex()); + mutex_lock lock(platforms_mutex_); GetPlatformMap()->clear(); GetPlatformByIdMap()->clear(); } diff --git a/tensorflow/stream_executor/multi_platform_manager.h b/tensorflow/stream_executor/multi_platform_manager.h index ea6155b482..438653ee20 100644 --- a/tensorflow/stream_executor/multi_platform_manager.h +++ b/tensorflow/stream_executor/multi_platform_manager.h @@ -67,13 +67,13 @@ limitations under the License. #include #include #include -#include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/status.h" #include "tensorflow/stream_executor/lib/statusor.h" #include "tensorflow/stream_executor/platform.h" #include "tensorflow/stream_executor/platform/mutex.h" #include "tensorflow/stream_executor/platform/port.h" +#include "tensorflow/stream_executor/platform/thread_annotations.h" namespace perftools { namespace gputools { @@ -85,26 +85,43 @@ class MultiPlatformManager { // already registered. The associated listener, if not null, will be used to // trace events for ALL executors for that platform. // Takes ownership of listener. - static port::Status RegisterPlatform(std::unique_ptr platform); + static port::Status RegisterPlatform(std::unique_ptr platform) + LOCKS_EXCLUDED(platforms_mutex_); - // Retrieves the platform registered with the given platform name; e.g. - // "CUDA", "OpenCL", ... + // Retrieves the platform registered with the given platform name (e.g. + // "CUDA", "OpenCL", ...) or id (an opaque, comparable value provided by the + // Platform's Id() method). + // + // If the platform has not already been initialized, it will be initialized + // with a default set of parameters. // // If the requested platform is not registered, an error status is returned. // Ownership of the platform is NOT transferred to the caller -- // the MultiPlatformManager owns the platforms in a singleton-like fashion. - static port::StatusOr PlatformWithName(const string& target); - - // Retrieves the platform registered with the given platform ID, which - // is an opaque (but comparable) value. + static port::StatusOr PlatformWithName(const string& target) + LOCKS_EXCLUDED(platforms_mutex_); + static port::StatusOr PlatformWithId(const Platform::Id& id) + LOCKS_EXCLUDED(platforms_mutex_); + + // Retrieves the platform registered with the given platform name (e.g. + // "CUDA", "OpenCL", ...) or id (an opaque, comparable value provided by the + // Platform's Id() method). + // + // The platform will be initialized with the given options. If the platform + // was already initialized, an error will be returned. // // If the requested platform is not registered, an error status is returned. // Ownership of the platform is NOT transferred to the caller -- // the MultiPlatformManager owns the platforms in a singleton-like fashion. - static port::StatusOr PlatformWithId(const Platform::Id& id); + static port::StatusOr InitializePlatformWithName( + const string& target, const std::map& options) + LOCKS_EXCLUDED(platforms_mutex_); + static port::StatusOr InitializePlatformWithId( + const Platform::Id& id, const std::map& options) + LOCKS_EXCLUDED(platforms_mutex_); // Clears the set of registered platforms, primarily used for testing. - static void ClearPlatformRegistry(); + static void ClearPlatformRegistry() LOCKS_EXCLUDED(platforms_mutex_); // Although the MultiPlatformManager "owns" its platforms, it holds them as // undecorated pointers to prevent races during program exit (between this @@ -122,17 +139,16 @@ class MultiPlatformManager { // Provides access to the available set of platforms under a lock. static port::Status WithPlatforms( - std::function callback) { - mutex_lock lock(GetPlatformsMutex()); + std::function callback) + LOCKS_EXCLUDED(platforms_mutex_) { + mutex_lock lock(platforms_mutex_); return callback(GetPlatformMap()); } private: - // mutex that guards the platform map. - static mutex& GetPlatformsMutex() { - static mutex* platforms_mutex = new mutex; - return *platforms_mutex; - } + using PlatformIdMap = std::map; + + static mutex platforms_mutex_; // TODO(b/22689637): Clean up these two maps; make sure they coexist nicely. // TODO(b/22689637): Move this (whatever the final/"official" map is) to @@ -147,12 +163,21 @@ class MultiPlatformManager { // Holds a Platform::Id-to-object mapping. // Unlike platforms_ above, this map does not own its contents. - static std::map* GetPlatformByIdMap() { - using PlatformIdMap = std::map; + static PlatformIdMap* GetPlatformByIdMap() { static PlatformIdMap* instance = new PlatformIdMap; return instance; } + // Looks up the platform object with the given name. Assumes the Platforms + // mutex is held. + static port::StatusOr LookupByNameLocked(const string& target) + EXCLUSIVE_LOCKS_REQUIRED(platforms_mutex_); + + // Looks up the platform object with the given id. Assumes the Platforms + // mutex is held. + static port::StatusOr LookupByIdLocked(const Platform::Id& id) + EXCLUSIVE_LOCKS_REQUIRED(platforms_mutex_); + SE_DISALLOW_COPY_AND_ASSIGN(MultiPlatformManager); }; diff --git a/tensorflow/stream_executor/platform.cc b/tensorflow/stream_executor/platform.cc index 93f08d06da..4cdc22bd16 100644 --- a/tensorflow/stream_executor/platform.cc +++ b/tensorflow/stream_executor/platform.cc @@ -85,6 +85,17 @@ StreamExecutorConfig::StreamExecutorConfig(int ordinal_in) Platform::~Platform() {} +bool Platform::Initialized() const { return true; } + +port::Status Platform::Initialize( + const std::map &platform_options) { + if (!platform_options.empty()) { + return port::Status(port::error::UNIMPLEMENTED, + "this platform does not support custom initialization"); + } + return port::Status::OK(); +} + port::Status Platform::ForceExecutorShutdown() { return port::Status(port::error::UNIMPLEMENTED, "executor shutdown is not supported on this platform"); diff --git a/tensorflow/stream_executor/platform.h b/tensorflow/stream_executor/platform.h index f0a0e60e02..54f8aa86c2 100644 --- a/tensorflow/stream_executor/platform.h +++ b/tensorflow/stream_executor/platform.h @@ -111,6 +111,9 @@ class Platform { // Returns a key uniquely identifying this platform. virtual Id id() const = 0; + // Name of this platform. + virtual const string& Name() const = 0; + // Returns the number of devices accessible on this platform. // // Note that, though these devices are visible, if there is only one userspace @@ -118,8 +121,17 @@ class Platform { // device, a call to ExecutorForDevice may return an error status. virtual int VisibleDeviceCount() const = 0; - // Name of this platform. - virtual const string& Name() const = 0; + // Returns true iff the platform has been initialized. + virtual bool Initialized() const; + + // Initializes the platform with a custom set of options. The platform must be + // initialized before obtaining StreamExecutor objects. The interpretation of + // the platform_options argument is implementation specific. This method may + // return an error if unrecognized options are provided. If using + // MultiPlatformManager, this method will be called automatically by + // InitializePlatformWithId/InitializePlatformWithName. + virtual port::Status Initialize( + const std::map& platform_options); // Returns a device with the given ordinal on this platform with a default // plugin configuration or, if none can be found with the given ordinal or @@ -156,6 +168,8 @@ class Platform { // This is only useful on platforms which bind a device to a single process // that has obtained the device context. May return UNIMPLEMENTED on platforms // that have no reason to destroy device contexts. + // + // The platform must be reinitialized after this is called. virtual port::Status ForceExecutorShutdown(); // Registers a TraceListener to listen to all StreamExecutors for this -- GitLab From 52ed0eed35d782fbf13fbfbfd6a1e755c56a5f80 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 11:40:19 -0800 Subject: [PATCH 614/884] This strengthens several checks of error codes returned by libcurl. In all of the cases that are changed by this CL, a failure indicates a software bug, not a runtime condition that should be handled and continued beyond. Continuing to execute only promotes silently-ignored bugs. I also removed the useless call which attempts to set the HTTP protocol to HTTP/2, because this call always fails. I opened b/74351157 to track the possible feature of adding support for HTTP/2. Also simplified the code around constructing the error string when returning actual Status objects, by moving code into a lambda. PiperOrigin-RevId: 188363531 --- .../core/platform/cloud/curl_http_request.cc | 240 ++++++------------ .../core/platform/cloud/curl_http_request.h | 9 - 2 files changed, 83 insertions(+), 166 deletions(-) diff --git a/tensorflow/core/platform/cloud/curl_http_request.cc b/tensorflow/core/platform/cloud/curl_http_request.cc index c0d6e49af9..1ac6a7531b 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.cc +++ b/tensorflow/core/platform/cloud/curl_http_request.cc @@ -25,6 +25,8 @@ limitations under the License. #include "tensorflow/core/platform/types.h" #include "tensorflow/core/public/version.h" +#define CHECK_CURL_OK(expr) CHECK_EQ(expr, CURLE_OK) + namespace tensorflow { namespace { @@ -130,37 +132,21 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env) // default in //third_party:curl.BUILD and can be customized via an // environment variable. - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput), - "Setting verbose output"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt( - curl_, CURLOPT_USERAGENT, - strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str()), - "Setting user agent"); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt( + curl_, CURLOPT_USERAGENT, + strings::StrCat("TensorFlow/", TF_VERSION_STRING).c_str())); // Do not use signals for timeouts - does not work in multi-threaded programs. - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L), - "Disabling signals"); - // We don't log an error here because HTTP/2 support may not be built into - // cURL, and we'd spam the logs. - // - // TODO(jhseu): Enable HTTP/2. - CURLcodeToStatus(libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTP_VERSION, - CURL_HTTP_VERSION_2_0)) - .IgnoreError(); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_NOSIGNAL, 1L)); + + // TODO(b/74351157): Enable HTTP/2. // Set up the progress meter. - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL), - "Disabling progress meter"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this), - "Setting custom pointer to the progress callback"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, - &CurlHttpRequest::ProgressCallback), - "Setting the progress callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION, + &CurlHttpRequest::ProgressCallback)); // If response buffer is not set, libcurl will print results to stdout, // so we always set it. @@ -193,17 +179,13 @@ void CurlHttpRequest::SetUri(const string& uri) { CheckNotSent(); is_uri_set_ = true; uri_ = uri; - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str()), - "Setting URL"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_URL, uri.c_str())); } void CurlHttpRequest::SetRange(uint64 start, uint64 end) { CheckNotSent(); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_RANGE, - strings::StrCat(start, "-", end).c_str()), - "Setting range"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt( + curl_, CURLOPT_RANGE, strings::StrCat(start, "-", end).c_str())); } void CurlHttpRequest::AddHeader(const string& name, const string& value) { @@ -239,9 +221,8 @@ void CurlHttpRequest::SetDeleteRequest() { CheckMethodNotSet(); is_method_set_ = true; method_ = RequestMethod::kDelete; - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE"), - "Setting delete request"); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_CUSTOMREQUEST, "DELETE")); } Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, @@ -264,12 +245,9 @@ Status CurlHttpRequest::SetPutFromFile(const string& body_filepath, curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting PUT request"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(put_body_)), - "Setting read data"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(put_body_))); // Using the default CURLOPT_READFUNCTION, which is doing an fread() on the // FILE * userdata set with CURLOPT_READDATA. return Status::OK(); @@ -280,18 +258,13 @@ void CurlHttpRequest::SetPutEmptyBody() { CheckMethodNotSet(); is_method_set_ = true; method_ = RequestMethod::kPut; - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1), "Setting put request"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_PUT, 1)); AddHeader("Content-Length", "0"); AddHeader("Transfer-Encoding", "identity"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)), - "Setting read data"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback), - "Setting read callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback)); } void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { @@ -301,17 +274,11 @@ void CurlHttpRequest::SetPostFromBuffer(const char* buffer, size_t size) { method_ = RequestMethod::kPost; curl_headers_ = libcurl_->curl_slist_append( curl_headers_, strings::StrCat("Content-Length: ", size).c_str()); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), - "Setting POST request"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)), - "Setting read data"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback), - "Setting read callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback)); post_body_buffer_ = StringPiece(buffer, size); } @@ -320,19 +287,13 @@ void CurlHttpRequest::SetPostEmptyBody() { CheckMethodNotSet(); is_method_set_ = true; method_ = RequestMethod::kPost; - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1), - "Setting POST request"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_POST, 1)); AddHeader("Content-Length", "0"); AddHeader("Transfer-Encoding", "identity"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, - reinterpret_cast(this)), - "Setting read data"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, - &CurlHttpRequest::ReadCallback), - "Setting read callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_READFUNCTION, + &CurlHttpRequest::ReadCallback)); } void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { @@ -342,14 +303,10 @@ void CurlHttpRequest::SetResultBuffer(std::vector* out_buffer) { out_buffer->clear(); response_buffer_ = out_buffer; - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)), - "Setting write data"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallback), - "Setting write callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, + &CurlHttpRequest::WriteCallback)); } void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { @@ -357,15 +314,10 @@ void CurlHttpRequest::SetResultBufferDirect(char* buffer, size_t size) { CheckNotSent(); direct_response_ = DirectResponseState{buffer, size, 0}; - - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, - reinterpret_cast(this)), - "Setting write data"); - TF_CURL_LOG_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEFUNCTION, - &CurlHttpRequest::WriteCallbackDirect), - "Setting write callback"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_WRITEDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt( + curl_, CURLOPT_WRITEFUNCTION, &CurlHttpRequest::WriteCallbackDirect)); } bool CurlHttpRequest::IsDirectResponse() const { @@ -462,24 +414,6 @@ size_t CurlHttpRequest::HeaderCallback(const void* ptr, size_t size, return size * nmemb; } -// This is pulled out as a separate function so that it's only computed when -// an error occurs. -string response_to_error_message(uint64 response_code, StringPiece response, - size_t response_to_error_limit, - CURLcode curl_result, - StringPiece error_buffer) { - string error_message = strings::StrCat( - "Error executing an HTTP request (HTTP response code ", response_code, - ", error code ", curl_result, ", error message '", error_buffer, "')"); - if (!response.empty()) { - return strings::StrCat( - error_message, ", response '", - response.substr(0, std::min(response.size(), response_to_error_limit)), - "'"); - } - return error_message; -} - Status CurlHttpRequest::Send() { CheckNotSent(); CHECK(is_uri_set_) << "URI has not been set."; @@ -487,36 +421,26 @@ Status CurlHttpRequest::Send() { is_sent_ = true; if (curl_headers_) { - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_), - "Setting HTTP header"); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_HTTPHEADER, curl_headers_)); } if (resolve_list_) { - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_), - "Setting custom resolves"); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_RESOLVE, resolve_list_)); } - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, - reinterpret_cast(this)), - "Setting header data"); - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, - &CurlHttpRequest::HeaderCallback), - "Setting header function"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERDATA, + reinterpret_cast(this))); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_HEADERFUNCTION, + &CurlHttpRequest::HeaderCallback)); - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, request_timeout_secs_), - "Setting request timeout"); - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, - connect_timeout_secs_), - "Setting connection timeout"); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_TIMEOUT, + request_timeout_secs_)); + CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_CONNECTTIMEOUT, + connect_timeout_secs_)); char error_buffer[CURL_ERROR_SIZE] = {0}; - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer), - "Setting error buffer"); + CHECK_CURL_OK( + libcurl_->curl_easy_setopt(curl_, CURLOPT_ERRORBUFFER, error_buffer)); if (stats_ != nullptr) { stats_->RecordRequest(this, uri_, method_); @@ -526,15 +450,27 @@ Status CurlHttpRequest::Send() { TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( curl_result, "Performing request. Detailed error: ", error_buffer); + auto get_error_message = [this, curl_result, &error_buffer]() -> string { + StringPiece response = GetResponse(); + string error_message = strings::StrCat( + "Error executing an HTTP request (HTTP response code ", response_code_, + ", error code ", curl_result, ", error message '", error_buffer, "')"); + if (!response.empty()) { + return strings::StrCat( + error_message, ", response '", + response.substr(0, + std::min(response.size(), response_to_error_limit_)), + "'"); + } + return error_message; + }; + double written_size = 0; - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, &written_size), - "Fetching written size"); + CHECK_CURL_OK(libcurl_->curl_easy_getinfo(curl_, CURLINFO_SIZE_DOWNLOAD, + &written_size)); - TF_CURL_RETURN_WITH_CONTEXT_IF_ERROR( - libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, - &response_code_), - "Fetching response code"); + CHECK_CURL_OK(libcurl_->curl_easy_getinfo(curl_, CURLINFO_RESPONSE_CODE, + &response_code_)); Status result; switch (response_code_) { @@ -558,25 +494,19 @@ Status CurlHttpRequest::Send() { // INVALID_ARGUMENT indicates a problem with how the request is constructed. case 400: // Bad Request case 411: // Length Required - result = errors::InvalidArgument(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, curl_result, - error_buffer)); + result = errors::InvalidArgument(get_error_message()); break; // PERMISSION_DENIED indicates an authentication or an authorization issue. case 401: // Unauthorized case 403: // Forbidden - result = errors::PermissionDenied(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, curl_result, - error_buffer)); + result = errors::PermissionDenied(get_error_message()); break; // NOT_FOUND indicates that the requested resource does not exist. case 404: // Not found case 410: // Gone - result = errors::NotFound(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, curl_result, - error_buffer)); + result = errors::NotFound(get_error_message()); break; // FAILED_PRECONDITION indicates that the request failed because some @@ -588,9 +518,7 @@ Status CurlHttpRequest::Send() { case 307: // Temporary Redirect case 412: // Precondition Failed case 413: // Payload Too Large - result = errors::FailedPrecondition(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, curl_result, - error_buffer)); + result = errors::FailedPrecondition(get_error_message()); break; // UNAVAILABLE indicates a problem that can go away if the request @@ -606,9 +534,7 @@ Status CurlHttpRequest::Send() { case 502: // Bad Gateway case 503: // Service Unavailable default: // All other HTTP response codes also should be retried. - result = errors::Unavailable(response_to_error_message( - response_code_, GetResponse(), response_to_error_limit_, curl_result, - error_buffer)); + result = errors::Unavailable(get_error_message()); break; } if (!result.ok()) { diff --git a/tensorflow/core/platform/cloud/curl_http_request.h b/tensorflow/core/platform/cloud/curl_http_request.h index 2a9be81f28..e658948ab9 100644 --- a/tensorflow/core/platform/cloud/curl_http_request.h +++ b/tensorflow/core/platform/cloud/curl_http_request.h @@ -276,15 +276,6 @@ Status CURLcodeToStatus(CURLcode code); } \ } while (0) -#define TF_CURL_LOG_WITH_CONTEXT_IF_ERROR(_code, ...) \ - do { \ - if (_code != CURLE_OK) { \ - ::tensorflow::Status _status = ::tensorflow::CURLcodeToStatus(_code); \ - ::tensorflow::errors::AppendToMessage(&_status, __VA_ARGS__); \ - LOG(ERROR) << "curl error: " << _status.error_message(); \ - } \ - } while (0) - } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_CLOUD_CURL_HTTP_REQUEST_H_ -- GitLab From 214ad0978641a946c25b334c4a33ecd1793b4d70 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 8 Mar 2018 11:49:19 -0800 Subject: [PATCH 615/884] Add some simple HLO creation utilities to auto-infer result shapes I need something like this for my Gather HLO->HLO lowering pass. PiperOrigin-RevId: 188365102 --- tensorflow/compiler/xla/service/BUILD | 14 +++- .../xla/service/algebraic_simplifier.cc | 84 ++++++++----------- .../xla/service/batchnorm_expander.cc | 1 - tensorflow/compiler/xla/service/gpu/BUILD | 1 + .../compiler/xla/service/gpu/pad_insertion.cc | 30 ++----- .../xla/service/hlo_creation_utils.cc | 67 +++++++++++++++ .../compiler/xla/service/hlo_creation_utils.h | 56 +++++++++++++ 7 files changed, 175 insertions(+), 78 deletions(-) create mode 100644 tensorflow/compiler/xla/service/hlo_creation_utils.cc create mode 100644 tensorflow/compiler/xla/service/hlo_creation_utils.h diff --git a/tensorflow/compiler/xla/service/BUILD b/tensorflow/compiler/xla/service/BUILD index a0f0635e52..438f3c829f 100644 --- a/tensorflow/compiler/xla/service/BUILD +++ b/tensorflow/compiler/xla/service/BUILD @@ -1166,6 +1166,17 @@ tf_cc_test( ], ) +cc_library( + name = "hlo_creation_utils", + srcs = ["hlo_creation_utils.cc"], + hdrs = ["hlo_creation_utils.h"], + deps = [ + ":hlo", + ":shape_inference", + "//tensorflow/compiler/xla:statusor", + ], +) + cc_library( name = "batchnorm_expander", srcs = ["batchnorm_expander.cc"], @@ -1174,7 +1185,6 @@ cc_library( ":hlo", ":hlo_pass", ":hlo_query", - ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", @@ -1213,9 +1223,9 @@ cc_library( hdrs = ["algebraic_simplifier.h"], deps = [ ":hlo", + ":hlo_creation_utils", ":hlo_pass", ":hlo_query", - ":shape_inference", "//tensorflow/compiler/xla:literal_util", "//tensorflow/compiler/xla:shape_util", "//tensorflow/compiler/xla:status_macros", diff --git a/tensorflow/compiler/xla/service/algebraic_simplifier.cc b/tensorflow/compiler/xla/service/algebraic_simplifier.cc index ecaa474336..be7aa307d2 100644 --- a/tensorflow/compiler/xla/service/algebraic_simplifier.cc +++ b/tensorflow/compiler/xla/service/algebraic_simplifier.cc @@ -26,10 +26,10 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h" #include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_creation_utils.h" #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_query.h" -#include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" @@ -383,13 +383,9 @@ Status AlgebraicSimplifierVisitor::HandleAdd(HloInstruction* add) { !lhs->operand(0)->IsConstant() && lhs->operand(1)->IsConstant()) { auto* c1 = lhs->mutable_operand(1); auto* c2 = rhs; - TF_ASSIGN_OR_RETURN( - Shape sum_of_constants_shape, - ShapeInference::InferBinaryOpShape(HloOpcode::kAdd, c1, c2)); - auto* sum_of_constants = - computation_->AddInstruction(HloInstruction::CreateBinary( - sum_of_constants_shape, HloOpcode::kAdd, c1, c2)); + TF_ASSIGN_OR_RETURN(auto* sum_of_constants, + CreateBinaryHlo(HloOpcode::kAdd, c1, c2)); return ReplaceWithNewInstruction( add, HloInstruction::CreateBinary(add->shape(), HloOpcode::kAdd, lhs->mutable_operand(0), @@ -641,31 +637,24 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { if (lhs->opcode() == HloOpcode::kDivide && rhs->opcode() == HloOpcode::kDivide) { TF_ASSIGN_OR_RETURN( - const Shape a_times_d_shape, - ShapeInference::InferBinaryOpShape(HloOpcode::kMultiply, - lhs->operand(0), rhs->operand(1))); - auto a_times_d = computation_->AddInstruction(HloInstruction::CreateBinary( - a_times_d_shape, HloOpcode::kMultiply, lhs->mutable_operand(0), - rhs->mutable_operand(1))); + auto a_times_d, + CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(0), + rhs->mutable_operand(1))); TF_ASSIGN_OR_RETURN( - const Shape b_times_c_shape, - ShapeInference::InferBinaryOpShape(HloOpcode::kMultiply, - lhs->operand(1), rhs->operand(0))); - auto b_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - b_times_c_shape, HloOpcode::kMultiply, lhs->mutable_operand(1), - rhs->mutable_operand(0))); - return ReplaceWithNewInstruction( - divide, HloInstruction::CreateBinary( - divide->shape(), HloOpcode::kDivide, a_times_d, b_times_c)); + auto b_times_c, + CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), + rhs->mutable_operand(0))); + TF_ASSIGN_OR_RETURN(auto new_divide, CreateBinaryHlo(HloOpcode::kDivide, + a_times_d, b_times_c)); + + return ReplaceInstruction(divide, new_divide); } // (A / B) / C => A / (B * C) if (lhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN(const Shape b_times_c_shape, - ShapeInference::InferBinaryOpShape( - HloOpcode::kMultiply, lhs->operand(1), rhs)); - auto b_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - b_times_c_shape, HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); + TF_ASSIGN_OR_RETURN( + auto b_times_c, + CreateBinaryHlo(HloOpcode::kMultiply, lhs->mutable_operand(1), rhs)); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -674,11 +663,9 @@ Status AlgebraicSimplifierVisitor::HandleDivide(HloInstruction* divide) { // A / (B / C) => (A*C) / B if (rhs->opcode() == HloOpcode::kDivide) { - TF_ASSIGN_OR_RETURN(const Shape a_times_c_shape, - ShapeInference::InferBinaryOpShape( - HloOpcode::kMultiply, lhs, rhs->operand(1))); - auto a_times_c = computation_->AddInstruction(HloInstruction::CreateBinary( - a_times_c_shape, HloOpcode::kMultiply, lhs, rhs->mutable_operand(1))); + TF_ASSIGN_OR_RETURN( + auto a_times_c, + CreateBinaryHlo(HloOpcode::kMultiply, lhs, rhs->mutable_operand(1))); return ReplaceWithNewInstruction( divide, HloInstruction::CreateBinary(divide->shape(), HloOpcode::kDivide, @@ -1311,17 +1298,14 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { padding_dimension->set_edge_padding_high(0); } } - TF_ASSIGN_OR_RETURN(Shape nonzero_pad_shape, - ShapeInference::InferPadShape(pad->operand(0)->shape(), - pad->operand(1)->shape(), - nonzero_padding)); + + TF_ASSIGN_OR_RETURN(HloInstruction * nonzero_pad, + CreatePadHlo(pad->mutable_operand(0), + pad->mutable_operand(1), nonzero_padding)); // Copy the layout from the original pad instructions. The new pad and the // slice instruction should all have the same layout. - TF_RETURN_IF_ERROR( - LayoutUtil::CopyLayoutBetweenShapes(pad->shape(), &nonzero_pad_shape)); - HloInstruction* nonzero_pad = computation_->AddInstruction( - HloInstruction::CreatePad(nonzero_pad_shape, pad->mutable_operand(0), - pad->mutable_operand(1), nonzero_padding)); + TF_RETURN_IF_ERROR(LayoutUtil::CopyLayoutBetweenShapes( + pad->shape(), nonzero_pad->mutable_shape())); // Second, construct the slice instruction to perform the negative padding. std::vector start_indices; @@ -1334,7 +1318,7 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { if (padding_dimension.edge_padding_low() < 0) { start = -1 * padding_dimension.edge_padding_low(); } - int64 end = nonzero_pad_shape.dimensions(i); + int64 end = nonzero_pad->shape().dimensions(i); if (padding_dimension.edge_padding_high() < 0) { end += padding_dimension.edge_padding_high(); } @@ -1343,16 +1327,14 @@ Status AlgebraicSimplifierVisitor::HandlePad(HloInstruction* pad) { strides.push_back(1); } - // Verify that the slice shape matches the pad shape. TF_ASSIGN_OR_RETURN( - Shape inferred_slice_shape, - ShapeInference::InferSliceShape(nonzero_pad_shape, start_indices, - end_indices, strides)); - TF_RET_CHECK(ShapeUtil::Compatible(inferred_slice_shape, pad->shape())); - - std::unique_ptr slice = HloInstruction::CreateSlice( - pad->shape(), nonzero_pad, start_indices, end_indices, strides); - return ReplaceWithNewInstruction(pad, std::move(slice)); + HloInstruction * slice, + CreateSliceHlo(nonzero_pad, start_indices, end_indices, strides)); + + // Verify that the slice shape matches the pad shape. + TF_RET_CHECK(ShapeUtil::Compatible(slice->shape(), pad->shape())); + + return ReplaceInstruction(pad, slice); } return Status::OK(); diff --git a/tensorflow/compiler/xla/service/batchnorm_expander.cc b/tensorflow/compiler/xla/service/batchnorm_expander.cc index 84c9db3293..38086bd7e1 100644 --- a/tensorflow/compiler/xla/service/batchnorm_expander.cc +++ b/tensorflow/compiler/xla/service/batchnorm_expander.cc @@ -30,7 +30,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/hlo_instruction.h" #include "tensorflow/compiler/xla/service/hlo_opcode.h" #include "tensorflow/compiler/xla/service/hlo_query.h" -#include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/types.h" diff --git a/tensorflow/compiler/xla/service/gpu/BUILD b/tensorflow/compiler/xla/service/gpu/BUILD index a1ea5884a4..cdaa14bbb9 100644 --- a/tensorflow/compiler/xla/service/gpu/BUILD +++ b/tensorflow/compiler/xla/service/gpu/BUILD @@ -455,6 +455,7 @@ cc_library( "//tensorflow/compiler/xla:util", "//tensorflow/compiler/xla:window_util", "//tensorflow/compiler/xla:xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_creation_utils", "//tensorflow/compiler/xla/service:hlo_pass", "//tensorflow/compiler/xla/service:shape_inference", ], diff --git a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc index 25846dc6cd..fa405b9329 100644 --- a/tensorflow/compiler/xla/service/gpu/pad_insertion.cc +++ b/tensorflow/compiler/xla/service/gpu/pad_insertion.cc @@ -17,6 +17,7 @@ limitations under the License. #include "tensorflow/compiler/xla/literal_util.h" #include "tensorflow/compiler/xla/service/gpu/ir_emission_utils.h" +#include "tensorflow/compiler/xla/service/hlo_creation_utils.h" #include "tensorflow/compiler/xla/service/shape_inference.h" #include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/window_util.h" @@ -68,13 +69,7 @@ HloInstruction* MaybePaddedAndSlicedInput( HloInstruction* padding = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(element_type)))); - input = computation->AddInstruction(HloInstruction::CreatePad( - ShapeInference::InferPadShape( - /*operand_shape=*/input->shape(), - /*padding_value_shape=*/ShapeUtil::MakeShape(element_type, {}), - padding_config) - .ConsumeValueOrDie(), - input, padding, padding_config)); + input = CreatePadHlo(input, padding, padding_config).ValueOrDie(); } if (window_util::HasNegativePadding(conv_window)) { @@ -97,11 +92,8 @@ HloInstruction* MaybePaddedAndSlicedInput( std::max(0LL, -conv_window.dimensions(i).padding_high()); } - input = computation->AddInstruction(HloInstruction::CreateSlice( - ShapeInference::InferSliceShape(input->shape(), start_indices, - limit_indices, strides) - .ConsumeValueOrDie(), - input, start_indices, limit_indices, strides)); + input = CreateSliceHlo(input, start_indices, limit_indices, strides) + .ValueOrDie(); } return input; @@ -134,13 +126,7 @@ HloInstruction* MaybePaddedKernel(const Window& conv_window, HloInstruction* padding = computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(element_type)))); - return computation->AddInstruction(HloInstruction::CreatePad( - ShapeInference::InferPadShape( - /*operand_shape=*/kernel->shape(), - /*padding_value_shape=*/ShapeUtil::MakeShape(element_type, {}), - padding_config) - .ConsumeValueOrDie(), - kernel, padding, padding_config)); + return CreatePadHlo(kernel, padding, padding_config).ValueOrDie(); } } // namespace @@ -252,11 +238,7 @@ bool PadInsertion::CanonicalizeBackwardFilterConvolution( computation->AddInstruction(HloInstruction::CreateConstant( MakeUnique(Literal::Zero(input->shape().element_type())))); HloInstruction* padded_input = - computation->AddInstruction(HloInstruction::CreatePad( - ShapeInference::InferPadShape(input->shape(), padding->shape(), - input_padding_config) - .ConsumeValueOrDie(), - input, padding, input_padding_config)); + CreatePadHlo(input, padding, input_padding_config).ValueOrDie(); // The shape of the backward_conv CustomCall is a tuple (conv_result, // scratch_buffer). Extract out the shape of conv_result. diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.cc b/tensorflow/compiler/xla/service/hlo_creation_utils.cc new file mode 100644 index 0000000000..63d2646d5f --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.cc @@ -0,0 +1,67 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_creation_utils.h" +#include "tensorflow/compiler/xla/service/shape_inference.h" + +namespace xla { +StatusOr CreateBinaryHlo(HloOpcode opcode, HloInstruction* lhs, + HloInstruction* rhs) { + HloComputation* computation = lhs->parent(); + CHECK_EQ(computation, rhs->parent()); + TF_ASSIGN_OR_RETURN(Shape binary_op_shape, + ShapeInference::InferBinaryOpShape(opcode, lhs, rhs)); + return computation->AddInstruction( + HloInstruction::CreateBinary(binary_op_shape, opcode, lhs, rhs)); +} + +StatusOr CreatePadHlo(HloInstruction* operand, + HloInstruction* padding_value, + const PaddingConfig& padding_config) { + HloComputation* computation = operand->parent(); + CHECK_EQ(computation, padding_value->parent()); + TF_ASSIGN_OR_RETURN( + Shape pad_shape, + ShapeInference::InferPadShape(operand->shape(), padding_value->shape(), + padding_config)); + return computation->AddInstruction(HloInstruction::CreatePad( + pad_shape, operand, padding_value, padding_config)); +} + +StatusOr CreateSliceHlo( + HloInstruction* operand, tensorflow::gtl::ArraySlice start_indices, + tensorflow::gtl::ArraySlice limit_indices, + tensorflow::gtl::ArraySlice strides) { + HloComputation* computation = operand->parent(); + TF_ASSIGN_OR_RETURN(Shape slice_shape, ShapeInference::InferSliceShape( + operand->shape(), start_indices, + limit_indices, strides)); + return computation->AddInstruction(HloInstruction::CreateSlice( + slice_shape, operand, start_indices, limit_indices, strides)); +} + +StatusOr CreateConvolveHlo( + HloInstruction* lhs, HloInstruction* rhs, const Window& window, + const ConvolutionDimensionNumbers& dimension_numbers) { + HloComputation* computation = lhs->parent(); + CHECK_EQ(computation, rhs->parent()); + TF_ASSIGN_OR_RETURN(Shape convolve_shape, ShapeInference::InferConvolveShape( + lhs->shape(), rhs->shape(), + window, dimension_numbers)); + return computation->AddInstruction(HloInstruction::CreateConvolve( + convolve_shape, lhs, rhs, window, dimension_numbers)); +} + +} // namespace xla diff --git a/tensorflow/compiler/xla/service/hlo_creation_utils.h b/tensorflow/compiler/xla/service/hlo_creation_utils.h new file mode 100644 index 0000000000..0cd633111a --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_creation_utils.h @@ -0,0 +1,56 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CREATION_UTILS_H_ +#define TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CREATION_UTILS_H_ + +#include "tensorflow/compiler/xla/service/hlo_computation.h" +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/statusor.h" + +namespace xla { + +// Some lightweight utilities intended to make HLO instruction creation more +// ergonomic. We don't have a complete set of helpers yet -- I expect we'll +// expand this interface as needed on an ad-hoc basis. + +// Creates a binary HLO instruction and adds it to the computation containing +// `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation). +StatusOr CreateBinaryHlo(HloOpcode opcode, HloInstruction* lhs, + HloInstruction* rhs); + +// Creates a pad HLO instruction and adds it to the computation containing +// `operand` and `padding_value` (`operand` and `padding_value` must be in the +// same computation). +StatusOr CreatePadHlo(HloInstruction* operand, + HloInstruction* padding_value, + const PaddingConfig& padding_config); + +// Creates a slice HLO instruction and adds it to the computation containing +// `operand`. +StatusOr CreateSliceHlo( + HloInstruction* operand, tensorflow::gtl::ArraySlice start_indices, + tensorflow::gtl::ArraySlice limit_indices, + tensorflow::gtl::ArraySlice strides); + +// Creates a convolution HLO instruction and adds it to the computation +// containing `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation). +StatusOr CreateConvolveHlo( + HloInstruction* lhs, HloInstruction* rhs, const Window& window, + const ConvolutionDimensionNumbers& dimension_numbers); + +} // namespace xla + +#endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_CREATION_UTILS_H_ -- GitLab From 6e3a43f4b7a1288c878b5daff274f1229256fbe8 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 8 Mar 2018 11:56:29 -0800 Subject: [PATCH 616/884] TFLite: Delegate Buffer Handle interface (take 2) PiperOrigin-RevId: 188366045 --- tensorflow/contrib/lite/BUILD | 22 +++ tensorflow/contrib/lite/context.c | 7 +- tensorflow/contrib/lite/context.h | 64 +++++++- tensorflow/contrib/lite/interpreter.cc | 154 +++++++++++++++---- tensorflow/contrib/lite/interpreter.h | 45 +++++- tensorflow/contrib/lite/interpreter_test.cc | 160 ++++++++++++++++---- tensorflow/contrib/lite/util.cc | 27 ++++ tensorflow/contrib/lite/util.h | 34 +++++ tensorflow/contrib/lite/util_test.cc | 50 ++++++ 9 files changed, 492 insertions(+), 71 deletions(-) create mode 100644 tensorflow/contrib/lite/util.cc create mode 100644 tensorflow/contrib/lite/util.h create mode 100644 tensorflow/contrib/lite/util_test.cc diff --git a/tensorflow/contrib/lite/BUILD b/tensorflow/contrib/lite/BUILD index 44c4a7e2ca..5cfbb544b7 100644 --- a/tensorflow/contrib/lite/BUILD +++ b/tensorflow/contrib/lite/BUILD @@ -132,6 +132,7 @@ cc_library( ":memory_planner", ":schema_fbs_version", ":simple_memory_arena", + ":util", "//tensorflow/contrib/lite/kernels:gemm_support", "//tensorflow/contrib/lite/nnapi:nnapi_lib", "//tensorflow/contrib/lite/schema:schema_fbs", @@ -232,6 +233,27 @@ cc_test( ], ) +cc_library( + name = "util", + srcs = ["util.cc"], + hdrs = ["util.h"], + deps = [ + ":context", + ], +) + +cc_test( + name = "util_test", + size = "small", + srcs = ["util_test.cc"], + deps = [ + ":context", + ":util", + "//tensorflow/contrib/lite/testing:util", + "@com_google_googletest//:gtest", + ], +) + # Test the serialization of a model with optional tensors. # Model tests diff --git a/tensorflow/contrib/lite/context.c b/tensorflow/contrib/lite/context.c index c09e838c5c..620de5d678 100644 --- a/tensorflow/contrib/lite/context.c +++ b/tensorflow/contrib/lite/context.c @@ -17,9 +17,14 @@ limitations under the License. #include #include +int TfLiteIntArrayGetSizeInBytes(int size) { + static TfLiteIntArray dummy; + return sizeof(dummy) + sizeof(dummy.data[0]) * size; +} + TfLiteIntArray* TfLiteIntArrayCreate(int size) { TfLiteIntArray* ret = - (TfLiteIntArray*)malloc(sizeof(*ret) + sizeof(ret->data[0]) * size); + (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size)); ret->size = size; return ret; } diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index ed7f4515fa..c6521e2fbf 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -29,6 +29,7 @@ limitations under the License. #ifndef TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ #define TENSORFLOW_CONTRIB_LITE_CONTEXT_H_ +#include #include #include @@ -40,6 +41,7 @@ typedef enum { kTfLiteOk = 0, kTfLiteError = 1 } TfLiteStatus; // Forward declare so GetNode can use this is in Context. typedef struct _TfLiteRegistration TfLiteRegistration; +typedef struct _TfLiteDelegate TfLiteDelegate; #define kOptionalTensor (-1) @@ -57,6 +59,10 @@ typedef struct { #endif } TfLiteIntArray; +// Given the size (number of elements) in a TfLiteIntArray, calculate its size +// in bytes. +int TfLiteIntArrayGetSizeInBytes(int size); + // Create a array of a given `size` (uninitialized entries). // This returns a pointer, that you must free using TfLiteIntArrayFree(). TfLiteIntArray* TfLiteIntArrayCreate(int size); @@ -162,6 +168,11 @@ typedef enum { kTfLiteDynamic, } TfLiteAllocationType; +// The delegates should use zero or positive integers to represent handles. +// -1 is reserved from unallocated status. +typedef int TfLiteBufferHandle; +const TfLiteBufferHandle kTfLiteNullBufferHandle = -1; + // An tensor in the interpreter system which is a wrapper around a buffer of // data including a dimensionality (or NULL if not currently defined). typedef struct { @@ -194,6 +205,22 @@ typedef struct { // Null-terminated name of this tensor. const char* name; + + // The delegate which knows how to handle `buffer_handle`. + // WARNING: This is an experimental interface that is subject to change. + TfLiteDelegate* delegate; + + // An integer buffer handle that can be handled by `delegate`. + // The value is valid only when delegate is not null. + // WARNING: This is an experimental interface that is subject to change. + TfLiteBufferHandle buffer_handle; + + // If the delegate uses its own buffer (e.g. GPU memory), the delegate is + // responsible to set data_is_stale to true. + // `delegate->CopyFromBufferHandle` can be called to copy the data from + // delegate buffer. + // WARNING: This is an // experimental interface that is subject to change. + bool data_is_stale; } TfLiteTensor; // Free memory of tensor `t`; @@ -234,6 +261,11 @@ typedef struct { // WARNING: This is an experimental interface that is subject to change. const void* custom_initial_data; int custom_initial_data_size; + + // The pointer to the delegate. This is non-null only when the node is + // created by calling `interpreter.ModifyGraphWithDelegate`. + // WARNING: This is an experimental interface that is subject to change. + TfLiteDelegate* delegate; } TfLiteNode; typedef struct TfLiteContext { @@ -287,7 +319,7 @@ typedef struct TfLiteContext { // does not take ownership of `nodes_to_replace`. TfLiteStatus (*ReplaceSubgraphsWithDelegateKernels)( struct TfLiteContext*, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace); + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); // TODO(ahentz): we should create a more general mechanism for this sort of // library-global objects. @@ -338,19 +370,45 @@ typedef struct _TfLiteRegistration { } TfLiteRegistration; // WARNING: This is an experimental interface that is subject to change. -typedef struct { +typedef struct _TfLiteDelegate { // Data that delegate needs to identify itself. This data is owned by the // delegate. The delegate is owned in the user code, so the delegate is // responsible for doing this when it is destroyed. void* data_; + // Invoked by ModifyGraphWithDelegate. This prepare is called, giving the // delegate a view of the current graph through TfLiteContext*. It typically // will look at the nodes and call ReplaceSubgraphsWithDelegateKernels() // to ask the TensorFlow lite runtime to create macro-nodes to represent // delegated subgraphs of the original graph. - TfLiteStatus (*Prepare)(TfLiteContext* context, void* data); + TfLiteStatus (*Prepare)(TfLiteContext* context, TfLiteDelegate* delegate); + + // Copy the data from delegate buffer handle to raw memory. + // This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyFromBufferHandle)(TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, + void* data, int size); + + // Copy the data from raw memory to delegate buffer handle. + // This can be null if the delegate doesn't use its own buffer. + TfLiteStatus (*CopyToBufferHandle)(TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, + void* data, int size); + + // Free the Delegate Buffer Handle. Note: This only frees the handle, but + // this doesn't release the underlying resource (e.g. textures). The + // resources are either owned by application layer or the delegate. + // This can be null if the delegate doesn't use its own buffer. + void (*FreeBufferHandle)(TfLiteDelegate* delegate, + TfLiteBufferHandle* handle); } TfLiteDelegate; +// WARNING: This is an experimental interface that is subject to change. +typedef struct { + TfLiteDelegate* delegate; + TfLiteIntArray* nodes_to_replace; +} TfLiteDelegateParams; + #ifdef __cplusplus } // extern "C" #endif // __cplusplus diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 0f5e17f0de..8fd1085544 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/contrib/lite/memory_planner.h" #include "tensorflow/contrib/lite/nnapi_delegate.h" #include "tensorflow/contrib/lite/schema/schema_generated.h" +#include "tensorflow/contrib/lite/util.h" namespace tflite { @@ -96,19 +97,57 @@ Interpreter::~Interpreter() { } for (int i = 0; i < context_.tensors_size; i++) { - TfLiteTensorFree(&context_.tensors[i]); + TfLiteTensor* tensor = &context_.tensors[i]; + if (tensor->buffer_handle != kTfLiteNullBufferHandle) { + tensor->delegate->FreeBufferHandle(tensor->delegate, + &tensor->buffer_handle); + } + TfLiteTensorFree(tensor); } } TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace) { + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate) { return static_cast(context->impl_) - ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace); + ->ReplaceSubgraphsWithDelegateKernels(registration, nodes_to_replace, + delegate); +} + +namespace { + +// This function allocates a continuous memory space that contains a +// TfLiteDelegateParams followed by a TfLiteIntArray. The pointer will be +// deallocated by C `free` function later. +TfLiteDelegateParams* CreateDelegateParams( + TfLiteDelegate* delegate, const std::vector& nodes_to_replace) { + int nodes_to_replace_size_in_bytes = + TfLiteIntArrayGetSizeInBytes(nodes_to_replace.size()); + void* allocation = + malloc(sizeof(TfLiteDelegateParams) + nodes_to_replace_size_in_bytes); + TfLiteDelegateParams* params = + reinterpret_cast(allocation); + TfLiteIntArray* nodes_to_replace_arr = reinterpret_cast( + static_cast(allocation) + sizeof(TfLiteDelegateParams)); + + nodes_to_replace_arr->size = nodes_to_replace.size(); + for (int i = 0; i < nodes_to_replace.size(); ++i) { + nodes_to_replace_arr->data[i] = nodes_to_replace[i]; + } + + params->delegate = delegate; + params->nodes_to_replace = nodes_to_replace_arr; + return params; } +} // Anonymous namespace + TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace) { + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, + TfLiteDelegate* delegate) { + // Annotate the registration as DELEGATE op. + registration.builtin_code = BuiltinOperator_DELEGATE; + // Annotate the registration as DELEGATE op. registration.builtin_code = BuiltinOperator_DELEGATE; @@ -120,30 +159,38 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( execution_plan_.clear(); for (auto& subgraph : subgraphs) { - // Turn subgraph.nodes into a TfLiteIntArray compatible data structure. - // TODO(aselle): Avoid this copy by constructing subgraph.nodes that way - // in the first place - subgraph.nodes.insert(subgraph.nodes.begin(), - static_cast(subgraph.nodes.size())); // Subgraphs calimed by the delegate should have a "macro" op created, the // other subgraphs (kTfNonPartition) just have their nodes added back to // the execution plan. switch (subgraph.type) { case Subgraph::kTfNonPartition: - for (auto it = subgraph.nodes.begin() + 1; it != subgraph.nodes.end(); + for (auto it = subgraph.nodes.begin(); it != subgraph.nodes.end(); ++it) { execution_plan_.push_back(*it); } break; case Subgraph::kTfPartition: { - void* builtin_data = nullptr; int node_index; - // Create a node that represents computation of this subgraph. - AddNodeWithParameters( - subgraph.input_tensors, subgraph.output_tensors, - reinterpret_cast(subgraph.nodes.data()), - subgraph.nodes.size() * sizeof(subgraph.nodes[0]), builtin_data, - ®istration, &node_index); + + TfLiteDelegateParams* params = + CreateDelegateParams(delegate, subgraph.nodes); + AddNodeWithParameters(subgraph.input_tensors, subgraph.output_tensors, + nullptr, 0, params, ®istration, &node_index); + + // Initialize the output tensors's delegate-related fields. + for (int tensor_index : subgraph.output_tensors) { + TfLiteTensor* tensor = &tensors_[tensor_index]; + TF_LITE_ENSURE_EQ(&context_, tensor->delegate, nullptr); + TF_LITE_ENSURE_EQ(&context_, tensor->buffer_handle, + kTfLiteNullBufferHandle); + // buffer_handle will be filled in delegate's `Prepare` + // function. + tensor->delegate = delegate; + } + + // Associate the node with the delegate. + TfLiteNode* node = &nodes_and_registration_[node_index].first; + node->delegate = delegate; } break; case Subgraph::kTfUnexplored: return kTfLiteError; @@ -233,14 +280,6 @@ TfLiteStatus Interpreter::BytesRequired(TfLiteType type, const int* dims, return kTfLiteOk; } -namespace { -TfLiteIntArray* convertVectorToTfLiteIntArray(const std::vector& x) { - TfLiteIntArray* lite = TfLiteIntArrayCreate(x.size()); - for (size_t i = 0; i < x.size(); i++) lite->data[i] = x[i]; - return lite; -} -} // namespace - TfLiteStatus Interpreter::AllocateTensors() { next_execution_plan_index_to_prepare_ = 0; if (memory_planner_) { @@ -275,7 +314,6 @@ TfLiteStatus Interpreter::AddNodeWithParameters( int new_node_index = nodes_and_registration_.size(); if (node_index) *node_index = new_node_index; nodes_and_registration_.resize(nodes_and_registration_.size() + 1); - auto& node_and_reg = nodes_and_registration_.back(); TfLiteNode& node = node_and_reg.first; if (node.inputs) TfLiteIntArrayFree(node.inputs); @@ -285,8 +323,8 @@ TfLiteStatus Interpreter::AddNodeWithParameters( // NOTE, here we are not using move semantics yet, since our internal // representation isn't std::vector, but in the future we would like to avoid // copies, so we want the interface to take r-value references now. - node.inputs = convertVectorToTfLiteIntArray(inputs); - node.outputs = convertVectorToTfLiteIntArray(outputs); + node.inputs = ConvertVectorToTfLiteIntArray(inputs); + node.outputs = ConvertVectorToTfLiteIntArray(outputs); node.temporaries = TfLiteIntArrayCreate(0); if (init_data) { node.user_data = OpInit(*registration, init_data, init_data_size); @@ -299,6 +337,7 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.builtin_data = builtin_data_deleter.release(); // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size` // properly for nodes generated by ReplaceSubgraphsWithDelegateKernels. + if (registration->builtin_code == BuiltinOperator_CUSTOM) { // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer // `Operator` table is passed in. @@ -309,6 +348,7 @@ TfLiteStatus Interpreter::AddNodeWithParameters( node.custom_initial_data_size = 0; } + node.delegate = nullptr; node_and_reg.second = *registration; execution_plan_.push_back(new_node_index); return kTfLiteOk; @@ -322,7 +362,7 @@ TfLiteStatus Interpreter::ResizeInputTensor(int tensor_index, TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); invokable_ = false; - TfLiteIntArray* dims_lite = convertVectorToTfLiteIntArray(dims); + TfLiteIntArray* dims_lite = ConvertVectorToTfLiteIntArray(dims); return ResizeTensorImpl(&context_.tensors[tensor_index], dims_lite); } @@ -424,11 +464,29 @@ TfLiteStatus Interpreter::Invoke() { TfLiteNode& node = nodes_and_registration_[node_index].first; const TfLiteRegistration& registration = nodes_and_registration_[node_index].second; + + // TODO(ycling): This is an extra loop through inputs to check if the data + // need to be copied from Delegate buffer to raw memory, which is often not + // needed. We may want to cache this in prepare to know if this needs to be + // done for a node or not. + for (int i = 0; i < node.inputs->size; ++i) { + int tensor_index = node.inputs->data[i]; + if (tensor_index == kOptionalTensor) { + continue; + } + TfLiteTensor* tensor = &tensors_[tensor_index]; + if (tensor->delegate && tensor->delegate != node.delegate && + tensor->data_is_stale) { + EnsureTensorDataIsReadable(tensor_index); + } + } + EnsureTensorsVectorCapacity(); if (OpInvoke(registration, &node) == kTfLiteError) { status = kTfLiteError; } } + return status; } @@ -464,6 +522,7 @@ TfLiteStatus Interpreter::AddTensors(int tensors_to_add, tensors_.resize(tensors_.size() + tensors_to_add); for (int i = base_index; i < tensors_.size(); i++) { memset(&tensors_[i], 0, sizeof(tensors_[i])); + tensors_[i].buffer_handle = kTfLiteNullBufferHandle; } context_.tensors = tensors_.data(); context_.tensors_size = tensors_.size(); @@ -511,7 +570,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); } invokable_ = false; - TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), quantization, const_cast(buffer), bytes, kTfLiteMmapRo, allocation, &context_.tensors[tensor_index]); return kTfLiteOk; @@ -536,7 +595,7 @@ TfLiteStatus Interpreter::SetTensorParametersReadWrite( TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), &required_bytes)); } - TfLiteTensorReset(type, name, convertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), quantization, /*buffer=*/nullptr, required_bytes, type == kTfLiteString ? kTfLiteDynamic : kTfLiteArenaRw, @@ -613,7 +672,7 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { ReplaceSubgraphsWithDelegateKernels; context_.GetExecutionPlan = GetExecutionPlan; - TfLiteStatus status = delegate->Prepare(&context_, delegate->data_); + TfLiteStatus status = delegate->Prepare(&context_, delegate); // Remove additional context info. context_.GetNodeAndRegistration = nullptr; context_.ReplaceSubgraphsWithDelegateKernels = nullptr; @@ -621,4 +680,35 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { return status; } +TfLiteStatus Interpreter::SetBufferHandle(int tensor_index, + TfLiteBufferHandle buffer_handle, + TfLiteDelegate* delegate) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + + TF_LITE_ENSURE(&context_, + tensor->delegate == nullptr || tensor->delegate == delegate); + tensor->delegate = delegate; + if (tensor->buffer_handle != kTfLiteNullBufferHandle) { + TF_LITE_ENSURE(&context_, tensor->delegate->FreeBufferHandle != nullptr); + tensor->delegate->FreeBufferHandle(tensor->delegate, + &tensor->buffer_handle); + } + tensor->buffer_handle = buffer_handle; + + return kTfLiteOk; +} + +TfLiteStatus Interpreter::GetBufferHandle(int tensor_index, + TfLiteBufferHandle* buffer_handle, + TfLiteDelegate** delegate) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + + *delegate = tensor->delegate; + *buffer_handle = tensor->buffer_handle; + + return kTfLiteOk; +} + } // namespace tflite diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index 04c19644a0..f2d4a05164 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -265,6 +265,46 @@ class Interpreter { void set_model(const Model* model) { model_ = const_cast(model); } Model* model() const { return model_; } + // Ensure the data in `tensor.data` is readable. In case delegate is used, + // it might require to copy the data from delegate buffer to raw memory. + TfLiteStatus EnsureTensorDataIsReadable(int tensor_index) { + TF_LITE_ENSURE(&context_, tensor_index < tensors_size()); + TfLiteTensor* tensor = &tensors_[tensor_index]; + if (tensor->data_is_stale) { + TF_LITE_ENSURE(&context_, tensor->delegate != nullptr); + TF_LITE_ENSURE(&context_, + tensor->buffer_handle != kTfLiteNullBufferHandle); + // This can be null if the delegate doesn't use its own buffer. + TF_LITE_ENSURE(&context_, + tensor->delegate->CopyFromBufferHandle != nullptr); + tensor->delegate->CopyFromBufferHandle(tensor->delegate, + tensor->buffer_handle, + tensor->data.raw, tensor->bytes); + tensor->data_is_stale = false; + } + return kTfLiteOk; + } + + // Set the delegate buffer handle to a tensor. It can be called in the + // following cases: + // 1. Set the buffer handle to a tensor that's not being written by a + // delegate. For example, feeding an OpenGL texture as the input of the + // inference graph. + // 2. Set the buffer handle to a tensor that uses the same delegate. + // For example, set an OpenGL texture as the output of inference, while + // the node which produces output is an OpenGL delegate node. + // WARNING: This is an experimental API and subject to change. + TfLiteStatus SetBufferHandle(int tensor_index, + TfLiteBufferHandle buffer_handle, + TfLiteDelegate* delegate); + + // Get the delegate buffer handle, and the delegate which can process the + // buffer handle. + // WARNING: This is an experimental API and subject to change. + TfLiteStatus GetBufferHandle(int tensor_index, + TfLiteBufferHandle* buffer_handle, + TfLiteDelegate** delegate); + // The default capacity of `tensors_` vector. static constexpr int kTensorsReservedCapacity = 128; // The capacity headroom of `tensors_` vector before calling ops' @@ -355,14 +395,15 @@ class Interpreter { // Entry point for C API ReplaceSubgraphsWithDelegateKernels static TfLiteStatus ReplaceSubgraphsWithDelegateKernels( TfLiteContext* context, TfLiteRegistration registration, - const TfLiteIntArray* nodes_to_replace); + const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); // Update the execution graph to replace some of the nodes with stub // nodes. Specifically any node index that has `nodes[index]==1` will be // slated for replacement with a delegate kernel specified by registration. // WARNING: This is an experimental interface that is subject to change. TfLiteStatus ReplaceSubgraphsWithDelegateKernels( - TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace); + TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, + TfLiteDelegate* delegate); // WARNING: This is an experimental interface that is subject to change. // Gets the internal pointer to a TensorFlow lite node by node_index. diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 2e6727b323..2586c15287 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -763,26 +763,38 @@ TfLiteRegistration AddOpRegistration() { } class TestDelegate : public ::testing::Test { - public: - TestDelegate() { - interpreter_.AddTensors(5); - interpreter_.SetInputs({0, 1}); - interpreter_.SetOutputs({3, 4}); + protected: + void SetUp() override { + interpreter_.reset(new Interpreter); + interpreter_->AddTensors(5); + interpreter_->SetInputs({0, 1}); + interpreter_->SetOutputs({3, 4}); TfLiteQuantizationParams quant; - interpreter_.SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, - quant); - interpreter_.SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, - quant); + interpreter_->SetTensorParametersReadWrite(0, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(1, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(2, kTfLiteFloat32, "", {3}, + quant); + interpreter_->SetTensorParametersReadWrite(3, kTfLiteFloat32, "", {3}, + quant); TfLiteRegistration reg = AddOpRegistration(); - interpreter_.AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); - interpreter_.AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); - interpreter_.AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({0, 0}, {2}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({1, 1}, {3}, nullptr, 0, nullptr, ®); + interpreter_->AddNodeWithParameters({2, 1}, {4}, nullptr, 0, nullptr, ®); } + void TearDown() override { + // Interpreter relies on delegate_ to free the resources properly. Thus + // the life cycle of delegate must be longer than interpreter. + interpreter_.reset(); + delegate_.reset(); + } + + TfLiteBufferHandle last_allocated_handle_ = kTfLiteNullBufferHandle; + + TfLiteBufferHandle AllocateBufferHandle() { return ++last_allocated_handle_; } + protected: class SimpleDelegate { public: @@ -791,8 +803,8 @@ class TestDelegate : public ::testing::Test { // value-copyable and compatible with TfLite. explicit SimpleDelegate(const std::vector& nodes) : nodes_(nodes) { delegate_.Prepare = [](TfLiteContext* context, - void* data) -> TfLiteStatus { - auto* simple = reinterpret_cast(data); + TfLiteDelegate* delegate) -> TfLiteStatus { + auto* simple = reinterpret_cast(delegate->data_); TfLiteIntArray* nodes_to_separate = TfLiteIntArrayCreate(simple->nodes_.size()); // Mark nodes that we want in TfLiteIntArray* structure. @@ -823,10 +835,26 @@ class TestDelegate : public ::testing::Test { } context->ReplaceSubgraphsWithDelegateKernels( - context, FakeFusedRegistration(), nodes_to_separate); + context, FakeFusedRegistration(), nodes_to_separate, delegate); TfLiteIntArrayFree(nodes_to_separate); return kTfLiteOk; }; + delegate_.CopyToBufferHandle = [](TfLiteDelegate* delegate, + TfLiteBufferHandle buffer_handle, + void* data, int size) -> TfLiteStatus { + // TODO(ycling): Implement tests to test buffer copying logic. + return kTfLiteOk; + }; + delegate_.CopyFromBufferHandle = + [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle, + void* data, int size) -> TfLiteStatus { + // TODO(ycling): Implement tests to test buffer copying logic. + return kTfLiteOk; + }; + delegate_.FreeBufferHandle = [](TfLiteDelegate* delegate, + TfLiteBufferHandle* handle) { + *handle = kTfLiteNullBufferHandle; + }; // Store type-punned data SimpleDelegate structure. delegate_.data_ = reinterpret_cast(this); } @@ -843,36 +871,102 @@ class TestDelegate : public ::testing::Test { std::vector nodes_; TfLiteDelegate delegate_; }; - Interpreter interpreter_; + std::unique_ptr interpreter_; + std::unique_ptr delegate_; }; TEST_F(TestDelegate, BasicDelegate) { - interpreter_.Invoke(); - SimpleDelegate simple({0, 1, 2}); - interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); - ASSERT_EQ(interpreter_.execution_plan().size(), 1); - int node = interpreter_.execution_plan()[0]; - const auto* node_and_reg = interpreter_.node_and_registration(node); + ASSERT_EQ(interpreter_->execution_plan().size(), 1); + int node = interpreter_->execution_plan()[0]; + const auto* node_and_reg = interpreter_->node_and_registration(node); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } TEST_F(TestDelegate, ComplexDeligate) { - interpreter_.Invoke(); - SimpleDelegate simple({1, 2}); - interpreter_.ModifyGraphWithDelegate(simple.get_tf_lite_delegate()); + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({1, 2})); + interpreter_->ModifyGraphWithDelegate(delegate_->get_tf_lite_delegate()); - ASSERT_EQ(interpreter_.execution_plan().size(), 2); + ASSERT_EQ(interpreter_->execution_plan().size(), 2); // 0th should be a non-delegated original op - ASSERT_EQ(interpreter_.execution_plan()[0], 0); + ASSERT_EQ(interpreter_->execution_plan()[0], 0); // 1st should be a new macro op (3) which didn't exist) - ASSERT_EQ(interpreter_.execution_plan()[1], 3); - const auto* node_and_reg = interpreter_.node_and_registration(3); + ASSERT_EQ(interpreter_->execution_plan()[1], 3); + const auto* node_and_reg = interpreter_->node_and_registration(3); ASSERT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); } +TEST_F(TestDelegate, SetBufferHandleToInput) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + constexpr int kOutputTensorIndex = 0; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + ASSERT_EQ(tensor->delegate, nullptr); + ASSERT_EQ(tensor->buffer_handle, kTfLiteNullBufferHandle); + + TfLiteBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = + interpreter_->SetBufferHandle(kOutputTensorIndex, handle, delegate); + ASSERT_EQ(status, kTfLiteOk); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->buffer_handle, handle); +} + +TEST_F(TestDelegate, SetBufferHandleToOutput) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + // Before setting the buffer handle, the tensor's `delegate` is already set + // because it will be written by the delegate. + ASSERT_EQ(tensor->delegate, delegate); + ASSERT_EQ(tensor->buffer_handle, kTfLiteNullBufferHandle); + + TfLiteBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = + interpreter_->SetBufferHandle(kOutputTensorIndex, handle, delegate); + ASSERT_EQ(status, kTfLiteOk); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->buffer_handle, handle); +} + +TEST_F(TestDelegate, SetInvalidHandleToTensor) { + interpreter_->Invoke(); + delegate_ = std::unique_ptr(new SimpleDelegate({0, 1, 2})); + TfLiteDelegate* delegate = delegate_->get_tf_lite_delegate(); + interpreter_->ModifyGraphWithDelegate(delegate); + + SimpleDelegate another_simple_delegate({0, 1, 2}); + + constexpr int kOutputTensorIndex = 3; + TfLiteTensor* tensor = interpreter_->tensor(kOutputTensorIndex); + // Before setting the buffer handle, the tensor's `delegate` is already set + // because it will be written by the delegate. + ASSERT_EQ(tensor->delegate, delegate); + ASSERT_EQ(tensor->buffer_handle, kTfLiteNullBufferHandle); + + TfLiteBufferHandle handle = AllocateBufferHandle(); + TfLiteStatus status = interpreter_->SetBufferHandle( + kOutputTensorIndex, handle, + another_simple_delegate.get_tf_lite_delegate()); + // Setting a buffer handle to a tensor with another delegate will fail. + ASSERT_EQ(status, kTfLiteError); + EXPECT_EQ(tensor->delegate, delegate); + EXPECT_EQ(tensor->buffer_handle, kTfLiteNullBufferHandle); +} + } // namespace } // namespace tflite diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc new file mode 100644 index 0000000000..b2c7e6c7a6 --- /dev/null +++ b/tensorflow/contrib/lite/util.cc @@ -0,0 +1,27 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/util.h" + +namespace tflite { + +TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { + TfLiteIntArray* output = TfLiteIntArrayCreate(input.size()); + for (size_t i = 0; i < input.size(); i++) { + output->data[i] = input[i]; + } + return output; +} + +} // namespace tflite diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h new file mode 100644 index 0000000000..50e4fb839e --- /dev/null +++ b/tensorflow/contrib/lite/util.h @@ -0,0 +1,34 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// This file provides general C++ utility functions in TFLite. +// For example: Converting between `TfLiteIntArray`, `std::vector` and +// Flatbuffer vectors. These functions can't live in `context.h` since it's pure +// C. + +#ifndef TENSORFLOW_CONTRIB_LITE_UTIL_H_ +#define TENSORFLOW_CONTRIB_LITE_UTIL_H_ + +#include +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { + +// Converts a `std::vector` to a `TfLiteIntArray`. +TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input); + +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_UTIL_H_ diff --git a/tensorflow/contrib/lite/util_test.cc b/tensorflow/contrib/lite/util_test.cc new file mode 100644 index 0000000000..04579c53aa --- /dev/null +++ b/tensorflow/contrib/lite/util_test.cc @@ -0,0 +1,50 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/util.h" + +namespace tflite { +namespace { + +TEST(ConvertVectorToTfLiteIntArray, TestWithVector) { + std::vector input = {1, 2}; + TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); + ASSERT_NE(output, nullptr); + EXPECT_EQ(output->size, 2); + EXPECT_EQ(output->data[0], 1); + EXPECT_EQ(output->data[1], 2); + TfLiteIntArrayFree(output); +} + +TEST(ConvertVectorToTfLiteIntArray, TestWithEmptyVector) { + std::vector input; + TfLiteIntArray* output = ConvertVectorToTfLiteIntArray(input); + ASSERT_NE(output, nullptr); + EXPECT_EQ(output->size, 0); + TfLiteIntArrayFree(output); +} + +} // namespace +} // namespace tflite + +int main(int argc, char** argv) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} -- GitLab From 543454b282bbcffd63d1348204662dbfed82fb86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 11:57:36 -0800 Subject: [PATCH 617/884] Expose a version of model_fn for contrib Estimators. Make the body of get_timestamped_export_dir an Estimator util. PiperOrigin-RevId: 188366199 --- .../python/learn/estimators/estimator.py | 22 +++++++- tensorflow/python/estimator/export/export.py | 30 +---------- tensorflow/python/estimator/util.py | 50 +++++++++++++++++++ 3 files changed, 72 insertions(+), 30 deletions(-) diff --git a/tensorflow/contrib/learn/python/learn/estimators/estimator.py b/tensorflow/contrib/learn/python/learn/estimators/estimator.py index 5262e04e16..d8ccb1e7dc 100644 --- a/tensorflow/contrib/learn/python/learn/estimators/estimator.py +++ b/tensorflow/contrib/learn/python/learn/estimators/estimator.py @@ -470,6 +470,20 @@ class BaseEstimator(sklearn.BaseEstimator, evaluable.Evaluable, # TODO(wicke): make RunConfig immutable, and then return it without a copy. return copy.deepcopy(self._config) + @property + def model_fn(self): + """Returns the model_fn which is bound to self.params. + + Returns: + The model_fn with the following signature: + `def model_fn(features, labels, mode, metrics)` + """ + + def public_model_fn(features, labels, mode, config): + return self._call_model_fn(features, labels, mode, config=config) + + return public_model_fn + @deprecated_args(SCIKIT_DECOUPLE_DATE, SCIKIT_DECOUPLE_INSTRUCTIONS, ('x', None), ('y', None), ('batch_size', None)) def fit(self, @@ -1179,7 +1193,7 @@ class Estimator(BaseEstimator): self._feature_engineering_fn = ( feature_engineering_fn or _identity_feature_engineering_fn) - def _call_model_fn(self, features, labels, mode, metrics=None): + def _call_model_fn(self, features, labels, mode, metrics=None, config=None): """Calls model function with support of 2, 3 or 4 arguments. Args: @@ -1187,6 +1201,7 @@ class Estimator(BaseEstimator): labels: labels dict. mode: ModeKeys metrics: Dict of metrics. + config: RunConfig. Returns: A `ModelFnOps` object. If model_fn returns a tuple, wraps them up in a @@ -1203,7 +1218,10 @@ class Estimator(BaseEstimator): if 'params' in model_fn_args: kwargs['params'] = self.params if 'config' in model_fn_args: - kwargs['config'] = self.config + if config: + kwargs['config'] = config + else: + kwargs['config'] = self.config if 'model_dir' in model_fn_args: kwargs['model_dir'] = self.model_dir model_fn_results = self._model_fn(features, labels, **kwargs) diff --git a/tensorflow/python/estimator/export/export.py b/tensorflow/python/estimator/export/export.py index f240e11478..9206a4964b 100644 --- a/tensorflow/python/estimator/export/export.py +++ b/tensorflow/python/estimator/export/export.py @@ -21,17 +21,16 @@ from __future__ import print_function import collections import os -import time import six +from tensorflow.python.estimator import util from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import array_ops from tensorflow.python.ops import parsing_ops -from tensorflow.python.platform import gfile from tensorflow.python.platform import tf_logging as logging from tensorflow.python.saved_model import signature_constants from tensorflow.python.saved_model import signature_def_utils @@ -329,13 +328,6 @@ def _log_signature_report(signature_def_map, excluded_signatures): logging.warn('Export includes no default signature!') -# When we create a timestamped directory, there is a small chance that the -# directory already exists because another worker is also writing exports. -# In this case we just wait one second to get a new timestamp and try again. -# If this fails several times in a row, then something is seriously wrong. -MAX_DIRECTORY_CREATION_ATTEMPTS = 10 - - def get_timestamped_export_dir(export_dir_base): """Builds a path to a new subdirectory within the base directory. @@ -354,25 +346,7 @@ def get_timestamped_export_dir(export_dir_base): RuntimeError: if repeated attempts fail to obtain a unique timestamped directory name. """ - attempts = 0 - while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS: - export_timestamp = int(time.time()) - - export_dir = os.path.join( - compat.as_bytes(export_dir_base), - compat.as_bytes(str(export_timestamp))) - if not gfile.Exists(export_dir): - # Collisions are still possible (though extremely unlikely): this - # directory is not actually created yet, but it will be almost - # instantly on return from this function. - return export_dir - time.sleep(1) - attempts += 1 - logging.warn( - 'Export directory {} already exists; retrying (attempt {}/{})'.format( - export_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS)) - raise RuntimeError('Failed to obtain a unique export directory name after ' - '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) + return util.get_timestamped_dir(export_dir_base) def get_temp_export_dir(timestamped_export_dir): diff --git a/tensorflow/python/estimator/util.py b/tensorflow/python/estimator/util.py index 3ce8eea84b..bb4bdd3fdf 100644 --- a/tensorflow/python/estimator/util.py +++ b/tensorflow/python/estimator/util.py @@ -20,7 +20,12 @@ from __future__ import division from __future__ import print_function import functools +import os +import time +from tensorflow.python.platform import gfile +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.util import compat from tensorflow.python.util import tf_decorator from tensorflow.python.util import tf_inspect @@ -56,3 +61,48 @@ def fn_args(fn): if _is_bounded_method(fn): args.remove('self') return tuple(args) + + +# When we create a timestamped directory, there is a small chance that the +# directory already exists because another process is also creating these +# directories. In this case we just wait one second to get a new timestamp and +# try again. If this fails several times in a row, then something is seriously +# wrong. +MAX_DIRECTORY_CREATION_ATTEMPTS = 10 + + +def get_timestamped_dir(dir_base): + """Builds a path to a new subdirectory within the base directory. + + The subdirectory will be named using the current time. + This guarantees monotonically increasing directory numbers even across + multiple runs of the pipeline. + The timestamp used is the number of seconds since epoch UTC. + + Args: + dir_base: A string containing a directory to create the subdirectory under. + + Returns: + The full path of the new subdirectory (which is not actually created yet). + + Raises: + RuntimeError: if repeated attempts fail to obtain a unique timestamped + directory name. + """ + attempts = 0 + while attempts < MAX_DIRECTORY_CREATION_ATTEMPTS: + timestamp = int(time.time()) + + result_dir = os.path.join( + compat.as_bytes(dir_base), compat.as_bytes(str(timestamp))) + if not gfile.Exists(result_dir): + # Collisions are still possible (though extremely unlikely): this + # directory is not actually created yet, but it will be almost + # instantly on return from this function. + return result_dir + time.sleep(1) + attempts += 1 + logging.warn('Directory {} already exists; retrying (attempt {}/{})'.format( + result_dir, attempts, MAX_DIRECTORY_CREATION_ATTEMPTS)) + raise RuntimeError('Failed to obtain a unique export directory name after ' + '{} attempts.'.format(MAX_DIRECTORY_CREATION_ATTEMPTS)) -- GitLab From 25b970971a58f9e49008eecfce113b7d342dbec2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 12:01:23 -0800 Subject: [PATCH 618/884] Decoupling hash and protobuf. This simplifies a little bit the dependency chain as we don't have to link protobuf libraries just to use basic hash functions. The case for deterministic serialization of protocol buffers if very specific and can be handled by a new header file. PiperOrigin-RevId: 188366713 --- tensorflow/c/c_api_function_test.cc | 1 + tensorflow/compiler/aot/BUILD | 1 + tensorflow/compiler/aot/compile.cc | 1 + tensorflow/compiler/xla/service/executable.cc | 1 + tensorflow/core/BUILD | 1 + tensorflow/core/framework/attr_value_util.cc | 1 + tensorflow/core/framework/op_def_util.cc | 1 + tensorflow/core/lib/hash/hash.cc | 11 ------- tensorflow/core/lib/hash/hash.h | 10 ------ .../core/lib/strings/proto_serialization.cc | 33 +++++++++++++++++++ .../core/lib/strings/proto_serialization.h | 33 +++++++++++++++++++ 11 files changed, 73 insertions(+), 21 deletions(-) create mode 100644 tensorflow/core/lib/strings/proto_serialization.cc create mode 100644 tensorflow/core/lib/strings/proto_serialization.h diff --git a/tensorflow/c/c_api_function_test.cc b/tensorflow/c/c_api_function_test.cc index 7ca50119ea..610274696f 100644 --- a/tensorflow/c/c_api_function_test.cc +++ b/tensorflow/c/c_api_function_test.cc @@ -20,6 +20,7 @@ limitations under the License. #include "tensorflow/core/framework/op_def.pb.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/logging.h" diff --git a/tensorflow/compiler/aot/BUILD b/tensorflow/compiler/aot/BUILD index 0900e87eba..ffa2d08829 100644 --- a/tensorflow/compiler/aot/BUILD +++ b/tensorflow/compiler/aot/BUILD @@ -72,6 +72,7 @@ cc_library( "//tensorflow/core:core_cpu_internal", "//tensorflow/core:framework_internal", "//tensorflow/core:lib", + "//tensorflow/core:lib_internal", "//tensorflow/core:protos_all_cc", ], ) diff --git a/tensorflow/compiler/aot/compile.cc b/tensorflow/compiler/aot/compile.cc index c87f2b75df..7c83387881 100644 --- a/tensorflow/compiler/aot/compile.cc +++ b/tensorflow/compiler/aot/compile.cc @@ -32,6 +32,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/types.h" diff --git a/tensorflow/compiler/xla/service/executable.cc b/tensorflow/compiler/xla/service/executable.cc index 90481c7a88..be92b1629a 100644 --- a/tensorflow/compiler/xla/service/executable.cc +++ b/tensorflow/compiler/xla/service/executable.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/core/lib/hash/hash.h" #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/platform/env.h" diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 8d556193d7..491f83e4fc 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -1518,6 +1518,7 @@ LIB_INTERNAL_PUBLIC_HEADERS = tf_additional_lib_hdrs() + [ "lib/strings/base64.h", "lib/strings/ordered_code.h", "lib/strings/proto_text_util.h", + "lib/strings/proto_serialization.h", "lib/strings/scanner.h", "lib/wav/wav_io.h", "platform/demangle.h", diff --git a/tensorflow/core/framework/attr_value_util.cc b/tensorflow/core/framework/attr_value_util.cc index a1c39d2a7a..ebb56d525e 100644 --- a/tensorflow/core/framework/attr_value_util.cc +++ b/tensorflow/core/framework/attr_value_util.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/platform/protobuf.h" diff --git a/tensorflow/core/framework/op_def_util.cc b/tensorflow/core/framework/op_def_util.cc index 2d035ab90d..c80802aad3 100644 --- a/tensorflow/core/framework/op_def_util.cc +++ b/tensorflow/core/framework/op_def_util.cc @@ -26,6 +26,7 @@ limitations under the License. #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/hash/hash.h" +#include "tensorflow/core/lib/strings/proto_serialization.h" #include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/mutex.h" diff --git a/tensorflow/core/lib/hash/hash.cc b/tensorflow/core/lib/hash/hash.cc index ed9b4df37a..dc9d300d00 100644 --- a/tensorflow/core/lib/hash/hash.cc +++ b/tensorflow/core/lib/hash/hash.cc @@ -126,15 +126,4 @@ uint64 Hash64(const char* data, size_t n, uint64 seed) { return h; } -bool SerializeToStringDeterministic(const protobuf::MessageLite& msg, - string* result) { - const size_t size = msg.ByteSizeLong(); - *result = string(size, '\0'); - protobuf::io::ArrayOutputStream array_stream(&(*result)[0], size); - protobuf::io::CodedOutputStream output_stream(&array_stream); - output_stream.SetSerializationDeterministic(true); - msg.SerializeWithCachedSizes(&output_stream); - return !output_stream.HadError() && size == output_stream.ByteCount(); -} - } // namespace tensorflow diff --git a/tensorflow/core/lib/hash/hash.h b/tensorflow/core/lib/hash/hash.h index 4d312ab7e8..b90c6514f2 100644 --- a/tensorflow/core/lib/hash/hash.h +++ b/tensorflow/core/lib/hash/hash.h @@ -24,7 +24,6 @@ limitations under the License. #include #include "tensorflow/core/lib/core/stringpiece.h" -#include "tensorflow/core/platform/protobuf.h" #include "tensorflow/core/platform/types.h" namespace tensorflow { @@ -92,15 +91,6 @@ struct hash> { } }; -// Wrapper around protocol buffer serialization that requests deterministic -// serialization, in particular for Map fields, which serialize in a random -// order by default. Returns true on success. -// Serialization is guaranteed to be deterministic for a given binary only. -// See the following for more details: -// https://github.com/google/protobuf/blob/a1bb147e96b6f74db6cdf3c3fcb00492472dbbfa/src/google/protobuf/io/coded_stream.h#L834 -bool SerializeToStringDeterministic(const protobuf::MessageLite& msg, - string* result); - } // namespace tensorflow #endif // TENSORFLOW_LIB_HASH_HASH_H_ diff --git a/tensorflow/core/lib/strings/proto_serialization.cc b/tensorflow/core/lib/strings/proto_serialization.cc new file mode 100644 index 0000000000..5c1fbda215 --- /dev/null +++ b/tensorflow/core/lib/strings/proto_serialization.cc @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/core/lib/strings/proto_serialization.h" + +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { + +bool SerializeToStringDeterministic(const protobuf::MessageLite& msg, + string* result) { + DCHECK_LE(msg.ByteSizeLong(), static_cast(INT_MAX)); + const int size = static_cast(msg.ByteSizeLong()); + *result = string(size, '\0'); + protobuf::io::ArrayOutputStream array_stream(&(*result)[0], size); + protobuf::io::CodedOutputStream output_stream(&array_stream); + output_stream.SetSerializationDeterministic(true); + msg.SerializeWithCachedSizes(&output_stream); + return !output_stream.HadError() && size == output_stream.ByteCount(); +} + +} // namespace tensorflow diff --git a/tensorflow/core/lib/strings/proto_serialization.h b/tensorflow/core/lib/strings/proto_serialization.h new file mode 100644 index 0000000000..6664928e28 --- /dev/null +++ b/tensorflow/core/lib/strings/proto_serialization.h @@ -0,0 +1,33 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CORE_LIB_STRINGS_PROTO_SERIALIZATION_H_ +#define TENSORFLOW_CORE_LIB_STRINGS_PROTO_SERIALIZATION_H_ + +#include "tensorflow/core/platform/protobuf.h" + +namespace tensorflow { + +// Wrapper around protocol buffer serialization that requests deterministic +// serialization, in particular for Map fields, which serialize in a random +// order by default. Returns true on success. +// Serialization is guaranteed to be deterministic for a given binary only. +// See the following for more details: +// https://github.com/google/protobuf/blob/a1bb147e96b6f74db6cdf3c3fcb00492472dbbfa/src/google/protobuf/io/coded_stream.h#L834 +bool SerializeToStringDeterministic(const protobuf::MessageLite& msg, + string* result); + +} // namespace tensorflow + +#endif // TENSORFLOW_CORE_LIB_STRINGS_PROTO_SERIALIZATION_H_ -- GitLab From 8691e3cb6ffd9e30907df5d4cb4e6878a4c2371b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 12:08:23 -0800 Subject: [PATCH 619/884] Fix a typo in gcs_smoke_test test script. PiperOrigin-RevId: 188367883 --- .../tools/integration_tests/gcs_smoke_test/test_wrapper.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh b/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh index ef29dee346..d4b6524a81 100755 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh +++ b/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh @@ -1,5 +1,5 @@ -# This is a python2 only test. #!/bin/bash +# This is a python2 only test. # Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -18,4 +18,4 @@ /usr/local/bin/pip install --user tf-nightly # Test Tensorflow interaction with GCS. -python tensorflow/tools/integration_test/gcs_smoke_test/gcs_smoke.py "$@" +python tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py "$@" -- GitLab From 6accccdc007b2bbef392176c923a3de6ffa3be6c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 12:22:56 -0800 Subject: [PATCH 620/884] Allow setting control dependencies on TensorArrays. Is this the right way to do it? PiperOrigin-RevId: 188370600 --- tensorflow/contrib/py2tf/utils/context_managers.py | 7 +++++++ tensorflow/contrib/py2tf/utils/context_managers_test.py | 5 +++++ 2 files changed, 12 insertions(+) diff --git a/tensorflow/contrib/py2tf/utils/context_managers.py b/tensorflow/contrib/py2tf/utils/context_managers.py index 38d9e11fe9..3d150a9581 100644 --- a/tensorflow/contrib/py2tf/utils/context_managers.py +++ b/tensorflow/contrib/py2tf/utils/context_managers.py @@ -21,6 +21,7 @@ from __future__ import print_function import contextlib from tensorflow.python.framework import ops +from tensorflow.python.ops import tensor_array_ops def control_dependency_on_returns(return_value): @@ -34,9 +35,15 @@ def control_dependency_on_returns(return_value): Returns: A context manager. """ + def control_dependency_handle(t): + if isinstance(t, tensor_array_ops.TensorArray): + return t.flow + return t + if return_value is None: return contextlib.contextmanager(lambda: (yield))() # TODO(mdan): Filter to tensor objects. if not isinstance(return_value, (list, tuple)): return_value = (return_value,) + return_value = tuple(control_dependency_handle(t) for t in return_value) return ops.control_dependencies(return_value) diff --git a/tensorflow/contrib/py2tf/utils/context_managers_test.py b/tensorflow/contrib/py2tf/utils/context_managers_test.py index 633ba93540..404f6e44e5 100644 --- a/tensorflow/contrib/py2tf/utils/context_managers_test.py +++ b/tensorflow/contrib/py2tf/utils/context_managers_test.py @@ -20,6 +20,8 @@ from __future__ import print_function from tensorflow.contrib.py2tf.utils import context_managers from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import tensor_array_ops from tensorflow.python.platform import test @@ -32,6 +34,9 @@ class ContextManagersTest(test.TestCase): with context_managers.control_dependency_on_returns( constant_op.constant(1)): pass + with context_managers.control_dependency_on_returns( + tensor_array_ops.TensorArray(dtypes.int32, size=1)): + pass with context_managers.control_dependency_on_returns( [constant_op.constant(1), constant_op.constant(2)]): -- GitLab From f0633ecbf6cc720c763e85b9ae907049401603a9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 12:32:01 -0800 Subject: [PATCH 621/884] Make benchmark_model work. PiperOrigin-RevId: 188372156 --- tensorflow/contrib/lite/tools/BUILD | 10 +- .../contrib/lite/tools/benchmark_model.cc | 441 ++++++++++++++++-- 2 files changed, 419 insertions(+), 32 deletions(-) diff --git a/tensorflow/contrib/lite/tools/BUILD b/tensorflow/contrib/lite/tools/BUILD index 54df724f79..b5abbc0712 100644 --- a/tensorflow/contrib/lite/tools/BUILD +++ b/tensorflow/contrib/lite/tools/BUILD @@ -46,7 +46,15 @@ tf_cc_binary( "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:string_util", "//tensorflow/contrib/lite/kernels:builtin_ops", - ], + ] + select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + ], + "//conditions:default": [ + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + ], + }), ) cc_library( diff --git a/tensorflow/contrib/lite/tools/benchmark_model.cc b/tensorflow/contrib/lite/tools/benchmark_model.cc index 6ae3ab5729..93c80e0f5e 100644 --- a/tensorflow/contrib/lite/tools/benchmark_model.cc +++ b/tensorflow/contrib/lite/tools/benchmark_model.cc @@ -1,4 +1,4 @@ -/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -25,36 +25,89 @@ limitations under the License. #include "tensorflow/contrib/lite/model.h" #include "tensorflow/contrib/lite/string_util.h" #include "tensorflow/contrib/lite/tools/mutable_op_resolver.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/util/command_line_flags.h" #ifdef TFLITE_CUSTOM_OPS_HEADER void RegisterSelectedOps(::tflite::MutableOpResolver* resolver); #endif -#define LOG(x) std::cerr +namespace tflite { -#define CHECK(x) \ - if (!(x)) { \ - LOG(ERROR) << #x << "failed"; \ - exit(1); \ +using ::tensorflow::Env; +using ::tensorflow::str_util::Split; +using ::tensorflow::str_util::SplitAndParseAsFloats; +using ::tensorflow::str_util::SplitAndParseAsInts; + +struct InputLayerInfo { + string name; + TfLiteType data_type; + std::vector shape; + // Note that initialization_values is currently unused. + std::vector initialization_values; +}; + +template +void FillRandomValue(T* ptr, const std::vector& sizes, + const std::function& random_func) { + int num_elements = 1; + for (int dim : sizes) { + num_elements *= dim; + } + for (int i = 0; i < num_elements; ++i) { + *ptr++ = random_func(); } +} -namespace tensorflow { -namespace benchmark_tflite_model { +void FillRandomString(tflite::DynamicBuffer* buffer, + const std::vector& sizes, + const std::function& random_func) { + int num_elements = 1; + for (int dim : sizes) { + num_elements *= dim; + } + for (int i = 0; i < num_elements; ++i) { + auto str = random_func(); + buffer->AddString(str.data(), str.length()); + } +} -std::unique_ptr model; -std::unique_ptr interpreter; +TfLiteType TfLiteTypeFromString(const string& input_layer_type) { + if (input_layer_type == "string") + return kTfLiteString; + else if (input_layer_type == "float") + return kTfLiteFloat32; + else if (input_layer_type == "uint8") + return kTfLiteUInt8; + else if (input_layer_type == "int32") + return kTfLiteInt32; + else if (input_layer_type == "int64") + return kTfLiteInt64; + else + return kTfLiteNoType; +} -void InitImpl(const std::string& graph, const std::vector& sizes, - const std::string& input_layer_type, int num_threads) { - CHECK(graph.c_str()); +std::vector ShapeFromTfLiteTensor(TfLiteTensor* t) { + std::vector result; + result.reserve(t->dims->size); + for (int i = 0; i < t->dims->size; ++i) { + result.push_back(t->dims->data[i]); + } + CHECK(!result.empty()) << "Found no shapes in model"; + return result; +} - model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); +bool CreateInterpreter(const string& graph, + std::unique_ptr* model, + std::unique_ptr* interpreter) { + *model = tflite::FlatBufferModel::BuildFromFile(graph.c_str()); if (!model) { - LOG(FATAL) << "Failed to mmap model " << graph; + std::cerr << "Failed to load model " << graph << std::endl; + return false; } - LOG(INFO) << "Loaded model " << graph; - model->error_reporter(); - LOG(INFO) << "resolved reporter"; #ifdef TFLITE_CUSTOM_OPS_HEADER tflite::MutableOpResolver resolver; @@ -63,34 +116,360 @@ void InitImpl(const std::string& graph, const std::vector& sizes, tflite::ops::builtin::BuiltinOpResolver resolver; #endif - tflite::InterpreterBuilder(*model, resolver)(&interpreter); - if (!interpreter) { - LOG(FATAL) << "Failed to construct interpreter"; + tflite::InterpreterBuilder(*(model->get()), resolver)(interpreter); + if (!(*interpreter)) { + std::cerr << "Failed to construct interpreter" << std::endl; + return false; } + return true; +} + +bool PrepareInterpreter(const std::vector inputs, + int num_threads, bool use_nnapi, + Interpreter* interpreter) { if (num_threads != -1) { interpreter->SetNumThreads(num_threads); } - int input = interpreter->inputs()[0]; + interpreter->UseNNAPI(use_nnapi); - if (input_layer_type != "string") { - interpreter->ResizeInputTensor(input, sizes); + // Check that all names and types match + for (const InputLayerInfo& input : inputs) { + for (int i : interpreter->inputs()) { + TfLiteTensor* t = interpreter->tensor(i); + CHECK_EQ(t->name, input.name) + << "Tensor # " << i << " is named " << t->name + << " but flags call it " << input.name; + CHECK_EQ(t->type, input.data_type) + << "Could not match the type of input tensor " << t->name; + } + } + + // Resize all non-string tensors. + for (const InputLayerInfo& input : inputs) { + for (int i : interpreter->inputs()) { + TfLiteTensor* t = interpreter->tensor(i); + if (t->type != kTfLiteString) { + interpreter->ResizeInputTensor(i, input.shape); + } + } } if (interpreter->AllocateTensors() != kTfLiteOk) { - LOG(FATAL) << "Failed to allocate tensors!"; + std::cerr << "Failed to allocate tensors!" << std::endl; + return false; + } + + // Set the values of the input tensors. + for (int i : interpreter->inputs()) { + TfLiteTensor* t = interpreter->tensor(i); + std::vector sizes = ShapeFromTfLiteTensor(t); + + // TODO(ahentz): below we ignore the O-th dimension (number of batches). + if (t->type == kTfLiteFloat32) { + FillRandomValue( + interpreter->typed_tensor(i), + std::vector(sizes.begin() + 1, sizes.end()), + []() { return static_cast(rand()) / RAND_MAX - 0.5f; }); + } else if (t->type == kTfLiteUInt8) { + FillRandomValue( + interpreter->typed_tensor(i), + std::vector(sizes.begin() + 1, sizes.end()), + []() { return static_cast(rand()) % 255; }); + } else if (t->type == kTfLiteString) { + tflite::DynamicBuffer buffer; + FillRandomString(&buffer, sizes, []() { + return "we're have some friends over saturday to hang out in the yard"; + }); + buffer.WriteToTensor(interpreter->tensor(i)); + } else { + std::cerr << "Don't know how to populate tensor " << t->name + << " of type " << t->type << std::endl; + return false; + } + } + return true; +} + +bool PopulateInputLayerInfo(const string& names_string, + const string& shapes_string, + const string& types_string, + const string& values_string, + std::vector* info) { + std::vector names = Split(names_string, ','); + std::vector shapes = Split(shapes_string, ':'); + std::vector types = Split(types_string, ','); + std::vector values = Split(values_string, ':'); + + if (names.size() != shapes.size()) { + LOG(ERROR) << "The number of items in" + << " --input_layer_shape (" << shapes_string << ", with " + << shapes.size() << " items)" + << " must match the number of items in" + << " --input_layer (" << names_string << ", with " + << names.size() << " items)." + << " For example --input_layer=input1,input2" + << " --input_layer_shape=1,224,224,4:1,20"; + return false; + } + if (names.size() != types.size()) { + LOG(ERROR) << "The number of items in" + << " --input_layer_type (" << types_string << ", with " + << types.size() << " items)" + << " must match the number of items in" + << " --input_layer (" << names_string << ", with " + << names.size() << " items)." + << " For example --input_layer=input1,input2" + << " --input_layer_type=float,int"; + return false; + } + + for (int i = 0; i < names.size(); ++i) { + info->push_back(InputLayerInfo()); + InputLayerInfo& input = info->back(); + + input.name = names[i]; + + input.data_type = TfLiteTypeFromString(types[i]); + CHECK(input.data_type != kTfLiteNoType) + << types[i] << " was an invalid type"; + + CHECK(SplitAndParseAsInts(shapes[i], ',', &input.shape)) + << "Incorrect size string specified: " << shapes[i]; + for (int dim : input.shape) { + if (dim == -1) { + LOG(ERROR) << "Any unknown sizes in the shapes (-1's) must be replaced" + << " with the size you want to benchmark with."; + return false; + } + } + + if (i < values.size()) { + CHECK(SplitAndParseAsFloats(values[i], ',', &input.initialization_values)) + << "Incorrect initialization values string specified: " << values[i]; + } + } + + return true; +} + +bool RunBenchmark(Interpreter* interpreter, int64_t* inference_time_us) { + const int64_t start_time = Env::Default()->NowMicros(); + + if (interpreter->Invoke() != kTfLiteOk) { + std::cerr << "Failed to invoke!"; + return false; } + + const int64_t end_time = Env::Default()->NowMicros(); + *inference_time_us = end_time - start_time; + return true; +} + +class Latencies { + public: + void AddMeasurement(int64_t time_us) { + max_ = std::max(time_us, max_); + min_ = std::min(time_us, min_); + ++count_; + sum_ += time_us; + squared_sum_ += static_cast(time_us) * time_us; + } + + double avg() const { + if (count_ == 0) return std::numeric_limits::quiet_NaN(); + return static_cast(sum_) / count_; + } + + int64_t std_deviation() const { + if (count_ == 0 || min_ == max_) return 0; + return sqrt(squared_sum_ / count_ - avg() * avg()); + } + + void OutputToStream(std::ostream* stream) const { + *stream << "count=" << count_; + if (count_ == 0) return; + *stream << " min=" << min_ << " max=" << max_; + *stream << " avg=" << avg() << " std=" << std_deviation(); + } + + private: + int64_t count_ = 0; + int64_t min_ = std::numeric_limits::max(); + int64_t max_ = std::numeric_limits::min(); + int64_t sum_ = 0; + double squared_sum_ = 0; +}; + +bool TimeMultipleRuns(Interpreter* interpreter, double sleep_seconds, + int num_runs, int64* total_time_us) { + // Convert the run_delay string into a timespec. + timespec req; + req.tv_sec = static_cast(sleep_seconds); + req.tv_nsec = (sleep_seconds - req.tv_sec) * 1000000000; + + *total_time_us = 0; + + std::cout << "Running benchmark for " << num_runs + << " iterations: " << std::endl; + + Latencies latencies; + for (int i = 0; i < num_runs; ++i) { + int64_t time_us; + bool run_status = RunBenchmark(interpreter, &time_us); + latencies.AddMeasurement(time_us); + *total_time_us += time_us; + if (!run_status) { + std::cout << "Failed on run " << i << std::endl; + return false; + } + + // If requested, sleep between runs for an arbitrary amount of time. + // This can be helpful to determine the effect of mobile processor + // scaling and thermal throttling. + if (sleep_seconds > 0.0) { +#ifdef PLATFORM_WINDOWS + Sleep(sleep_seconds * 1000); +#else + nanosleep(&req, nullptr); +#endif + } + } + latencies.OutputToStream(&std::cout); + std::cout << std::endl; + + return true; } int Main(int argc, char** argv) { - InitImpl("", {}, "", 1); + using tensorflow::Flag; + using tensorflow::Flags; + + string graph; // e.g.: /data/local/tmp/tfl_inception-v1_model.fb + string input_layer_string; // e.g.: input + string input_layer_shape_string; // e.g.: 1,224,224,3 + string input_layer_type_string; // e.g.: float + string input_layer_values_string; + string output_layer_string; // e.g.: output + int num_runs = 50; + string run_delay = "-1.0"; + int num_threads = -1; + string benchmark_name = ""; + string output_prefix = ""; + int warmup_runs = 1; + bool use_nnapi = false; + + std::vector flag_list = { + Flag("graph", &graph, "graph file name"), + // All the following flags are optional, but can be used in order + // to benchmark different input shapes. + Flag("input_layer", &input_layer_string, "input layer names"), + Flag("input_layer_shape", &input_layer_shape_string, "input layer shape"), + Flag("input_layer_type", &input_layer_type_string, "input layer type"), + Flag("input_layer_values", &input_layer_values_string, + "values to initialize the inputs with"), + Flag("output_layer", &output_layer_string, "output layer name"), + Flag("num_runs", &num_runs, "number of runs"), + Flag("run_delay", &run_delay, "delay between runs in seconds"), + Flag("num_threads", &num_threads, "number of threads"), + Flag("benchmark_name", &benchmark_name, "benchmark name"), + Flag("output_prefix", &output_prefix, "benchmark output prefix"), + Flag("warmup_runs", &warmup_runs, "how many runs to initialize model"), + Flag("use_nnapi", &use_nnapi, "use nnapi api"), + }; + string usage = Flags::Usage(argv[0], flag_list); + const bool parse_result = Flags::Parse(&argc, argv, flag_list); + tensorflow::port::InitMain(argv[0], &argc, &argv); + + if (!parse_result) { + std::cerr << usage << std::endl; + return -1; + } + + std::cout << "Graph: [" << graph << "]" << std::endl; + if (!input_layer_string.empty()) { + std::cout << "Input layers: [" << input_layer_string << "]" << std::endl; + std::cout << "Input shapes: [" << input_layer_shape_string << "]" + << std::endl; + std::cout << "Input types: [" << input_layer_type_string << "]" + << std::endl; + } + if (!output_layer_string.empty()) { + std::cout << "Output layers: [" << output_layer_string << "]" << std::endl; + } + std::cout << "Num runs: [" << num_runs << "]" << std::endl; + std::cout << "Inter-run delay (seconds): [" << run_delay << "]" << std::endl; + std::cout << "Num threads: [" << num_threads << "]" << std::endl; + if (!benchmark_name.empty()) { + std::cout << "Benchmark name: [" << benchmark_name << "]" << std::endl; + std::cout << "Output prefix: [" << output_prefix << "]" << std::endl; + } + std::cout << "Warmup runs: [" << warmup_runs << "]" << std::endl; + std::cout << "Use nnapi : [" << use_nnapi << "]" << std::endl; + + if (graph.empty()) { + std::cout + << "Please specify the name of your TF Lite input file with --graph" + << std::endl; + return -1; + } + + std::vector inputs; + if (!PopulateInputLayerInfo(input_layer_string, input_layer_shape_string, + input_layer_type_string, + input_layer_values_string, &inputs)) { + return -1; + } + + int64 initialization_start_us = Env::Default()->NowMicros(); + + std::unique_ptr model; + std::unique_ptr interpreter; + if (!CreateInterpreter(graph, &model, &interpreter)) { + return -1; + } + if (!PrepareInterpreter(inputs, num_threads, use_nnapi, interpreter.get())) { + return -1; + } + + int64 initialization_end_us = Env::Default()->NowMicros(); + + const double initialization_time_s = + (initialization_end_us - initialization_start_us) / 1000000.0f; + std::cout << "Initialized session in " << initialization_time_s << "s" + << std::endl; + + const double sleep_seconds = std::strtod(run_delay.c_str(), nullptr); + + // If requested, run through the graph first to preinitialize everything + // before the benchmarking runs. + int64 warmup_time_us = 0; + if (warmup_runs > 0) { + if (!TimeMultipleRuns(interpreter.get(), sleep_seconds, warmup_runs, + &warmup_time_us)) { + std::cerr << "Warmup failed" << std::endl; + return -1; + } + } + + // Capture overall inference time without stat logging overhead. This is the + // timing data that can be compared to other libaries. + int64 no_stat_time_us = 0; + if (!TimeMultipleRuns(interpreter.get(), sleep_seconds, num_runs, + &no_stat_time_us)) { + std::cerr << "Timing failed." << std::endl; + return -1; + } + + std::cout << "Average inference timings in us: " << no_stat_time_us / num_runs + << " , Warmup: " + << (warmup_runs > 0 ? warmup_time_us / warmup_runs : 0) << ", " + << std::endl; + return 0; } -} // namespace benchmark_tflite_model -} // namespace tensorflow +} // namespace tflite -int main(int argc, char** argv) { - return tensorflow::benchmark_tflite_model::Main(argc, argv); -} +int main(int argc, char** argv) { return ::tflite::Main(argc, argv); } -- GitLab From 601c57a83763810df7904375ba684f775d2bd13e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 13:06:20 -0800 Subject: [PATCH 622/884] Add support for literals to QN. Fix the AST logic and tests. Add some extra checks. PiperOrigin-RevId: 188376050 --- tensorflow/contrib/py2tf/pyct/qual_names.py | 88 ++++++++++++++++--- .../contrib/py2tf/pyct/qual_names_test.py | 37 +++++--- 2 files changed, 99 insertions(+), 26 deletions(-) diff --git a/tensorflow/contrib/py2tf/pyct/qual_names.py b/tensorflow/contrib/py2tf/pyct/qual_names.py index 2ffda03868..6bcbaeb2ae 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names.py @@ -25,11 +25,38 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections + import gast from tensorflow.contrib.py2tf.pyct import anno +class Symbol(collections.namedtuple('Symbol', ['name'])): + """Represents a Python symbol.""" + + +class StringLiteral(collections.namedtuple('StringLiteral', ['value'])): + """Represents a Python string literal.""" + + def __str__(self): + return '\'%s\'' % self.value + + def __repr__(self): + return str(self) + + +class NumberLiteral(collections.namedtuple('NumberLiteral', ['value'])): + """Represents a Python numeric literal.""" + + def __str__(self): + return '%s' % self.value + + def __repr__(self): + return str(self) + + +# TODO(mdan): Use subclasses to remove the has_attr has_subscript booleans. class QN(object): """Represents a qualified name.""" @@ -39,26 +66,37 @@ class QN(object): 'both: attr={}, subscript={}.'.format(attr, subscript)) self._has_attr = False self._has_subscript = False + if attr is not None: if not isinstance(base, QN): - raise ValueError('For attribute QNs, base must be a QN.') + raise ValueError( + 'for attribute QNs, base must be a QN; got instead "%s"' % base) + if not isinstance(attr, str): + raise ValueError('attr may only be a string; got instead "%s"' % attr) self._parent = base # TODO(mdan): Get rid of the tuple - it can only have 1 or 2 elements now. self.qn = (base, attr) self._has_attr = True + elif subscript is not None: if not isinstance(base, QN): raise ValueError('For subscript QNs, base must be a QN.') self._parent = base self.qn = (base, subscript) self._has_subscript = True + else: - if not isinstance(base, str): - raise ValueError('For simple QNs, base must be a string.') + if not isinstance(base, (str, StringLiteral, NumberLiteral)): + # TODO(mdan): Require Symbol instead of string. + raise ValueError( + 'For simple QNs, base must be a string or a Literal object.') assert '.' not in base and '[' not in base and ']' not in base self._parent = None self.qn = (base,) + def is_symbol(self): + return isinstance(self.qn[0], str) + def is_composite(self): return len(self.qn) > 1 @@ -108,10 +146,21 @@ class QN(object): def ast(self): # The caller must adjust the context appropriately. if self.has_subscript(): - return gast.Subscript(self.parent.ast(), str(self.qn[-1]), None) + return gast.Subscript(self.parent.ast(), gast.Index(self.qn[-1].ast()), + None) if self.has_attr(): return gast.Attribute(self.parent.ast(), self.qn[-1], None) - return gast.Name(self.qn[0], None, None) + + base = self.qn[0] + if isinstance(base, str): + return gast.Name(base, None, None) + elif isinstance(base, StringLiteral): + return gast.Str(base.value) + elif isinstance(base, NumberLiteral): + return gast.Num(base.value) + else: + assert False, ('the constructor should prevent types other than ' + 'str, StringLiteral and NumberLiteral') class QnResolver(gast.NodeTransformer): @@ -120,25 +169,36 @@ class QnResolver(gast.NodeTransformer): Note: Not using NodeAnnos to avoid circular dependencies. """ + def visit_Call(self, node): + node = self.generic_visit(node) + # This helps treat the following cases uniformly: + # a = b[i] + # a = b()[i] + anno.copyanno(node.func, node, anno.Basic.QN) + return node + def visit_Name(self, node): - self.generic_visit(node) + node = self.generic_visit(node) anno.setanno(node, anno.Basic.QN, QN(node.id)) return node def visit_Attribute(self, node): - self.generic_visit(node) + node = self.generic_visit(node) anno.setanno(node, anno.Basic.QN, QN(anno.getanno(node.value, anno.Basic.QN), attr=node.attr)) return node def visit_Subscript(self, node): - if not isinstance(node.slice, gast.Index): - raise NotImplementedError('range and multi-dimensional indexing are not' - ' yet supported') - self.generic_visit(node) - if isinstance(node.slice.value, gast.Num) or isinstance( - node.slice.value, gast.Str): - raise NotImplementedError('constant subscripts are not yet supported') + node = self.generic_visit(node) + s = node.slice + if not isinstance(s, gast.Index): + # TODO(mdan): Support range and multi-dimensional indices. + # Continuing silently because some demos use these. + return node + if isinstance(s.value, gast.Num): + subscript = QN(NumberLiteral(s.value.n)) + elif isinstance(s.value, gast.Str): + subscript = QN(StringLiteral(s.value.s)) else: subscript = anno.getanno(node.slice.value, anno.Basic.QN) anno.setanno(node, anno.Basic.QN, diff --git a/tensorflow/contrib/py2tf/pyct/qual_names_test.py b/tensorflow/contrib/py2tf/pyct/qual_names_test.py index 9eaaaf9d4c..f2cd8e98f0 100644 --- a/tensorflow/contrib/py2tf/pyct/qual_names_test.py +++ b/tensorflow/contrib/py2tf/pyct/qual_names_test.py @@ -22,6 +22,7 @@ import textwrap from tensorflow.contrib.py2tf.pyct import anno from tensorflow.contrib.py2tf.pyct import parser +from tensorflow.contrib.py2tf.pyct import qual_names from tensorflow.contrib.py2tf.pyct.qual_names import QN from tensorflow.contrib.py2tf.pyct.qual_names import resolve from tensorflow.python.platform import test @@ -56,7 +57,7 @@ class QNTest(test.TestCase): self.assertEqual(str(a_sub_b), 'a[b]') self.assertEqual(a_sub_b.ssf(), 'a_sub_b') self.assertEqual(a_sub_b.ast().value.id, 'a') - self.assertEqual(a_sub_b.ast().slice, 'b') + self.assertEqual(a_sub_b.ast().slice.value.id, 'b') self.assertTrue(a_sub_b.is_composite()) self.assertTrue(a_sub_b.has_subscript()) self.assertEqual(a_sub_b.parent.qn, ('a',)) @@ -73,8 +74,9 @@ class QNTest(test.TestCase): self.assertEqual(str(a_sub_b_sub_c), 'a[b[c]]') self.assertEqual(a_sub_b_sub_c.ssf(), 'a_sub_b_sub_c') self.assertEqual(a_sub_b_sub_c.ast().value.id, 'a') - self.assertEqual(a_sub_b_sub_c.ast().slice, 'b[c]') - self.assertEqual(b_sub_c.ast().slice, 'c') + self.assertEqual(a_sub_b_sub_c.ast().slice.value.value.id, 'b') + self.assertEqual(a_sub_b_sub_c.ast().slice.value.slice.value.id, 'c') + self.assertEqual(b_sub_c.ast().slice.value.id, 'c') self.assertEqual(a_sub_b_sub_c.parent.qn, ('a',)) with self.assertRaises(ValueError): QN('a', 'b') @@ -112,13 +114,13 @@ class QNTest(test.TestCase): b_sub_c = QN(b, subscript=c) a_sub_b_sub_c = QN(a, subscript=b_sub_c) - b_dot_c = QN(b, attr=c) + b_dot_c = QN(b, attr='c') a_sub__b_dot_c = QN(a, subscript=b_dot_c) a_sub_b = QN(a, subscript=b) - a_sub_b__dot_c = QN(a_sub_b, attr=c) + a_sub_b__dot_c = QN(a_sub_b, attr='c') - a_dot_b = QN(a, attr=b) + a_dot_b = QN(a, attr='b') a_dot_b_sub_c = QN(a_dot_b, subscript=c) self.assertEqual(str(a_sub_b_sub_c), 'a[b[c]]') @@ -126,14 +128,14 @@ class QNTest(test.TestCase): self.assertEqual(str(a_sub_b__dot_c), 'a[b].c') self.assertEqual(str(a_dot_b_sub_c), 'a.b[c]') - self.assertFalse(a_sub_b_sub_c == a_sub__b_dot_c) - self.assertFalse(a_sub_b_sub_c == a_sub_b__dot_c) - self.assertFalse(a_sub_b_sub_c == a_dot_b_sub_c) + self.assertNotEqual(a_sub_b_sub_c, a_sub__b_dot_c) + self.assertNotEqual(a_sub_b_sub_c, a_sub_b__dot_c) + self.assertNotEqual(a_sub_b_sub_c, a_dot_b_sub_c) - self.assertFalse(a_sub__b_dot_c == a_sub_b__dot_c) - self.assertFalse(a_sub__b_dot_c == a_dot_b_sub_c) + self.assertNotEqual(a_sub__b_dot_c, a_sub_b__dot_c) + self.assertNotEqual(a_sub__b_dot_c, a_dot_b_sub_c) - self.assertFalse(a_sub_b__dot_c == a_dot_b_sub_c) + self.assertNotEqual(a_sub_b__dot_c, a_dot_b_sub_c) def test_hashable(self): d = {QN('a'): 'a', QN('b'): 'b'} @@ -141,6 +143,17 @@ class QNTest(test.TestCase): self.assertEqual(d[QN('b')], 'b') self.assertTrue(QN('c') not in d) + def test_literals(self): + a = QN('a') + a_sub_str_b = QN(a, subscript=QN(qual_names.StringLiteral('b'))) + a_sub_b = QN(a, subscript=QN('b')) + + self.assertNotEqual(a_sub_str_b, a_sub_b) + self.assertNotEqual(hash(a_sub_str_b), hash(a_sub_b)) + + a_sub_three = QN(a, subscript=QN(qual_names.NumberLiteral(3))) + self.assertEqual(a_sub_three.ast().slice.value.n, 3) + class QNResolverTest(test.TestCase): -- GitLab From d6f3a547af2060974a1397052809a1a7f1e2d311 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Thu, 8 Mar 2018 13:29:44 -0800 Subject: [PATCH 623/884] disabling tsan test that is currently failing PiperOrigin-RevId: 188378908 --- tensorflow/contrib/eager/python/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index fcb14bedc4..2f9bc68aaa 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -266,6 +266,7 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], + tags = ["notsan"], ) filegroup( -- GitLab From ebf554ff77bc46bfdd9b424bc44b62f803100b33 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 13:36:46 -0800 Subject: [PATCH 624/884] Make adaptive SDCA the default. PiperOrigin-RevId: 188380039 --- .../python/kernel_tests/sdca_ops_test.py | 10 +++++----- .../base_api/api_def_SdcaOptimizer.pbtxt | 5 ++++- tensorflow/core/kernels/sdca_internal.cc | 2 +- tensorflow/core/kernels/sdca_internal.h | 18 +++++++++--------- tensorflow/core/kernels/sdca_ops.cc | 10 +++++----- .../tools/api/golden/tensorflow.train.pbtxt | 2 +- 6 files changed, 25 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py index 70f777f08b..cfe62fac43 100644 --- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py +++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py @@ -270,14 +270,14 @@ class SdcaWithLogisticLossTest(SdcaModelTest): train_op = lr.minimize() - def Minimize(): + def minimize(): with self._single_threaded_test_session(): for _ in range(_MAX_ITERATIONS): - train_op.run() + train_op.run() # pylint: disable=cell-var-from-loop threads = [] for _ in range(num_loss_partitions): - threads.append(threading.Thread(target=Minimize)) + threads.append(threading.Thread(target=minimize)) threads[-1].start() for t in threads: @@ -395,7 +395,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): predicted_labels = get_binary_predictions_for_logistic(predictions) self.assertAllClose([0, 1, 1, 1], predicted_labels.eval()) self.assertAllClose( - 0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) + 0.0, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2) def testFractionalExampleLabel(self): # Setup test data with 1 positive, and 1 mostly-negative example. @@ -407,7 +407,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest): make_example_proto({ 'age': [1], 'gender': [1] - }, 1), + }, 0.9), ] example_weights = [1.0, 1.0] for num_shards in _SHARD_NUMBERS: diff --git a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizer.pbtxt b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizer.pbtxt index b0b58ac00e..9da0e124eb 100644 --- a/tensorflow/core/api_def/base_api/api_def_SdcaOptimizer.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_SdcaOptimizer.pbtxt @@ -97,8 +97,11 @@ END } attr { name: "adaptative" + default_value { + b: True + } description: <::Matrix example_state_data, diff --git a/tensorflow/core/kernels/sdca_internal.h b/tensorflow/core/kernels/sdca_internal.h index 45915693ac..1665b1210e 100644 --- a/tensorflow/core/kernels/sdca_internal.h +++ b/tensorflow/core/kernels/sdca_internal.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ -#define TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ +#ifndef TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ +#define TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ #define EIGEN_USE_THREADS @@ -75,7 +75,7 @@ struct ExampleStatistics { class Regularizations { public: - Regularizations(){}; + Regularizations() {} // Initialize() must be called immediately after construction. Status Initialize(OpKernelConstruction* const context) { @@ -199,7 +199,7 @@ class FeatureWeightsDenseStorage { FeatureWeightsDenseStorage(const TTypes::Matrix nominals, TTypes::Matrix deltas) : nominals_(nominals), deltas_(deltas) { - CHECK(deltas.rank() > 1); + CHECK_GT(deltas.rank(), 1); } // Check if a feature index is with-in the bounds. @@ -322,15 +322,15 @@ class Examples { return examples_.at(example_index); } - int sampled_index(const int id, const bool adaptative) const { - if (adaptative) return sampled_index_[id]; + int sampled_index(const int id, const bool adaptive) const { + if (adaptive) return sampled_index_[id]; return id; } // Adaptive SDCA in the current implementation only works for // binary classification, where the input argument for num_weight_vectors // is 1. - Status SampleAdaptativeProbabilities( + Status SampleAdaptiveProbabilities( const int num_loss_partitions, const Regularizations& regularization, const ModelWeights& model_weights, const TTypes::Matrix example_state_data, @@ -378,7 +378,7 @@ class Examples { // All examples in the batch. std::vector examples_; - // Adaptative sampling variables + // Adaptive sampling variables. std::vector probabilities_; std::vector sampled_index_; std::vector sampled_count_; @@ -391,4 +391,4 @@ class Examples { } // namespace sdca } // namespace tensorflow -#endif // TENSORFLOW_KERNELS_SDCA_INTERNAL_H_ +#endif // TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_ diff --git a/tensorflow/core/kernels/sdca_ops.cc b/tensorflow/core/kernels/sdca_ops.cc index dbe0177dda..5b63057f3f 100644 --- a/tensorflow/core/kernels/sdca_ops.cc +++ b/tensorflow/core/kernels/sdca_ops.cc @@ -80,7 +80,7 @@ struct ComputeOptions { context, false, errors::InvalidArgument("Unsupported loss type: ", loss_type)); } - OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptative)); + OP_REQUIRES_OK(context, context->GetAttr("adaptative", &adaptive)); OP_REQUIRES_OK( context, context->GetAttr("num_sparse_features", &num_sparse_features)); OP_REQUIRES_OK(context, context->GetAttr("num_sparse_features_with_values", @@ -113,7 +113,7 @@ struct ComputeOptions { int num_dense_features = 0; int num_inner_iterations = 0; int num_loss_partitions = 0; - bool adaptative = false; + bool adaptive = true; Regularizations regularizations; }; @@ -147,9 +147,9 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) { OP_REQUIRES_OK(context, context->set_output("out_example_state_data", mutable_example_state_data_t)); - if (options.adaptative) { + if (options.adaptive) { OP_REQUIRES_OK(context, - examples.SampleAdaptativeProbabilities( + examples.SampleAdaptiveProbabilities( options.num_loss_partitions, options.regularizations, model_weights, example_state_data, options.loss_updater, /*num_weight_vectors =*/1)); @@ -163,7 +163,7 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) { // num_examples which is an int. for (int id = static_cast(begin); id < end; ++id) { const int64 example_index = - examples.sampled_index(++atomic_index, options.adaptative); + examples.sampled_index(++atomic_index, options.adaptive); const Example& example = examples.example(example_index); const float dual = example_state_data(example_index, 0); const float example_weight = example.example_weight(); diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index e49c719a33..3b06aafa9f 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -402,7 +402,7 @@ tf_module { } member_method { name: "sdca_optimizer" - argspec: "args=[\'sparse_example_indices\', \'sparse_feature_indices\', \'sparse_feature_values\', \'dense_features\', \'example_weights\', \'example_labels\', \'sparse_indices\', \'sparse_weights\', \'dense_weights\', \'example_state_data\', \'loss_type\', \'l1\', \'l2\', \'num_loss_partitions\', \'num_inner_iterations\', \'adaptative\', \'name\'], varargs=None, keywords=None, defaults=[\'False\', \'None\'], " + argspec: "args=[\'sparse_example_indices\', \'sparse_feature_indices\', \'sparse_feature_values\', \'dense_features\', \'example_weights\', \'example_labels\', \'sparse_indices\', \'sparse_weights\', \'dense_weights\', \'example_state_data\', \'loss_type\', \'l1\', \'l2\', \'num_loss_partitions\', \'num_inner_iterations\', \'adaptative\', \'name\'], varargs=None, keywords=None, defaults=[\'True\', \'None\'], " } member_method { name: "sdca_shrink_l1" -- GitLab From 8c9a9b371d619ba35f7eae598a2eea045659738a Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Thu, 8 Mar 2018 13:39:54 -0800 Subject: [PATCH 625/884] [TF Lite] Provide a fast path for Interpreter::SetTensorParametersReadOnly. The fast path kicks in if shape matches tensor.dims and type matches tensor.type. In this case, the interpreter is not invalidated and AllocateTensors need not be called after a call to SetTensorParametersReadOnly. PiperOrigin-RevId: 188380596 --- tensorflow/contrib/lite/context.c | 8 ++++++-- tensorflow/contrib/lite/context.h | 3 +++ tensorflow/contrib/lite/interpreter.cc | 20 ++++++++++++++++---- tensorflow/contrib/lite/util.cc | 10 ++++++++++ tensorflow/contrib/lite/util.h | 4 ++++ 5 files changed, 39 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/context.c b/tensorflow/contrib/lite/context.c index 620de5d678..5c6f5e72a4 100644 --- a/tensorflow/contrib/lite/context.c +++ b/tensorflow/contrib/lite/context.c @@ -60,12 +60,16 @@ TfLiteIntArray* TfLiteIntArrayCopy(TfLiteIntArray* src) { void TfLiteIntArrayFree(TfLiteIntArray* a) { free(a); } -void TfLiteTensorFree(TfLiteTensor* t) { +void TfLiteTensorDataFree(TfLiteTensor* t) { if (t->allocation_type == kTfLiteDynamic && t->data.raw) { free(t->data.raw); } - if (t->dims) TfLiteIntArrayFree(t->dims); t->data.raw = NULL; +} + +void TfLiteTensorFree(TfLiteTensor* t) { + TfLiteTensorDataFree(t); + if (t->dims) TfLiteIntArrayFree(t->dims); t->dims = NULL; } diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index c6521e2fbf..23946dd26e 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -223,6 +223,9 @@ typedef struct { bool data_is_stale; } TfLiteTensor; +// Free data memory of tensor `t`; +void TfLiteTensorDataFree(TfLiteTensor* t); + // Free memory of tensor `t`; void TfLiteTensorFree(TfLiteTensor* t); diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 8fd1085544..2834dc49f9 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -569,10 +569,22 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( &required_bytes)); TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); } - invokable_ = false; - TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), - quantization, const_cast(buffer), bytes, - kTfLiteMmapRo, allocation, &context_.tensors[tensor_index]); + + TfLiteTensor& tensor = context_.tensors[tensor_index]; + if (type == tensor.type && EqualVectorAndTfLiteIntArray(tensor.dims, dims)) { + // Fast path which does not invalidate the invokable property. + TfLiteTensorDataFree(&tensor); + tensor.data.raw = const_cast(buffer); + if (!tensor.dims) tensor.dims = ConvertVectorToTfLiteIntArray(dims); + tensor.params = quantization; + tensor.allocation_type = kTfLiteMmapRo; + tensor.allocation = allocation; + } else { + invokable_ = false; + TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), + quantization, const_cast(buffer), bytes, + kTfLiteMmapRo, allocation, &tensor); + } return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc index b2c7e6c7a6..b7f31e2731 100644 --- a/tensorflow/contrib/lite/util.cc +++ b/tensorflow/contrib/lite/util.cc @@ -24,4 +24,14 @@ TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { return output; } +bool EqualVectorAndTfLiteIntArray(const TfLiteIntArray* a, + const std::vector& b) { + if (!a) return false; + if (a->size != b.size()) return false; + for (int i = 0; i < a->size; ++i) { + if (a->data[i] != b[i]) return false; + } + return true; +} + } // namespace tflite diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h index 50e4fb839e..f505d82a11 100644 --- a/tensorflow/contrib/lite/util.h +++ b/tensorflow/contrib/lite/util.h @@ -29,6 +29,10 @@ namespace tflite { // Converts a `std::vector` to a `TfLiteIntArray`. TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input); +// Checks whether a `TfLiteIntArray` and `std::vector` have matching elements. +bool EqualVectorAndTfLiteIntArray(const TfLiteIntArray* a, + const std::vector& b); + } // namespace tflite #endif // TENSORFLOW_CONTRIB_LITE_UTIL_H_ -- GitLab From fb7df94916b24a45c138babcf24f431af4b0dbd8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 13:49:31 -0800 Subject: [PATCH 626/884] Add basic TensorArray support. This is temporary, and will be replaced with lists (although we can keep the TAs if necessary). PiperOrigin-RevId: 188382727 --- tensorflow/contrib/py2tf/converters/BUILD | 11 ++ .../py2tf/converters/converter_test_base.py | 11 +- tensorflow/contrib/py2tf/converters/lists.py | 103 ++++++++++++++++++ .../contrib/py2tf/converters/lists_test.py | 52 +++++++++ tensorflow/contrib/py2tf/impl/conversion.py | 5 + .../py2tf/pyct/static_analysis/annos.py | 10 +- 6 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 tensorflow/contrib/py2tf/converters/lists.py create mode 100644 tensorflow/contrib/py2tf/converters/lists_test.py diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index fa7718c93e..c85ad9200e 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -26,6 +26,7 @@ py_library( "decorators.py", "for_loops.py", "list_comprehension.py", + "lists.py", "logical_expressions.py", "name_scopes.py", "side_effect_guards.py", @@ -156,6 +157,16 @@ py_test( ], ) +py_test( + name = "lists_test", + srcs = ["lists_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":test_lib", + "//tensorflow/python:client_testlib", + ], +) + py_test( name = "logical_expressions_test", srcs = ["logical_expressions_test.py"], diff --git a/tensorflow/contrib/py2tf/converters/converter_test_base.py b/tensorflow/contrib/py2tf/converters/converter_test_base.py index 1f98d8469c..8c08c5492a 100644 --- a/tensorflow/contrib/py2tf/converters/converter_test_base.py +++ b/tensorflow/contrib/py2tf/converters/converter_test_base.py @@ -88,7 +88,13 @@ class TestCase(test.TestCase): def make_fake_mod(self, name, *symbols): fake_mod = imp.new_module(name) for s in symbols: - setattr(fake_mod, s.__name__, s) + if hasattr(s, '__name__'): + setattr(fake_mod, s.__name__, s) + elif hasattr(s, 'name'): + # This is a bit of a hack, but works for things like tf.int32 + setattr(fake_mod, s.name, s) + else: + raise ValueError('can not attach %s - what should be its name?' % s) return fake_mod def attach_namespace(self, module, **ns): @@ -112,7 +118,8 @@ class TestCase(test.TestCase): arg_values=None, arg_types=arg_types, owner_type=owner_type, - recursive=recursive) + recursive=recursive, + type_annotation_func=utils.set_element_type) node = qual_names.resolve(node) node = activity.resolve(node, ctx) node = live_values.resolve(node, ctx, {}) diff --git a/tensorflow/contrib/py2tf/converters/lists.py b/tensorflow/contrib/py2tf/converters/lists.py new file mode 100644 index 0000000000..06e1dad8f4 --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/lists.py @@ -0,0 +1,103 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Converter for list operations. + +This includes converting Python lists to TensorArray/TensorList. +""" + +# TODO(mdan): Elaborate the logic here. +# TODO(mdan): Does it even make sense to attempt to try to use TAs? +# The current rule (always convert to TensorArray) is naive and insufficient. +# In general, a better mechanism could look like: +# * convert to TensorList by default +# * leave as Python list if the user explicitly forbids it +# * convert to TensorArray only when complete write once behavior can be +# guaranteed (e.g. list comprehensions) + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import gast + +from tensorflow.contrib.py2tf.pyct import anno +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer +from tensorflow.python.framework import dtypes + + +class ListTransformer(transformer.Base): + """Converts lists and related operations to their TF counterpart.""" + + def _empty_list(self, node): + if not anno.hasanno(node, 'element_type'): + raise NotImplementedError( + 'type inference for empty lists is not yet supported; ' + 'use utils.set_element_type(, ) to continue') + dtype = anno.getanno(node, 'element_type') + if not isinstance(dtype, dtypes.DType): + # TODO(mdan): Allow non-TF dtypes? + # That would be consistent with the dynamic dispatch pattern, but + # we must make sure that doesn't become confusing. + raise NotImplementedError('element type "%s" not yet supported' % dtype) + + dtype_name = dtype.name + # TODO(mdan): Does it ever make sense not to use tensor lists? + template = """ + tf.TensorArray(tf.dtype_name, size=0, dynamic_size=True) + """ + return templates.replace_as_expression(template, dtype_name=dtype_name) + + def _pre_populated_list(self, node): + raise NotImplementedError() + + def visit_Expr(self, node): + node = self.generic_visit(node) + if isinstance(node.value, gast.Call): + call_node = node.value + qn = anno.getanno(call_node.func, anno.Basic.QN) + + if qn.qn[-1] == 'append' and (len(call_node.args) == 1): + template = """ + target = py2tf_utils.dynamic_list_append(target, element) + """ + node = templates.replace( + template, + target=qn.parent.ast(), + element=call_node.args[0]) + return node + + def visit_Assign(self, node): + node = self.generic_visit(node) + + # Only convert lists when they are assigned to a variable, e.g.: + # l = [] + # TODO(mdan): This rule should be improved. + if len(node.targets) != 1: + return node + if not isinstance(node.value, gast.List): + return node + if not isinstance(node.value.ctx, gast.Load): + return node + + if node.value.elts: + node.value = self._pre_populated_list(node.value) + else: + node.value = self._empty_list(node.value) + return node + + +def transform(node, context): + return ListTransformer(context).visit(node) diff --git a/tensorflow/contrib/py2tf/converters/lists_test.py b/tensorflow/contrib/py2tf/converters/lists_test.py new file mode 100644 index 0000000000..671a1cc7b1 --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/lists_test.py @@ -0,0 +1,52 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for lists module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf import utils +from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.py2tf.converters import lists +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import tensor_array_ops +from tensorflow.python.platform import test + + +class ListTest(converter_test_base.TestCase): + + def test_empty_annotated_list(self): + + def test_fn(): + l = [] + utils.set_element_type(l, dtypes.int32) + l.append(1) + return l + + node = self.parse_and_analyze(test_fn, {'dtypes': dtypes, 'utils': utils}) + node = lists.transform(node, self.ctx) + + with self.compiled(node, tensor_array_ops.TensorArray, + dtypes.int32) as result: + # TODO(mdan): Attach these additional modules automatically. + result.utils = utils + result.dtypes = dtypes + with self.test_session() as sess: + self.assertEqual(test_fn(), sess.run(result.test_fn().stack())) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 96e7b1a53e..8a3cf9cd0a 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -29,6 +29,7 @@ from tensorflow.contrib.py2tf.converters import continue_statements from tensorflow.contrib.py2tf.converters import control_flow from tensorflow.contrib.py2tf.converters import decorators from tensorflow.contrib.py2tf.converters import for_loops +from tensorflow.contrib.py2tf.converters import lists from tensorflow.contrib.py2tf.converters import logical_expressions from tensorflow.contrib.py2tf.converters import name_scopes from tensorflow.contrib.py2tf.converters import side_effect_guards @@ -299,6 +300,9 @@ def node_to_graph(node, ctx, nocompile_decorators): node = _static_analysis_pass(node, ctx) + # TODO(mdan): Clean this up. + # Some intermediate analyses are not required, and some comments got orphaned. + # Past this point, line numbers are no longer accurate so we ignore the # source. # TODO(mdan): Is it feasible to reconstruct intermediate source code? @@ -317,6 +321,7 @@ def node_to_graph(node, ctx, nocompile_decorators): node = single_return.transform(node, ctx) node = _static_analysis_pass(node, ctx) + node = lists.transform(node, ctx) node = for_loops.transform(node, ctx) # for_loops may insert new global references. node = builtin_functions.transform(node, ctx) diff --git a/tensorflow/contrib/py2tf/pyct/static_analysis/annos.py b/tensorflow/contrib/py2tf/pyct/static_analysis/annos.py index 2d8e494423..5254b83ca7 100644 --- a/tensorflow/contrib/py2tf/pyct/static_analysis/annos.py +++ b/tensorflow/contrib/py2tf/pyct/static_analysis/annos.py @@ -34,13 +34,14 @@ class NodeAnno(NoValue): """ # Symbols - + # These flags are boolean. IS_LOCAL = 'Symbol is local to the function scope being analized.' IS_PARAM = 'Symbol is a parameter to the function being analized.' IS_MODIFIED_SINCE_ENTRY = ( 'Symbol has been explicitly replaced in the current function scope.') # Scopes + # Scopes are represented by objects of type activity.Scope. ARGS_SCOPE = 'The scope for the argument list of a function call.' BODY_SCOPE = ( 'The scope for the main body of a statement (True branch for if ' @@ -48,3 +49,10 @@ class NodeAnno(NoValue): ORELSE_SCOPE = ( 'The scope for the orelse body of a statement (False branch for if ' 'statements, orelse body for loops).') + + # Type and Value annotations + # Type annotations are represented by objects of type type_info.Type. + STATIC_INFO = ( + 'The type or value information that should be asserted about the entity ' + 'referenced by the symbol holding this annotation, irrespective of the ' + 'execution context.') -- GitLab From 96bf8aee5bc74bd505c4d57afab1f0067a4247eb Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Thu, 8 Mar 2018 13:53:19 -0800 Subject: [PATCH 627/884] disbaling timeout in guitar PiperOrigin-RevId: 188383577 --- tensorflow/contrib/nccl/BUILD | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/contrib/nccl/BUILD b/tensorflow/contrib/nccl/BUILD index 5ac96007df..94d01efee1 100644 --- a/tensorflow/contrib/nccl/BUILD +++ b/tensorflow/contrib/nccl/BUILD @@ -52,6 +52,7 @@ tf_cuda_cc_test( "manual", "multi_gpu", "no_oss", + "noguitar", "notap", ], deps = @@ -136,6 +137,7 @@ cuda_py_test( "manual", "multi_gpu", "no_oss", + "noguitar", "notap", ], ) -- GitLab From 07a0c1536fa792844c54686379249c21576c4c81 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Thu, 8 Mar 2018 14:05:39 -0800 Subject: [PATCH 628/884] disabling failing tsan test PiperOrigin-RevId: 188385868 --- tensorflow/compiler/tests/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 782bf82d41..4143aa1f80 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -537,6 +537,7 @@ tf_xla_py_test( size = "medium", srcs = ["spacetobatch_op_test.py"], shard_count = 3, + tags = ["notsan"], deps = [ ":xla_test", "//tensorflow/python:array_ops", -- GitLab From 2dd2f9d04037b7c9b137e5ce3638506e1f013e13 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Thu, 8 Mar 2018 14:29:45 -0800 Subject: [PATCH 629/884] Add document for TPUEstimate.predict, including limitations and example. PiperOrigin-RevId: 188390287 --- .../contrib/tpu/python/tpu/tpu_estimator.py | 143 ++++++++++++++++-- 1 file changed, 133 insertions(+), 10 deletions(-) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index 33251f2412..d918b0f198 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -1517,14 +1517,20 @@ class TPUEstimator(estimator_lib.Estimator): size when calling the `input_fn` and `model_fn`. Users should specify global batch size in constructor, and then get the batch size for each shard in `input_fn` and `model_fn` by `params['batch_size']`. - For training, `model_fn` gets per-core batch size; `input_fn` may get - per-core or per-host batch size depending on - `per_host_input_for_training` in `TPUConfig`. - For evaluation, `model_fn` gets per-core batch size and `input_fn` get - per-host batch size. + + - For training, `model_fn` gets per-core batch size; `input_fn` may get + per-core or per-host batch size depending on `per_host_input_for_training` + in `TPUConfig` (See docstring for TPUConfig for details). + + - For evaluation and prediction, `model_fn` gets per-core batch size and + `input_fn` get per-host batch size. + + Evaluation + ========== `model_fn` should return `TPUEstimatorSpec`, which expects the `eval_metrics` for TPU evaluation. + `TPUEstimatorSpec.eval_metrics` is a tuple of `metric_fn` and `tensors`, where `tensors` could be a list of `Tensor`s or dict of names to `Tensor`s. (See `TPUEstimatorSpec` for details). `metric_fn` takes the `tensors` and returns @@ -1536,12 +1542,17 @@ class TPUEstimator(estimator_lib.Estimator): `train_batch_size` or `eval_batch_size` unmodified as `params['batch_size']`. Current limitations: + -------------------- + + 1. TPU evaluation only works on a single host (one TPU worker). - 1. TPU evaluation only works on single host. - 2. `input_fn` for evaluation should not throw OutOfRange error for all - evaluation steps and all batches should have the same size. + 2. `input_fn` for evaluation should **NOT** raise an end-of-input exception + (`OutOfRangeError` or `StopIteration`). And all evaluation steps and all + batches should have the same size. Example (MNIST): + ---------------- + ``` # The metric Fn which runs on CPU. def metric_fn(labels, logits): @@ -1577,8 +1588,120 @@ class TPUEstimator(estimator_lib.Estimator): })) ``` - Predict support on TPU is not yet implemented. So, `predict` and - `export_savedmodel` are executed on CPU, even if `use_tpu` is true. + Prediction + ========== + + Prediction on TPU is an experimental feature to support large batch inference. + It is not designed for latency-critical system. In addition, due to some + usability issues, for prediction with small dataset, CPU `.predict`, i.e., + creating a new `TPUEstimator` instance with `use_tpu=False`, might be more + convenient. + + Note: In contrast to TPU training/evaluation, the `input_fn` for prediction + *should* raise an end-of-input exception (`OutOfRangeError` or + `StopIteration`), which serves as the stopping signal to `TPUEstimator`. To be + precise, the ops created by `input_fn` produce one batch of the data. + The `predict()` API processes one batch at a time. When reaching the end of + the data source, an end-of-input exception should be raised by one of these + operations. The user usually does not need to do this manually. As long as the + dataset is not repeated forever, the `tf.data` API will raise an end-of-input + exception automatically after the last batch has been produced. + + Note: Estimator.predict returns a Python generator. Please consume all the + data from the generator so that TPUEstimator can shutdown the TPU system + properly for user. + + Current limitations: + -------------------- + 1. TPU prediction only works on a single host (one TPU worker). + + 2. `input_fn` must return a `Dataset` instance rather than `features`. In + fact, .train() and .evaluate() also support Dataset as return value. + + 3. Each batch returned by `Dataset`'s iterator must have the *same static* + shape. This means two things: + - batch_size cannot be `None` + - the final batch must be padded by user to a full batch. + + Example (MNIST): + ---------------- + ``` + height = 32 + width = 32 + total_examples = 100 + + def predict_input_fn(params): + batch_size = params['batch_size'] + + images = tf.random_uniform( + [total_examples, height, width, 3], minval=-1, maxval=1) + + dataset = tf.data.Dataset.from_tensor_slices(images) + dataset = dataset.batch(batch_size) + dataset = dataset.map(lambda images: {'image': images}) + + def pad(tensor, missing_count): + # Pads out the batch dimension to the complete batch_size. + rank = len(tensor.shape) + assert rank > 0 + padding = tf.stack([[0, missing_count]] + [[0, 0]] * (rank - 1)) + padded_shape = (batch_size,) + tuple(tensor.shape[1:]) + padded_tensor = tf.pad(tensor, padding) + padded_tensor.set_shape(padded_shape) + return padded_tensor + + def pad_batch_if_incomplete(batch_features): + # Pads out the batch dimension for all features. + real_batch_size = tf.shape(batch_features["image"])[0] + + missing_count = tf.constant(batch_size, tf.int32) - real_batch_size + + padded_features = { + key: pad(tensor, missing_count) + for key, tensor in batch_features.iteritems() + } + padding_mask = tf.concat( + [ + tf.zeros((real_batch_size, 1), dtype=tf.int32), + tf.ones((missing_count, 1), dtype=tf.int32) + ], + axis=0) + padding_mask.set_shape((batch_size, 1)) + padded_features["is_padding"] = padding_mask + return padded_features + + dataset = dataset.map(pad_batch_if_incomplete) + + return dataset + + def model_fn(features, labels, params, mode): + # Generate predictions, called 'output', from features['image'] + + if mode == tf.estimator.ModeKeys.PREDICT: + return tf.contrib.tpu.TPUEstimatorSpec( + mode=mode, + predictions={ + 'predictions': output, + 'is_padding': features['is_padding'] + }) + + tpu_est = TPUEstimator( + model_fn=model_fn, + ..., + predict_batch_size=16) + + # Fully consume the generator so that TPUEstimator can shutdown the TPU + # system. + for item in tpu_est.predict(input_fn=input_fn): + # Filter out item if the `is_padding` is 1. + # Process the 'predictions' + ``` + + Exporting + ========= + + Exporting `SavedModel` support on TPU is not yet implemented. So, + `export_savedmodel` is executed on CPU, even if `use_tpu` is true. """ def __init__(self, -- GitLab From 04d33df3058a9e172659cb6ba9e5bc8f1412ec42 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 14:42:12 -0800 Subject: [PATCH 630/884] Add/AddN optimizer/rewriter Collapse a sub-graph of Add/AddN operations of fully specified and identical shapes to a single AddN operation. PiperOrigin-RevId: 188392302 --- .../optimizers/arithmetic_optimizer.cc | 397 +++++++++++++++++- .../optimizers/arithmetic_optimizer.h | 29 +- .../optimizers/arithmetic_optimizer_test.cc | 195 ++++++++- 3 files changed, 613 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 709a434e40..3cf42fde41 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -214,7 +214,12 @@ PartialTensorShape GetInputShape(const string& input, const NodeMap& node_map) { int output_pos; string node_name = ParseNodeName(input, &output_pos); const NodeDef* input_node = node_map.GetNode(node_name); - return input_node->attr().at(kOutputShapesAttr).list().shape(output_pos); + auto attr = input_node->attr(); + if (attr.find(kOutputShapesAttr) == attr.end()) { + return PartialTensorShape(); // unknown shape + } else { + return attr.at(kOutputShapesAttr).list().shape(output_pos); + } } bool ShapesEqual(const string& input_x, const string& input_y, @@ -292,6 +297,359 @@ NodeDef* GetTailOfValuePreservingChain( is_value_preserving_non_branching); } +// Context passed to each arithmetic optimizer stage. Optimizer stage is +// responsible for updating the node map for all added or deleted nodes, to keep +// it consistent with optimized graph. +struct ArithmeticOptimizerContext { + ArithmeticOptimizerContext( + const std::unordered_set* nodes_to_preserve, + GraphDef* optimized_graph, NodeMap* node_map, + SetVector* nodes_to_simplify) + : nodes_to_preserve(nodes_to_preserve), + optimized_graph(optimized_graph), + node_map(node_map), + nodes_to_simplify(nodes_to_simplify) {} + + const std::unordered_set* nodes_to_preserve; + GraphDef* optimized_graph; + NodeMap* node_map; + SetVector* nodes_to_simplify; +}; + +// Base class for single arithmetic optimization: e.g. Bitcast optimization, +// AddOps optimization, etc... +class ArithmeticOptimizerStage { + public: + explicit ArithmeticOptimizerStage(ArithmeticOptimizerContext ctx) + : ctx_(ctx) {} + virtual ~ArithmeticOptimizerStage() = default; + + // Check if we should try to simplify node. Returning true doesn't + // guarantee that node will be simplified. + // + // Should implement just a basic sanity check, without any expensive graph + // traversals. + virtual bool IsSupported(const NodeDef* node) const = 0; + + // Try to simplify the given node. If successfully simplified a given node, + // return a name of a new simplified version using output parameter. + // + // Consumers of an old node's outputs will be automatically re-wired to + // consume outputs of a new simplified node. + // + // Return error status only if some precondition is failed, or got an + // incorrect graph. In every other case return Status:OK(), even if didn't + // simplify anything. + // + // A simplified node will be always considered for further optimization and + // will be automatically added to the optimization queue. If a simplified node + // has the same name as original node it has to be explicitly added to the + // optimization queue for second pass. + virtual Status TrySimplify(const NodeDef* node, + string* simplified_node_name) = 0; + + protected: + // Simplification graph rewrite can create additional nodes that are inputs + // to final simplified node, they can be also added to the arithmetic + // optimizer queue for further optimization. + void AddToOptimizationQueue(NodeDef* node) { + ctx_.nodes_to_simplify->PushBack(node); + } + + // Get a node by input name from a node map. Return a error if node was not + // found. + Status GetInputNode(const string& input, NodeDef** node) const { + string node_name = NodeName(input); + NodeDef* node_by_name = ctx_.node_map->GetNode(node_name); + if (node_by_name == nullptr) { + return errors::FailedPrecondition("Node ", node_name, + " doesn't exists in a node map"); + } + *node = node_by_name; + return Status::OK(); + } + + // Get input shape from a node map. If node doesn't exists return unknown + // shape. + PartialTensorShape GetInputShape(const string& input) const { + int position; + string node_name = ParseNodeName(input, &position); + NodeDef* node; + Status node_status = GetInputNode(node_name, &node); + if (!node_status.ok()) { + return PartialTensorShape(); // unknown shape + } + auto attr = node->attr(); + if (attr.find(kOutputShapesAttr) == attr.end()) { + return PartialTensorShape(); // unknown shape + } else { + return attr.at(kOutputShapesAttr).list().shape(position); + } + } + + ArithmeticOptimizerContext ctx_; +}; + +// Rewrite a tree of Add/AddN with a single AddN operation, consuming all the +// original inputs of absorbed nodes. +// +// All nodes in a Add/AddN subgraph must have fully specified and identical +// shape. All nodes must have the same device placement. +// +// Example: +// AddN_1 +// / | \ +// Add_1 z Add_2 -> AddN(z, y, z, w, q, e) +// / \ / \ +// x y w Add_3 +// / \ +// q e +class AddOpsRewriteStage : public ArithmeticOptimizerStage { + public: + explicit AddOpsRewriteStage(ArithmeticOptimizerContext ctx) + : ArithmeticOptimizerStage(ctx), rewritten_nodes_() {} + + ~AddOpsRewriteStage() override = default; + + // Check if a node can become a root of AddOpsGroup + bool IsSupported(const NodeDef* node) const override { + // check basic preconditions + if (!IsRewritable(node)) { + return false; + } + // and must have fully defined shape + // TODO(ezhulenev): support partially defined shapes, when we can prove that + // unknown dimensions in the rewritten subgraph are the same. + PartialTensorShape shape = GetInputShape(node->name()); + if (!shape.IsFullyDefined()) { + return false; + } + // and must have inputs of fully defined shape identical to the output + // TODO(ezhulenev): relax this condition to support equal unknown dimensions + return HasAllInputsOfIdenticalShape(*node, shape); + } + + Status TrySimplify(const NodeDef* node, + string* simplified_node_name) override { + CHECK(IsSupported(node)) + << "Node " << node->name() + << " is not supported by add ops group optimizer step"; + AddOpsGroup group; + TF_RETURN_IF_ERROR(CreateAddOpsGroup(node, &group)); + + if (!group.absorbed_nodes.empty()) { + *simplified_node_name = RewriteAddOpsGroup(group); + } + + return Status::OK(); + } + + private: + // Holds together an add ops subgraph that we want to rewrite together. + // + // For the graph above the AddOpsGroup will be: + // root_node: AddN_1 + // absorbed_nodes: [Add_1, Add_2] + // input_nodes: [x, y, z, w, q, e] + struct AddOpsGroup { + const NodeDef* root_node; + PartialTensorShape root_shape; + // Add/AddN operations below the root level that were absorbed by this group + std::vector absorbed_nodes; + // Inputs of absorbed nodes that will be forwarded to rewritten AddN node + std::vector inputs; + }; + + // Check if all inputs are fully defined and identical to expected shape + bool HasAllInputsOfIdenticalShape(const NodeDef& node, + const PartialTensorShape& shape) const { + const AddOpsRewriteStage* self = this; + return std::all_of(node.input().begin(), node.input().end(), + [self, &shape](const string& input) { + auto input_shape = self->GetInputShape(input); + return input_shape.IsFullyDefined() && + input_shape.IsIdenticalTo(shape); + }); + } + + // TODO(ezhulenev): use GraphRewriter? + bool IsDrivenByControlDependency(const NodeDef& node) const { + return std::any_of(node.input().begin(), node.input().end(), + IsControlInput); + } + + // TODO(ezhulenev): use GraphRewriter? + bool DrivesControlDependency(const NodeDef& node) const { + int position; + for (const NodeDef* output : ctx_.node_map->GetOutputs(node.name())) { + for (int i = 0; i < output->input_size(); ++i) { + auto input = output->input(i); + string name = ParseNodeName(input, &position); + if (name == node.name() && /*control input*/ position < 0) { + return true; + } + } + } + return false; + } + + // Check if a node can be absorbed by current AddOpsGroup + bool IsAbsorbableByAddOpsGroup(const string& name, const AddOpsGroup& group) { + NodeDef* node; + Status node_status = GetInputNode(name, &node); + if (!node_status.ok()) { + return false; + } + + PartialTensorShape shape = GetInputShape(name); + CHECK(shape.IsIdenticalTo(group.root_shape)) + << "Cannot absorb a node of incompatible shape"; + + // check basic preconditions + if (!IsRewritable(node)) { + return false; + } + // with a single output consumer (presumably if we reach this node from + // previously absorbed or a root node, it means that this node is not used + // as an input to any other op, outside of the group) + if (ctx_.node_map->GetOutputs(node->name()).size() != 1) { + return false; + } + // must be on the same device as a root node + if (node->device() != group.root_node->device()) { + return false; + } + // All input shapes must be fully defined and equal to the node shape + return HasAllInputsOfIdenticalShape(*node, shape); + } + + // Node requirements both for a root node and an absorbed node + bool IsRewritable(const NodeDef* node) const { + // only Add or AddN can be a root node + // TODO(ezhulenev): check if AccumulateNV2 can be supported too + if (!IsAdd(*node) && !IsAddN(*node)) { + return false; + } + // it must not be in a preserve set + if (ctx_.nodes_to_preserve->find(node->name()) != + ctx_.nodes_to_preserve->end()) { + return false; + } + // it must not be a node created or absorbed by previous iteration + if (rewritten_nodes_.find(node->name()) != rewritten_nodes_.end()) { + return false; + } + // should not drive or be driven by control dependency + // TODO(ezhulenev): relax this condition for root node + return !(IsDrivenByControlDependency(*node) || + DrivesControlDependency(*node)); + } + + // Create an AddOpsGroup with a root in a given node + Status CreateAddOpsGroup(const NodeDef* root_node, AddOpsGroup* group) { + group->root_node = root_node; + group->root_shape = GetInputShape(root_node->name()); + + group->absorbed_nodes.reserve(root_node->input_size()); + for (int i = 0; i < root_node->input_size(); ++i) { + TF_RETURN_IF_ERROR(AbsorbInputByAddOpsGroup(root_node->input(i), group)); + } + + return Status::OK(); + } + + Status AbsorbInputByAddOpsGroup(const string& input, AddOpsGroup* group) { + NodeDef* node; + TF_RETURN_IF_ERROR(GetInputNode(input, &node)); + + if (IsAbsorbableByAddOpsGroup(input, *group)) { + group->absorbed_nodes.push_back(node); + for (int i = 0; i < node->input_size(); ++i) { + TF_RETURN_IF_ERROR(AbsorbInputByAddOpsGroup(node->input(i), group)); + } + } else { + // If node can't be absorbed, add it to AddOpsGroup input + group->inputs.push_back(input); + } + return Status::OK(); + } + + const std::pair ParseNodeScopeAndName(const string& name) { + auto pos = name.find_last_of("/"); + if (pos == string::npos) { + return {"", name}; + } else { + return {name.substr(0, pos), name.substr(pos + 1)}; + } + } + + // New node for AddOpsGroup is added to the same scope as a root_node. All + // absorbed nodes are stripped of their scope, and only names are used in a + // new node name. + // + // Example: AddOpsGroup(root="a/b/c/Add_2", absorbed=["d/Add_1", "e/Add"]) + // node_name="a/b/c/AddOpsGroup_Add_2_Add_1_Add + string AddOpsGroupName(const AddOpsGroup& group) { + CHECK_NOTNULL(group.root_node); + string node_name; + + auto root_node = ParseNodeScopeAndName(group.root_node->name()); + auto root_scope = root_node.first; + auto root_name = root_node.second; + if (!root_scope.empty()) { + strings::StrAppend(&node_name, root_scope, "/"); + } + + strings::StrAppend(&node_name, kArithmeticOptimizer, "/", "AddOpsGroup_", + root_name); + for (const NodeDef* absorbed : group.absorbed_nodes) { + auto absorbed_node = ParseNodeScopeAndName(absorbed->name()); + strings::StrAppend(&node_name, "_", absorbed_node.second); + } + return node_name; + } + + // Create a new node for a AddOpsGroup and return it's name. + string RewriteAddOpsGroup(const AddOpsGroup& group) { + CHECK_GT(group.absorbed_nodes.size(), 0) + << "AddOpsGroup must have non empty absorbed nodes"; + + // name for a new node constructed from AddOpsGroup + string node_name = AddOpsGroupName(group); + + // copy attributes from a root node + DataType dtype = group.root_node->attr().at("T").type(); + + // add new node + NodeDef* added_node = ctx_.optimized_graph->add_node(); + added_node->set_name(node_name); + added_node->set_op("AddN"); + added_node->set_device(group.root_node->device()); + (*added_node->mutable_attr())["T"].set_type(dtype); + (*added_node->mutable_attr())["N"].set_i(group.inputs.size()); + + ctx_.node_map->AddNode(node_name, added_node); + for (string input : group.inputs) { + ctx_.node_map->AddOutput(input, node_name); + added_node->add_input(std::move(input)); + } + + VLOG(1) << "Absorbed " << group.absorbed_nodes.size() + << " Add/AddN nodes from the graph"; + + // keep track of nodes that were created or absorbed as a part of rewrite + rewritten_nodes_.insert(node_name); + for (const NodeDef* absorbed : group.absorbed_nodes) { + rewritten_nodes_.insert(absorbed->name()); + } + + return node_name; + } + + // keep nodes that were added or absorbed as a part of AddOpsGroup rewrite + std::unordered_set rewritten_nodes_; +}; + } // namespace class UniqueNodes { @@ -516,6 +874,8 @@ void ArithmeticOptimizer::AddFrameControlDeps( } } +// TODO(ezhulenev): extract each individual simplify rewrite into separate +// ArithmeticOptimizerStage string ArithmeticOptimizer::TrySimplifyAndReplaceUses( const NodeDef* node, SetVector* nodes_to_simplify) { // Remove involutions applied twice. @@ -1025,14 +1385,46 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { for (int i = 0; i < optimized_graph_->node_size(); ++i) { nodes_to_simplify.PushBack(optimized_graph_->mutable_node(i)); } + + ArithmeticOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, + node_map_.get(), &nodes_to_simplify); + + std::vector> stages; + + // Add/AddN tree rewrites + if (options_.enable_add_to_addn_combining) { + stages.push_back( + std::unique_ptr(new AddOpsRewriteStage(ctx))); + } + + VLOG(1) << "Simplify arithmetic ops using " << stages.size() + << " arithmetic optimization stages"; + while (!nodes_to_simplify.Empty()) { const NodeDef* node = nodes_to_simplify.PopBack(); - const string simplified_tensor = + + // TODO(ezhulenev): move all rewrites into separate stages + string simplified_tensor = TrySimplifyAndReplaceUses(node, &nodes_to_simplify); + + // if it was not simplified try to run it through all configured stages + if (simplified_tensor.empty()) { + for (auto& stage : stages) { + if (stage->IsSupported(node)) { + TF_RETURN_IF_ERROR(stage->TrySimplify(node, &simplified_tensor)); + if (!simplified_tensor.empty()) { + break; + } + } + } + } + + // if it's still empty go to the next Node if (simplified_tensor.empty()) { continue; } + // re-wire consumers of an old node to the new one if (NodeName(simplified_tensor) != node->name()) { // Always consider simplified_tensor for further optimizations. NodeDef* simplified_node = node_map_->GetNode(simplified_tensor); @@ -1087,6 +1479,7 @@ Status ArithmeticOptimizer::Optimize(Cluster* /*cluster*/, // Shapes are only needed in aggressive mode. graph_properties_.reset(new GraphProperties(item)); TF_RETURN_IF_ERROR(graph_properties_->InferStatically(false)); + // TODO(ezhulenev): Use GraphProperties to lookup tensor shapes directly TF_RETURN_IF_ERROR(graph_properties_->AnnotateOutputShapes(optimized_graph_)); // Perform the optimizations. diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index afd538db40..9cff8ca9d0 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -32,9 +32,14 @@ constexpr char kArithmeticOptimizer[] = "ArithmeticOptimizer"; // run a model. class ArithmeticOptimizer : public GraphOptimizer { public: - ArithmeticOptimizer() : opt_level_(RewriterConfig::ON) {} + ArithmeticOptimizer() + : opt_level_(RewriterConfig::ON), + options_(ArithmeticOptimizerOptions::Default(RewriterConfig::ON)) {} + explicit ArithmeticOptimizer(RewriterConfig::Toggle opt_level) - : opt_level_(opt_level) {} + : opt_level_(opt_level), + options_(ArithmeticOptimizerOptions::Default(opt_level)) {} + ~ArithmeticOptimizer() override {} string name() const override { return "arithmetic_optimizer"; }; @@ -46,6 +51,21 @@ class ArithmeticOptimizer : public GraphOptimizer { const GraphDef& optimized_graph, double result) override; private: + friend class ArithmeticOptimizerTest; + + // Granular control for arithmetic optimizer stages + struct ArithmeticOptimizerOptions { + // rewrite a tree of Add/AddN ops with a single AddN + bool enable_add_to_addn_combining; + + // Choose which arithmetic optimizer stages will be enabled for a given + // optimization level by default. + static ArithmeticOptimizerOptions Default( + RewriterConfig::Toggle opt_level) { + return {/*enable_add_to_addn_combining*/ true}; + } + }; + // Returns true is a node with given name and the optimizer prefix already // exists. string OptimizedNodeName(const NodeDef& node, StringPiece suffix) const; @@ -97,13 +117,14 @@ class ArithmeticOptimizer : public GraphOptimizer { SetVector* nodes_to_simplify); RewriterConfig::Toggle opt_level_; + ArithmeticOptimizerOptions options_; - bool fetch_nodes_known_; + bool fetch_nodes_known_ = false; std::unordered_set nodes_to_preserve_; std::unique_ptr node_map_; FrameMap frame_map_; std::unique_ptr graph_properties_; - GraphDef* optimized_graph_; // Not owned. + GraphDef* optimized_graph_ = nullptr; // Not owned. }; } // end namespace grappler diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 2a82b25058..a56351c18a 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -26,6 +26,7 @@ limitations under the License. namespace tensorflow { namespace grappler { + namespace { string OptimizedName(const string& name) { @@ -46,8 +47,32 @@ void VerifyGraphsMatch(const GraphDef& original_graph, } } } +} // namespace -class ArithmeticOptimizerTest : public ::testing::Test {}; +class ArithmeticOptimizerTest : public ::testing::Test { + protected: + // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no + // longer have any output consumers. + void OptimizeAndPrune(ArithmeticOptimizer* optimizer, GrapplerItem* item, + GraphDef* output) { + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + item->graph.Swap(output); + TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); + } + + // TODO(ezhulenev): Make private. After migration to stages each test + // should explicitly enable required optimization for tests isolation + void DisableAllStages(ArithmeticOptimizer* optimizer) { + ArithmeticOptimizer::ArithmeticOptimizerOptions options{ + /*enable_add_to_addn_combining*/ false}; + optimizer->options_ = options; + } + + void EnableAddToAddNCombining(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.enable_add_to_addn_combining = true; + } +}; TEST_F(ArithmeticOptimizerTest, NoOp) { // This trivial graph is so basic there's nothing to optimize. @@ -350,7 +375,10 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { for (int i = 0; i < item.graph.node_size(); ++i) { item.graph.mutable_node(i)->set_device(devices[i]); } + ArithmeticOptimizer optimizer; + DisableAllStages(&optimizer); + GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); @@ -1164,6 +1192,169 @@ TEST_F(ArithmeticOptimizerTest, RemoveRedundantCast) { [](const NodeDef& node) { return node.op() == "Cast"; })); } -} // namespace +TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + tensorflow::Scope sx = s.NewSubScope("x"); + tensorflow::Scope sy = s.NewSubScope("y"); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto c = ops::Variable(s.WithOpName("c"), {2, 2}, DT_FLOAT); + auto add_ab = ops::Add(sx.WithOpName("Add_ab"), a, b); + auto add_abc = ops::Add(sy.WithOpName("Add_abc"), add_ab, c); + + auto outputs = ops::Identity(s.WithOpName("outputs"), add_abc); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableAddToAddNCombining(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + + // We expect the following rewrite(s) to occur: + // + // + + // / \ + // + c --> AddN(a, b, c) + // / \ + // a b + EXPECT_EQ(5, output.node_size()); + + NodeMap node_map(&output); + + // check add tree was replaced with AddN + const NodeDef* collapsed_add = CHECK_NOTNULL( + node_map.GetNode("y/ArithmeticOptimizer/AddOpsGroup_Add_abc_Add_ab")); + + EXPECT_EQ("AddN", collapsed_add->op()); + EXPECT_EQ(3, collapsed_add->input_size()); + EXPECT_EQ("a", collapsed_add->input(0)); + EXPECT_EQ("b", collapsed_add->input(1)); + EXPECT_EQ("c", collapsed_add->input(2)); + + // check output was re-wired to new node + const NodeDef* updated_outputs = CHECK_NOTNULL(node_map.GetNode("outputs")); + + EXPECT_EQ(collapsed_add->name(), updated_outputs->input(0)); +} + +TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto c = ops::Variable(s.WithOpName("c"), {2, 2}, DT_FLOAT); + auto add_ab = ops::Add(s.WithOpName("Add_ab"), a, b); + auto add_abc = ops::Add(s.WithOpName("Add_abc"), add_ab, c); + + auto x = ops::Variable(s.WithOpName("x"), {2, 2}, DT_FLOAT); + auto y = ops::Variable(s.WithOpName("y"), {2, 2}, DT_FLOAT); + auto z = ops::Variable(s.WithOpName("z"), {2, 2}, DT_FLOAT); + auto add_xy = ops::Add(s.WithOpName("Add_xy"), x, y); + auto add_xyz = ops::Add(s.WithOpName("Add_xyz"), add_xy, z); + + auto mul = ops::Multiply(s.WithOpName("Mul"), add_abc, add_xyz); + auto outputs = ops::Identity(s.WithOpName("outputs"), mul); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableAddToAddNCombining(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + + // We expect the following rewrite(s) to occur: + // + // * + // / \ + // + + * + // / \ / \ / \ + // + c x + --> AddN(a, b, c) AddN(x, y, z)) + // / \ / \ + // a b y z + EXPECT_EQ(10, output.node_size()); + + NodeMap node_map(&output); + + // check left Add subtree replaced with AddN + const NodeDef* collapsed_left = CHECK_NOTNULL( + node_map.GetNode("ArithmeticOptimizer/AddOpsGroup_Add_abc_Add_ab")); + + EXPECT_EQ("AddN", collapsed_left->op()); + EXPECT_EQ(3, collapsed_left->input_size()); + EXPECT_EQ("a", collapsed_left->input(0)); + EXPECT_EQ("b", collapsed_left->input(1)); + EXPECT_EQ("c", collapsed_left->input(2)); + + // check right Add subtree replaced with AddN + const NodeDef* collapsed_right = CHECK_NOTNULL( + node_map.GetNode("ArithmeticOptimizer/AddOpsGroup_Add_xyz_Add_xy")); + + EXPECT_EQ("AddN", collapsed_right->op()); + EXPECT_EQ(3, collapsed_right->input_size()); + EXPECT_EQ("x", collapsed_right->input(0)); + EXPECT_EQ("y", collapsed_right->input(1)); + EXPECT_EQ("z", collapsed_right->input(2)); + + // check that Mul inputs re-wired to new Nodes + const NodeDef* updated_mul = CHECK_NOTNULL(node_map.GetNode("Mul")); + + EXPECT_EQ("Mul", updated_mul->op()); + EXPECT_EQ(2, updated_mul->input_size()); + EXPECT_EQ(collapsed_left->name(), updated_mul->input(0)); + EXPECT_EQ(collapsed_right->name(), updated_mul->input(1)); +} + +TEST_F(ArithmeticOptimizerTest, AddOpsRewriteAddInputThroughMultiplePaths) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto c = ops::Variable(s.WithOpName("c"), {2, 2}, DT_FLOAT); + auto add_ab = ops::Add(s.WithOpName("Add_ab"), a, b); + auto add_bc = ops::Add(s.WithOpName("Add_bc"), b, c); + auto add_all = ops::Add(s.WithOpName("Add_all"), add_ab, add_bc); + auto outputs = ops::Identity(s.WithOpName("outputs"), add_all); + + GrapplerItem item; + item.fetch = {"outputs"}; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + + GraphDef output; + ArithmeticOptimizer optimizer; + EnableAddToAddNCombining(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + + // We expect the following rewrite(s) to occur: + // + // + + // / \ + // + + --> AddN(a, b, b, c) + // / \ / \ ^ + // a b c b added twice! + EXPECT_EQ(5, output.node_size()); + + NodeMap node_map(&output); + + // check Add tree replaced with AddN + const NodeDef* collapsed_add = CHECK_NOTNULL(node_map.GetNode( + "ArithmeticOptimizer/AddOpsGroup_Add_all_Add_ab_Add_bc")); + + EXPECT_EQ("AddN", collapsed_add->op()); + EXPECT_EQ(4, collapsed_add->input_size()); + EXPECT_EQ("a", collapsed_add->input(0)); + EXPECT_EQ("b", collapsed_add->input(1)); + EXPECT_EQ("b", collapsed_add->input(2)); + EXPECT_EQ("c", collapsed_add->input(3)); +} + } // namespace grappler } // namespace tensorflow -- GitLab From b592a8295aac0fdfffc2aa55695924e53e90bba7 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 14:56:02 -0800 Subject: [PATCH 631/884] Add SSIM and PSNR functions to tf.image. Add the following functions: - tf.image.psnr() - tf.image.ssim() - tf.image.ssim_multiscale() - tf.image.sobel_edges() - tf.image.image_gradients() Add test images in tensorflow/core/lib/ssim/testdata, .../psnr/testdata. Fulfills request for SSIM, fixes #15370. PiperOrigin-RevId: 188394631 --- tensorflow/core/BUILD | 7 + tensorflow/core/lib/psnr/testdata/cat_q20.jpg | Bin 0 -> 1965 bytes tensorflow/core/lib/psnr/testdata/cat_q72.jpg | Bin 0 -> 2584 bytes tensorflow/core/lib/psnr/testdata/cat_q95.jpg | Bin 0 -> 4032 bytes .../core/lib/ssim/testdata/checkerboard1.png | Bin 0 -> 773 bytes .../core/lib/ssim/testdata/checkerboard2.png | Bin 0 -> 3121 bytes .../core/lib/ssim/testdata/checkerboard3.png | Bin 0 -> 4546 bytes tensorflow/python/BUILD | 3 + tensorflow/python/ops/image_ops.py | 5 + tensorflow/python/ops/image_ops_impl.py | 490 ++++++++++++++++++ tensorflow/python/ops/image_ops_test.py | 418 +++++++++++++++ .../tools/api/golden/tensorflow.image.pbtxt | 20 + 12 files changed, 943 insertions(+) create mode 100644 tensorflow/core/lib/psnr/testdata/cat_q20.jpg create mode 100644 tensorflow/core/lib/psnr/testdata/cat_q72.jpg create mode 100644 tensorflow/core/lib/psnr/testdata/cat_q95.jpg create mode 100644 tensorflow/core/lib/ssim/testdata/checkerboard1.png create mode 100644 tensorflow/core/lib/ssim/testdata/checkerboard2.png create mode 100644 tensorflow/core/lib/ssim/testdata/checkerboard3.png diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 491f83e4fc..0fbe4eba6e 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3652,6 +3652,13 @@ filegroup( "lib/gif/testdata/optimized.gif", # BMP data "lib/bmp/testdata/lena.bmp", + # SSIM, PSNR data + "lib/ssim/testdata/checkerboard1.png", + "lib/ssim/testdata/checkerboard2.png", + "lib/ssim/testdata/checkerboard3.png", + "lib/psnr/testdata/cat_q20.jpg", + "lib/psnr/testdata/cat_q72.jpg", + "lib/psnr/testdata/cat_q95.jpg", ], visibility = ["//visibility:public"], ) diff --git a/tensorflow/core/lib/psnr/testdata/cat_q20.jpg b/tensorflow/core/lib/psnr/testdata/cat_q20.jpg new file mode 100644 index 0000000000000000000000000000000000000000..d7b882a7a7b17ca6f77876d6f534c41c3c62a11a GIT binary patch literal 1965 zcmex=o+2Ft9NQF)}kSGBAL# z6eBB`4r5?okcP737#J8dplX;H7#Iv0nHYE&7#PwR7#K3!S-|Rx7$Cp{Y7UHc_w)@= zFw(QoGcsUcWnh@V&cMRJz+hlxWWcZh$~H7GU|axk!Te4iLi` z{y${)$S5f(u+rDhE7nU)$xlkvOU}>LuShJ=H`FuGXRxuaC`e4sPAySLN=?tqvsHh; zd%wCuW{RzPxT&v!Z-H}aMy5wqQEG6NUr2IQcCuxPy1gBjO@&oOZb5EpNuokUZcbjY zRfVlO*l?@7Vk?lazPcTkO;JjkRgjAtR7p`vnyot2E#>9qdgaD?`9p6CpE3aMjt~8u0A3twKzYg6s+Fd2&xiU5Uw&3n@VIs zxJrb=(7enNJI}o245-0qGH@Ni`DrEPiAAY)?)mxYIjK<9a8bBAP~r?rEh^5;&$9!u z6`&jiBR!bgLBdc`M5wssBo=1`WLCf=V0^fKLp=kir7$U&Qn=mD`8oMT!3BxQsdk=u zC8TzE!;`cf7a~)n z=B3!G7lD%e|Jw}C3>q?W%5st#%8JUm+8PEnDULSg<~G$4v7RXt8)i=HZ|Lb-u;tm| z1*>kY?dm!4|MabQU;qFAKl|YS|NnpefA;tP50D{@+PbZVVxozi2Ti-uXMWw|Tu zT)9xYn_!RHra92?z=) z7%Dpczs118$jHFR%Jk~BW=D?Clf3y`{uKRO5^%A{>BH27c`}owR#bSOO7UG0Gp(%k z!L`zU&&rG8|Bo<;2%`9mfq@|bY!)*kqdlV_9tBKr1p?SjU}RvkX9AfZ19CSb12ZUi zpqNoW&`?pqF%WDOBLkzo;>HSf*A0tbX*xw7))M2=2z2MzJI&g%>x}f`C^xsGIZmd` z55iL%uKPI2SbtOaawK$Guf&?bx4Keu4;`_uN!PeB6Bg`XdjvX@GuJ(t`!45xJ@2%? z4@ExN)_Qntb8`K7aMqpwM;K%T8JL)ukR8OpC?Kd{plIj-b`i+)0<*M5Y`Jt6^(8(G zys{@)X0QIq|3?_41R0o66N8`uqksZ~B3K_NOav0{6zn(2uoDTE{(po)L4c7FWEa9R zMg~DeLq~zY#KOjf8^O9j{uijKnmo6dx%bueO$Xnm8clQXnte&`o^HtOD?JidxVd;_ z|(~-DW>F>PU(gzTI~D--H8?R|uX?S6$|}bpGnJ^Itz$ z7_N~0aH65&;EcY`;=gAadb-<#m!F-_$nZH$GV!!6=SCyp;5~D^%AyKW_DSd&?-q)` OWxY|3TiCMx|4jg}s(c&( literal 0 HcmV?d00001 diff --git a/tensorflow/core/lib/psnr/testdata/cat_q72.jpg b/tensorflow/core/lib/psnr/testdata/cat_q72.jpg new file mode 100644 index 0000000000000000000000000000000000000000..2b5dd75ac9e391a92f29aebfcad0fd2079bc6029 GIT binary patch literal 2584 zcmex=o+2Ft9NQF)}kSGBAL# z6eBB`4r5?okcP737#J8dplX;H7#Iv0nHYE&7#PwR7#K3!S-|Rx7$Cp{Y7UHc_w)@= zFw(QoGcsUcWnh@V&cMRJz+hlxWWcZh$~H7GU|axk!Te4iLi` z{y${)$S5f(u+rDhE7nU)$xlkvOU}>LuShJ=H`FuGXRxuaC`e4sPAySLN=?tqvsHh; zd%wCuW{RzPxT&v!Z-H}aMy5wqQEG6NUr2IQcCuxPy1gBjO@&oOZb5EpNuokUZcbjY zRfVlO*l?@7Vk?lazPcTkO;JjkRgjAtR7p`vnyot2E#>9qdgaD?`9p6CpE3aMjt~8u0A3twKzYg6s+Fd2&xiU5Uw&3n@VIs zxJrb=(7enNJI}o245-0qGH@Ni`DrEPiAAY)?)mxYIjK<9a8bBAP~r?rEh^5;&$9!u z6`&jiBR!bgLBdc`M5wssBo=1`WLCf=V0^fKLp=kir7$U&Qn=mD`8oMT!3BxQsdk=u zC8TzE!;`cf7a~)n z=B3!G7lD%e|Jw}C44iE29PDhI9PAuiT%6pzV*I>3JiO8(q5@(HvPwz{vhwmO8YVg_ zYKH3a^18OVhNk9LR#wW|_Ad4o&L);t<{(2DxwyD^d3dGx`K8QN2oobyhvTS#lT0GQEkiDsUE&2+a1{^eAe}o z3yz6uDYf@RyzY+V(Z#pNte_5A-424w*TCMG5(Miyq6+ZmY{ zm{|mc6a|Epl#N6TjsM?b;9z8AU}R-_%J@FcYMNTt{K7Togl_h!ZeMijUAVe(r?_sT z`{|X(;@Fp$IeMKve_C$u>=oa1%Zk@-xoU3p)##U7@|G1Q>#nS-^DUFsW+}X!Fu%K4 z(>$2(-=d>z$@6r!bvzI|@XODp%~e(6yL_QzC*utE#SbqSJxQK*()gCzBOkNNbu2++zW@ISgS-GE z#G7z`GBOA%8af6FBo;PKgc{FiuUHxDR&Yl&y5C28vT)P>i6OU)4$e83TIc$mDY#Fe zbmE+K%j_2MuCbo>(AJIBxybTW?v6B%<56c`nbiukS0yFQu5R#MxTALGgA0OMpL~SA z%~x4d$mA`{UikIRn;CpUr|!+?PM@>f_6Un>;H;ahRZC}hm-}pB|CI5#P)Rhi=jHDX z)7jrOGZgQhZau8Sxo>H`(wod;qn*tms=tL^KPlaDOkx3lvC4W=%Y*H(WX}LfZ2}2X zsu-Q(XC7?booVTuJ>Tl=%~>aAI4(4Qn11R15e6AS24-;VAce1>fTE$JLL%7bpcoeL zTUFQbVPTT1(UeazccheB{vNt4QgM9kbT4re)f*umr*r~cWiS3P#4bMa4 zk{MhWz>*szE)xp_CpK;b+r`MhXfKfHu+wPb&OKPg=L)6cPh^Vtpi&Iw->vyNIV z;X0+&eq`^J8=lwP#eRoNrtn{QZQ=3Z$9IuCt_`b_ZXIur5YrLedBeb-t zwBz5zd%T*oJxijpuTlTCkpjcCt5e@Sw^A(K+VWuI{!N!Vxz@17u~Zz5e*?LKiaPHJ0}^4)~tBcyuCK@5Id{h!fD<&{5Nu_ zXFmShDAE&ZJ3B|CVA(@US%#^~?yomD)Sg*3jr-o;*`m`Va!rE{Px*3s&%qlt;*UT6 z?QB2P(wltKH)jR!e+H$fEmLfkn}s!~w{4hpXu>t8FJAWqUY_V_DO-7A=CAYrZvp^N C++Fej literal 0 HcmV?d00001 diff --git a/tensorflow/core/lib/psnr/testdata/cat_q95.jpg b/tensorflow/core/lib/psnr/testdata/cat_q95.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7fa3c3157fbfa4f02bc5feb726e46b9a33cc2f2f GIT binary patch literal 4032 zcmex=o+2Ft9NQF)}kSGBAL# z6eBB`4r5?okcP737#J8dplX;H7#Iv0nHYE&7#PwR7#K3!S-|Rx7$Cp{Y7UHc_w)@= zFw(QoGcsUcWnh@V&cMRJz+hlxWWcZh$~H7GU|axk!Te4iLi` z{y${)$S5f(u+rDhE7nU)$xlkvOU}>LuShJ=H`FuGXRxuaC`e4sPAySLN=?tqvsHh; zd%wCuW{RzPxT&v!Z-H}aMy5wqQEG6NUr2IQcCuxPy1gBjO@&oOZb5EpNuokUZcbjY zRfVlO*l?@7Vk?lazPcTkO;JjkRgjAtR7p`vnyot2E#>9qdgaD?`9p6CpE3aMjt~8u0A3twKzYg6s+Fd2&xiU5Uw&3n@VIs zxJrb=(7enNJI}o245-0qGH@Ni`DrEPiAAY)?)mxYIjK<9a8bBAP~r?rEh^5;&$9!u z6`&jiBR!bgLBdc`M5wssBo=1`WLCf=V0^fKLp=kir7$U&Qn=mD`8oMT!3BxQsdk=u zC8TzE!;`cf7a~)n z=B3!G7lD%e|Jw}C3`~q503w+{fQ1=Eva+x+v#_zUv4Jr=CkGolCp#M(2R8>N7YML% zaPx3;@qjoWLl{ANn3!0ZnOV8m+1NoUNW=dRF$i)nI54;}GYT>=2{JMZGX6ipAkDzQ z#0)C6pgEs`ft`bem5rH!2_(sgF3HTo$i&LP#{T~n0}nGJ1Cs!=00RT#?hp1ZPwtvu zo_l>!=o=erd2Kr{`&s9&v1Pr^-NJ15=+xI`=cCR%nU=WDPSv+UYV~4XjTn>9H)Q6{ zee1CxC&?hb5Fw? z+F^VWifh7G-H7>ibuZ^3!yCsu*`BEAMn4Rl`qZnDmrbuX+B9s7LdEf_>tDhHpfGBKdlpDH@Ui4Cr7%~ zv@+=JR&yTTeQw7p+|(@XV^7S!-V+y~aV5fGVezWx z-?Dek*?9Kd?3ar+_)DfZxrpv{EIaL;r1kyvx6t!q0s6*wy_=*S^S@jyy!zd&Enl_? z&oC2O=b99Tk$ts;*-Df*}#dNt8O>Ut^a?7L0J$Sd`wKtph^v# zei)gU1%+4?1q_9ij2r_K!2!t3$Y{?P^iyd`#IGZJ`Ig+j+BZ`%Xn$(i(u>Jmi+pZM z%UD|#T8f6 z=wc)tmp1Ferii z#mvCS!VLBkh+t$AWEN5sW>GS96fklEdy5I=t^TbOkMGpj(Y@5#3j?C$uJVEUS3uBDShitv66LW8uW9Gtx8sgM@lIeT62b zR2fNfJSj^$zx~AIU&6Dc7j6=Isgd(mASK>M?BwdKSup*TXbycm3c4M-R#L( z`OLShY=T)_@?7r^pZb+gU+a^6uxnz$DKpcXDMyd3pMFR>s!^e^&S2ZVy>VP;ex($+ zua$rNL3dfhQ4?fvC=~G9af4tU7HKv05!P};N-Rm~}+^YqJ z+_JCvyP3oK_rAX>l6iK;n%go`dfALG*0Y1vHE-5 z++BJJm)$=LbO)X62-@{Ku;2DaQK?Dy9(VIsYu-8iGue1h(O3WBx@$|P#lEk7Q8Ibe z$778hw|3<7xb|P4xGv))&kMG1DgR!Tq{YwBo$@~FvZK-W$ehe;mb(0x4um;G8~i@^ z^>Cm5-SX7uF)`lR@1wT*w_C?lVSLw?MA0r9^KCM$xHt~I!CHN@eitK9} zbDgTz8v$QGuT%11jk;Z`CAeowj<#dWuD0T=kGapXK{-r-5tPLkS>XjVBZHu#p<`lT zVdKPw2QO~?0F5+8d&RJpf`Z4d5_q-R?(Uj)mj9T|sY5nfj;!%sJ@;MPyoBy$ccM<` z7X?;2b~9#GT(*ia6#x9Js6uwG@`aw;+$np7R_ym~&402waBhG0ydCOXKW5C?CBJ0N zT?XdsyjR4QK0JH+^IdS?O9aX7(KG!6C<5*eN_5WSXvtx0<`Y%tvbvk5;KGZW0JQM#-_`olQ^>+*d zXFC1S2}<2wpQ3X1z{lD5zJwITUOB*3FrDdhrmI1#(x(;i0w3>}t*!VvZI#tNahFSX z!YroC^eZLVw`vBt+|04tseedI-gf1CU&|NocUZeKCqMgh{LO`mSE}rxkD}E!taWhs zCG@1<${`%dF1)td? zJP(Un|CqLJ!ktMhY5(5nO}}9F!M!rBbL)B`bsJA<<(#?{#%X_@_X%yf*=Kg)X|ey& zrT4rKCZE0$z1Lj8vbyK@K9wlvB|W-*32aUaU$;ePxE{%RGC#GYSs`V{f#z!7jX9Y@ z56(TFI(y2zX~O@HFvx(s0BV^q!U}Lm@(L6v1Uny8#0hZ8eLKBBp;w_d$KsRo5wY2p z^*M%L^WPaST{c5SU2o5x_`H*UZ&*a!vA^B&{Tn-T1oOGO2kNu_BZ8c+A?%_ zm;^66G$TwlIdTKP6Z`vf77rYByyVmOzL$!c$glL%KK{9|S;?QGS(T4ppJQcG^jyw0 z`@Zp>sX{_4FaOE^Y|7BKczXKLi<6a-G!rLozb5zX(?pkDHr4N~TkUr?Zk2pG{aJp+ zYj>67UQAZ*$4nX(3>J#sd~I?3#~tCn235~~r4)MpSupEw;a<^=U%uF;6gVx+Tj^CZ zBd~|*#7n1kt<`hh?A@}(bM4zJdv=6~s?FZVneJjYH~D=^%HP8`uO4qrpTtotd!_lQ&-t)f4(My%>7xUKB_1}f~g39Y>TvX)DXJ?zI zJ||_Pdw<@Q^7r)>C08^=Y(6iJxY1~vJVP+m$vowE`N!_lfBu-5%`~^%^ZEeG#H9}M zy`L+l6&>!|YqS4%XWo6m)w|cutYlfF9~0AV=yv@1@_#!nX$bcy`^|k>Z{7IF@BYD{ z`FAvLTiMC0W`6h>r;oZ&;JVUHL&+3ILqa{ zbLBWA@A3PXp-Huj+&=$`vOJBVS&+OcPzkj;d)LS2mxc;?j@4DB&zRiF9vu57;=g&X?{AR!KZ2aDzPbd7p zus%K|a@ysWU%sjL8-A;;w~;fu{YU>jDL#VvU1t6AryZ}K^sT!6XX4q(;e1Ji0|TZN zrn2w)ZJC>YKGz(7otu|aSGecivvbc*TALV>7{(m4M2^4Cw0vZ>bMY&)`jpR4rZ!6u z89hiYdi>SQ-R!^3{f||%rDw;>tlz$KbLA&K-yV`<{piQ4sJ!*&|24SD(5(&v^B|Tj}A4Z*?!OlmBP*sYvc0(@&;^b=;H1^{4mWKRDHU z!;>8aPWxiw_k4c!c>3$9Pkx&;k@nnLoMgHb(y5= zqnGx+OJB#PTvuQI^zX;me%mSSo0nu?-@fqpBiD*r{?=?=hJ=qBcK!czz0Fv3?&~up zWgq_Tms_@0u4cbh@713bl{e=6UgU7>meIWr!fWChva4--j?PRMe^;4&Gv4(2w<_gn z+upLr%jw@WxS!88L3jO?!|rGGeyrnexV(C<`MycZ!|vv@M7&?Jeb0jZ*F5*F%-(d4 zA>h}uw-R&nAOFZ_4G?fG+W&!BbE96@Dwl6|g$=HH4Lr}=&6H+FHXZyvO^9hjhyd%{ zei`3uk{9}&INchj1&CyDx{^bkQoEJF(BL7t!Snt^`R3FH$vJukj?PA(-!4;jn8A3n-0#cAPmfCPA7Nw&TE|xT+UC`*-?9l0d@imN zi(=oJFCMFQE7Y&}_lq6kD$Kz$`95}T*Rz(`Y~43;Lh;M!7b!b}#NW zu4<3xMppBt{@{=M74dRk#-=@HwIA26uguAj(HM?nbz1Q}=pUTjA2yfw(7f)?c^gfbfyIZiooZt|ju+>=~|dy`$9m}W^hoQ=8Q(Oe#% z(a3Y<+20Fn#;12Ct(#QL9p<*KbMimE@}+x^)(1AcJ={FK?)%B9hb!+t^JNI@H_!Wb z^p*4dqLjn$93%hc&f8o0KF;-xPw4$wd+XCzebQy^yeM~mNzp2W*!x@4Ud)%>`)cw2 z_uJj}*?vefyE<#;u1{)rW@IKcl+C>VW}h0zmN5RfZy(k3;;ekHCq7~+t3E7$|L2W& zUnZ=pu5Xi-nX~%ezsxP?eu~X%n;Se=-{yPl>g9XZFH)$g{`z`{ZS1vs)2){5+i7Ay#4tha)lZzMhhKW2#S+nbTx`#wygYTEg z&67W~o!u&7pm33A>%uboqLkH}4Vj(MC7*i|893c$O;p%iY&qfNNe}<~Z6>Fm)LX5e!|mkLbtbLtI{(@F z4vWUbEPkkb-70x_JD6k9 z-thkP{6!`EdJgII{tC|tn0(G$e?r*X52qHyzAd}q;Gg*O$_LeV-II2h1>1kxdwl)V zQ|9O0^zN{2hj ze{!naa^lUOLrLq_-n)~%zVFt*;3I)LA+yEb7M1e$hr0E{5zxG$@$mK1EyrY>a5%Dvu}Nr zvVq3@nqS|3I=-}amDzCE@Bh8+cEvY-hAaDSI{$xW{er}6uh(zMtCl~fv-|s|zOvWz z!*1`Lv-kMc;&)fxF~9%u@Xnicmi28e*~{Z37nHu(Jy9*9aP8H$1a2Kk=7% zZDISSEHwFY?8861jHI}{PYP`=UeM`sZD;7!UY00(E>#iMD5t;$I@oQU~)SBpR>C@YclfPTte!0`ASKJ^c#aQ>X{}26BN8TN~H~am+bKlbL z@06C5+3N1U@4?E%>L=2DIor0FTmQRTePf>K``VJ^U0=Tim#_8T{$lrJp|@{qs-PF&g=1N~p)O`BrN&SJ+Q~{Tn zJSTTwOY_ac>b<)`o0Jvo1!{oUKIFXVdfS68ksH!0I&+x+y~_5TH{pGM!U z=DK*V{`;@+wR_+HR&7bVvQ7P+g~Yba->*w|%Y^v9QMEE()x2$UxXqs{m(Jar^u3pX zA>iNEP6y`j<;V8#eSiM?r5m*$pGyCfo_{}f-@7|I_;0=Xa7BCm&eGB?Z~mXD_-4Mp z>cyhib6;#8FYWO@UGwB(($}WnTdi;GoVc3(d*#`m)*eaaA$1>DE>(NgbF5EBVS+*C z?{8B&Q;z3exo6Haqft|`!CS)B6u?CdZsuz%!YFfq}u()z4*}Q$iB}6Zrf+ literal 0 HcmV?d00001 diff --git a/tensorflow/core/lib/ssim/testdata/checkerboard3.png b/tensorflow/core/lib/ssim/testdata/checkerboard3.png new file mode 100644 index 0000000000000000000000000000000000000000..95fa3bbb3ee42673b2b7e52cf84e704f249fa26b GIT binary patch literal 4546 zcmeAS@N?(olHy`uVBq!ia0y~yVEDkmz_5jbiGhJZ^`F^(1_lPs0*}aI1_nI^5N5n8 zBPY$kAXwt*;uunK>+M|sj-1<}$M)yHuU)lj_3zTV?|!q$zToC(_3dzMW)|*caW(Xt zJA>zpOT`cMAMHLof=W(oc1;s`c-%yorH(WlOHfk$;J|({fo;B<^Qwh7n^-ud-Uwy>VmcdEOT#P-S41%fWb;s zEb7qBr*Eb{FMQ85&2guQn8gCy4V&Npcpt9!Ca&$|%UPVJON8V8o-=-5)7G!W^|0h@ zlH{w$-IvAd9`vu%^ZS%A>9T2Xu2=f=>H76AfBs(GebQ+E!|Ok%J&D%i_*c)luz4K^ z6|lzEJC31DY`Zsi`~ALDm*~oA%q^Pvw z-Dy&n)@(3yW?j{I|HJDVZ_A&%@(*6uJ9Br6=mb`V$U4PHff>P%{~kOd*u3}xll!#0 zpNfv1ZhNF^a^%de=C{X!w(sOUX{fTg^KsscbWw%$pRFnHv*yl^`}5nr;$3v0(&r^- zE$e>I-x6skJx5GxN|x8^)7AB#*w)D{;EQu&W|@6#@z?Zw$-9-8Ju5$Jka+G3U(IEi zlkb-?F?Px<|8`UV&vOe?wQ%F8M<1U(VU~aTkxln!WO@4QKVR%GY~ClT*d#1B_i^?9 zmTu9mzi+0yxWV?jeFvsg)}_Y(+seLNy0dP+h=9O`RTZHTA2)dJRA`*D z_Vn&~hm&|0U(e!L@L|P=-uspR6aAW>Y|p)O*GgVv^U0k`EIbETI1C)}zTc=lsA97_ zAWCZ1ZDsaW&DZlD{`&X#p_HlhJr4d@uZpV*j8`{%ck<2%y?wTWrFeHdr%q6AP1!=5 z8xM9V-@R!izoU8jp}%q^=I>`I83nZFOvq=St*)=VRJQI>*~JwBTW2XU-Tb`p@PRqp z+LIZt7Pk92c&=&7TJT`2>;0!z@*T;$7ic@ZARl-AHebVk-$>l0;a=cP=YzX7YB^Np znHJ95%6X@)S7v&&M|=K>-Kk6BiaUik0!r5P?T$`o&^TM$fA9Y<&XsnYGCy@WqyrSg zHg?UMXd<{Mz*l&J?DS@bW*%KfhDq5zVi}zbJEz~$-N>XMIPI*_lj)i)36iHAvhTgF zSup4BorBNUdq~^bwes}s{QLJoj!xt&Eq^s>8Rty~Q`F|=S^Ti;5Y)IJv)R<;ypJLa z@2s`*69p8u_sJ`9eK=teqyMbcwXfb`Ny|;Pnq?dBoMg_ge7bk~K2cY`FnPUCv+dTM zW|i43;o#zQPiFti#q;On&r+@8ZDPNmGgqhV@ca5n-Lc!AL`Zs^xc%wRcg5U?*E8Il zPVU(A>~{UvH=iz6o~#Sk(a1DASNFxdF2(F@Pu*HJj@1$KlK!=G&r=HDE@rlUN_F@- z>n$8+Cc^%W$qROdoj=s$96R;ol)G~8IW7cgM$eJ^*kN#dt+Ruh*t@k8x6SiUVB(xw z()hXk6!Vj78EzSyT0a%+ozZXn`L&wUg3z;Eg}V;e7VTmvmeolh&s%JcBaF45sXz`)yZdokn02F4(>(`hGjm$J@S z;uCV%CG=u-_rB&}KBlU(fh;lio4&ELS?uAkj59i*-Wt39=QsNY`?@=lXA3bfeb;a9 z+spQ?ls~-E;>Cxkw!cEJPg-5h_pfrAT8(>H_!E?`PM{&7abDx-GPFs=(*Wj_vz@_KW@*yzwlVG_2H8z2fO8~{ub?=Hm}68 zXh&Mpp>HWwAI}%;bt-!kRJ7nn{%<9=1s~HYi#FSOHCLAT1>NHP9BCzf%w>DKbrWaq zF<~98PkdQ*$wwH4pP8Lb_DZ!VxlyF4b6RFnZw1G7bFKj2)C)%~mPR^f9}RvHW0-o* z{YFQ~U%UJ4?;b2I_^&a2!mOoPGoMMg`vrOVX0<)LrP$b}6(TWF=*h1$i48%ahCEjd zXX|=+7%UQ)_D8Btz-Y>mIbB6<8S|Xf7kVi5DBNxBqqOMSvs{E}dwlPG|MOyZH|560~S6{&Fl%!SDa+wja)0xdN$^{Q`(k8SG-oS z9pAZYN|fhX8`+yLW;C-*Fi>$8o3#I8>XipEK_@l{rWWtJt?=}qag%GI%(2Z^j@9a& z@XZL@BKjsNT4%b}_Ra}PY?oU}k_Ip4t4|Gxg?2DQQKytr z8^X;icF1m#obXpU;!pSS{LgDO6q@@l)~&1iY8dq~Yt5$n?!xEtzZ{MK|Lf;gwe9nC z1uP17&3L!}>-zmghV%F{KXq?Pa*?N;rQP>Y%KPwB|}ub29*Tzh=8`QNLj`H%m0mXe(<@b=^Vg{&e4D(0*2o-k|7yP4zo zq-?|GD}672EZ}Zb(aPDY+wX3~o7en6v61s<&aNHn*@fBT7fv*goocoG?4K5~9tqza zV(S?8e|*9)VS(WE+n*|09$Vcn5D0Me!-V7{qJd5J2`3fJ>C=_TqR%b%=|z$RrO+u z*Mu{3j9ZsI*>`UHl)Ep_Y=2~@vYUD0J~p4cyGnVU8YTiqLo)(gA6HDwZJ&6v@ZRbM z!N|$w<`bQz1tY{5M6WMAJ*DfZt7~u>?@1-a&3)oGw@FOCZFN6r-|xx#zr#cV&gi9g z|Jn9Ed0WFzUJdaHhOw`M-ySqhyPD5p$}sDy(eJ0?`ywnC$Hz?!aJGB&_~}j49o`vv zGo2JTEsKBN{G9vjfq1%7z~r>qe=gnM`){Ui-n_fU9$T$+>Yt|XGvM{Ex|hm7<^8YA z;WgS^DVKj$OuhGkJ7V31CtE$e6ONTEw$HP^&nDiIb$a=6bNT9r+xw=QXWjg~c~Q%r z%=p=PELK}RcnD;f0qid@R-QT zy$+F?z0Q&0jDg9F*L=NgDH^J`1EgmE5sUiWZXmLt;`s8(eEO}ub$6A%X*-$Bn4~%L zzSm@fgFULzCzsql`&+XxN4&(9LzaB}@mvX`g-VEqqk_z0#|a*9_Z#b5wxzUQ=aGDz zt+vNIkwK5to}Tnyz*<_)bEqk7(W%9tULJk?-Ki6z4ib1i-f$N#3GfDp_=HB zeNy+IqQYS>uM;P~-g9#D;A9oan7384!^3O?Bh#$w?y31V891&^c=BCPB*8#&yXIC+ z$-haeEuj&tlP0`NxS|qJ@F=EG{@@`d_t$67?vT0lL}QClnz`9a!Ng}XCkHvt+HyEz zLi)N`6PDDeH=TE@=P7KrJj~>EZ0XXbt34<6gE*Slq%Uo-xv#e@ZQjkQh@%%ZEp<04 zzmny+TX%WqqkX?Uci!0hd#(Jwoe|HH)@h$Dl(7Fg?R@WML*J9RT^5m7&%N1E_wKKb z_i;5dCbr9mT`NBuzpu2=Z%$4Zb(B23wXUGT;LMp#$u|CLr%t|nI{w#hd3(#9Yt9zd znN8H%w)}Jd|HfSZnSXlqB7TXrs0dhGx>&WCd eax+{~y)(_iY^6-qjxTpB44RuF zVSJ0Hs8@ORls4~Z+igytbJ?36wPvJEsZ4tiYIr8dIx4q+TJq&5(;CGc6Sw6CAGA>T zXW-(cIaM0YcsLpD-~B!Dm1Tw`o7Uy^ zCj9S3+R_a6-~1||a^b?6NsMVH*Ov-Sl%6tyg)49SjpcWaY949kg{7rT z;g4hA_$S|bx;I@W?i$~6nZEUT?VIH$`14!MxyEE6a%Xo;s@aT2Y7tygZQpFP;b)kLN5%r#c=^KZXOfH0w5lz!dAdIG z<^t87e|b4g7B1UkA(fsgbLGyu8B1yTSfUj=(uucv-Y3uZ$Ix7Wqq<_wQt>h_xSRyCvO&h*_rTU%~kojr}61) zt;!4=IKpmbeSUFyQ}u#msmK7QdoRD%ud}(7?<_R2W#fzD{5w6a>+U9mOby@j)4ML^ zsPE+4Pl`J&x6Xe1>hSy@vuD4J^t<|(v2lLH{~7tNjI1?hUOxB#_pnX)u34Y+`Ds_( z*6;hCR;ntx|IM-!OTO-?dUtvKx^iu`N?jAxiP~%9_g3aD{rd0Q)8ex%tzWs$?~S^) z`tT!*SYwwpc^|IM-&)ydoAnL@^(GZrB0FzI@RwkDCltWW~}S|SCG*jcD8bEekY|ZE#wa7KlV3j XYft!36v<>@U|{fc^>bP0l+XkKWFcvn literal 0 HcmV?d00001 diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index c4f03906fb..e0559f865d 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -1847,13 +1847,16 @@ py_library( ":control_flow_ops", ":framework", ":framework_for_generated_wrappers", + ":gradients", ":image_ops_gen", ":math_ops", + ":nn", ":nn_ops_gen", ":random_ops", ":string_ops", ":util", ":variables", + "//third_party/py/numpy", ], ) diff --git a/tensorflow/python/ops/image_ops.py b/tensorflow/python/ops/image_ops.py index ae52d32fea..68be9ccdd6 100644 --- a/tensorflow/python/ops/image_ops.py +++ b/tensorflow/python/ops/image_ops.py @@ -69,6 +69,11 @@ See the @{$python/image} guide. @@non_max_suppression @@sample_distorted_bounding_box @@total_variation +@@psnr +@@ssim +@@ssim_multiscale +@@image_gradients +@@sobel_edges """ from __future__ import absolute_import from __future__ import division diff --git a/tensorflow/python/ops/image_ops_impl.py b/tensorflow/python/ops/image_ops_impl.py index ca8806a095..1088135b46 100644 --- a/tensorflow/python/ops/image_ops_impl.py +++ b/tensorflow/python/ops/image_ops_impl.py @@ -18,6 +18,8 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -29,6 +31,8 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_image_ops from tensorflow.python.ops import gen_nn_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import string_ops from tensorflow.python.ops import variables @@ -1890,3 +1894,489 @@ def yuv_to_rgb(images): _yuv_to_rgb_kernel, dtype=images.dtype, name='kernel') ndims = images.get_shape().ndims return math_ops.tensordot(images, kernel, axes=[[ndims - 1], [0]]) + + +def _verify_compatible_image_shapes(img1, img2): + """Checks if two image tensors are compatible for applying SSIM or PSNR. + + This function checks if two sets of images have ranks at least 3, and if the + last three dimensions match. + + Args: + img1: Tensor containing the first image batch. + img2: Tensor containing the second image batch. + + Returns: + A tuple containing: the first tensor shape, the second tensor shape, and a + list of control_flow_ops.Assert() ops implementing the checks. + + Raises: + ValueError: When static shape check fails. + """ + shape1 = img1.get_shape().with_rank_at_least(3) + shape2 = img2.get_shape().with_rank_at_least(3) + shape1[-3:].assert_is_compatible_with(shape2[-3:]) + + if shape1.ndims is not None and shape2.ndims is not None: + for dim1, dim2 in zip(reversed(shape1[:-3]), reversed(shape2[:-3])): + if not (dim1 == 1 or dim2 == 1 or dim1.is_compatible_with(dim2)): + raise ValueError( + 'Two images are not compatible: %s and %s' % (shape1, shape2)) + + # Now assign shape tensors. + shape1, shape2 = array_ops.shape_n([img1, img2]) + + # TODO(sjhwang): Check if shape1[:-3] and shape2[:-3] are broadcastable. + checks = [] + checks.append(control_flow_ops.Assert( + math_ops.greater_equal(array_ops.size(shape1), 3), + [shape1, shape2], summarize=10)) + checks.append(control_flow_ops.Assert( + math_ops.reduce_all(math_ops.equal(shape1[-3:], shape2[-3:])), + [shape1, shape2], summarize=10)) + return shape1, shape2, checks + + +@tf_export('image.psnr') +def psnr(a, b, max_val, name=None): + """Returns the Peak Signal-to-Noise Ratio between a and b. + + This is intended to be used on signals (or images). Produces a PSNR value for + each image in batch. + + The last three dimensions of input are expected to be [height, width, depth]. + + Example: + + ```python + # Read images from file. + im1 = tf.decode_png('path/to/im1.png') + im2 = tf.decode_png('path/to/im2.png') + # Compute PSNR over tf.uint8 Tensors. + psnr1 = tf.image.psnr(im1, im2, max_val=255) + + # Compute PSNR over tf.float32 Tensors. + im1 = tf.image.convert_image_dtype(im1, tf.float32) + im2 = tf.image.convert_image_dtype(im2, tf.float32) + psnr2 = tf.image.psnr(im1, im2, max_val=1.0) + # psnr1 and psnr2 both have type tf.float32 and are almost equal. + ``` + + Arguments: + a: First set of images. + b: Second set of images. + max_val: The dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + name: Namespace to embed the computation in. + + Returns: + The scalar PSNR between a and b. The returned tensor has type `tf.float32` + and shape [batch_size, 1]. + """ + with ops.name_scope(name, 'PSNR', [a, b]): + # Need to convert the images to float32. Scale max_val accordingly so that + # PSNR is computed correctly. + max_val = math_ops.cast(max_val, a.dtype) + max_val = convert_image_dtype(max_val, dtypes.float32) + a = convert_image_dtype(a, dtypes.float32) + b = convert_image_dtype(b, dtypes.float32) + mse = math_ops.reduce_mean(math_ops.squared_difference(a, b), [-3, -2, -1]) + psnr_val = math_ops.subtract( + 20 * math_ops.log(max_val) / math_ops.log(10.0), + np.float32(10 / np.log(10)) * math_ops.log(mse), + name='psnr') + + _, _, checks = _verify_compatible_image_shapes(a, b) + with ops.control_dependencies(checks): + return array_ops.identity(psnr_val) + +_SSIM_K1 = 0.01 +_SSIM_K2 = 0.03 + + +def _ssim_helper(x, y, reducer, max_val, compensation=1.0): + r"""Helper function for computing SSIM. + + SSIM estimates covariances with weighted sums. The default parameters + use a biased estimate of the covariance: + Suppose `reducer` is a weighted sum, then the mean estimators are + \mu_x = \sum_i w_i x_i, + \mu_y = \sum_i w_i y_i, + where w_i's are the weighted-sum weights, and covariance estimator is + cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) + with assumption \sum_i w_i = 1. This covariance estimator is biased, since + E[cov_{xy}] = (1 - \sum_i w_i ^ 2) Cov(X, Y). + For SSIM measure with unbiased covariance estimators, pass as `compensation` + argument (1 - \sum_i w_i ^ 2). + + Arguments: + x: First set of images. + y: Second set of images. + reducer: Function that computes 'local' averages from set of images. + For non-covolutional version, this is usually tf.reduce_mean(x, [1, 2]), + and for convolutional version, this is usually tf.nn.avg_pool or + tf.nn.conv2d with weighted-sum kernel. + max_val: The dynamic range (i.e., the difference between the maximum + possible allowed value and the minimum allowed value). + compensation: Compensation factor. See above. + + Returns: + A pair containing the luminance measure, and the contrast-structure measure. + """ + c1 = (_SSIM_K1 * max_val) ** 2 + c2 = (_SSIM_K2 * max_val) ** 2 + + # SSIM luminance measure is + # (2 * mu_x * mu_y + c1) / (mu_x ** 2 + mu_y ** 2 + c1). + mean0 = reducer(x) + mean1 = reducer(y) + num0 = mean0 * mean1 * 2.0 + den0 = math_ops.square(mean0) + math_ops.square(mean1) + luminance = (num0 + c1) / (den0 + c1) + + # SSIM contrast-structure measure is + # (2 * cov_{xy} + c2) / (cov_{xx} + cov_{yy} + c2). + # Note that `reducer` is a weighted sum with weight w_k, \sum_i w_i = 1, then + # cov_{xy} = \sum_i w_i (x_i - \mu_x) (y_i - \mu_y) + # = \sum_i w_i x_i y_i - (\sum_i w_i x_i) (\sum_j w_j y_j). + num1 = reducer(x * y) * 2.0 + den1 = reducer(math_ops.square(x) + math_ops.square(y)) + c2 *= compensation + cs = (num1 - num0 + c2) / (den1 - den0 + c2) + + # SSIM score is the product of the luminance and contrast-structure measures. + return luminance, cs + + +def _fspecial_gauss(size, sigma): + """Function to mimic the 'fspecial' gaussian MATLAB function.""" + size = ops.convert_to_tensor(size, dtypes.int32) + sigma = ops.convert_to_tensor(sigma) + + coords = math_ops.cast(math_ops.range(size), sigma.dtype) + coords -= math_ops.cast(size - 1, sigma.dtype) / 2.0 + + g = math_ops.square(coords) + g *= -0.5 / math_ops.square(sigma) + + g = array_ops.reshape(g, shape=[1, -1]) + array_ops.reshape(g, shape=[-1, 1]) + g = array_ops.reshape(g, shape=[1, -1]) # For tf.nn.softmax(). + g = nn_ops.softmax(g) + return array_ops.reshape(g, shape=[size, size, 1, 1]) + + +def _ssim_per_channel(img1, img2, max_val=1.0): + """Computes SSIM index between img1 and img2 per color channel. + + This function matches the standard SSIM implementation from: + Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image + quality assessment: from error visibility to structural similarity. IEEE + transactions on image processing. + + Details: + - 11x11 Gaussian filter of width 1.5 is used. + - k1 = 0.01, k2 = 0.03 as in the original paper. + + Args: + img1: First image batch. + img2: Second image batch. + max_val: The dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + + Returns: + A pair of tensors containing and channel-wise SSIM and contrast-structure + values. The shape is [..., channels]. + """ + filter_size = constant_op.constant(11, dtype=dtypes.int32) + filter_sigma = constant_op.constant(1.5, dtype=img1.dtype) + + shape1, shape2 = array_ops.shape_n([img1, img2]) + checks = [ + control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal( + shape1[-3:-1], filter_size)), [shape1, filter_size], summarize=8), + control_flow_ops.Assert(math_ops.reduce_all(math_ops.greater_equal( + shape2[-3:-1], filter_size)), [shape2, filter_size], summarize=8)] + + # Enforce the check to run before computation. + with ops.control_dependencies(checks): + img1 = array_ops.identity(img1) + + # TODO(sjhwang): Try to cache kernels and compensation factor. + kernel = _fspecial_gauss(filter_size, filter_sigma) + kernel = array_ops.tile(kernel, multiples=[1, 1, shape1[-1], 1]) + + # The correct compensation factor is `1.0 - tf.reduce_sum(tf.square(kernel))`, + # but to match MATLAB implementation of MS-SSIM, we use 1.0 instead. + compensation = 1.0 + + # TODO(sjhwang): Try FFT. + # TODO(sjhwang): Gaussian kernel is separable in space. Consider applying + # 1-by-n and n-by-1 Gaussain filters instead of an n-by-n filter. + def reducer(x): + shape = array_ops.shape(x) + x = array_ops.reshape(x, shape=array_ops.concat([[-1], shape[-3:]], 0)) + y = nn.depthwise_conv2d(x, kernel, strides=[1, 1, 1, 1], padding='VALID') + return array_ops.reshape(y, array_ops.concat([shape[:-3], + array_ops.shape(y)[1:]], 0)) + + luminance, cs = _ssim_helper(img1, img2, reducer, max_val, compensation) + + # Average over the second and the third from the last: height, width. + axes = constant_op.constant([-3, -2], dtype=dtypes.int32) + ssim_val = math_ops.reduce_mean(luminance * cs, axes) + cs = math_ops.reduce_mean(cs, axes) + return ssim_val, cs + + +@tf_export('image.ssim') +def ssim(img1, img2, max_val): + """Computes SSIM index between img1 and img2. + + This function is based on the standard SSIM implementation from: + Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P. (2004). Image + quality assessment: from error visibility to structural similarity. IEEE + transactions on image processing. + + Note: The true SSIM is only defined on grayscale. This function does not + perform any colorspace transform. (If input is already YUV, then it will + compute YUV SSIM average.) + + Details: + - 11x11 Gaussian filter of width 1.5 is used. + - k1 = 0.01, k2 = 0.03 as in the original paper. + + The image sizes must be at least 11x11 because of the filter size. + + Example: + + ```python + # Read images from file. + im1 = tf.decode_png('path/to/im1.png') + im2 = tf.decode_png('path/to/im2.png') + # Compute SSIM over tf.uint8 Tensors. + ssim1 = tf.image.ssim(im1, im2, max_val=255) + + # Compute SSIM over tf.float32 Tensors. + im1 = tf.image.convert_image_dtype(im1, tf.float32) + im2 = tf.image.convert_image_dtype(im2, tf.float32) + ssim2 = tf.image.ssim(im1, im2, max_val=1.0) + # ssim1 and ssim2 both have type tf.float32 and are almost equal. + ``` + + Args: + img1: First image batch. + img2: Second image batch. + max_val: The dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + + Returns: + A tensor containing an SSIM value for each image in batch. Returned SSIM + values are in range (-1, 1], when pixel values are non-negative. Returns + a tensor with shape: broadcast(img1.shape[:-3], img2.shape[:-3]). + """ + _, _, checks = _verify_compatible_image_shapes(img1, img2) + with ops.control_dependencies(checks): + img1 = array_ops.identity(img1) + + # Need to convert the images to float32. Scale max_val accordingly so that + # SSIM is computed correctly. + max_val = math_ops.cast(max_val, img1.dtype) + max_val = convert_image_dtype(max_val, dtypes.float32) + img1 = convert_image_dtype(img1, dtypes.float32) + img2 = convert_image_dtype(img2, dtypes.float32) + ssim_per_channel, _ = _ssim_per_channel(img1, img2, max_val) + # Compute average over color channels. + return math_ops.reduce_mean(ssim_per_channel, [-1]) + + +# Default values obtained by Wang et al. +_MSSSIM_WEIGHTS = (0.0448, 0.2856, 0.3001, 0.2363, 0.1333) + + +@tf_export('image.ssim_multiscale') +def ssim_multiscale(img1, img2, max_val, power_factors=_MSSSIM_WEIGHTS): + """Computes the MS-SSIM between img1 and img2. + + This function assumes that `img1` and `img2` are image batches, i.e. the last + three dimensions are [height, width, channels]. + + Note: The true SSIM is only defined on grayscale. This function does not + perform any colorspace transform. (If input is already YUV, then it will + compute YUV SSIM average.) + + Original paper: Wang, Zhou, Eero P. Simoncelli, and Alan C. Bovik. "Multiscale + structural similarity for image quality assessment." Signals, Systems and + Computers, 2004. + + Arguments: + img1: First image batch. + img2: Second image batch. Must have the same rank as img1. + max_val: The dynamic range of the images (i.e., the difference between the + maximum the and minimum allowed values). + power_factors: Iterable of weights for each of the scales. The number of + scales used is the length of the list. Index 0 is the unscaled + resolution's weight and each increasing scale corresponds to the image + being downsampled by 2. Defaults to (0.0448, 0.2856, 0.3001, 0.2363, + 0.1333), which are the values obtained in the original paper. + + Returns: + A tensor containing an MS-SSIM value for each image in batch. The values + are in range [0, 1]. Returns a tensor with shape: + broadcast(img1.shape[:-3], img2.shape[:-3]). + """ + # Shape checking. + shape1 = img1.get_shape().with_rank_at_least(3) + shape2 = img2.get_shape().with_rank_at_least(3) + shape1[-3:].merge_with(shape2[-3:]) + + with ops.name_scope(None, 'MS-SSIM', [img1, img2]): + shape1, shape2, checks = _verify_compatible_image_shapes(img1, img2) + with ops.control_dependencies(checks): + img1 = array_ops.identity(img1) + + # Need to convert the images to float32. Scale max_val accordingly so that + # SSIM is computed correctly. + max_val = math_ops.cast(max_val, img1.dtype) + max_val = convert_image_dtype(max_val, dtypes.float32) + img1 = convert_image_dtype(img1, dtypes.float32) + img2 = convert_image_dtype(img2, dtypes.float32) + + imgs = [img1, img2] + shapes = [shape1, shape2] + + # img1 and img2 are assumed to be a (multi-dimensional) batch of + # 3-dimensional images (height, width, channels). `heads` contain the batch + # dimensions, and `tails` contain the image dimensions. + heads = [s[:-3] for s in shapes] + tails = [s[-3:] for s in shapes] + + divisor = [1, 2, 2, 1] + divisor_tensor = constant_op.constant(divisor[1:], dtype=dtypes.int32) + + def do_pad(images, remainder): + padding = array_ops.expand_dims(remainder, -1) + padding = array_ops.pad(padding, [[1, 0], [1, 0]]) + return [array_ops.pad(x, padding, mode='SYMMETRIC') for x in images] + + mcs = [] + for k in range(len(power_factors)): + with ops.name_scope(None, 'Scale%d' % k, imgs): + if k > 0: + # Avg pool takes rank 4 tensors. Flatten leading dimensions. + flat_imgs = [ + array_ops.reshape(x, array_ops.concat([[-1], t], 0)) + for x, t in zip(imgs, tails) + ] + + remainder = tails[0] % divisor_tensor + need_padding = math_ops.reduce_any(math_ops.not_equal(remainder, 0)) + # pylint: disable=cell-var-from-loop + padded = control_flow_ops.cond(need_padding, + lambda: do_pad(flat_imgs, remainder), + lambda: flat_imgs) + # pylint: enable=cell-var-from-loop + + downscaled = [nn_ops.avg_pool(x, ksize=divisor, strides=divisor, + padding='VALID') + for x in padded] + tails = [x[1:] for x in array_ops.shape_n(downscaled)] + imgs = [ + array_ops.reshape(x, array_ops.concat([h, t], 0)) + for x, h, t in zip(downscaled, heads, tails) + ] + + # Overwrite previous ssim value since we only need the last one. + ssim_per_channel, cs = _ssim_per_channel(*imgs, max_val=max_val) + mcs.append(nn_ops.relu(cs)) + + # Remove the cs score for the last scale. In the MS-SSIM calculation, + # we use the l(p) at the highest scale. l(p) * cs(p) is ssim(p). + mcs.pop() # Remove the cs score for the last scale. + mcs_and_ssim = array_ops.stack(mcs + [nn_ops.relu(ssim_per_channel)], + axis=-1) + # Take weighted geometric mean across the scale axis. + ms_ssim = math_ops.reduce_prod(math_ops.pow(mcs_and_ssim, power_factors), + [-1]) + + return math_ops.reduce_mean(ms_ssim, [-1]) # Avg over color channels. + + +@tf_export('image.image_gradients') +def image_gradients(image): + """Returns image gradients (dy, dx) for each color channel. + + Both output tensors have the same shape as the input: [batch_size, h, w, + d]. The gradient values are organized so that [I(x+1, y) - I(x, y)] is in + location (x, y). That means that dy will always have zeros in the last row, + and dx will always have zeros in the last column. + + Arguments: + image: Tensor with shape [batch_size, h, w, d]. + + Returns: + Pair of tensors (dy, dx) holding the vertical and horizontal image + gradients (1-step finite difference). + + Raises: + ValueError: If `image` is not a 4D tensor. + """ + if image.get_shape().ndims != 4: + raise ValueError('image_gradients expects a 4D tensor ' + '[batch_size, h, w, d], not %s.', image.get_shape()) + image_shape = array_ops.shape(image) + batch_size, height, width, depth = array_ops.unstack(image_shape) + dy = image[:, 1:, :, :] - image[:, :-1, :, :] + dx = image[:, :, 1:, :] - image[:, :, :-1, :] + + # Return tensors with same size as original image by concatenating + # zeros. Place the gradient [I(x+1,y) - I(x,y)] on the base pixel (x, y). + shape = array_ops.stack([batch_size, 1, width, depth]) + dy = array_ops.concat([dy, array_ops.zeros(shape, image.dtype)], 1) + dy = array_ops.reshape(dy, image_shape) + + shape = array_ops.stack([batch_size, height, 1, depth]) + dx = array_ops.concat([dx, array_ops.zeros(shape, image.dtype)], 2) + dx = array_ops.reshape(dx, image_shape) + + return dy, dx + + +@tf_export('image.sobel_edges') +def sobel_edges(image): + """Returns a tensor holding Sobel edge maps. + + Arguments: + image: Image tensor with shape [batch_size, h, w, d] and type float32 or + float64. The image(s) must be 2x2 or larger. + + Returns: + Tensor holding edge maps for each channel. Returns a tensor with shape + [batch_size, h, w, d, 2] where the last two dimensions hold [[dy[0], dx[0]], + [dy[1], dx[1]], ..., [dy[d-1], dx[d-1]]] calculated using the Sobel filter. + """ + # Define vertical and horizontal Sobel filters. + static_image_shape = image.get_shape() + image_shape = array_ops.shape(image) + kernels = [[[-1, -2, -1], [0, 0, 0], [1, 2, 1]], + [[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]] + num_kernels = len(kernels) + kernels = np.transpose(np.asarray(kernels), (1, 2, 0)) + kernels = np.expand_dims(kernels, -2) + kernels_tf = constant_op.constant(kernels, dtype=image.dtype) + + kernels_tf = array_ops.tile(kernels_tf, [1, 1, image_shape[-1], 1], + name='sobel_filters') + + # Use depth-wise convolution to calculate edge maps per channel. + pad_sizes = [[0, 0], [1, 1], [1, 1], [0, 0]] + padded = array_ops.pad(image, pad_sizes, mode='REFLECT') + + # Output tensor has shape [batch_size, h, w, d * num_kernels]. + strides = [1, 1, 1, 1] + output = nn.depthwise_conv2d(padded, kernels_tf, strides, 'VALID') + + # Reshape to [batch_size, h, w, d, num_kernels]. + shape = array_ops.concat([image_shape, [num_kernels]], 0) + output = array_ops.reshape(output, shape=shape) + output.set_shape(static_image_shape.concatenate([num_kernels])) + return output diff --git a/tensorflow/python/ops/image_ops_test.py b/tensorflow/python/ops/image_ops_test.py index b67e7cc558..b99aac5be5 100644 --- a/tensorflow/python/ops/image_ops_test.py +++ b/tensorflow/python/ops/image_ops_test.py @@ -20,6 +20,7 @@ from __future__ import print_function import colorsys import functools +import itertools import math import os import time @@ -37,7 +38,9 @@ from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gen_image_ops +from tensorflow.python.ops import gradients from tensorflow.python.ops import image_ops +from tensorflow.python.ops import image_ops_impl from tensorflow.python.ops import io_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops import random_ops @@ -3328,5 +3331,420 @@ class NonMaxSuppressionTest(test_util.TensorFlowTestCase): image_ops.non_max_suppression(boxes, scores, 3, [[0.5]]) +class VerifyCompatibleImageShapesTest(test_util.TensorFlowTestCase): + """Tests utility function used by ssim() and psnr().""" + + def testWrongDims(self): + img = array_ops.placeholder(dtype=dtypes.float32) + img_np = np.array((2, 2)) + + with self.test_session(use_gpu=True) as sess: + _, _, checks = image_ops_impl._verify_compatible_image_shapes(img, img) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(checks, {img: img_np}) + + def testShapeMismatch(self): + img1 = array_ops.placeholder(dtype=dtypes.float32) + img2 = array_ops.placeholder(dtype=dtypes.float32) + + img1_np = np.array([1, 2, 2, 1]) + img2_np = np.array([1, 3, 3, 1]) + + with self.test_session(use_gpu=True) as sess: + _, _, checks = image_ops_impl._verify_compatible_image_shapes(img1, img2) + with self.assertRaises(errors.InvalidArgumentError): + sess.run(checks, {img1: img1_np, img2: img2_np}) + + +class PSNRTest(test_util.TensorFlowTestCase): + """Tests for PSNR.""" + + def _LoadTestImage(self, sess, filename): + content = io_ops.read_file(os.path.join( + "tensorflow/core/lib/psnr/testdata", filename)) + im = image_ops.decode_jpeg(content, dct_method="INTEGER_ACCURATE") + im = image_ops.convert_image_dtype(im, dtypes.float32) + im, = sess.run([im]) + return np.expand_dims(im, axis=0) + + def _LoadTestImages(self): + with self.test_session(use_gpu=True) as sess: + q20 = self._LoadTestImage(sess, "cat_q20.jpg") + q72 = self._LoadTestImage(sess, "cat_q72.jpg") + q95 = self._LoadTestImage(sess, "cat_q95.jpg") + return q20, q72, q95 + + def _PSNR_NumPy(self, orig, target, max_value): + """Numpy implementation of PSNR.""" + mse = ((orig - target) ** 2).mean(axis=(-3, -2, -1)) + return 20 * np.log10(max_value) - 10 * np.log10(mse) + + def _RandomImage(self, shape, max_val): + """Returns an image or image batch with given shape.""" + return np.random.rand(*shape).astype(np.float32) * max_val + + def testPSNRSingleImage(self): + image1 = self._RandomImage((8, 8, 1), 1) + image2 = self._RandomImage((8, 8, 1), 1) + psnr = self._PSNR_NumPy(image1, image2, 1) + + with self.test_session(use_gpu=True): + tf_image1 = constant_op.constant(image1, shape=image1.shape, + dtype=dtypes.float32) + tf_image2 = constant_op.constant(image2, shape=image2.shape, + dtype=dtypes.float32) + tf_psnr = image_ops.psnr(tf_image1, tf_image2, 1.0, "psnr").eval() + self.assertAllClose(psnr, tf_psnr, atol=0.001) + + def testPSNRMultiImage(self): + image1 = self._RandomImage((10, 8, 8, 1), 1) + image2 = self._RandomImage((10, 8, 8, 1), 1) + psnr = self._PSNR_NumPy(image1, image2, 1) + + with self.test_session(use_gpu=True): + tf_image1 = constant_op.constant(image1, shape=image1.shape, + dtype=dtypes.float32) + tf_image2 = constant_op.constant(image2, shape=image2.shape, + dtype=dtypes.float32) + tf_psnr = image_ops.psnr(tf_image1, tf_image2, 1, "psnr").eval() + self.assertAllClose(psnr, tf_psnr, atol=0.001) + + def testGoldenPSNR(self): + q20, q72, q95 = self._LoadTestImages() + + # Verify NumPy implementation first. + # Golden values are generated using GNU Octave's psnr() function. + psnr1 = self._PSNR_NumPy(q20, q72, 1) + self.assertNear(30.321, psnr1, 0.001, msg="q20.dtype=" + str(q20.dtype)) + psnr2 = self._PSNR_NumPy(q20, q95, 1) + self.assertNear(29.994, psnr2, 0.001) + psnr3 = self._PSNR_NumPy(q72, q95, 1) + self.assertNear(35.302, psnr3, 0.001) + + # Test TensorFlow implementation. + with self.test_session(use_gpu=True): + tf_q20 = constant_op.constant(q20, shape=q20.shape, dtype=dtypes.float32) + tf_q72 = constant_op.constant(q72, shape=q72.shape, dtype=dtypes.float32) + tf_q95 = constant_op.constant(q95, shape=q95.shape, dtype=dtypes.float32) + tf_psnr1 = image_ops.psnr(tf_q20, tf_q72, 1, "psnr1").eval() + tf_psnr2 = image_ops.psnr(tf_q20, tf_q95, 1, "psnr2").eval() + tf_psnr3 = image_ops.psnr(tf_q72, tf_q95, 1, "psnr3").eval() + self.assertAllClose(psnr1, tf_psnr1, atol=0.001) + self.assertAllClose(psnr2, tf_psnr2, atol=0.001) + self.assertAllClose(psnr3, tf_psnr3, atol=0.001) + + def testInfinity(self): + q20, _, _ = self._LoadTestImages() + psnr = self._PSNR_NumPy(q20, q20, 1) + with self.test_session(use_gpu=True): + tf_q20 = constant_op.constant(q20, shape=q20.shape, dtype=dtypes.float32) + tf_psnr = image_ops.psnr(tf_q20, tf_q20, 1, "psnr").eval() + self.assertAllClose(psnr, tf_psnr, atol=0.001) + + def testInt(self): + img1 = self._RandomImage((10, 8, 8, 1), 255) + img2 = self._RandomImage((10, 8, 8, 1), 255) + img1 = constant_op.constant(img1, dtypes.uint8) + img2 = constant_op.constant(img2, dtypes.uint8) + psnr_uint8 = image_ops.psnr(img1, img2, 255) + img1 = image_ops.convert_image_dtype(img1, dtypes.float32) + img2 = image_ops.convert_image_dtype(img2, dtypes.float32) + psnr_float32 = image_ops.psnr(img1, img2, 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(psnr_uint8.eval(), psnr_float32.eval(), atol=0.001) + + +class SSIMTest(test_util.TensorFlowTestCase): + """Tests for SSIM.""" + + _filenames = ["checkerboard1.png", + "checkerboard2.png", + "checkerboard3.png",] + + _ssim = np.asarray([[1.000000, 0.230880, 0.231153], + [0.230880, 1.000000, 0.996828], + [0.231153, 0.996828, 1.000000]]) + + def _LoadTestImage(self, sess, filename): + content = io_ops.read_file(os.path.join( + "tensorflow/core/lib/ssim/testdata", filename)) + im = image_ops.decode_png(content) + im = image_ops.convert_image_dtype(im, dtypes.float32) + im, = sess.run([im]) + return np.expand_dims(im, axis=0) + + def _LoadTestImages(self): + with self.test_session(use_gpu=True) as sess: + return [self._LoadTestImage(sess, f) for f in self._filenames] + + def _RandomImage(self, shape, max_val): + """Returns an image or image batch with given shape.""" + return np.random.rand(*shape).astype(np.float32) * max_val + + def testAgainstMatlab(self): + """Tests against values produced by Matlab.""" + img = self._LoadTestImages() + expected = self._ssim[np.triu_indices(3)] + + ph = [array_ops.placeholder(dtype=dtypes.float32) for _ in range(2)] + ssim = image_ops.ssim(*ph, max_val=1.0) + with self.test_session(use_gpu=True): + scores = [ssim.eval(dict(zip(ph, t))) + for t in itertools.combinations_with_replacement(img, 2)] + self.assertAllClose(expected, np.squeeze(scores), atol=1e-4) + + def testBatch(self): + img = self._LoadTestImages() + expected = self._ssim[np.triu_indices(3, k=1)] + + img1, img2 = zip(*itertools.combinations(img, 2)) + img1 = np.concatenate(img1) + img2 = np.concatenate(img2) + + ssim = image_ops.ssim(constant_op.constant(img1), + constant_op.constant(img2), 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(expected, ssim.eval(), atol=1e-4) + + def testBroadcast(self): + img = self._LoadTestImages()[:2] + expected = self._ssim[:2, :2] + + img = constant_op.constant(np.concatenate(img)) + img1 = array_ops.expand_dims(img, axis=0) # batch dims: 1, 2. + img2 = array_ops.expand_dims(img, axis=1) # batch dims: 2, 1. + + ssim = image_ops.ssim(img1, img2, 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(expected, ssim.eval(), atol=1e-4) + + def testNegative(self): + """Tests against negative SSIM index.""" + step = np.expand_dims(np.arange(0, 256, 16, dtype=np.uint8), axis=0) + img1 = np.tile(step, (16, 1)) + img2 = np.fliplr(img1) + + img1 = img1.reshape((1, 16, 16, 1)) + img2 = img2.reshape((1, 16, 16, 1)) + + ssim = image_ops.ssim(constant_op.constant(img1), + constant_op.constant(img2), 255) + with self.test_session(use_gpu=True): + self.assertLess(ssim.eval(), 0) + + def testInt(self): + img1 = self._RandomImage((1, 16, 16, 3), 255) + img2 = self._RandomImage((1, 16, 16, 3), 255) + img1 = constant_op.constant(img1, dtypes.uint8) + img2 = constant_op.constant(img2, dtypes.uint8) + ssim_uint8 = image_ops.ssim(img1, img2, 255) + img1 = image_ops.convert_image_dtype(img1, dtypes.float32) + img2 = image_ops.convert_image_dtype(img2, dtypes.float32) + ssim_float32 = image_ops.ssim(img1, img2, 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(ssim_uint8.eval(), ssim_float32.eval(), atol=0.001) + + +class MultiscaleSSIMTest(test_util.TensorFlowTestCase): + """Tests for MS-SSIM.""" + + _filenames = ["checkerboard1.png", + "checkerboard2.png", + "checkerboard3.png",] + + _msssim = np.asarray([[1.000000, 0.091016, 0.091025], + [0.091016, 1.000000, 0.999567], + [0.091025, 0.999567, 1.000000]]) + + def _LoadTestImage(self, sess, filename): + content = io_ops.read_file(os.path.join( + "tensorflow/core/lib/ssim/testdata", filename)) + im = image_ops.decode_png(content) + im = image_ops.convert_image_dtype(im, dtypes.float32) + im, = sess.run([im]) + return np.expand_dims(im, axis=0) + + def _LoadTestImages(self): + with self.test_session(use_gpu=True) as sess: + return [self._LoadTestImage(sess, f) for f in self._filenames] + + def _RandomImage(self, shape, max_val): + """Returns an image or image batch with given shape.""" + return np.random.rand(*shape).astype(np.float32) * max_val + + def testAgainstMatlab(self): + """Tests against MS-SSIM computed with Matlab implementation. + + For color images, MS-SSIM scores are averaged over color channels. + """ + img = self._LoadTestImages() + expected = self._msssim[np.triu_indices(3)] + + ph = [array_ops.placeholder(dtype=dtypes.float32) for _ in range(2)] + msssim = image_ops.ssim_multiscale(*ph, max_val=1.0) + with self.test_session(use_gpu=True): + scores = [msssim.eval(dict(zip(ph, t))) + for t in itertools.combinations_with_replacement(img, 2)] + + self.assertAllClose(expected, np.squeeze(scores), atol=1e-4) + + def testUnweightedIsDifferentiable(self): + img = self._LoadTestImages() + ph = [array_ops.placeholder(dtype=dtypes.float32) for _ in range(2)] + scalar = constant_op.constant(1.0, dtype=dtypes.float32) + scaled_ph = [x * scalar for x in ph] + msssim = image_ops.ssim_multiscale(*scaled_ph, max_val=1.0, + power_factors=(1, 1, 1, 1, 1)) + grads = gradients.gradients(msssim, scalar) + with self.test_session(use_gpu=True) as sess: + np_grads = sess.run(grads, feed_dict={ph[0]: img[0], ph[1]: img[1]}) + self.assertTrue(np.isfinite(np_grads).all()) + + def testBatch(self): + """Tests MS-SSIM computed in batch.""" + img = self._LoadTestImages() + expected = self._msssim[np.triu_indices(3, k=1)] + + img1, img2 = zip(*itertools.combinations(img, 2)) + img1 = np.concatenate(img1) + img2 = np.concatenate(img2) + + msssim = image_ops.ssim_multiscale(constant_op.constant(img1), + constant_op.constant(img2), 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(expected, msssim.eval(), 1e-4) + + def testBroadcast(self): + """Tests MS-SSIM broadcasting.""" + img = self._LoadTestImages()[:2] + expected = self._msssim[:2, :2] + + img = constant_op.constant(np.concatenate(img)) + img1 = array_ops.expand_dims(img, axis=0) # batch dims: 1, 2. + img2 = array_ops.expand_dims(img, axis=1) # batch dims: 2, 1. + + score_tensor = image_ops.ssim_multiscale(img1, img2, 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(expected, score_tensor.eval(), 1e-4) + + def testRange(self): + """Tests against low MS-SSIM score. + + MS-SSIM is a geometric mean of SSIM and CS scores of various scales. + If any of the value is negative so that the geometric mean is not + well-defined, then treat the MS-SSIM score as zero. + """ + with self.test_session(use_gpu=True) as sess: + img1 = self._LoadTestImage(sess, "checkerboard1.png") + img2 = self._LoadTestImage(sess, "checkerboard3.png") + images = [img1, img2, np.zeros_like(img1), + np.full_like(img1, fill_value=255)] + + images = [ops.convert_to_tensor(x, dtype=dtypes.float32) for x in images] + msssim_ops = [image_ops.ssim_multiscale(x, y, 1.0) + for x, y in itertools.combinations(images, 2)] + msssim = sess.run(msssim_ops) + msssim = np.squeeze(msssim) + + self.assertTrue(np.all(msssim >= 0.0)) + self.assertTrue(np.all(msssim <= 1.0)) + + def testInt(self): + img1 = self._RandomImage((1, 180, 240, 3), 255) + img2 = self._RandomImage((1, 180, 240, 3), 255) + img1 = constant_op.constant(img1, dtypes.uint8) + img2 = constant_op.constant(img2, dtypes.uint8) + ssim_uint8 = image_ops.ssim_multiscale(img1, img2, 255) + img1 = image_ops.convert_image_dtype(img1, dtypes.float32) + img2 = image_ops.convert_image_dtype(img2, dtypes.float32) + ssim_float32 = image_ops.ssim_multiscale(img1, img2, 1.0) + with self.test_session(use_gpu=True): + self.assertAllClose(ssim_uint8.eval(), ssim_float32.eval(), atol=0.001) + + +class ImageGradientsTest(test_util.TensorFlowTestCase): + + def testImageGradients(self): + shape = [1, 2, 4, 1] + img = constant_op.constant([[1, 3, 4, 2], [8, 7, 5, 6]]) + img = array_ops.reshape(img, shape) + + expected_dy = np.reshape([[7, 4, 1, 4], [0, 0, 0, 0]], shape) + expected_dx = np.reshape([[2, 1, -2, 0], [-1, -2, 1, 0]], shape) + + dy, dx = image_ops.image_gradients(img) + with self.test_session(): + actual_dy = dy.eval() + actual_dx = dx.eval() + self.assertAllClose(expected_dy, actual_dy) + self.assertAllClose(expected_dx, actual_dx) + + def testImageGradientsMultiChannelBatch(self): + batch = [[[[1, 2], [2, 5], [3, 3]], + [[8, 4], [5, 1], [9, 8]]], + [[[5, 3], [7, 9], [1, 6]], + [[1, 2], [6, 3], [6, 3]]]] + + expected_dy = [[[[7, 2], [3, -4], [6, 5]], + [[0, 0], [0, 0], [0, 0]]], + [[[-4, -1], [-1, -6], [5, -3]], + [[0, 0], [0, 0], [0, 0]]]] + + expected_dx = [[[[1, 3], [1, -2], [0, 0]], + [[-3, -3], [4, 7], [0, 0]]], + [[[2, 6], [-6, -3], [0, 0]], + [[5, 1], [0, 0], [0, 0]]]] + + batch = constant_op.constant(batch) + assert batch.get_shape().as_list() == [2, 2, 3, 2] + dy, dx = image_ops.image_gradients(batch) + with self.test_session(use_gpu=True): + actual_dy = dy.eval() + actual_dx = dx.eval() + self.assertAllClose(expected_dy, actual_dy) + self.assertAllClose(expected_dx, actual_dx) + + def testImageGradientsBadShape(self): + # [2 x 4] image but missing batch and depth dimensions. + img = constant_op.constant([[1, 3, 4, 2], [8, 7, 5, 6]]) + with self.assertRaises(ValueError): + image_ops.image_gradients(img) + + +class SobelEdgesTest(test_util.TensorFlowTestCase): + + def testSobelEdges1x2x3x1(self): + img = constant_op.constant([[1, 3, 6], [4, 1, 5]], + dtype=dtypes.float32, shape=[1, 2, 3, 1]) + expected = np.reshape([[[0, 0], [0, 12], [0, 0]], + [[0, 0], [0, 12], [0, 0]]], [1, 2, 3, 1, 2]) + sobel = image_ops.sobel_edges(img) + with self.test_session(use_gpu=True): + actual_sobel = sobel.eval() + self.assertAllClose(expected, actual_sobel) + + def testSobelEdges5x3x4x2(self): + batch_size = 5 + plane = np.reshape([[1, 3, 6, 2], [4, 1, 5, 7], [2, 5, 1, 4]], + [1, 3, 4, 1]) + two_channel = np.concatenate([plane, plane], axis=3) + batch = np.concatenate([two_channel] * batch_size, axis=0) + img = constant_op.constant(batch, dtype=dtypes.float32, + shape=[batch_size, 3, 4, 2]) + + expected_plane = np.reshape([[[0, 0], [0, 12], [0, 10], [0, 0]], + [[6, 0], [0, 6], [-6, 10], [-6, 0]], + [[0, 0], [0, 0], [0, 10], [0, 0]]], + [1, 3, 4, 1, 2]) + expected_two_channel = np.concatenate( + [expected_plane, expected_plane], axis=3) + expected_batch = np.concatenate([expected_two_channel] * batch_size, axis=0) + + sobel = image_ops.sobel_edges(img) + with self.test_session(use_gpu=True): + actual_sobel = sobel.eval() + self.assertAllClose(expected_batch, actual_sobel) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/tools/api/golden/tensorflow.image.pbtxt b/tensorflow/tools/api/golden/tensorflow.image.pbtxt index bda1c2bf85..3fc64dae88 100644 --- a/tensorflow/tools/api/golden/tensorflow.image.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.image.pbtxt @@ -100,6 +100,10 @@ tf_module { name: "hsv_to_rgb" argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " } + member_method { + name: "image_gradients" + argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None" + } member_method { name: "is_jpeg" argspec: "args=[\'contents\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -116,6 +120,10 @@ tf_module { name: "per_image_standardization" argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None" } + member_method { + name: "psnr" + argspec: "args=[\'a\', \'b\', \'max_val\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " + } member_method { name: "random_brightness" argspec: "args=[\'image\', \'max_delta\', \'seed\'], varargs=None, keywords=None, defaults=[\'None\'], " @@ -188,6 +196,18 @@ tf_module { name: "sample_distorted_bounding_box" argspec: "args=[\'image_size\', \'bounding_boxes\', \'seed\', \'seed2\', \'min_object_covered\', \'aspect_ratio_range\', \'area_range\', \'max_attempts\', \'use_image_if_no_bounding_boxes\', \'name\'], varargs=None, keywords=None, defaults=[\'None\', \'None\', \'0.1\', \'None\', \'None\', \'None\', \'None\', \'None\'], " } + member_method { + name: "sobel_edges" + argspec: "args=[\'image\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "ssim" + argspec: "args=[\'img1\', \'img2\', \'max_val\'], varargs=None, keywords=None, defaults=None" + } + member_method { + name: "ssim_multiscale" + argspec: "args=[\'img1\', \'img2\', \'max_val\', \'power_factors\'], varargs=None, keywords=None, defaults=[\'(0.0448, 0.2856, 0.3001, 0.2363, 0.1333)\'], " + } member_method { name: "total_variation" argspec: "args=[\'images\', \'name\'], varargs=None, keywords=None, defaults=[\'None\'], " -- GitLab From 66788c60d65564775bcbcf4dc1734157228dbdba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 14:57:22 -0800 Subject: [PATCH 632/884] Fix bug in updating NodeMap when materializing shapes from ShapeN. Fix a similar bug in MaybeRemoveControlInput. Improve error message in dependency optimizer, so we can tell if the problem is in dependency optimizer itself or upstream of it. PiperOrigin-RevId: 188394863 --- .../grappler/optimizers/constant_folding.cc | 52 +++++++++++++------ .../optimizers/constant_folding_test.cc | 50 ++++++++++++++++++ .../optimizers/dependency_optimizer.cc | 4 +- 3 files changed, 90 insertions(+), 16 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 77804142e6..31dc1b73e1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -140,20 +140,20 @@ bool AllValuesAre(const TensorProto& tensor, const T& value) { // Add new_input as a control input to node if it does not already depend on it. // TODO(rmlarsen): Move the following two utility functions to utils.{h,cc} and // clean up code that should be using them. -bool MaybeAddControlInput(const string& new_input, NodeDef* node, +bool MaybeAddControlInput(const string& ctrl_input, NodeDef* node, GraphDef* graph, NodeMap* node_map) { bool already_exists = false; for (const string& input : node->input()) { - if (input == new_input || AsControlDependency(input) == new_input) { + if (input == ctrl_input || AsControlDependency(input) == ctrl_input) { already_exists = true; break; } } if (!already_exists) { const string ctrl_dep = - ConstantFolding::AddControlDependency(new_input, graph, node_map); + ConstantFolding::AddControlDependency(ctrl_input, graph, node_map); node->add_input(ctrl_dep); - node_map->AddOutput(NodeName(new_input), node->name()); + node_map->AddOutput(NodeName(ctrl_input), node->name()); } return !already_exists; } @@ -161,16 +161,27 @@ bool MaybeAddControlInput(const string& new_input, NodeDef* node, // Remove old_input as a control input to node. bool MaybeRemoveControlInput(const string& old_input, NodeDef* node, GraphDef* graph, NodeMap* node_map) { + bool removed_input = false; + bool update_node_map = true; + const string old_input_ctrl_dep = AsControlDependency(NodeName(old_input)); for (int i = 0; i < node->input_size(); ++i) { const string& input = node->input(i); - if (IsControlInput(input) && AsControlDependency(old_input) == input) { - node->mutable_input()->SwapElements(i, node->input_size() - 1); - node->mutable_input()->RemoveLast(); - node_map->RemoveOutput(NodeName(old_input), node->name()); - return true; + if (old_input_ctrl_dep == input) { + if (IsControlInput(input)) { + node->mutable_input()->SwapElements(i, node->input_size() - 1); + node->mutable_input()->RemoveLast(); + removed_input = true; + } else { + // There is a non-control input from the same node. + // Don't remove the output from the NodeMap. + update_node_map = false; + } } } - return false; + if (update_node_map) { + node_map->RemoveOutput(NodeName(old_input), node->name()); + } + return removed_input; } } // namespace @@ -353,7 +364,7 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { node_map_->AddOutput(NodeName(ctrl_dep), node->name()); } else { auto outputs = node_map_->GetOutputs(node->name()); - for (const auto& output : outputs) { + for (NodeDef* output : outputs) { for (int k = 0; k < output->input_size(); ++k) { int port; string node_name = ParseNodeName(output->input(k), &port); @@ -378,11 +389,22 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { *added_node->add_input() = ctrl_dep; node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); } - node_map_->UpdateInput(output->name(), - NodeName(output->input(k)), const_name); *output->mutable_input(k) = const_name; + node_map_->AddOutput(const_name, output->name()); } } + bool remove_output = true; + for (int k = 0; k < output->input_size(); ++k) { + int port; + string node_name = ParseNodeName(output->input(k), &port); + if (node_name == node->name()) { + remove_output = false; + break; + } + } + if (remove_output) { + node_map_->RemoveOutput(node->name(), output->name()); + } } } } @@ -1051,7 +1073,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) { node_map_->AddOutput(node->name(), const_index->name()); auto outputs = node_map_->GetOutputs(node->name()); - for (auto& output : outputs) { + for (NodeDef* output : outputs) { for (int i = 0; i < output->input_size(); i++) { int port; string node_name = ParseNodeName(output->input(i), &port); @@ -1142,7 +1164,7 @@ Status ConstantFolding::FoldNode(NodeDef* node, GraphDef* output_graph) { if (const_nodes.size() > 1) { auto outputs = node_map_->GetOutputs(node->name()); - for (const auto& output : outputs) { + for (NodeDef* output : outputs) { for (int i = 0; i < output->input_size(); i++) { int port; string node_name = ParseNodeName(output->input(i), &port); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 29dc93c257..4b9770889f 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -947,6 +947,56 @@ TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN) { EXPECT_EQ(9, found); } +TEST_F(ConstantFoldingTest, ShapeMaterializationShapeN_MultipleOutputs) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output v1 = ops::Variable(scope.WithOpName("v1"), {3, -1}, DT_FLOAT); + Output v2 = ops::Variable(scope.WithOpName("v2"), {4, 6}, DT_FLOAT); + auto s = ops::ShapeN(scope.WithOpName("s"), {v1, v2}); + auto id_n = ops::IdentityN(scope.WithOpName("id_n"), {s[0], s[1]}); + Output ia = ops::Identity(scope.WithOpName("ia"), id_n[0]); + Output ib = ops::Identity(scope.WithOpName("ib"), id_n[1]); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch.push_back("ia"); + item.fetch.push_back("ib"); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + int found = 0; + for (const auto& node : output.node()) { + EXPECT_NE(AddPrefixToNodeName("s-matshapes-0", kConstantFoldingConst), + node.name()); + if (node.name() == "s") { + ++found; + EXPECT_EQ("ShapeN", node.op()); + EXPECT_EQ("v1", node.input(0)); + EXPECT_EQ("v2", node.input(1)); + } + if (node.name() == "id_n") { + ++found; + EXPECT_EQ("IdentityN", node.op()); + EXPECT_EQ("s", node.input(0)); + EXPECT_EQ(AddPrefixToNodeName("s-matshapes-1", kConstantFoldingConst), + node.input(1)); + } + if (node.name() == "ia") { + ++found; + EXPECT_EQ("id_n", node.input(0)); + } + if (node.name() == "ib") { + ++found; + EXPECT_EQ("Const", node.op()); + EXPECT_EQ("^s", node.input(0)); + EXPECT_EQ("^id_n", node.input(1)); + } + } + EXPECT_EQ(4, found); +} + TEST_F(ConstantFoldingTest, SwitchNodesEmptyFetch) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); ops::Variable v_in(scope.WithOpName("v_in"), {3}, DT_FLOAT); diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index bb4b916f46..a5b2572c9c 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -576,7 +576,9 @@ Status DependencyOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, // Remove redundant control dependencies. TF_RETURN_IF_ERROR(TransitiveReduction()); } else { - LOG(ERROR) << topo_sort_status.error_message(); + LOG(ERROR) << "Iteration = " << iteration + << ", topological sort failed with message: " + << topo_sort_status.error_message(); } // Turn nodes with only control outputs into NoOps, prune NoOp and Identity // nodes. -- GitLab From f43d695a833aef4bea81bae8d921bd9eeaed0462 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Thu, 8 Mar 2018 15:10:36 -0800 Subject: [PATCH 633/884] [XLA]: Enhancement to the while loop simplifier HLO pass. If a while-loop tuple element is initialized as a constant and isn't changed by the while-body, replace the use of the tuple element in while-condition and while-body with the constant value. This enables the simplification of while-loops that have 0/1 iteration and loop bound passed in through the while-loop tuple. Add test cases for while-loops with 0/1 iteration and loop bound passed in through the while-loop tuple. PiperOrigin-RevId: 188397087 --- .../xla/service/while_loop_simplifier.cc | 76 ++++++++++++++- .../xla/service/while_loop_simplifier_test.cc | 96 ++++++++++++++++++- 2 files changed, 170 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index c9d77c9376..1a93a880dd 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/hlo_evaluator.h" +#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -605,6 +606,75 @@ static StatusOr TryRemoveWhileLoop(HloInstruction* while_op) { return false; } +static StatusOr TryPropagateConstant(HloInstruction* while_op) { + auto while_init = while_op->operand(0); + if (while_init->opcode() != HloOpcode::kTuple) { + return false; + } + + auto while_body = while_op->while_body(); + auto while_body_root = while_body->root_instruction(); + if (while_body_root->opcode() != HloOpcode::kTuple) { + return false; + } + + auto while_body_param = while_body->parameter_instruction(0); + const HloInstruction::InstructionVector& root_operands = + while_body_root->operands(); + + // Find the loop invariant tuple elements with constant init value and + // build a map from the tuple element index to the constant value. + tensorflow::gtl::FlatMap index_to_constant; + for (int i = 0; i < root_operands.size(); i++) { + HloInstruction* instr = root_operands[i]; + if (instr->opcode() == HloOpcode::kGetTupleElement && + instr->tuple_index() == i && instr->operand(0) == while_body_param) { + auto tuple_element = while_init->operand(i); + if (tuple_element->IsConstant()) { + VLOG(3) << "Found loop invariant tuple element " << i << " " + << tuple_element->ToString(); + index_to_constant[i] = tuple_element; + } + } + } + + if (index_to_constant.empty()) { + return false; + } + + // Replace the use of each constant tuple element in the loop_condition and + // loop_body with the corresponding constant value. + auto propagate_constant = [&](HloComputation* computation) -> StatusOr { + HloInstruction* param = computation->parameter_instruction(0); + bool changed = false; + for (auto instr : param->users()) { + // Since only a while-loop with a tuple result reaches here, we can safely + // assume that `param` is a tuple and the first operand of the + // GetTupleElement instruction is a use of `param`. + if (instr->opcode() == HloOpcode::kGetTupleElement) { + VLOG(3) << "tuple index " << instr->tuple_index() << " " + << instr->ToString(); + auto iter = index_to_constant.find(instr->tuple_index()); + if (iter != index_to_constant.end()) { + const HloInstruction* hlo_constant = (*iter).second; + VLOG(3) << "Replace use of " << instr->ToString() << " with " + << hlo_constant->ToString(); + TF_RETURN_IF_ERROR(instr->ReplaceAllUsesWith( + computation->AddInstruction(hlo_constant->Clone()))); + changed = true; + } + } + } + return changed; + }; + + TF_ASSIGN_OR_RETURN(bool changed_cond, + propagate_constant(while_op->while_condition())); + TF_ASSIGN_OR_RETURN(bool changed_body, propagate_constant(while_body)); + + return changed_cond || changed_body; +} + StatusOr WhileLoopSimplifier::Run(HloModule* module) { XLA_VLOG_LINES(3, "WhileLoopSimplifier::Run(), before:\n" + module->ToString()); @@ -635,7 +705,11 @@ StatusOr WhileLoopSimplifier::Run(HloModule* module) { continue; } - StatusOr result = TryRemoveWhileLoop(while_op); + StatusOr result = TryPropagateConstant(while_op); + TF_RETURN_IF_ERROR(result.status()); + changed |= result.ValueOrDie(); + + result = TryRemoveWhileLoop(while_op); TF_RETURN_IF_ERROR(result.status()); if (result.ValueOrDie()) { changed = true; diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index cbea3e3cf2..396f942dc0 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -30,6 +30,11 @@ class WhileLoopSimplifierTest : public HloVerifiedTestBase { protected: // Makes an HloModule that contains a loop with `num_iters` iteration. void MakeModuleWithSimpleLoop(int num_iters); + + // Similar to MakeModuleWithSimpleLoop except that the loop bound is passed to + // the loop-condition through an element of a tuple which is the + // loop-condition parameter. + void MakeModuleWithSimpleLoopTupleElementLoopBound(int num_iters); }; void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { @@ -66,6 +71,45 @@ void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { ParseAndVerifyModule(hlo_string.c_str()); } +void WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound( + int num_iters) { + string hlo_string_template = R"( + HloModule SimpleLoopWithIndirectLoopBound + SimpleLoopWithIndirectLoopBound.body { + loop_var.1 = (s32[], s32[3]{0}, s32[]) parameter(0) + get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0 + constant.1 = s32[] constant(1) + add = s32[] add(get-tuple-element.1, constant.1) + get-tuple-element.2 = s32[3]{0} get-tuple-element(loop_var.1), index=1 + multiply = s32[3]{0} multiply(get-tuple-element.2, get-tuple-element.2) + limit = s32[] get-tuple-element(loop_var.1), index=2 + ROOT tuple = (s32[], s32[3]{0}, s32[]) tuple(add, multiply, limit) + } + SimpleLoopWithIndirectLoopBound.condition { + loop_var.2 = (s32[], s32[3]{0}, s32[]) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0 + get-tuple-element.4 = s32[] get-tuple-element(loop_var.2), index=2 + ROOT less-than = pred[] less-than(get-tuple-element.3, get-tuple-element.4) + } + ENTRY SimpleLoopWithIndirectLoopBound { + constant.3 = s32[] constant(42) + constant.4 = s32[3]{0} constant({0, 1, 2}) + constant.2 = s32[] constant({{LOOP_BOUND}}) + tuple.1 = (s32[], s32[3]{0}, s32[]) tuple(constant.3, constant.4, + constant.2) + ROOT while = (s32[], s32[3]{0}, s32[]) while(tuple.1), + condition=SimpleLoopWithIndirectLoopBound.condition, + body=SimpleLoopWithIndirectLoopBound.body + } + )"; + + string hlo_string = tensorflow::str_util::StringReplace( + hlo_string_template, "{{LOOP_BOUND}}", + tensorflow::strings::StrCat(42 + num_iters), + /*replace_all=*/true); + ParseAndVerifyModule(hlo_string.c_str()); +} + TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { MakeModuleWithSimpleLoop(/*num_iters=*/0); HloModule* the_module = &module(); @@ -74,6 +118,15 @@ TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { op::Tuple(op::Constant(), op::Constant())); } +TEST_F(WhileLoopSimplifierTest, + LoopWithZeroIterationTupleElementLoopBoundSimplified) { + MakeModuleWithSimpleLoopTupleElementLoopBound(/*num_iters=*/0); + HloModule* the_module = &module(); + ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); + EXPECT_THAT(the_module->entry_computation()->root_instruction(), + op::Tuple(op::Constant(), op::Constant(), op::Constant())); +} + TEST_F(WhileLoopSimplifierTest, LoopWithOneIterationSimplified) { MakeModuleWithSimpleLoop(/*num_iters=*/1); HloModule* the_module = &module(); @@ -82,6 +135,15 @@ TEST_F(WhileLoopSimplifierTest, LoopWithOneIterationSimplified) { op::Tuple(op::Add(), op::Multiply())); } +TEST_F(WhileLoopSimplifierTest, + LoopWithOneIterationTupleELementLoopBoundSimplified) { + MakeModuleWithSimpleLoopTupleElementLoopBound(/*num_iters=*/1); + HloModule* the_module = &module(); + ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); + EXPECT_THAT(the_module->entry_computation()->root_instruction(), + op::Tuple(op::Add(), op::Multiply(), op::Constant())); +} + TEST_F(WhileLoopSimplifierTest, LoopWithTwoIterationsNotSimplified) { MakeModuleWithSimpleLoop(/*num_iters=*/2); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); @@ -364,7 +426,6 @@ TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { HloModule BodyHasNonTupleRoot BodyHasNonTupleRoot.passthrough { ROOT param = (s32[], s32[]) parameter(0) - get-tuple-element = s32[] get-tuple-element((s32[], s32[]) param), index=1 } BodyHasNonTupleRoot.always_true { param.1 = (s32[], s32[]) parameter(0) @@ -382,5 +443,38 @@ TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } +TEST_F(WhileLoopSimplifierTest, + LoopWithNonTupleBodyRootInstructionNotSimplified) { + const string hlo_string = R"( + HloModule SimpleLoop + SimpleLoop.body { + loop_var.1 = (s32[], s32[3]{0}) parameter(0) + get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0 + constant.1 = s32[] constant(1) + add = s32[] add(get-tuple-element.1, constant.1) + get-tuple-element.2 = s32[3]{0} get-tuple-element(loop_var.1), index=1 + multiply = s32[3]{0} multiply(get-tuple-element.2, get-tuple-element.2) + ROOT custom-call = (s32[], s32[3]{0}) custom-call(add, multiply), + custom_call_target="x" + } + SimpleLoop.condition { + loop_var.2 = (s32[], s32[3]{0}) parameter(0) + get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0 + constant.2 = s32[] constant(44) + ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2) + } + ENTRY SimpleLoop { + constant.3 = s32[] constant(42) + constant.4 = s32[3]{0} constant({0, 1, 2}) + tuple.1 = (s32[], s32[3]{0}) tuple(constant.3, constant.4) + ROOT while = (s32[], s32[3]{0}) while(tuple.1), condition= + SimpleLoop.condition, body=SimpleLoop.body + } + )"; + + ParseAndVerifyModule(hlo_string.c_str()); + EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); +} + } // namespace } // namespace xla -- GitLab From 49c848697ce6fe7dc95d847aea92b200aea3822e Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 8 Mar 2018 15:45:24 -0800 Subject: [PATCH 634/884] Fix typo in comment. PiperOrigin-RevId: 188403010 --- tensorflow/compiler/xla/tests/hlo_test_base.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 413bb213fd..4d49b7071d 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -44,7 +44,7 @@ namespace xla { // enables, for one, explicitly building a graph of HLO instructions to run. // // This can also be used to write text/file-based test cases. Note that the test -// target is responsible for linking the needed backends. A covenient way to do +// target is responsible for linking the needed backends. A convenient way to do // this is to make it an xla_test: it will generate test targets linking with // the respective backends, which will be used as the test backend; the // interpreter backend is already linked with hlo_test_base so it will be the -- GitLab From e8f6485d88dbf4027917e3559519b2f363325479 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 15:47:07 -0800 Subject: [PATCH 635/884] Fix StridedSlice PiperOrigin-RevId: 188403234 --- .../contrib/lite/kernels/strided_slice.cc | 20 +++++++++---------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/strided_slice.cc b/tensorflow/contrib/lite/kernels/strided_slice.cc index fb1e11e0ca..eb374d9031 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice.cc @@ -48,7 +48,7 @@ struct StridedSliceContext { output = GetOutput(context, node, kOutputTensor); dims = NumDimensions(input); } - TfLiteStridedSliceParams* params; + const TfLiteStridedSliceParams* params; TfLiteTensor* input; TfLiteTensor* begin; TfLiteTensor* end; @@ -199,19 +199,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { strides.emplace_back(1); } - op_context.params->begin_mask = + int begin_mask = ReverseMaskBits(op_context.params->begin_mask, op_context.dims); - op_context.params->end_mask = - ReverseMaskBits(op_context.params->end_mask, op_context.dims); - op_context.params->shrink_axis_mask = + int end_mask = ReverseMaskBits(op_context.params->end_mask, op_context.dims); + int shrink_axis_mask = ReverseMaskBits(op_context.params->shrink_axis_mask, op_context.dims); -#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ - kernel_type::StridedSlice( \ - GetTensorData(op_context.input), \ - GetTensorDims(op_context.input), op_context.params->begin_mask, \ - op_context.params->end_mask, op_context.params->shrink_axis_mask, \ - starts, stops, strides, GetTensorData(op_context.output), \ +#define TF_LITE_STRIDED_SLICE(kernel_type, data_type) \ + kernel_type::StridedSlice( \ + GetTensorData(op_context.input), \ + GetTensorDims(op_context.input), begin_mask, end_mask, shrink_axis_mask, \ + starts, stops, strides, GetTensorData(op_context.output), \ GetTensorDims(op_context.output)) switch (op_context.input->type) { -- GitLab From 7c3c5801d67a2d56d4015c3f505f3d89386cb394 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Thu, 8 Mar 2018 16:16:47 -0800 Subject: [PATCH 636/884] Return kTfLiteError if calling delegate-specific functions from non-delegate code. PiperOrigin-RevId: 188407931 --- tensorflow/contrib/lite/interpreter.cc | 33 +++++++++++++---- tensorflow/contrib/lite/interpreter_test.cc | 40 +++++++++++++++++++++ 2 files changed, 67 insertions(+), 6 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 2834dc49f9..4710488065 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -30,6 +30,27 @@ limitations under the License. namespace tflite { +namespace { + +// Stub method which returns kTfLiteError when the function is forbidden. +// We're registrating this function to several different function to save +// compiled binary size. Please note the restrictions: +// * The type of first parameter have to be `TfLiteContext*`. +// * All paramteters must be trivailly destructible. (E.g. No C++ class) +TfLiteStatus ForbiddenContextFunction(TfLiteContext* context, ...) { + context->ReportError(context, + "The function is forbidden if not calling in delegate."); + return kTfLiteError; +} + +// Set the ForbiddenContextFunction to a compatible function pointer. +template +void SetForbiddenContextFunction(FunctionType* func) { + *func = reinterpret_cast(ForbiddenContextFunction); +} + +} // namespace + // A trivial implementation of GraphInfo around the Interpreter. // NOTE: this interpreter info represents the subset of the // graph that is executed according to execution plan. Thus, @@ -74,9 +95,9 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.gemm_context = nullptr; // Invalid to call these these except from TfLiteDelegate - context_.GetNodeAndRegistration = nullptr; - context_.ReplaceSubgraphsWithDelegateKernels = nullptr; - context_.GetExecutionPlan = nullptr; + SetForbiddenContextFunction(&context_.GetNodeAndRegistration); + SetForbiddenContextFunction(&context_.ReplaceSubgraphsWithDelegateKernels); + SetForbiddenContextFunction(&context_.GetExecutionPlan); // Reserve some space for the tensors to avoid excessive resizing. tensors_.reserve(kTensorsReservedCapacity); @@ -686,9 +707,9 @@ TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { TfLiteStatus status = delegate->Prepare(&context_, delegate); // Remove additional context info. - context_.GetNodeAndRegistration = nullptr; - context_.ReplaceSubgraphsWithDelegateKernels = nullptr; - context_.GetExecutionPlan = nullptr; + SetForbiddenContextFunction(&context_.GetNodeAndRegistration); + SetForbiddenContextFunction(&context_.ReplaceSubgraphsWithDelegateKernels); + SetForbiddenContextFunction(&context_.GetExecutionPlan); return status; } diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 2586c15287..17eb2f4b07 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -561,6 +561,46 @@ TEST(BasicInterpreter, TestCustomErrorReporter) { ASSERT_EQ(reporter.calls, 1); } +TEST(BasicInterpreter, TestUnsupportedDelegateFunctions) { + Interpreter interpreter; + ASSERT_EQ(interpreter.AddTensors(2), kTfLiteOk); + TfLiteRegistration registration = { + .init = nullptr, .free = nullptr, .prepare = nullptr, .invoke = nullptr}; + // These functions are only supported inside Delegate's Prepare function. + // The test verifies that these functions returns `kTfLiteError`, but not + // `kTfLiteOk` or just crashes. + registration.prepare = [](TfLiteContext* context, TfLiteNode* node) { + { + TfLiteIntArray* execution_plan; + EXPECT_EQ(context->GetExecutionPlan(context, &execution_plan), + kTfLiteError); + } + { + TfLiteNode* node; + TfLiteRegistration* registration; + EXPECT_EQ( + context->GetNodeAndRegistration(context, 0, &node, ®istration), + kTfLiteError); + } + { + TfLiteRegistration delegate_registration = {nullptr, nullptr, nullptr, + nullptr}; + TfLiteIntArray nodes_to_replace; + nodes_to_replace.size = 0; + EXPECT_EQ(context->ReplaceSubgraphsWithDelegateKernels( + context, delegate_registration, &nodes_to_replace, nullptr), + kTfLiteError); + } + return kTfLiteError; + }; + ASSERT_EQ(interpreter.SetInputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.SetOutputs({0}), kTfLiteOk); + ASSERT_EQ(interpreter.AddNodeWithParameters({0}, {1}, nullptr, 0, nullptr, + ®istration), + kTfLiteOk); + EXPECT_EQ(interpreter.AllocateTensors(), kTfLiteError); +} + TEST(InterpreterTensorsCapacityTest, TestWithinHeadroom) { Interpreter interpreter; ASSERT_EQ(interpreter.AddTensors(Interpreter::kTensorsReservedCapacity), -- GitLab From cb19a43c0510b36b7f95886650f537303700404b Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Thu, 8 Mar 2018 16:39:37 -0800 Subject: [PATCH 637/884] [tf.data] Fix potential destruction race in IteratorGetNext. PiperOrigin-RevId: 188411125 --- tensorflow/core/kernels/data/iterator_ops.cc | 27 +++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 3fb96679da..6fe3746a73 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -868,8 +868,6 @@ class IteratorGetNextOp : public AsyncOpKernel { // owned thread pool. thread_pool_->Schedule(std::bind( [this, ctx, iterator](DoneCallback done) { - core::ScopedUnref unref_iterator(iterator); - std::vector components; bool end_of_sequence = false; @@ -886,17 +884,22 @@ class IteratorGetNextOp : public AsyncOpKernel { }; IteratorContext iter_ctx(std::move(params)); - OP_REQUIRES_OK_ASYNC( - ctx, iterator->GetNext(&iter_ctx, &components, &end_of_sequence), - done); - OP_REQUIRES_ASYNC(ctx, !end_of_sequence, - errors::OutOfRange("End of sequence"), done); - - for (int i = 0; i < components.size(); ++i) { - // TODO(mrry): Check that the shapes match the shape attrs. - ctx->set_output(i, components[i]); + Status s = + iterator->GetNext(&iter_ctx, &components, &end_of_sequence); + // NOTE(mrry): We must unref the iterator before calling `done()`, to + // avoid destruction races. + iterator->Unref(); + + if (!s.ok()) { + ctx->SetStatus(s); + } else if (end_of_sequence) { + ctx->SetStatus(errors::OutOfRange("End of sequence")); + } else { + for (int i = 0; i < components.size(); ++i) { + // TODO(mrry): Check that the shapes match the shape attrs. + ctx->set_output(i, components[i]); + } } - done(); }, std::move(done))); -- GitLab From 44bcb41f7edae78b69ab52acbc58934242cf13b8 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Thu, 8 Mar 2018 16:40:24 -0800 Subject: [PATCH 638/884] Treat IdentityN nodes with a single input as regular Identity nodes. PiperOrigin-RevId: 188411260 --- tensorflow/core/grappler/op_types.cc | 3 ++ .../optimizers/dependency_optimizer_test.cc | 47 +++++++++++++++++++ 2 files changed, 50 insertions(+) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index fb46b584b2..8cf1402ae8 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -144,6 +144,9 @@ bool IsHistogramSummary(const NodeDef& node) { bool IsIdentity(const NodeDef& node) { const auto& op = node.op(); + if (op == "IdentityN" && node.attr().at("T").list().type_size() == 1) { + return true; + } return op == "Identity" || op == "RefIdentity"; } diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc index 08659cbf6f..b66cc17a72 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -548,6 +548,53 @@ TEST_F(DependencyOptimizerTest, IdentityInputs) { EXPECT_EQ("s:1", output.node(5).input(0)); } +TEST_F(DependencyOptimizerTest, IdentityN) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output b = ops::Placeholder(scope.WithOpName("b"), DT_BOOL); + Output x = ops::RandomUniform(scope.WithOpName("x"), {1, 2}, DT_FLOAT); + auto s = ops::Switch(scope.WithOpName("s"), x, b); + + // IdentityN nodes to be removed. + auto id_f = ops::IdentityN(scope.WithOpName("id_f"), {s.output_false}); + auto id_t = ops::IdentityN(scope.WithOpName("id_t"), {s.output_true}); + + // IdentityN node that can't be removed. + auto id_b = + ops::IdentityN(scope.WithOpName("id_b"), {s.output_false, s.output_true}); + + // Outputs + Output out1 = ops::Identity(scope.WithOpName("out1"), id_f[0]); + Output out2 = ops::Identity(scope.WithOpName("out2"), id_t[0]); + Output out3 = ops::Identity(scope.WithOpName("out3"), id_b[0]); + Output out4 = ops::Identity(scope.WithOpName("out4"), id_b[1]); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch = {"out1", "out2", "out3", "out4"}; + + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(9, output.node_size()); + EXPECT_EQ("out1", output.node(5).name()); + EXPECT_EQ(1, output.node(5).input_size()); + EXPECT_EQ("s", output.node(5).input(0)); + + EXPECT_EQ("out2", output.node(6).name()); + EXPECT_EQ(1, output.node(6).input_size()); + EXPECT_EQ("s:1", output.node(6).input(0)); + + EXPECT_EQ("out3", output.node(7).name()); + EXPECT_EQ(1, output.node(7).input_size()); + EXPECT_EQ("id_b", output.node(7).input(0)); + + EXPECT_EQ("out4", output.node(8).name()); + EXPECT_EQ(1, output.node(8).input_size()); + EXPECT_EQ("id_b:1", output.node(8).input(0)); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From d56eface20da6adf5a12507053c16ef22594739b Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Thu, 8 Mar 2018 16:45:45 -0800 Subject: [PATCH 639/884] Fixes a bug where the ProcFLR doesn't lookup existing instantiations in the distributed (ClusterFLR) case. As a result multiple instantiations for the same function were happening. PiperOrigin-RevId: 188411978 --- tensorflow/core/BUILD | 1 + .../process_function_library_runtime.cc | 55 ++++++++--- .../process_function_library_runtime.h | 32 +++++-- .../process_function_library_runtime_test.cc | 94 ++++++++++++++++++- 4 files changed, 160 insertions(+), 22 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index 0fbe4eba6e..f2b0d542dd 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3156,6 +3156,7 @@ tf_cc_test( ":core_cpu", ":core_cpu_internal", ":framework", + ":lib", ":test", ":test_main", ":testlib", diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 929f5c67bc..44dc6f9459 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -25,6 +25,19 @@ namespace tensorflow { const char ProcessFunctionLibraryRuntime::kDefaultFLRDevice[] = "null"; +Status ProcessFunctionLibraryRuntime::FunctionData::DistributedInit( + DistributedFunctionLibraryRuntime* parent, const string& function_name, + const FunctionLibraryDefinition& lib_def, AttrSlice attrs, + const FunctionLibraryRuntime::InstantiateOptions& options) { + mutex_lock l(mu_); + if (!init_started_) { + init_started_ = true; + init_result_ = parent->Instantiate(function_name, lib_def, attrs, options, + &local_handle_); + } + return init_result_; +} + ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, int graph_def_version, const FunctionLibraryDefinition* lib_def, @@ -167,7 +180,8 @@ FunctionLibraryRuntime::Handle ProcessFunctionLibraryRuntime::AddHandle( if (function_data_.count(h) != 0) return h; } h = next_handle_; - function_data_.insert({h, FunctionData(device_name, local_handle)}); + FunctionData* fd = new FunctionData(device_name, local_handle); + function_data_[h] = std::unique_ptr(fd); table_[function_key] = h; next_handle_++; return h; @@ -196,19 +210,19 @@ ProcessFunctionLibraryRuntime::GetHandleOnDevice( if (function_data_.count(handle) == 0) { return kInvalidLocalHandle; } - const FunctionData& function_data = function_data_[handle]; - if (function_data.target_device != device_name) { + FunctionData* function_data = function_data_[handle].get(); + if (function_data->target_device() != device_name) { return kInvalidLocalHandle; } - return function_data.local_handle; + return function_data->local_handle(); } string ProcessFunctionLibraryRuntime::GetDeviceName( FunctionLibraryRuntime::Handle handle) { mutex_lock l(mu_); CHECK_EQ(1, function_data_.count(handle)); - const FunctionData& function_data = function_data_[handle]; - return function_data.target_device; + FunctionData* function_data = function_data_[handle].get(); + return function_data->target_device(); } Status ProcessFunctionLibraryRuntime::Instantiate( @@ -225,11 +239,26 @@ Status ProcessFunctionLibraryRuntime::Instantiate( "Currently don't support instantiating functions on device: ", options.target); } - FunctionLibraryRuntime::Handle cluster_handle; - TF_RETURN_IF_ERROR(parent_->Instantiate(function_name, *lib_def_, attrs, - options, &cluster_handle)); + string function_key = Canonicalize(function_name, attrs); - *handle = AddHandle(function_key, options.target, cluster_handle); + FunctionData* f; + { + mutex_lock l(mu_); + FunctionLibraryRuntime::Handle h = + gtl::FindWithDefault(table_, function_key, kInvalidHandle); + if (h == kInvalidHandle || function_data_.count(h) == 0) { + h = next_handle_; + FunctionData* fd = new FunctionData(options.target, kInvalidHandle); + function_data_[h] = std::unique_ptr(fd); + table_[function_key] = h; + next_handle_++; + } + f = function_data_[h].get(); + *handle = h; + } + TF_RETURN_IF_ERROR( + f->DistributedInit(parent_, function_name, *lib_def_, attrs, options)); + return Status::OK(); } @@ -247,7 +276,7 @@ Status ProcessFunctionLibraryRuntime::ReleaseHandle( { mutex_lock l(mu_); CHECK_EQ(1, function_data_.count(handle)) << " handle: " << handle; - target_device = function_data_[handle].target_device; + target_device = function_data_[handle]->target_device(); } flr = GetFLR(target_device); if (flr != nullptr) { @@ -276,8 +305,8 @@ void ProcessFunctionLibraryRuntime::Run( done(errors::NotFound("Handle: ", handle, " not found.")); return; } - target_device = function_data_[handle].target_device; - local_handle = function_data_[handle].local_handle; + target_device = function_data_[handle]->target_device(); + local_handle = function_data_[handle]->local_handle(); } flr = GetFLR(target_device); if (flr != nullptr) { diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index 0473e16d24..10619ba6ea 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -145,14 +145,31 @@ class ProcessFunctionLibraryRuntime { mutable mutex mu_; - struct FunctionData { - const string target_device; - const FunctionLibraryRuntime::LocalHandle local_handle; - + class FunctionData { + public: FunctionData(const string& target_device, FunctionLibraryRuntime::LocalHandle local_handle) - : target_device(target_device), local_handle(local_handle) {} - FunctionData() : FunctionData("", -1) {} + : target_device_(target_device), local_handle_(local_handle) {} + + string target_device() { return target_device_; } + + FunctionLibraryRuntime::LocalHandle local_handle() { return local_handle_; } + + // Initializes the FunctionData object by potentially making an Initialize + // call to the DistributedFunctionLibraryRuntime. + Status DistributedInit( + DistributedFunctionLibraryRuntime* parent, const string& function_name, + const FunctionLibraryDefinition& lib_def, AttrSlice attrs, + const FunctionLibraryRuntime::InstantiateOptions& options); + + private: + mutex mu_; + + const string target_device_; + FunctionLibraryRuntime::LocalHandle local_handle_ GUARDED_BY(mu_); + bool init_started_ GUARDED_BY(mu_) = false; + Status init_result_ GUARDED_BY(mu_); + Notification init_done_; }; const DeviceMgr* const device_mgr_; @@ -160,7 +177,8 @@ class ProcessFunctionLibraryRuntime { // Holds all the function invocations here. std::unordered_map table_ GUARDED_BY(mu_); - std::unordered_map + std::unordered_map> function_data_ GUARDED_BY(mu_); std::unordered_map> flr_map_; int next_handle_ GUARDED_BY(mu_); diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index 439ba1ce96..ab1f919852 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -19,9 +19,11 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/function_testlib.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/framework/function.h" #include "tensorflow/core/framework/function_testlib.h" #include "tensorflow/core/framework/tensor_testutil.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" @@ -29,8 +31,32 @@ limitations under the License. namespace tensorflow { namespace { +class TestClusterFLR : public DistributedFunctionLibraryRuntime { + public: + TestClusterFLR() {} + + Status Instantiate(const string& function_name, + const FunctionLibraryDefinition& lib_def, AttrSlice attrs, + const FunctionLibraryRuntime::InstantiateOptions& options, + FunctionLibraryRuntime::LocalHandle* handle) { + mutex_lock l(mu_); + *handle = next_handle_; + next_handle_++; + return Status::OK(); + } + + void Run(const FunctionLibraryRuntime::Options& opts, + FunctionLibraryRuntime::LocalHandle handle, + gtl::ArraySlice args, std::vector* rets, + FunctionLibraryRuntime::DoneCallback done) {} + + private: + mutex mu_; + int next_handle_ GUARDED_BY(mu_) = 0; +}; + class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { - protected: + public: void Init(const std::vector& flib) { SessionOptions options; auto* device_count = options.config.mutable_device_count(); @@ -42,12 +68,20 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { for (const auto& fdef : flib) *(proto.add_function()) = fdef; lib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), proto)); OptimizerOptions opts; + cluster_flr_.reset(new TestClusterFLR()); proc_flr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), - opts, nullptr /* cluster_flr */)); + opts, cluster_flr_.get())); rendezvous_ = new IntraProcessRendezvous(device_mgr_.get()); } + Status Instantiate( + const string& name, test::function::Attrs attrs, + const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts, + FunctionLibraryRuntime::Handle* handle) { + return proc_flr_->Instantiate(name, attrs, instantiate_opts, handle); + } + Status Run(const string& name, FunctionLibraryRuntime::Options opts, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& instantiate_opts, @@ -106,6 +140,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { std::vector devices_; std::unique_ptr device_mgr_; std::unique_ptr lib_def_; + std::unique_ptr cluster_flr_; std::unique_ptr proc_flr_; IntraProcessRendezvous* rendezvous_; }; @@ -250,5 +285,60 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, MultipleCallsDiffDeviceFindDevice) { rendezvous_->Unref(); } +TEST_F(ProcessFunctionLibraryRuntimeTest, ClusterFLRSerialTest) { + Init({test::function::FindDevice()}); + FunctionLibraryRuntime::Options opts; + opts.source_device = "/job:a/replica:0/task:0/cpu:0"; + opts.rendezvous = rendezvous_; + opts.remote_execution = true; + FunctionLibraryRuntime::InstantiateOptions instantiate_opts; + instantiate_opts.target = "/job:b/replica:0/task:0/device:CPU:0"; + FunctionLibraryRuntime::Handle h; + TF_CHECK_OK(Instantiate("FindDevice", + {{"_target", "/job:b/replica:0/task:0/device:CPU:0"}}, + instantiate_opts, &h)); + EXPECT_EQ(0, proc_flr_->GetHandleOnDevice( + "/job:b/replica:0/task:0/device:CPU:0", h)); + TF_CHECK_OK(Instantiate("FindDevice", + {{"_target", "/job:b/replica:0/task:0/device:CPU:0"}}, + instantiate_opts, &h)); + EXPECT_EQ(0, proc_flr_->GetHandleOnDevice( + "/job:b/replica:0/task:0/device:CPU:0", h)); + instantiate_opts.target = "/job:c/replica:0/task:0/device:CPU:0"; + TF_CHECK_OK(Instantiate("FindDevice", + {{"_target", "/job:c/replica:0/task:0/device:CPU:0"}}, + instantiate_opts, &h)); + EXPECT_EQ(1, proc_flr_->GetHandleOnDevice( + "/job:c/replica:0/task:0/device:CPU:0", h)); + rendezvous_->Unref(); +} + +TEST_F(ProcessFunctionLibraryRuntimeTest, ClusterFLRParallelTest) { + Init({test::function::FindDevice()}); + FunctionLibraryRuntime::Options opts; + opts.source_device = "/job:a/replica:0/task:0/cpu:0"; + opts.rendezvous = rendezvous_; + opts.remote_execution = true; + FunctionLibraryRuntime::InstantiateOptions instantiate_opts; + instantiate_opts.target = "/job:b/replica:0/task:0/device:CPU:0"; + + thread::ThreadPool* tp = new thread::ThreadPool(Env::Default(), "test", 4); + auto fn = [this, &instantiate_opts]() { + FunctionLibraryRuntime::Handle h; + TF_CHECK_OK(Instantiate( + "FindDevice", {{"_target", "/job:b/replica:0/task:0/device:CPU:0"}}, + instantiate_opts, &h)); + EXPECT_EQ(0, proc_flr_->GetHandleOnDevice( + "/job:b/replica:0/task:0/device:CPU:0", h)); + }; + + for (int i = 0; i < 100; ++i) { + tp->Schedule(fn); + } + delete tp; + + rendezvous_->Unref(); +} + } // anonymous namespace } // namespace tensorflow -- GitLab From e7ec9100b45480710817ce6259bdbb4d4c2a48ee Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 16:56:18 -0800 Subject: [PATCH 640/884] Check df parameter > 0 for Chi2. PiperOrigin-RevId: 188413552 --- tensorflow/contrib/distributions/python/ops/chi2.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/python/ops/chi2.py b/tensorflow/contrib/distributions/python/ops/chi2.py index bdd5571c96..e610f469e5 100644 --- a/tensorflow/contrib/distributions/python/ops/chi2.py +++ b/tensorflow/contrib/distributions/python/ops/chi2.py @@ -21,6 +21,8 @@ from __future__ import print_function from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops from tensorflow.python.ops.distributions import gamma @@ -87,7 +89,11 @@ class Chi2(gamma.Gamma): # allow_nan_stats=True # through to the parent class results in unnecessary asserts. with ops.name_scope(name, values=[df]): - self._df = ops.convert_to_tensor(df, name="df") + with ops.control_dependencies([ + check_ops.assert_positive(df), + ] if validate_args else []): + self._df = array_ops.identity(df, name="df") + super(Chi2, self).__init__( concentration=0.5 * self._df, rate=constant_op.constant(0.5, dtype=self._df.dtype), -- GitLab From b49af5522c2d6a99acdc043aca8e826a537a3e80 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 16:56:26 -0800 Subject: [PATCH 641/884] Restore cholesky_outer_product_test, to contains for CholeskyOuterProduct PiperOrigin-RevId: 188413575 --- .../bijectors/cholesky_outer_product_test.py | 172 ++++++++++++------ 1 file changed, 121 insertions(+), 51 deletions(-) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py index 0ff3530428..ab2338f4cb 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py @@ -18,70 +18,140 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np + from tensorflow.contrib.distributions.python.ops import bijectors -from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops.distributions import gamma as gamma_lib -from tensorflow.python.ops.distributions import transformed_distribution as transformed_distribution_lib from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency from tensorflow.python.platform import test -class InvertBijectorTest(test.TestCase): - """Tests the correctness of the Y = Invert(bij) transformation.""" +class CholeskyOuterProductBijectorTest(test.TestCase): + """Tests the correctness of the Y = X @ X.T transformation.""" - def testBijector(self): + def testBijectorMatrix(self): with self.test_session(): - for fwd in [ - bijectors.Identity(), - bijectors.Exp(event_ndims=1), - bijectors.Affine( - shift=[0., 1.], scale_diag=[2., 3.], event_ndims=1), - bijectors.Softplus(event_ndims=1), - bijectors.SoftmaxCentered(event_ndims=1), - bijectors.SigmoidCentered(), - ]: - rev = bijectors.Invert(fwd) - self.assertEqual("_".join(["invert", fwd.name]), rev.name) - x = [[[1., 2.], - [2., 3.]]] - self.assertAllClose(fwd.inverse(x).eval(), rev.forward(x).eval()) - self.assertAllClose(fwd.forward(x).eval(), rev.inverse(x).eval()) - self.assertAllClose( - fwd.forward_log_det_jacobian(x).eval(), - rev.inverse_log_det_jacobian(x).eval()) - self.assertAllClose( - fwd.inverse_log_det_jacobian(x).eval(), - rev.forward_log_det_jacobian(x).eval()) + bijector = bijectors.CholeskyOuterProduct( + event_ndims=2, validate_args=True) + self.assertEqual("cholesky_outer_product", bijector.name) + x = [[[1., 0], [2, 1]], [[np.sqrt(2.), 0], [np.sqrt(8.), 1]]] + y = np.matmul(x, np.transpose(x, axes=(0, 2, 1))) + # Fairly easy to compute differentials since we have 2x2. + dx_dy = [[[2. * 1, 0, 0], + [2, 1, 0], + [0, 2 * 2, 2 * 1]], + [[2 * np.sqrt(2.), 0, 0], + [np.sqrt(8.), np.sqrt(2.), 0], + [0, 2 * np.sqrt(8.), 2 * 1]]] + ildj = -np.sum( + np.log(np.asarray(dx_dy).diagonal( + offset=0, axis1=1, axis2=2)), + axis=1) + self.assertAllEqual((2, 2, 2), bijector.forward(x).get_shape()) + self.assertAllEqual((2, 2, 2), bijector.inverse(y).get_shape()) + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose( + ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) + self.assertAllClose( + -bijector.inverse_log_det_jacobian(y).eval(), + bijector.forward_log_det_jacobian(x).eval(), + atol=0., + rtol=1e-7) - def testScalarCongruency(self): + def testBijectorScalar(self): with self.test_session(): - bijector = bijectors.Invert(bijectors.Exp()) - assert_scalar_congruency( - bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) + bijector = bijectors.CholeskyOuterProduct( + event_ndims=0, validate_args=True) + self.assertEqual("cholesky_outer_product", bijector.name) + x = [[[1., 5], + [2, 1]], + [[np.sqrt(2.), 3], + [np.sqrt(8.), 1]]] + y = np.square(x) + ildj = -np.log(2.) - np.log(x) + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose( + ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) + self.assertAllClose( + -bijector.inverse_log_det_jacobian(y).eval(), + bijector.forward_log_det_jacobian(x).eval(), + atol=0., + rtol=1e-7) - def testShapeGetters(self): + def testScalarCongruency(self): with self.test_session(): - bijector = bijectors.Invert(bijectors.SigmoidCentered(validate_args=True)) - x = tensor_shape.TensorShape([2]) - y = tensor_shape.TensorShape([]) - self.assertAllEqual(y, bijector.forward_event_shape(x)) - self.assertAllEqual( - y.as_list(), - bijector.forward_event_shape_tensor(x.as_list()).eval()) - self.assertAllEqual(x, bijector.inverse_event_shape(y)) - self.assertAllEqual( - x.as_list(), - bijector.inverse_event_shape_tensor(y.as_list()).eval()) + bijector = bijectors.CholeskyOuterProduct( + event_ndims=0, validate_args=True) + assert_scalar_congruency(bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) - def testDocstringExample(self): - with self.test_session(): - exp_gamma_distribution = ( - transformed_distribution_lib.TransformedDistribution( - distribution=gamma_lib.Gamma(concentration=1., rate=2.), - bijector=bijectors.Invert(bijectors.Exp()))) - self.assertAllEqual( - [], array_ops.shape(exp_gamma_distribution.sample()).eval()) + def testNoBatchStatic(self): + x = np.array([[1., 0], [2, 1]]) # np.linalg.cholesky(y) + y = np.array([[1., 2], [2, 5]]) # np.matmul(x, x.T) + with self.test_session() as sess: + y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x) + x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y) + [y_actual_, x_actual_] = sess.run([y_actual, x_actual]) + self.assertAllEqual([2, 2], y_actual.get_shape()) + self.assertAllEqual([2, 2], x_actual.get_shape()) + self.assertAllClose(y, y_actual_) + self.assertAllClose(x, x_actual_) + + def testNoBatchDeferred(self): + x = np.array([[1., 0], [2, 1]]) # np.linalg.cholesky(y) + y = np.array([[1., 2], [2, 5]]) # np.matmul(x, x.T) + with self.test_session() as sess: + x_pl = array_ops.placeholder(dtypes.float32) + y_pl = array_ops.placeholder(dtypes.float32) + y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x_pl) + x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y_pl) + [y_actual_, x_actual_] = sess.run([y_actual, x_actual], + feed_dict={x_pl: x, y_pl: y}) + self.assertEqual(None, y_actual.get_shape()) + self.assertEqual(None, x_actual.get_shape()) + self.assertAllClose(y, y_actual_) + self.assertAllClose(x, x_actual_) + + def testBatchStatic(self): + x = np.array([[[1., 0], + [2, 1]], + [[3., 0], + [1, 2]]]) # np.linalg.cholesky(y) + y = np.array([[[1., 2], + [2, 5]], + [[9., 3], + [3, 5]]]) # np.matmul(x, x.T) + with self.test_session() as sess: + y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x) + x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y) + [y_actual_, x_actual_] = sess.run([y_actual, x_actual]) + self.assertEqual([2, 2, 2], y_actual.get_shape()) + self.assertEqual([2, 2, 2], x_actual.get_shape()) + self.assertAllClose(y, y_actual_) + self.assertAllClose(x, x_actual_) + + def testBatchDeferred(self): + x = np.array([[[1., 0], + [2, 1]], + [[3., 0], + [1, 2]]]) # np.linalg.cholesky(y) + y = np.array([[[1., 2], + [2, 5]], + [[9., 3], + [3, 5]]]) # np.matmul(x, x.T) + with self.test_session() as sess: + x_pl = array_ops.placeholder(dtypes.float32) + y_pl = array_ops.placeholder(dtypes.float32) + y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x_pl) + x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y_pl) + [y_actual_, x_actual_] = sess.run([y_actual, x_actual], + feed_dict={x_pl: x, y_pl: y}) + self.assertEqual(None, y_actual.get_shape()) + self.assertEqual(None, x_actual.get_shape()) + self.assertAllClose(y, y_actual_) + self.assertAllClose(x, x_actual_) if __name__ == "__main__": -- GitLab From b04cbe64774858125147dfecc77f0d9cf68a9898 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Thu, 8 Mar 2018 17:15:03 -0800 Subject: [PATCH 642/884] Internal change PiperOrigin-RevId: 188416325 --- tensorflow/python/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index e0559f865d..3b050a8763 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -3258,6 +3258,10 @@ tf_py_wrap_cc( "util/transform_graph.i", "util/util.i", ], + win_def_file = select({ + "//tensorflow:windows": ":pywrap_tensorflow_filtered_def_file", + "//conditions:default": None, + }), deps = [ ":bfloat16_lib", ":cost_analyzer_lib", -- GitLab From 410647b29f7172ae8d4c525421a671907f505c86 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Thu, 8 Mar 2018 17:37:51 -0800 Subject: [PATCH 643/884] Remove no-longer-needed work-around for resource variables in Optimizer. PiperOrigin-RevId: 188419224 --- tensorflow/python/training/optimizer.py | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/tensorflow/python/training/optimizer.py b/tensorflow/python/training/optimizer.py index 9776b90ba4..7adaedef5b 100644 --- a/tensorflow/python/training/optimizer.py +++ b/tensorflow/python/training/optimizer.py @@ -40,19 +40,6 @@ from tensorflow.python.util import nest from tensorflow.python.util.tf_export import tf_export -def _get_variable_for(v): - """Returns the ResourceVariable responsible for v, or v if not necessary.""" - if context.executing_eagerly(): - return v - if v.op.type == "VarHandleOp": - for var in variables.trainable_variables(): - if (isinstance(var, resource_variable_ops.ResourceVariable) - and var.handle.op is v.op): - return var - raise ValueError("Got %s but could not locate source variable." % (str(v))) - return v - - def _deduplicate_indexed_slices(values, indices): """Sums `values` associated with any non-unique `indices`. @@ -549,7 +536,7 @@ class Optimizer( raise ValueError("No gradients provided for any variable: %s." % ([str(v) for _, _, v in converted_grads_and_vars],)) with ops.init_scope(): - self._create_slots([_get_variable_for(v) for v in var_list]) + self._create_slots(var_list) update_ops = [] with ops.name_scope(name, self._name) as name: self._prepare() -- GitLab From 99bb01a681f9993677a4e1086db7ee7879dc792f Mon Sep 17 00:00:00 2001 From: Alexandre Passos Date: Thu, 8 Mar 2018 18:12:54 -0800 Subject: [PATCH 644/884] Pulling Rendezvous initialization out of TFE_Context constructor. PiperOrigin-RevId: 188423386 --- tensorflow/c/eager/c_api.cc | 7 +++++-- tensorflow/c/eager/c_api_internal.h | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index dfe2089d60..6793bb548c 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -105,8 +105,11 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { if (!status->status.ok()) { return nullptr; } - return new TFE_Context(*opts, std::unique_ptr( - new tensorflow::DeviceMgr(devices))); + std::unique_ptr device_mgr( + new tensorflow::DeviceMgr(devices)); + tensorflow::Rendezvous* r = + new tensorflow::IntraProcessRendezvous(device_mgr.get()); + return new TFE_Context(*opts, std::move(device_mgr), r); } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index f701f3483e..5bbfd577b4 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -48,14 +48,14 @@ TFE_ContextDevicePlacementPolicy PlacementPolicy( struct TFE_Context { explicit TFE_Context(const TFE_ContextOptions& opts, - std::unique_ptr device_mgr) + std::unique_ptr device_mgr, + tensorflow::Rendezvous* rendezvous) : soft_placement( opts.session_options.options.config.allow_soft_placement()), policy(PlacementPolicy(soft_placement, opts.policy)), device_manager(std::move(device_mgr)), devices(device_manager->ListDevices()), - rendezvous( - new tensorflow::IntraProcessRendezvous(device_manager.get())), + rendezvous(rendezvous), pflr(new tensorflow::ProcessFunctionLibraryRuntime( device_manager.get(), opts.session_options.options.env, TF_GRAPH_DEF_VERSION, &func_lib_def, {})), -- GitLab From 5fd341d3987fa04195b6469fb359493f63fa616c Mon Sep 17 00:00:00 2001 From: Akshay Agrawal Date: Thu, 8 Mar 2018 18:25:29 -0800 Subject: [PATCH 645/884] TFE: Implement __r*__ operators for `Dimension`. This lets you use Dimension objects in numerical computations; e.g., it lets you evaluate expressions like 3 + my_tensor.shape[0] when executing eagerly. At time of writing, without this change, `matplotlib.pyplot.plt(my_tensor, my_other_tensor)` fails when executing eagerly, but it works with this change. This change also makes it possible to right-multiply a dimension by a list (e.g., dimension * [3]); previously, only the left-multiply worked ([3] * dimension). PiperOrigin-RevId: 188424557 --- tensorflow/python/framework/tensor_shape.py | 97 +++++++++++++++++-- .../python/framework/tensor_shape_test.py | 16 +++ 2 files changed, 103 insertions(+), 10 deletions(-) diff --git a/tensorflow/python/framework/tensor_shape.py b/tensorflow/python/framework/tensor_shape.py index 6f2ab8408e..af2a5b1a7e 100644 --- a/tensorflow/python/framework/tensor_shape.py +++ b/tensorflow/python/framework/tensor_shape.py @@ -156,7 +156,7 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is the sum of `self` and `other`. @@ -167,6 +167,17 @@ class Dimension(object): else: return Dimension(self._value + other.value) + def __radd__(self, other): + """Returns the sum of `other` and `self`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the sum of `self` and `other`. + """ + return self + other + def __sub__(self, other): """Returns the subtraction of `other` from `self`. @@ -180,10 +191,10 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: - A Dimension whose value is the subtraction of sum of `other` from `self`. + A Dimension whose value is the subtraction of `other` from `self`. """ other = as_dimension(other) if self._value is None or other.value is None: @@ -191,6 +202,21 @@ class Dimension(object): else: return Dimension(self._value - other.value) + def __rsub__(self, other): + """Returns the subtraction of `self` from `other`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the subtraction of `self` from `other`. + """ + other = as_dimension(other) + if self._value is None or other.value is None: + return Dimension(None) + else: + return Dimension(other.value - self._value) + def __mul__(self, other): """Returns the product of `self` and `other`. @@ -204,17 +230,32 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is the product of `self` and `other`. """ - other = as_dimension(other) + try: + other = as_dimension(other) + except (TypeError, ValueError): + return NotImplemented + if self._value is None or other.value is None: return Dimension(None) else: return Dimension(self._value * other.value) + def __rmul__(self, other): + """Returns the product of `self` and `other`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is the product of `self` and `other`. + """ + return self * other + def __floordiv__(self, other): """Returns the quotient of `self` and `other` rounded down. @@ -228,17 +269,35 @@ class Dimension(object): ``` Args: - other: Another `Dimension`. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A `Dimension` whose value is the integer quotient of `self` and `other`. """ - other = as_dimension(other) + try: + other = as_dimension(other) + except (TypeError, ValueError): + return NotImplemented if self._value is None or other.value is None: return Dimension(None) else: return Dimension(self._value // other.value) + def __rfloordiv__(self, other): + """Returns the quotient of `other` and `self` rounded down. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A `Dimension` whose value is the integer quotient of `self` and `other`. + """ + other = as_dimension(other) + if self._value is None or other.value is None: + return Dimension(None) + else: + return Dimension(other.value // self._value) + def __div__(self, other): """DEPRECATED: Use `__floordiv__` via `x // y` instead. @@ -256,7 +315,7 @@ class Dimension(object): return self // other def __mod__(self, other): - """Returns `self` modulo `other. + """Returns `self` modulo `other`. Dimension moduli are computed as follows: @@ -268,17 +327,35 @@ class Dimension(object): ``` Args: - other: Another Dimension. + other: Another Dimension, or a value accepted by `as_dimension`. Returns: A Dimension whose value is `self` modulo `other`. """ - other = as_dimension(other) + try: + other = as_dimension(other) + except (TypeError, ValueError): + return NotImplemented if self._value is None or other.value is None: return Dimension(None) else: return Dimension(self._value % other.value) + def __rmod__(self, other): + """Returns `other` modulo `self`. + + Args: + other: Another Dimension, or a value accepted by `as_dimension`. + + Returns: + A Dimension whose value is `other` modulo `self`. + """ + try: + other = as_dimension(other) + except (TypeError, ValueError): + return NotImplemented + return other % self + def __lt__(self, other): """Returns True if `self` is known to be less than `other`. diff --git a/tensorflow/python/framework/tensor_shape_test.py b/tensorflow/python/framework/tensor_shape_test.py index fffd86c7a6..4e8ce4d889 100644 --- a/tensorflow/python/framework/tensor_shape_test.py +++ b/tensorflow/python/framework/tensor_shape_test.py @@ -34,12 +34,20 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(tensor_shape.Dimension(15), dim + tensor_shape.Dimension(3)) self.assertEqual(tensor_shape.Dimension(15), dim + 3) + self.assertEqual(tensor_shape.Dimension(15), 3 + dim) + self.assertEqual(tensor_shape.Dimension(9), dim - 3) + self.assertEqual(tensor_shape.Dimension(1), 13 - dim) self.assertEqual(tensor_shape.Dimension(24), dim * tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(24), dim * 2) + self.assertEqual(tensor_shape.Dimension(24), 2 * dim) + self.assertEqual([4] * 12, [4] * dim) + self.assertEqual(12 * [4], dim * [4]) + self.assertEqual(tensor_shape.Dimension(24), 2 * dim) self.assertEqual( tensor_shape.Dimension(6), dim // tensor_shape.Dimension(2)) self.assertEqual(tensor_shape.Dimension(6), dim // 2) + self.assertEqual(tensor_shape.Dimension(0), 2 // dim) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(tensor_shape.Dimension(12))) self.assertEqual(tensor_shape.Dimension(12), dim.merge_with(12)) @@ -176,6 +184,14 @@ class DimensionTest(test_util.TensorFlowTestCase): self.assertEqual(str(tensor_shape.Dimension(7)), "7") self.assertEqual(str(tensor_shape.Dimension(None)), "?") + def testMod(self): + four = tensor_shape.Dimension(4) + nine = tensor_shape.Dimension(9) + self.assertEqual(nine % four, 1) + # test both __mod__ and __rmod__. + self.assertEqual(nine % 4, 1) + self.assertEqual(4 % nine, 4) + class ShapeTest(test_util.TensorFlowTestCase): -- GitLab From 172aee510ec75043672a611ccc07de88c3320294 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 8 Mar 2018 18:37:35 -0800 Subject: [PATCH 646/884] Fix typo in error message PiperOrigin-RevId: 188425637 --- tensorflow/compiler/xla/service/shape_inference.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 915baecc56..2ff7ae97b7 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -2092,8 +2092,8 @@ ShapeInference::InferDegenerateDimensionBroadcastShape( const int64 start_num_dims = start_indices_shape.dimensions(0); if (ShapeUtil::Rank(operand_shape) != start_num_dims) { return InvalidArgument( - "Dynamic slice start number of dimensions %lld (%s) must match rank " - "%lld of slice input (%s).", + "Dynamic update slice start number of dimensions %lld (%s) must match " + "rank %lld of slice input (%s).", start_num_dims, ShapeUtil::HumanString(start_indices_shape).c_str(), ShapeUtil::Rank(operand_shape), ShapeUtil::HumanString(operand_shape).c_str()); -- GitLab From 2d9834e6fd1954c9ed996d259a71fd4ea30bed33 Mon Sep 17 00:00:00 2001 From: Andrew Harp Date: Thu, 8 Mar 2018 20:17:40 -0800 Subject: [PATCH 647/884] Adding AudioSpectrogram and MFCC operator support to tflite PiperOrigin-RevId: 188433328 --- tensorflow/contrib/lite/kernels/BUILD | 31 +++- .../contrib/lite/kernels/audio_spectrogram.cc | 165 ++++++++++++++++++ .../lite/kernels/audio_spectrogram_test.cc | 122 +++++++++++++ .../lite/kernels/internal/spectrogram.cc | 1 - tensorflow/contrib/lite/kernels/mfcc.cc | 154 ++++++++++++++++ tensorflow/contrib/lite/kernels/mfcc_test.cc | 104 +++++++++++ tensorflow/contrib/lite/kernels/register.cc | 14 ++ 7 files changed, 589 insertions(+), 2 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram.cc create mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc create mode 100644 tensorflow/contrib/lite/kernels/mfcc.cc create mode 100644 tensorflow/contrib/lite/kernels/mfcc_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index b1a29701e0..c6c11b0aee 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -119,6 +119,7 @@ cc_library( srcs = [ "activations.cc", "add.cc", + "audio_spectrogram.cc", "basic_rnn.cc", "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", @@ -140,6 +141,7 @@ cc_library( "lsh_projection.cc", "lstm.cc", "mean.cc", + "mfcc.cc", "mul.cc", "pad.cc", "pooling.cc", @@ -179,15 +181,42 @@ cc_library( "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:string_util", "//tensorflow/contrib/lite/kernels:gemm_support", + "//tensorflow/contrib/lite/kernels/internal:audio_utils", "//tensorflow/contrib/lite/kernels/internal:kernel_utils", "//tensorflow/contrib/lite/kernels/internal:optimized", "//tensorflow/contrib/lite/kernels/internal:optimized_base", "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/contrib/lite/kernels/internal:reference", "//tensorflow/contrib/lite/kernels/internal:reference_base", - "//tensorflow/contrib/lite/kernels/internal:round", "//tensorflow/contrib/lite/kernels/internal:tensor_utils", "@farmhash_archive//:farmhash", + "@flatbuffers", + ], +) + +tf_cc_test( + name = "audio_spectrogram_test", + size = "small", + srcs = ["audio_spectrogram_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + "@flatbuffers", + ], +) + +tf_cc_test( + name = "mfcc_test", + size = "small", + srcs = ["mfcc_test.cc"], + deps = [ + ":builtin_ops", + "//tensorflow/contrib/lite:framework", + "//tensorflow/contrib/lite/kernels:test_util", + "@com_google_googletest//:gtest", + "@flatbuffers", ], ) diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc new file mode 100644 index 0000000000..5a17d3a598 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc @@ -0,0 +1,165 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace audio_spectrogram { + +constexpr int kInputTensor = 0; +constexpr int kOutputTensor = 0; + +enum KernelType { + kReference, +}; + +typedef struct { + int window_size; + int stride; + bool magnitude_squared; + int output_height; + internal::Spectrogram* spectrogram; +} TfLiteAudioSpectrogramParams; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new TfLiteAudioSpectrogramParams; + + const uint8_t* buffer_t = reinterpret_cast(buffer); + + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + data->window_size = m["window_size"].AsInt64(); + data->stride = m["stride"].AsInt64(); + data->magnitude_squared = m["magnitude_squared"].AsBool(); + + data->spectrogram = new internal::Spectrogram; + + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + auto* params = reinterpret_cast(buffer); + delete params->spectrogram; + delete params; +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); + + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, input->type, output->type); + + TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, + params->stride)); + const int64_t sample_count = input->dims->data[0]; + const int64_t length_minus_window = (sample_count - params->window_size); + if (length_minus_window < 0) { + params->output_height = 0; + } else { + params->output_height = 1 + (length_minus_window / params->stride); + } + TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); + output_size->data[0] = input->dims->data[1]; + output_size->data[1] = params->output_height; + output_size->data[2] = params->spectrogram->output_frequency_channels(); + + return context->ResizeTensor(context, output, output_size); +} + +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = + reinterpret_cast(node->user_data); + + TfLiteTensor* input = GetInput(context, node, kInputTensor); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, + params->stride)); + + const float* input_data = GetTensorData(input); + + const int64_t sample_count = input->dims->data[0]; + const int64_t channel_count = input->dims->data[1]; + + const int64_t output_width = params->spectrogram->output_frequency_channels(); + + float* output_flat = GetTensorData(output); + + std::vector input_for_channel(sample_count); + for (int64_t channel = 0; channel < channel_count; ++channel) { + float* output_slice = + output_flat + (channel * params->output_height * output_width); + for (int i = 0; i < sample_count; ++i) { + input_for_channel[i] = input_data[i * channel_count + channel]; + } + std::vector> spectrogram_output; + TF_LITE_ENSURE(context, + params->spectrogram->ComputeSquaredMagnitudeSpectrogram( + input_for_channel, &spectrogram_output)); + TF_LITE_ENSURE_EQ(context, spectrogram_output.size(), + params->output_height); + TF_LITE_ENSURE(context, spectrogram_output.empty() || + (spectrogram_output[0].size() == output_width)); + for (int row_index = 0; row_index < params->output_height; ++row_index) { + const std::vector& spectrogram_row = spectrogram_output[row_index]; + TF_LITE_ENSURE_EQ(context, spectrogram_row.size(), output_width); + float* output_row = output_slice + (row_index * output_width); + if (params->magnitude_squared) { + for (int i = 0; i < output_width; ++i) { + output_row[i] = spectrogram_row[i]; + } + } else { + for (int i = 0; i < output_width; ++i) { + output_row[i] = sqrtf(spectrogram_row[i]); + } + } + } + } + return kTfLiteOk; +} + +} // namespace audio_spectrogram + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM() { + static TfLiteRegistration r = { + audio_spectrogram::Init, audio_spectrogram::Free, + audio_spectrogram::Prepare, + audio_spectrogram::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc new file mode 100644 index 0000000000..38708930d9 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc @@ -0,0 +1,122 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include +#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); + +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class BaseAudioSpectrogramOpModel : public SingleOpModel { + public: + BaseAudioSpectrogramOpModel(const TensorData& input1, + const TensorData& output, int window_size, + int stride, bool magnitude_squared) { + input1_ = AddInput(input1); + output_ = AddOutput(output); + + flexbuffers::Builder fbb; + fbb.Map([&]() { + fbb.Int("window_size", window_size); + fbb.Int("stride", stride); + fbb.Bool("magnitude_squared", magnitude_squared); + }); + fbb.Finish(); + SetCustomOp("AudioSpectrogram", fbb.GetBuffer(), + Register_AUDIO_SPECTROGRAM); + BuildInterpreter({GetShape(input1_)}); + } + + int input1() { return input1_; } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int output_; +}; + +TEST(BaseAudioSpectrogramOpModel, NonSquaredTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, + {TensorType_FLOAT32, {}}, 8, 1, false); + m.PopulateTensor(m.input1(), + {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_EQ(3, output_shape.size()); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0.0f, 1.0f, 2.0f, 1.0f, 0.0f}, 1e-3))); +} + +TEST(SpectrogramOpTest, SquaredTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, + {TensorType_FLOAT32, {}}, 8, 1, true); + m.PopulateTensor(m.input1(), + {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_EQ(3, output_shape.size()); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); + + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0.f, 1.f, 4.f, 1.f, 0.f}, 1e-3))); +} + +TEST(SpectrogramOpTest, StrideTest) { + BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {10, 1}}, + {TensorType_FLOAT32, {}}, 8, 2, true); + m.PopulateTensor(m.input1(), {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, + 1.0f, 0.0f, 1.0f, 0.0f}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_THAT(output_shape, ElementsAre(1, 2, 5)); + EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( + {0, 1, 4, 1, 0, 1, 2, 1, 2, 1}, 1e-3))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc index 66ca694dc4..0e481a9d40 100644 --- a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -54,7 +54,6 @@ inline int Log2Floor(uint n) { log += shift; } } - assert(value == 1); return log; } diff --git a/tensorflow/contrib/lite/kernels/mfcc.cc b/tensorflow/contrib/lite/kernels/mfcc.cc new file mode 100644 index 0000000000..5dfcf8067e --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mfcc.cc @@ -0,0 +1,154 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/internal/mfcc.h" +#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" +#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" +#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" +#include "tensorflow/contrib/lite/kernels/internal/tensor.h" +#include "tensorflow/contrib/lite/kernels/kernel_util.h" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace ops { +namespace custom { +namespace mfcc { + +enum KernelType { + kReference, +}; + +typedef struct { + float upper_frequency_limit; + float lower_frequency_limit; + int filterbank_channel_count; + int dct_coefficient_count; +} TfLiteMfccParams; + +constexpr int kInputTensorWav = 0; +constexpr int kInputTensorRate = 1; +constexpr int kOutputTensor = 0; + +void* Init(TfLiteContext* context, const char* buffer, size_t length) { + auto* data = new TfLiteMfccParams; + + const uint8_t* buffer_t = reinterpret_cast(buffer); + + const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); + data->upper_frequency_limit = m["upper_frequency_limit"].AsInt64(); + data->lower_frequency_limit = m["lower_frequency_limit"].AsInt64(); + data->filterbank_channel_count = m["filterbank_channel_count"].AsInt64(); + data->dct_coefficient_count = m["dct_coefficient_count"].AsInt64(); + return data; +} + +void Free(TfLiteContext* context, void* buffer) { + delete reinterpret_cast(buffer); +} + +TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->user_data); + + TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); + TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); + + TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); + TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + TF_LITE_ENSURE_EQ(context, NumDimensions(inputWav), 3); + TF_LITE_ENSURE_EQ(context, NumDimensions(inputRate), 1); + + TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); + TF_LITE_ENSURE_EQ(context, inputWav->type, output->type); + + TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); + output_size->data[0] = inputWav->dims->data[0]; + output_size->data[1] = inputWav->dims->data[1]; + output_size->data[2] = params->dct_coefficient_count; + + return context->ResizeTensor(context, output, output_size); +} + +// Input is a single squared-magnitude spectrogram frame. The input spectrum +// is converted to linear magnitude and weighted into bands using a +// triangular mel filterbank, and a discrete cosine transform (DCT) of the +// values is taken. Output is populated with the lowest dct_coefficient_count +// of these values. +template +TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { + auto* params = reinterpret_cast(node->user_data); + + TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); + TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); + TfLiteTensor* output = GetOutput(context, node, kOutputTensor); + + const int32 sample_rate = *GetTensorData(inputRate); + + const int spectrogram_channels = inputWav->dims->data[2]; + const int spectrogram_samples = inputWav->dims->data[1]; + const int audio_channels = inputWav->dims->data[0]; + + internal::Mfcc mfcc; + mfcc.set_upper_frequency_limit(params->upper_frequency_limit); + mfcc.set_lower_frequency_limit(params->lower_frequency_limit); + mfcc.set_filterbank_channel_count(params->filterbank_channel_count); + mfcc.set_dct_coefficient_count(params->dct_coefficient_count); + + mfcc.Initialize(spectrogram_channels, sample_rate); + + const float* spectrogram_flat = GetTensorData(inputWav); + float* output_flat = GetTensorData(output); + + for (int audio_channel = 0; audio_channel < audio_channels; ++audio_channel) { + for (int spectrogram_sample = 0; spectrogram_sample < spectrogram_samples; + ++spectrogram_sample) { + const float* sample_data = + spectrogram_flat + + (audio_channel * spectrogram_samples * spectrogram_channels) + + (spectrogram_sample * spectrogram_channels); + std::vector mfcc_input(sample_data, + sample_data + spectrogram_channels); + std::vector mfcc_output; + mfcc.Compute(mfcc_input, &mfcc_output); + TF_LITE_ENSURE_EQ(context, params->dct_coefficient_count, + mfcc_output.size()); + float* output_data = output_flat + + (audio_channel * spectrogram_samples * + params->dct_coefficient_count) + + (spectrogram_sample * params->dct_coefficient_count); + for (int i = 0; i < params->dct_coefficient_count; ++i) { + output_data[i] = mfcc_output[i]; + } + } + } + + return kTfLiteOk; +} + +} // namespace mfcc + +TfLiteRegistration* Register_MFCC() { + static TfLiteRegistration r = {mfcc::Init, mfcc::Free, mfcc::Prepare, + mfcc::Eval}; + return &r; +} + +} // namespace custom +} // namespace ops +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/mfcc_test.cc b/tensorflow/contrib/lite/kernels/mfcc_test.cc new file mode 100644 index 0000000000..3f1b231f92 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/mfcc_test.cc @@ -0,0 +1,104 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include +#include +#include + +#include +#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" +#include "tensorflow/contrib/lite/interpreter.h" +#include "tensorflow/contrib/lite/kernels/register.h" +#include "tensorflow/contrib/lite/kernels/test_util.h" +#include "tensorflow/contrib/lite/model.h" + +namespace tflite { +namespace ops { +namespace custom { + +TfLiteRegistration* Register_MFCC(); + +namespace { + +using ::testing::ElementsAre; +using ::testing::ElementsAreArray; + +class BaseMfccOpModel : public SingleOpModel { + public: + BaseMfccOpModel(const TensorData& input1, const TensorData& input2, + const TensorData& output) { + input1_ = AddInput(input1); + input2_ = AddInput(input2); + output_ = AddOutput(output); + + flexbuffers::Builder fbb; + fbb.Map([&]() { + fbb.Int("upper_frequency_limit", 4000); + fbb.Int("lower_frequency_limit", 20); + fbb.Int("filterbank_channel_count", 40); + fbb.Int("dct_coefficient_count", 13); + }); + fbb.Finish(); + SetCustomOp("Mfcc", fbb.GetBuffer(), Register_MFCC); + + BuildInterpreter({GetShape(input1_), GetShape(input2_)}); + } + + int input1() { return input1_; } + int input2() { return input2_; } + std::vector GetOutput() { return ExtractVector(output_); } + std::vector GetOutputShape() { return GetTensorShape(output_); } + + protected: + int input1_; + int input2_; + int output_; +}; + +TEST(MfccOpTest, SimpleTest) { + BaseMfccOpModel m({TensorType_FLOAT32, {1, 1, 513}}, {TensorType_INT32, {1}}, + {TensorType_FLOAT32, {}}); + + std::vector data(513); + for (int i = 0; i < data.size(); ++i) { + data[i] = i + 1; + } + m.PopulateTensor(m.input1(), 0, data.data(), + data.data() + data.size()); + m.PopulateTensor(m.input2(), {22050}); + + m.Invoke(); + + std::vector output_shape = m.GetOutputShape(); + EXPECT_THAT(output_shape, ElementsAre(1, 1, 13)); + EXPECT_THAT( + m.GetOutput(), + ElementsAreArray(ArrayFloatNear( + {29.13970072, -6.41568601, -0.61903012, -0.96778652, -0.26819878, + -0.40907028, -0.15614748, -0.23203119, -0.10481487, -0.1543029, + -0.0769791, -0.10806114, -0.06047613}, + 1e-3))); +} + +} // namespace +} // namespace custom +} // namespace ops +} // namespace tflite + +int main(int argc, char** argv) { + ::tflite::LogToStderr(); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 9537b79a9a..369d3b9886 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -17,6 +17,14 @@ limitations under the License. namespace tflite { namespace ops { + +namespace custom { + +TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); +TfLiteRegistration* Register_MFCC(); + +} // namespace custom + namespace builtin { TfLiteRegistration* Register_RELU(); @@ -123,6 +131,12 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); + + // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that + // custom ops aren't always included by default. + AddCustom("Mfcc", tflite::ops::custom::Register_MFCC()); + AddCustom("AudioSpectrogram", + tflite::ops::custom::Register_AUDIO_SPECTROGRAM()); } TfLiteRegistration* BuiltinOpResolver::FindOp( -- GitLab From 45ef823633f2f1edd67a1fe02efb97e7014f4fee Mon Sep 17 00:00:00 2001 From: Amit Patankar Date: Thu, 8 Mar 2018 20:22:09 -0800 Subject: [PATCH 648/884] Removing certain attributes from pom-android.xml. PiperOrigin-RevId: 188433792 --- .../tensorflow-android/pom-android.xml.template | 2 -- .../java/maven/tensorflow-android/update.py | 15 ++++----------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/tensorflow/java/maven/tensorflow-android/pom-android.xml.template b/tensorflow/java/maven/tensorflow-android/pom-android.xml.template index 5cbd0c898d..37d2372d7b 100644 --- a/tensorflow/java/maven/tensorflow-android/pom-android.xml.template +++ b/tensorflow/java/maven/tensorflow-android/pom-android.xml.template @@ -20,10 +20,8 @@ UTF-8 - ${build_number} ${build_commit_id} ${build_type} - ${build_url} diff --git a/tensorflow/java/maven/tensorflow-android/update.py b/tensorflow/java/maven/tensorflow-android/update.py index 4ae666e4e5..2206d800ca 100644 --- a/tensorflow/java/maven/tensorflow-android/update.py +++ b/tensorflow/java/maven/tensorflow-android/update.py @@ -45,6 +45,9 @@ def get_json(url): def get_commit_id(build_info): """Fetch the git commit id from the build info json object.""" + release_commit_id = build_info.get('build_commit_id') + if release_commit_id: + return release_commit_id actions = build_info.get('actions') build_data = next( a for a in actions @@ -95,20 +98,12 @@ def main(): release_prefix = 'https://storage.googleapis.com/tensorflow/libtensorflow' info_url = '%s/android_buildinfo-%s.json' % (release_prefix, args.version) aar_url = '%s/tensorflow-%s.aar' % (release_prefix, args.version) - build_type = 'release-matrix-android2' + build_type = 'release-android' # Retrieve build information build_info = get_json(info_url) # Check all required build info is present - if build_info.get('result') != 'SUCCESS': - raise ValueError('Invalid json: %s' % build_info) - build_url = build_info.get('url') - if not build_url: - raise ValueError('Missing url: %s' % build_info) - build_number = build_info.get('number') - if not build_number: - raise ValueError('Missing build number: %s' % build_info) build_commit_id = get_commit_id(build_info) if not build_commit_id: raise ValueError('Missing commit id: %s' % build_info) @@ -119,9 +114,7 @@ def main(): f.write( template.substitute({ 'build_commit_id': build_commit_id, - 'build_number': build_number, 'build_type': build_type, - 'build_url': build_url, 'version': args.version })) -- GitLab From 7dbe0cf7ecc4d0560ec9081b443ada693e4e6096 Mon Sep 17 00:00:00 2001 From: Jingyue Wu Date: Thu, 8 Mar 2018 22:05:27 -0800 Subject: [PATCH 649/884] Collapse adjacent dimensions that have no paddings. For example, tf.pad(<4D tensor>, [[0, 0], [0, 0], [0, 0], [0, 1]]) is equivalent to a 2D pad, which is faster. PiperOrigin-RevId: 188440916 --- tensorflow/core/kernels/pad_op.cc | 124 ++++++++++++++++-- tensorflow/python/kernel_tests/pad_op_test.py | 25 ++++ 2 files changed, 138 insertions(+), 11 deletions(-) diff --git a/tensorflow/core/kernels/pad_op.cc b/tensorflow/core/kernels/pad_op.cc index 77c180873f..ce795414de 100644 --- a/tensorflow/core/kernels/pad_op.cc +++ b/tensorflow/core/kernels/pad_op.cc @@ -104,42 +104,144 @@ class PadOp : public OpKernel { return; } - Tensor* output = nullptr; - OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output)); + TensorShape collapsed_input_shape; + TensorShape collapsed_output_shape; + Tensor collapsed_paddings; + if (fixed_dims > 1 && + CollapseAdjacentNonPaddedDimensions( + in0.shape(), in1, output_shape, &collapsed_input_shape, + &collapsed_paddings, &collapsed_output_shape)) { + Tensor collapsed_input; + CHECK(collapsed_input.CopyFrom(in0, collapsed_input_shape)); + Tensor collapsed_output; + OP_REQUIRES_OK(context, context->allocate_temp(collapsed_input.dtype(), + collapsed_output_shape, + &collapsed_output)); + const Tensor& collapsed_paddings_ref = collapsed_paddings; + typename TTypes::ConstMatrix collapsed_paddings_matrix = + collapsed_paddings_ref.matrix(); + OperateWithVariableRank(context, collapsed_input_shape.dims(), + collapsed_input, collapsed_paddings_matrix, + pad_value, &collapsed_output); + + Tensor output; + CHECK(output.CopyFrom(collapsed_output, output_shape)); + context->set_output(0, output); + } else { + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, output_shape, &output)); + OperateWithVariableRank(context, fixed_dims, in0, paddings, pad_value, + output); + } + } + + private: + // Collapses adjacent dimensions that are not padded to one dimension for + // speed. Returns true if any two dimensions are collapsed. For example, + // + // Pad(input_shape=[8, 28, 28, 3], + // paddings=[[0, 0], [0, 0], [0, 0], [0, 1]] + // is equivalent to + // Pad(input_shape=[6272, 3], + // paddings=[[0, 0], [0, 1]]) + // + // input_shape: the original input shape. + // paddings_as_tensor: the original paddings. + // output_shape: the original output shape. + // collapsed_input_shape: the input shape after collapsing. + // collapsed_paddings_as_tensor: the paddings after collapsing. + // collapsed_output_shape: the output shape after collapsing. + static bool CollapseAdjacentNonPaddedDimensions( + const TensorShape& input_shape, const Tensor& paddings_as_tensor, + const TensorShape& output_shape, TensorShape* collapsed_input_shape, + Tensor* collapsed_paddings_as_tensor, + TensorShape* collapsed_output_shape) { + bool collapsed = false; + typename TTypes::ConstMatrix paddings = + paddings_as_tensor.matrix(); + std::vector> collapsed_paddings; + int i = 0; + while (i < paddings.dimension(0)) { + if (paddings(i, 0) != 0 || paddings(i, 1) != 0) { + // If padded, copy the original dimension over. + collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), + input_shape.dim_size(i)); + collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), + output_shape.dim_size(i)); + collapsed_paddings.push_back({paddings(i, 0), paddings(i, 1)}); + ++i; + } else { + // If not padded, find the next dimension that is padded and collapse + // all dimensions in between to one dimension. + int64 collapsed_input_dim_size = input_shape.dim_size(i); + int64 collapsed_output_dim_size = output_shape.dim_size(i); + ++i; + while (i < paddings.dimension(0) && paddings(i, 0) == 0 && + paddings(i, 1) == 0) { + collapsed = true; + collapsed_input_dim_size *= input_shape.dim_size(i); + collapsed_output_dim_size *= output_shape.dim_size(i); + ++i; + } + collapsed_input_shape->InsertDim(collapsed_input_shape->dims(), + collapsed_input_dim_size); + collapsed_output_shape->InsertDim(collapsed_output_shape->dims(), + collapsed_output_dim_size); + collapsed_paddings.push_back({0, 0}); + } + } + + // Copy collapsed_paddings to collapsed_paddings_as_tensor. + *collapsed_paddings_as_tensor = + Tensor(paddings_as_tensor.dtype(), + TensorShape({static_cast(collapsed_paddings.size()), 2})); + auto collapsed_paddings_as_matrix = + collapsed_paddings_as_tensor->matrix(); + for (size_t i = 0; i < collapsed_paddings.size(); ++i) { + collapsed_paddings_as_matrix(i, 0) = collapsed_paddings[i].first; + collapsed_paddings_as_matrix(i, 1) = collapsed_paddings[i].second; + } + return collapsed; + } + + void OperateWithVariableRank(OpKernelContext* context, int fixed_dims, + const Tensor& input, + typename TTypes::ConstMatrix paddings, + T pad_value, Tensor* output) { // Invoke the dims-specific implementation. switch (fixed_dims) { case 0: - Operate<0>(context, in0.tensor(), paddings, pad_value, output); + Operate<0>(context, input.tensor(), paddings, pad_value, output); break; case 1: // TODO(irving): Once Pad doesn't need a scalar special case, // change flat to tensor. That is, once !allow_legacy_scalars(). - Operate<1>(context, in0.flat(), paddings, pad_value, output); + Operate<1>(context, input.flat(), paddings, pad_value, output); break; case 2: - Operate<2>(context, in0.tensor(), paddings, pad_value, output); + Operate<2>(context, input.tensor(), paddings, pad_value, output); break; case 3: - Operate<3>(context, in0.tensor(), paddings, pad_value, output); + Operate<3>(context, input.tensor(), paddings, pad_value, output); break; case 4: - Operate<4>(context, in0.tensor(), paddings, pad_value, output); + Operate<4>(context, input.tensor(), paddings, pad_value, output); break; case 5: - Operate<5>(context, in0.tensor(), paddings, pad_value, output); + Operate<5>(context, input.tensor(), paddings, pad_value, output); break; case 6: - Operate<6>(context, in0.tensor(), paddings, pad_value, output); + Operate<6>(context, input.tensor(), paddings, pad_value, output); break; default: OP_REQUIRES(context, false, errors::InvalidArgument("Only ranks up to 6 supported: ", - in0.shape().DebugString())); + input.shape().DebugString())); } } - private: template void Operate(OpKernelContext* context, typename TTypes::ConstTensor input, diff --git a/tensorflow/python/kernel_tests/pad_op_test.py b/tensorflow/python/kernel_tests/pad_op_test.py index aaeb3b199e..236aa4abe1 100644 --- a/tensorflow/python/kernel_tests/pad_op_test.py +++ b/tensorflow/python/kernel_tests/pad_op_test.py @@ -336,5 +336,30 @@ class PadOpTest(test.TestCase): self.assertAllEqual(inp, out) self.assertShapeEqual(inp, tf_val) + def testCollapseAdjacentNonPaddedDimensions(self): + # pyformat: disable + for paddings_value in [[[0, 0], [0, 0], [0, 0], [0, 1]], + [[0, 0], [2, 3], [0, 0], [0, 0]], + [[0, 0], [0, 0], [0, 0], [0, 0]]]: + # pyformat: enable + inp = constant_op.constant(1.0, shape=[8, 28, 28, 3]) + paddings = constant_op.constant(paddings_value, dtype=dtypes.int32) + padded = array_ops.pad(inp, paddings) + middle = array_ops.slice(padded, [row[0] for row in paddings_value], + [dim.value for dim in inp.shape.dims]) + left = array_ops.slice(padded, [0, 0, 0, 0], + [row[0] for row in paddings_value]) + right = array_ops.slice( + padded, + [paddings_value[i][0] + inp.shape.dims[i].value for i in range(4)], + [-1, -1, -1, -1]) + with self.test_session(use_gpu=True): + self.assertAllEqual(inp.eval(), middle.eval()) + self.assertAllEqual( + np.zeros([row[0] for row in paddings_value]), left.eval()) + self.assertAllEqual( + np.zeros([row[1] for row in paddings_value]), right.eval()) + + if __name__ == "__main__": test.main() -- GitLab From 26b83da42fb47015aabd6ba1aa8e6d41ff8763dc Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Thu, 8 Mar 2018 23:28:53 -0800 Subject: [PATCH 650/884] Remove a layer of templatization With this change - HloTestBase always calls HloRunner with an array of non-owning Literal pointers as arguments - HloRunner no longer has a general LiteralPtr, but just provides explicit overloads for std::unique_ptr and Literal* This was prompted by a dependent change that needs to call HloTestBase::RunAndCompare with Literal* arguments. PiperOrigin-RevId: 188446331 --- tensorflow/compiler/xla/service/hlo_runner.cc | 2 +- tensorflow/compiler/xla/service/hlo_runner.h | 37 ++++++++----------- .../compiler/xla/tests/hlo_test_base.cc | 32 +++++++++------- tensorflow/compiler/xla/tests/hlo_test_base.h | 12 ++---- 4 files changed, 38 insertions(+), 45 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index 41b079eb79..d65befaf84 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -110,7 +110,7 @@ HloRunner::HloRunner(se::Platform* platform) { HloRunner::~HloRunner() {} -StatusOr> HloRunner::ExecuteInternal( +StatusOr> HloRunner::Execute( std::unique_ptr module, const tensorflow::gtl::ArraySlice arguments, bool run_hlo_passes) { diff --git a/tensorflow/compiler/xla/service/hlo_runner.h b/tensorflow/compiler/xla/service/hlo_runner.h index cbaebc68be..06ce22a5b9 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.h +++ b/tensorflow/compiler/xla/service/hlo_runner.h @@ -27,6 +27,7 @@ limitations under the License. #include "tensorflow/compiler/xla/status_macros.h" #include "tensorflow/compiler/xla/statusor.h" #include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" #include "tensorflow/compiler/xla/xla_data.pb.h" #include "tensorflow/core/lib/gtl/array_slice.h" #include "tensorflow/core/platform/stream_executor_no_cuda.h" @@ -64,17 +65,27 @@ class HloRunner { const std::string& filename, const DebugOptions& debug_options); // Executes the given module with given literals as input and returns the - // result as a Literal. The LiteralPtr type accepts Literal* or - // std::unique_ptr. + // result as a Literal. // // If run_hlo_passes is false, the module will be executed without Hlo // optimization. - template StatusOr> Execute( std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, + const tensorflow::gtl::ArraySlice arguments, bool run_hlo_passes = true); + StatusOr> Execute( + std::unique_ptr module, + const tensorflow::gtl::ArraySlice> arguments, + bool run_hlo_passes = true) { + // Construct a vector of plain pointers for the arguments. + std::vector argument_pointers; + c_transform( + arguments, std::back_inserter(argument_pointers), + [](const std::unique_ptr& literal) { return literal.get(); }); + return Execute(std::move(module), argument_pointers, run_hlo_passes); + } + // If backend is not created in the constructor, creates and returns the // default backend. If creation fails, crashes the program. // @@ -83,11 +94,6 @@ class HloRunner { Backend& backend(); private: - StatusOr> ExecuteInternal( - std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, - bool run_hlo_passes = true); - struct EigenThreadPoolWrapper; std::unique_ptr thread_pool_wrapper_; @@ -95,19 +101,6 @@ class HloRunner { std::unique_ptr backend_; }; -template -StatusOr> HloRunner::Execute( - std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, - bool run_hlo_passes) { - // Construct a vector of plain pointers for the arguments. - std::vector argument_pointers; - for (const auto& argument : arguments) { - argument_pointers.push_back(&*argument); - } - return ExecuteInternal(std::move(module), argument_pointers, run_hlo_passes); -} - } // namespace xla #endif // TENSORFLOW_COMPILER_XLA_SERVICE_HLO_RUNNER_H_ diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.cc b/tensorflow/compiler/xla/tests/hlo_test_base.cc index 6723c99edb..5f62c44f25 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.cc +++ b/tensorflow/compiler/xla/tests/hlo_test_base.cc @@ -140,15 +140,10 @@ StatusOr> HloTestBase::MakeReferenceModule( return std::move(reference_module); } -template StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal( - std::unique_ptr module, const ArraySlice arguments, + std::unique_ptr module, const ArraySlice arguments, const optional& error, bool run_hlo_passes, const std::function& reference_preprocessor) { - static_assert( - std::is_same::value || - std::is_same, LiteralPtr>::value, - "The LiteralPtr type only accepts Literal* or std::unique_ptr."); TF_RETURN_IF_ERROR( VerifyHloModule(*test_runner_.backend().platform(), module.get())); TF_ASSIGN_OR_RETURN(auto reference_module, @@ -165,9 +160,8 @@ StatusOr<::testing::AssertionResult> HloTestBase::RunAndCompareInternal( error); } -template ::testing::AssertionResult HloTestBase::RunAndCompare( - std::unique_ptr module, const ArraySlice arguments, + std::unique_ptr module, const ArraySlice arguments, const optional& error, const std::function& reference_preprocessor) { auto result = @@ -179,9 +173,8 @@ template return result.ValueOrDie(); } -template ::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses( - std::unique_ptr module, const ArraySlice arguments, + std::unique_ptr module, const ArraySlice arguments, const optional& error, const std::function& reference_preprocessor) { auto result = @@ -198,8 +191,14 @@ template const std::function& reference_preprocessor) { const auto& fake_arguments = MakeFakeArguments(module.get()).ConsumeValueOrDie(); - return RunAndCompare>( - std::move(module), fake_arguments, error, reference_preprocessor); + + std::vector fake_argument_ptrs; + c_transform( + fake_arguments, std::back_inserter(fake_argument_ptrs), + [](const std::unique_ptr& literal) { return literal.get(); }); + + return RunAndCompare(std::move(module), fake_argument_ptrs, error, + reference_preprocessor); } ::testing::AssertionResult HloTestBase::RunAndCompareNoHloPasses( @@ -207,8 +206,13 @@ template const std::function& reference_preprocessor) { const auto& fake_arguments = MakeFakeArguments(module.get()).ConsumeValueOrDie(); - return RunAndCompareNoHloPasses>( - std::move(module), fake_arguments, error, reference_preprocessor); + std::vector fake_argument_ptrs; + c_transform( + fake_arguments, std::back_inserter(fake_argument_ptrs), + [](const std::unique_ptr& literal) { return literal.get(); }); + + return RunAndCompareNoHloPasses(std::move(module), fake_argument_ptrs, error, + reference_preprocessor); } ::testing::AssertionResult HloTestBase::RunAndCompare( diff --git a/tensorflow/compiler/xla/tests/hlo_test_base.h b/tensorflow/compiler/xla/tests/hlo_test_base.h index 4d49b7071d..e375f13a44 100644 --- a/tensorflow/compiler/xla/tests/hlo_test_base.h +++ b/tensorflow/compiler/xla/tests/hlo_test_base.h @@ -104,8 +104,7 @@ class HloTestBase : public ::testing::Test { // Executes the given hlo module on two backends and compares results. // - // 'arguments': the input of the hlo module. The LiteralPtr type accepts - // Literal* or std::unique_ptr. + // 'arguments': the input of the hlo module. // // 'error': if has value, expects the results to be near (within the error // bound). Otherwise, expects the results to be equal. @@ -114,20 +113,18 @@ class HloTestBase : public ::testing::Test { // backend, but it might need to be tailored so that it is able to run on the // reference backend. Note that the program shape of the module must not be // modified. - template ::testing::AssertionResult RunAndCompare( std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, + const tensorflow::gtl::ArraySlice arguments, const tensorflow::gtl::optional& error, const std::function& reference_preprocessor = nullptr) TF_MUST_USE_RESULT; // Same as above, except that the module will be executed without Hlo // optimization. - template ::testing::AssertionResult RunAndCompareNoHloPasses( std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, + const tensorflow::gtl::ArraySlice arguments, const tensorflow::gtl::optional& error, const std::function& reference_preprocessor = nullptr) TF_MUST_USE_RESULT; @@ -232,10 +229,9 @@ class HloTestBase : public ::testing::Test { // Runs the module on two platforms with or without running hlo passes and // compares the results. Returns whether the results are near or equal. If any // error happens before the results are computed, returns the error status. - template StatusOr<::testing::AssertionResult> RunAndCompareInternal( std::unique_ptr module, - const tensorflow::gtl::ArraySlice arguments, + const tensorflow::gtl::ArraySlice arguments, const tensorflow::gtl::optional& error, bool run_hlo_passes, const std::function& reference_preprocessor); }; -- GitLab From 0004c829f69ff14058ce8679d4807c866f950ef6 Mon Sep 17 00:00:00 2001 From: Yifei Feng <1192265+yifeif@users.noreply.github.com> Date: Thu, 8 Mar 2018 23:56:44 -0800 Subject: [PATCH 651/884] Fix pylint error (#17575) --- tensorflow/contrib/py2tf/converters/single_return.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/py2tf/converters/single_return.py b/tensorflow/contrib/py2tf/converters/single_return.py index 90bc22008f..1194b98f5e 100644 --- a/tensorflow/contrib/py2tf/converters/single_return.py +++ b/tensorflow/contrib/py2tf/converters/single_return.py @@ -212,7 +212,7 @@ class DetectReturnInUnsupportedControlFlow(gast.NodeVisitor): def __init__(self): self.cant_return = False - super(gast.NodeVisitor, self).__init__() + super(DetectReturnInUnsupportedControlFlow, self).__init__() def visit_While(self, node): self.cant_return = True -- GitLab From d0c647ff2f6f3398252c9831c8b49e8a2c3c8db5 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 9 Mar 2018 00:09:03 -0800 Subject: [PATCH 652/884] Fix misleading comment. PiperOrigin-RevId: 188450336 --- tensorflow/compiler/xla/service/llvm_ir/ir_array.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h index b942717512..faa92d608c 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h @@ -76,8 +76,7 @@ class IrArray { llvm::IRBuilder<>* ir_builder); // Constructs an index from the given multi-dimensional index and the shape - // that it indexes into. Also, computes the linear index according to - // "shape". + // that it indexes into. // // Precondition: "shape" has a layout. Index(tensorflow::gtl::ArraySlice multidim, -- GitLab From 462756fcb33e2dd7c6f5132459612442d36d8476 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 9 Mar 2018 00:14:46 -0800 Subject: [PATCH 653/884] Fix cmake build errors for Linux (#17581) When trying to build TensorFlow with cmake for Linux, as was specified: ``` tensorflow/tools/ci_build/ci_build.sh CMAKE tensorflow/tools/ci_build/builds/cmake.sh ``` The following error encountered: ``` grpc/src/grpc/libgrpc_unsecure.a(grpc_ares_wrapper.cc.o): In function `on_txt_done_cb(void*, int, int, unsigned char*, int)': grpc_ares_wrapper.cc:(.text+0x256): undefined reference to `ares_parse_txt_reply_ext' grpc_ares_wrapper.cc:(.text+0x267): undefined reference to `ares_strerror' grpc_ares_wrapper.cc:(.text+0x363): undefined reference to `ares_free_data' ``` This fix fixes the above issue with libcares.a in cmake file. Signed-off-by: Yong Tang --- tensorflow/contrib/cmake/external/grpc.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/cmake/external/grpc.cmake b/tensorflow/contrib/cmake/external/grpc.cmake index a9f43a3ecb..17f65999fa 100644 --- a/tensorflow/contrib/cmake/external/grpc.cmake +++ b/tensorflow/contrib/cmake/external/grpc.cmake @@ -35,6 +35,7 @@ else() set(grpc_STATIC_LIBRARIES ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc++_unsecure.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgrpc_unsecure.a + ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/third_party/cares/cares/lib/libcares.a ${CMAKE_CURRENT_BINARY_DIR}/grpc/src/grpc/libgpr.a) endif() -- GitLab From 3c3d02b31fb8da4135f83dd5bcfd96f187ab2fd5 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Fri, 9 Mar 2018 00:15:06 -0800 Subject: [PATCH 654/884] Hide `os` from docs generator. (#17576) Delete `os` so the docs generator doesn't build docs for it. --- tensorflow/contrib/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/__init__.py b/tensorflow/contrib/__init__.py index bcf0d7b48b..669d611b01 100644 --- a/tensorflow/contrib/__init__.py +++ b/tensorflow/contrib/__init__.py @@ -95,6 +95,7 @@ from tensorflow.contrib.summary import summary from tensorflow.python.util.lazy_loader import LazyLoader ffmpeg = LazyLoader("ffmpeg", globals(), "tensorflow.contrib.ffmpeg") +del os del LazyLoader del absolute_import -- GitLab From bd8eb65ad20d0c72ebb02cd61f8e9a6420a189ac Mon Sep 17 00:00:00 2001 From: Dahan Gong Date: Fri, 9 Mar 2018 16:17:14 +0800 Subject: [PATCH 655/884] fix compilation errors on MSVC if IS_SLIM_BUILD (#17546) --- tensorflow/core/lib/io/record_reader.cc | 2 ++ tensorflow/core/lib/io/record_reader.h | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/lib/io/record_reader.cc b/tensorflow/core/lib/io/record_reader.cc index 254fdf115d..6de850bb20 100644 --- a/tensorflow/core/lib/io/record_reader.cc +++ b/tensorflow/core/lib/io/record_reader.cc @@ -205,7 +205,9 @@ Status RecordReader::SkipNBytes(uint64 offset) { if (options_.buffer_size > 0) { TF_RETURN_IF_ERROR(input_stream_->SkipNBytes(offset)); } +#if !defined(IS_SLIM_BUILD) } +#endif return Status::OK(); } // namespace io diff --git a/tensorflow/core/lib/io/record_reader.h b/tensorflow/core/lib/io/record_reader.h index 62dd2efb79..26278e0328 100644 --- a/tensorflow/core/lib/io/record_reader.h +++ b/tensorflow/core/lib/io/record_reader.h @@ -16,10 +16,10 @@ limitations under the License. #ifndef TENSORFLOW_LIB_IO_RECORD_READER_H_ #define TENSORFLOW_LIB_IO_RECORD_READER_H_ -#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/errors.h" #include "tensorflow/core/lib/core/stringpiece.h" -#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/inputstream_interface.h" +#if !defined(IS_SLIM_BUILD) #include "tensorflow/core/lib/io/zlib_compression_options.h" #include "tensorflow/core/lib/io/zlib_inputstream.h" #endif // IS_SLIM_BUILD -- GitLab From 60a21e25b0261369a15ca1d17505d7c3c82be967 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Fri, 9 Mar 2018 00:21:26 -0800 Subject: [PATCH 656/884] Fix broken link pointing to vulnerability reporting/SECURITY.md (#17453) The vulnerability reporting (SECURITY.md) has been moved to top level directory, this fix fixes the broken link inside tensorflow/docs_src/community/welcome.md Signed-off-by: Yong Tang --- tensorflow/docs_src/community/welcome.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index 9f6fe91b14..d2d3f9edae 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -65,5 +65,5 @@ please read the following list carefully: on GitHub. For example, use the issue tracker to request a new operation in TensorFlow. * To report vulnerabilities, please follow our - [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/SECURITY.md). + [vulnerability disclosure guidelines](https://github.com/tensorflow/tensorflow/blob/master/SECURITY.md). -- GitLab From 02fcab8e8abe75b350c116ed6b4382a9561c145c Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 9 Mar 2018 02:16:53 -0800 Subject: [PATCH 657/884] [StreamExecutor] Remove ThenDoHostCallbackForTest -- it's identical to ThenDoHostCallback. The reason this came about is: ThenDoHostCallback was once private, and ThenDoHostCallbackForTest was public. Then at some point ThenDoHostCallback became public, but the *ForTest one was never removed. PiperOrigin-RevId: 188459741 --- tensorflow/stream_executor/stream.cc | 6 ------ tensorflow/stream_executor/stream.h | 5 ++--- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/tensorflow/stream_executor/stream.cc b/tensorflow/stream_executor/stream.cc index 4d852e6e5a..6bbb5f0b2e 100644 --- a/tensorflow/stream_executor/stream.cc +++ b/tensorflow/stream_executor/stream.cc @@ -5020,12 +5020,6 @@ Stream &Stream::ThenTransformTensor(const dnn::BatchDescriptor &input_desc, return *this; } -Stream &Stream::ThenDoHostCallbackForTest(std::function callback) { - VLOG_CALL(PARAM(callback)); - - return ThenDoHostCallback(callback); -} - Stream &Stream::ThenDoHostCallback(std::function callback) { VLOG_CALL(PARAM(callback)); diff --git a/tensorflow/stream_executor/stream.h b/tensorflow/stream_executor/stream.h index 8cd0a0d3ba..d7d1131569 100644 --- a/tensorflow/stream_executor/stream.h +++ b/tensorflow/stream_executor/stream.h @@ -1968,16 +1968,15 @@ class Stream { // Entrains onto the stream a callback to the host (from the device). // Host callbacks block/occupy the stream just as device functions // (execute one at a time, block later stream operations). + // // Behavior is undefined when synchronizing using OpenCL user events. // Behavior is undefined if host callbacks call device routines or insert // them into any stream. + // // On certain platforms, ThenDoHostCallback is expected to have significant // negative effects on performance. Stream &ThenDoHostCallback(std::function callback); - // Identical to ThenDoHostCallback; only exposed for testing purposes. - Stream &ThenDoHostCallbackForTest(std::function callback); - // Returns the StreamExecutor (parent object) associated with this stream. StreamExecutor *parent() const { CHECK(parent_ != nullptr); -- GitLab From a0b69a790217b1673cbd82aeedd18aa8dfa74652 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 06:41:33 -0800 Subject: [PATCH 658/884] Convert Squeeze into Reshape and support squeezes on all dimensions. PiperOrigin-RevId: 188477922 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../contrib/lite/toco/export_tensorflow.cc | 8 +- .../convert_squeeze_to_reshape.cc | 80 +++++++++++++++++++ .../graph_transformations.h | 1 + .../contrib/lite/toco/import_tensorflow.cc | 9 ++- tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + 6 files changed, 94 insertions(+), 6 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index 031db2bd7c..fe4e18ddd0 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -173,6 +173,7 @@ cc_library( "graph_transformations/convert_expanddims_to_reshape.cc", "graph_transformations/convert_pure_conv_to_depthwise.cc", "graph_transformations/convert_reorder_axes.cc", + "graph_transformations/convert_squeeze_to_reshape.cc", "graph_transformations/convert_trivial_addn_to_add.cc", "graph_transformations/convert_trivial_stack_to_reshape.cc", "graph_transformations/convert_trivial_transpose_to_reshape.cc", diff --git a/tensorflow/contrib/lite/toco/export_tensorflow.cc b/tensorflow/contrib/lite/toco/export_tensorflow.cc index 695def7ba3..22a23357b3 100644 --- a/tensorflow/contrib/lite/toco/export_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/export_tensorflow.cc @@ -1654,9 +1654,11 @@ void ConvertSqueezeOperator(const Model& model, const SqueezeOperator& src_op, const auto params_type = GetTensorFlowDataType(model, src_op.inputs[0]); (*new_op->mutable_attr())["T"].set_type(params_type); - auto& squeeze_dims = (*new_op->mutable_attr())["squeeze_dims"]; - for (int i : src_op.squeeze_dims) { - squeeze_dims.mutable_list()->add_i(i); + if (!src_op.squeeze_dims.empty()) { + auto& squeeze_dims = (*new_op->mutable_attr())["squeeze_dims"]; + for (int i : src_op.squeeze_dims) { + squeeze_dims.mutable_list()->add_i(i); + } } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc new file mode 100644 index 0000000000..e601284495 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc @@ -0,0 +1,80 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "absl/strings/str_cat.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +// Replaces a tf.squeeze operator with a reshape. +// Squeeze removes dimensions == 1 (if in the list of squeeze_dims). This +// means that the data layout will never change with this op, just the shape. +// By converting these to reshapes once we have run shape propagation we allow +// standard reshape optimization transforms to do their magic. +bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { + auto squeeze_it = model->operators.begin() + op_index; + if (squeeze_it->get()->type != OperatorType::kSqueeze) { + return false; + } + auto squeeze_op = static_cast(squeeze_it->get()); + CHECK_EQ(squeeze_op->inputs.size(), 1); + CHECK_EQ(squeeze_op->outputs.size(), 1); + + const auto& input_array = model->GetArray(squeeze_op->inputs[0]); + if (!input_array.has_shape()) { + // Yield until input dims have been resolved. + return false; + } + if (input_array.shape().dimensions_count() == 0) { + // Input array cannot be 0-D. + return false; + } + if (!model->HasArray(squeeze_op->outputs[0]) || + !model->GetArray(squeeze_op->outputs[0]).has_shape()) { + // Yield until shape propagation has set the output shape for us. + return false; + } + + // We use the output shape that has been calculated by shape propagation. + const auto& output_shape = model->GetArray(squeeze_op->outputs[0]).shape(); + + auto* reshape_op = new TensorFlowReshapeOperator; + reshape_op->inputs = { + squeeze_op->inputs[0], + CreateInt32Array(model, squeeze_op->outputs[0] + "_shape", + output_shape.dims()), + }; + reshape_op->outputs = squeeze_op->outputs; + + AddMessageF("Replacing %s with %s", LogName(*squeeze_op), + LogName(*reshape_op)); + + // Replace the operator in the graph. + const auto reshape_it = model->operators.emplace(squeeze_it, reshape_op); + squeeze_it = reshape_it + 1; + CHECK_EQ(squeeze_it->get(), squeeze_op); + model->operators.erase(squeeze_it); + + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index f0739990ad..2958479dc2 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -114,6 +114,7 @@ void RunGraphTransformations(Model* model, const string& message, // List of all graph transformations DECLARE_GRAPH_TRANSFORMATION(ConvertExpandDimsToReshape) DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise) +DECLARE_GRAPH_TRANSFORMATION(ConvertSqueezeToReshape) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialAddNToAdd) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialStackToReshape) DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTransposeToReshape) diff --git a/tensorflow/contrib/lite/toco/import_tensorflow.cc b/tensorflow/contrib/lite/toco/import_tensorflow.cc index 50aeafdf8d..a7a50e6fc9 100644 --- a/tensorflow/contrib/lite/toco/import_tensorflow.cc +++ b/tensorflow/contrib/lite/toco/import_tensorflow.cc @@ -727,9 +727,12 @@ void ConvertSqueezeOperator(const NodeDef& node, op->inputs.push_back(node.input(0)); op->outputs.push_back(node.name()); - const auto& squeeze_dims = GetListAttr(node, "squeeze_dims"); - for (int i = 0; i < squeeze_dims.i_size(); ++i) { - op->squeeze_dims.push_back(squeeze_dims.i(i)); + // When omitted we are to squeeze all dimensions == 1. + if (HasAttr(node, "squeeze_dims")) { + const auto& squeeze_dims = GetListAttr(node, "squeeze_dims"); + for (int i = 0; i < squeeze_dims.i_size(); ++i) { + op->squeeze_dims.push_back(squeeze_dims.i(i)); + } } model->operators.emplace_back(op); diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 42e0a89017..8ca28922a6 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -52,6 +52,7 @@ void MakeGeneralGraphTransformationsSet( GraphTransformationsSet* transformations) { CHECK(transformations->empty()); transformations->Add(new ConvertExpandDimsToReshape); + transformations->Add(new ConvertSqueezeToReshape); transformations->Add(new ConvertTrivialAddNToAdd); transformations->Add(new ConvertTrivialStackToReshape); transformations->Add(new ConvertTrivialTransposeToReshape); -- GitLab From 6478a30b84a6620b853b450761e12f7075b7a43f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 06:45:57 -0800 Subject: [PATCH 659/884] Adding support for constant Gather ops. PiperOrigin-RevId: 188478173 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + .../resolve_constant_gather.cc | 134 ++++++++++++++++++ .../unpartition_embedding_lookup.cc | 3 + tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + 5 files changed, 140 insertions(+) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index fe4e18ddd0..bf4396486e 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -219,6 +219,7 @@ cc_library( "graph_transformations/resolve_constant_concatenation.cc", "graph_transformations/resolve_constant_fake_quant.cc", "graph_transformations/resolve_constant_fill.cc", + "graph_transformations/resolve_constant_gather.cc", "graph_transformations/resolve_constant_range.cc", "graph_transformations/resolve_constant_shape_or_rank.cc", "graph_transformations/resolve_constant_stack.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 2958479dc2..1447de1220 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -176,6 +176,7 @@ DECLARE_GRAPH_TRANSFORMATION(ResolveConstantShapeOrRank) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStack) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantStridedSlice) DECLARE_GRAPH_TRANSFORMATION(ResolveConstantFill) +DECLARE_GRAPH_TRANSFORMATION(ResolveConstantGather) DECLARE_GRAPH_TRANSFORMATION(ResolveMultiplyByZero) DECLARE_GRAPH_TRANSFORMATION(Dequantize) DECLARE_GRAPH_TRANSFORMATION(UnpartitionEmbeddingLookup) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc new file mode 100644 index 0000000000..d999c2df94 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_gather.cc @@ -0,0 +1,134 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +namespace { + +// Gathers data from axis 0. +template +inline void Gather(const Array& input_array, int input_rank, + const Array& coords_array, Array* output_array) { + const Shape& input_shape = input_array.shape(); + const std::vector>& input_data = + input_array.GetBuffer().data; + const Shape& coords_shape = coords_array.shape(); + const std::vector& coords_data = + coords_array.GetBuffer().data; + + const Shape& output_shape = output_array->shape(); + std::vector>& output_data = + output_array->GetMutableBuffer().data; + output_data.resize(RequiredBufferSizeForShape(output_shape)); + + int rev_input_rank = input_shape.dimensions_count() - 1 - (input_rank - 1); + CHECK_EQ(coords_shape.dims(0), output_array->shape().dims(rev_input_rank)); + + int stride = 1; + for (int i = input_shape.dimensions_count() - 1; i >= input_rank - 1; --i) { + stride *= input_shape.dims(i); + } + + for (int i = 0; i < coords_shape.dims(0); ++i) { + DCHECK_GE(coords_data[i], 0); + DCHECK_LT(coords_data[i], input_shape.dims(rev_input_rank)); + DataType* out = output_data.data() + i * stride; + const DataType* in = input_data.data() + coords_data[i] * stride; + memcpy(out, in, sizeof(DataType) * stride); + } +} + +} // namespace + +// Resolves a constant Gather operation. +// This simply performs the gather and produces the output array with the +// appropriate values. +bool ResolveConstantGather::Run(Model* model, std::size_t op_index) { + auto it = model->operators.begin() + op_index; + const auto* base_op = it->get(); + if (base_op->type != OperatorType::kGather) { + return false; + } + const auto* op = static_cast(base_op); + + CHECK_EQ(op->inputs.size(), 2); + CHECK_EQ(op->outputs.size(), 1); + auto& output_array = model->GetArray(op->outputs[0]); + if (output_array.data_type == ArrayDataType::kNone) { + // Yield until the output type has been set by PropagateArrayDataTypes. + return false; + } + if (!output_array.has_shape()) { + // Yield until the output shape has been set by PropagateFixedShapes. + return false; + } + + // Only handling axis=0 for now. + if (op->axis != 0) { + AddMessageF("%s has axis %d; only axis=0 is supported", LogName(*op), + op->axis); + return false; + } + + // We require constant inputs. + if (!IsConstantParameterArray(*model, op->inputs[0]) || + !IsConstantParameterArray(*model, op->inputs[1])) { + return false; + } + const Array& input_array = model->GetArray(op->inputs[0]); + const Array& coords_array = model->GetArray(op->inputs[1]); + CHECK(coords_array.data_type == ArrayDataType::kInt32) + << "Only int32 indices are supported"; + + CHECK(!output_array.buffer); + switch (output_array.data_type) { + case ArrayDataType::kFloat: + Gather(input_array, op->input_rank, coords_array, + &output_array); + break; + case ArrayDataType::kUint8: + Gather(input_array, op->input_rank, coords_array, + &output_array); + break; + case ArrayDataType::kInt32: + Gather(input_array, op->input_rank, coords_array, + &output_array); + break; + case ArrayDataType::kInt64: + Gather(input_array, op->input_rank, coords_array, + &output_array); + break; + default: + LOG(FATAL) << "Unsupported data type given to Gather op with output \"" + << op->outputs[0] << "\""; + break; + } + + // Erase input arrays if no longer used after we remove the op. + DeleteArrayIfUsedOnce(op->inputs[0], model); + DeleteArrayIfUsedOnce(op->inputs[1], model); + + // Erase the operator. + model->operators.erase(it); + return true; +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc index 419fb9a799..48c326651f 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/unpartition_embedding_lookup.cc @@ -191,6 +191,8 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { model->GetOrCreateArray(gather_params_permute_op->outputs[0]); const auto& partition_array = model->GetArray(gather_ops[0]->inputs[0]); const auto& partition_array_dims = partition_array.shape().dims(); + gather_params_permute_op->input_rank = + partition_array.shape().dimensions_count(); auto& perm_array = model->GetOrCreateArray(gather_params_permute_op->inputs[1]); perm_array.data_type = ArrayDataType::kInt32; @@ -209,6 +211,7 @@ bool UnpartitionEmbeddingLookup::Run(Model* model, std::size_t op_index) { merged_gather_op->inputs = {gather_params_permute_op->outputs[0], mod_op->inputs[0]}; merged_gather_op->outputs = {stitch_op->outputs[0]}; + merged_gather_op->input_rank = partition_array.shape().dimensions_count(); model->operators.emplace(op_it, merged_gather_op); AddMessageF( diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index 8ca28922a6..ee3f7ab846 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -77,6 +77,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ResolveBatchNormalization); transformations->Add(new ResolveConstantBinaryOperator); transformations->Add(new ResolveConstantFill); + transformations->Add(new ResolveConstantGather); transformations->Add(new ResolveConstantRange); transformations->Add(new ResolveConstantStack); transformations->Add(new ResolveConstantStridedSlice); -- GitLab From 056c3167b8f6f829ecc2663c7df2bf2c1419747b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 09:06:30 -0800 Subject: [PATCH 660/884] Desugar IfExp nodes PiperOrigin-RevId: 188491604 --- tensorflow/contrib/py2tf/converters/BUILD | 12 ++ tensorflow/contrib/py2tf/converters/ifexp.py | 49 ++++++++ .../contrib/py2tf/converters/ifexp_test.py | 106 ++++++++++++++++++ tensorflow/contrib/py2tf/impl/conversion.py | 2 + 4 files changed, 169 insertions(+) create mode 100644 tensorflow/contrib/py2tf/converters/ifexp.py create mode 100644 tensorflow/contrib/py2tf/converters/ifexp_test.py diff --git a/tensorflow/contrib/py2tf/converters/BUILD b/tensorflow/contrib/py2tf/converters/BUILD index c85ad9200e..f624c42686 100644 --- a/tensorflow/contrib/py2tf/converters/BUILD +++ b/tensorflow/contrib/py2tf/converters/BUILD @@ -25,6 +25,7 @@ py_library( "control_flow.py", "decorators.py", "for_loops.py", + "ifexp.py", "list_comprehension.py", "lists.py", "logical_expressions.py", @@ -202,3 +203,14 @@ py_test( "//tensorflow/python:client_testlib", ], ) + +py_test( + name = "ifexp_test", + srcs = ["ifexp_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":test_lib", + "//tensorflow/contrib/py2tf/pyct", + "//tensorflow/python:client_testlib", + ], +) diff --git a/tensorflow/contrib/py2tf/converters/ifexp.py b/tensorflow/contrib/py2tf/converters/ifexp.py new file mode 100644 index 0000000000..5fd6f348af --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/ifexp.py @@ -0,0 +1,49 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Canonicalizes the ternary conditional operator.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf.pyct import templates +from tensorflow.contrib.py2tf.pyct import transformer + + +class IfExp(transformer.Base): + """Canonicalizes all IfExp nodes into plain conditionals.""" + + def visit_IfExp(self, node): + template = """ + py2tf_utils.run_cond(test, lambda: body, lambda: orelse) + """ + desugared_ifexp = templates.replace_as_expression( + template, test=node.test, body=node.body, orelse=node.orelse) + return desugared_ifexp + + +def transform(node, context): + """Desugar IfExp nodes into plain conditionals. + + Args: + node: an AST node to transform + context: a context object + + Returns: + new_node: an AST with no IfExp nodes, only conditionals. + """ + + node = IfExp(context).visit(node) + return node diff --git a/tensorflow/contrib/py2tf/converters/ifexp_test.py b/tensorflow/contrib/py2tf/converters/ifexp_test.py new file mode 100644 index 0000000000..9c357ef35b --- /dev/null +++ b/tensorflow/contrib/py2tf/converters/ifexp_test.py @@ -0,0 +1,106 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for ifexp module.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.contrib.py2tf import utils +from tensorflow.contrib.py2tf.converters import converter_test_base +from tensorflow.contrib.py2tf.converters import ifexp +from tensorflow.python.platform import test + + +class IfExpTest(converter_test_base.TestCase): + + def compiled_fn(self, test_fn, *args): + node = self.parse_and_analyze(test_fn, {}) + node = ifexp.transform(node, self.ctx) + module = self.compiled(node, *args) + return module + + def test_simple(self): + + def test_fn(x): + return 1 if x else 0 + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + for x in [0, 1]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_fn(self): + + def f(x): + return 3 * x + + def test_fn(x): + y = f(x * x if x > 0 else x) + return y + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + result.f = f + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_exp(self): + + def test_fn(x): + return x * x if x > 0 else x + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + for x in [-2, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_nested(self): + + def test_fn(x): + return x * x if x > 0 else x if x else 1 + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + for x in [-2, 0, 2]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_in_cond(self): + + def test_fn(x): + if x > 0: + return x * x if x < 5 else x * x * x + return -x + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + for x in [-2, 2, 5]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + def test_assign_in_cond(self): + + def test_fn(x): + if x > 0: + x = -x if x < 5 else x + return x + + with self.compiled_fn(test_fn) as result: + result.py2tf_util = utils + for x in [-2, 2, 5]: + self.assertEqual(test_fn(x), result.test_fn(x)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/py2tf/impl/conversion.py b/tensorflow/contrib/py2tf/impl/conversion.py index 8a3cf9cd0a..37b24ab55f 100644 --- a/tensorflow/contrib/py2tf/impl/conversion.py +++ b/tensorflow/contrib/py2tf/impl/conversion.py @@ -29,6 +29,7 @@ from tensorflow.contrib.py2tf.converters import continue_statements from tensorflow.contrib.py2tf.converters import control_flow from tensorflow.contrib.py2tf.converters import decorators from tensorflow.contrib.py2tf.converters import for_loops +from tensorflow.contrib.py2tf.converters import ifexp from tensorflow.contrib.py2tf.converters import lists from tensorflow.contrib.py2tf.converters import logical_expressions from tensorflow.contrib.py2tf.converters import name_scopes @@ -307,6 +308,7 @@ def node_to_graph(node, ctx, nocompile_decorators): # source. # TODO(mdan): Is it feasible to reconstruct intermediate source code? ctx.source_code = None + node = ifexp.transform(node, ctx) node, deps = decorators.transform(node, nocompile_decorators) node = break_statements.transform(node, ctx) node = asserts.transform(node, ctx) -- GitLab From 7fbfa59b1d970eb5e3a27b12ef38315ab556faef Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 09:13:14 -0800 Subject: [PATCH 661/884] Enable the Grappler arithmetic optimizer for all python tests. PiperOrigin-RevId: 188492233 --- tensorflow/python/framework/test_util.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 9fc1154201..cfe8b19cb3 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -898,8 +898,6 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) - config.graph_options.rewrite_options.arithmetic_optimization = ( - rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: -- GitLab From 68a478d6c002014ae610452b77c5606ec11ad334 Mon Sep 17 00:00:00 2001 From: jinghuangintel Date: Fri, 9 Mar 2018 09:20:38 -0800 Subject: [PATCH 662/884] [Intel MKL-DNN]: added MKLDNN dilated convolution support (#17160) * added MKLDNN dilated conv support * Resolving conflict caused by removal of testCPUConv2DNCHWUnimplemented unit test. * fixing conflicts in conv_ops_test.py * changed the mkl test flag * changed the mkl test flag ii * addressed the comment --- tensorflow/core/graph/mkl_layout_pass.cc | 5 + .../core/kernels/mkl_conv_grad_filter_ops.cc | 81 ++++++---- .../core/kernels/mkl_conv_grad_input_ops.cc | 18 ++- tensorflow/core/kernels/mkl_conv_ops.cc | 146 ++++++++++++------ tensorflow/core/kernels/mkl_conv_ops.h | 117 ++++++++++---- tensorflow/core/ops/nn_ops.cc | 8 + .../python/kernel_tests/conv_ops_test.py | 20 +-- 7 files changed, 269 insertions(+), 126 deletions(-) diff --git a/tensorflow/core/graph/mkl_layout_pass.cc b/tensorflow/core/graph/mkl_layout_pass.cc index 02038c5d77..f6a9d8e19a 100644 --- a/tensorflow/core/graph/mkl_layout_pass.cc +++ b/tensorflow/core/graph/mkl_layout_pass.cc @@ -3528,11 +3528,13 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, string data_format; string padding; std::vector strides; + std::vector dilations; bool use_cudnn_on_gpu; // Get all attributes from old node. TF_CHECK_OK(GetNodeAttr(orig_node->def(), "T", &T)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(orig_node->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(orig_node->def(), "data_format", &data_format)); TF_CHECK_OK( @@ -3541,6 +3543,7 @@ void MklLayoutRewritePass::CopyAttrsConv2D(const Node* orig_node, // Add attributes to new node. nb->Attr("T", T); nb->Attr("strides", strides); + nb->Attr("dilations", dilations); nb->Attr("padding", padding); nb->Attr("data_format", data_format); nb->Attr("use_cudnn_on_gpu", use_cudnn_on_gpu); @@ -3778,12 +3781,14 @@ Status MklLayoutRewritePass::MergeConv2DWithBiasAdd(std::unique_ptr* g, DataType T_pred, T_succ; string padding; std::vector strides; + std::vector dilations; string data_format_pred, data_format_succ; bool use_cudnn_on_gnu; TF_CHECK_OK(GetNodeAttr(pred->def(), "T", &T_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "T", &T_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "padding", &padding)); TF_CHECK_OK(GetNodeAttr(pred->def(), "strides", &strides)); + TF_CHECK_OK(GetNodeAttr(pred->def(), "dilations", &dilations)); TF_CHECK_OK(GetNodeAttr(pred->def(), "data_format", &data_format_pred)); TF_CHECK_OK(GetNodeAttr(succ->def(), "data_format", &data_format_succ)); TF_CHECK_OK(GetNodeAttr(pred->def(), "use_cudnn_on_gpu", &use_cudnn_on_gnu)); diff --git a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc index 1401bc65a4..e0706568b1 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_filter_ops.cc @@ -444,6 +444,7 @@ class MklConv2DCustomBackpropFilterOp ~MklConv2DCustomBackpropFilterOp() {} private: + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -492,7 +493,9 @@ class MklConv2DCustomBackpropFilterOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -518,31 +521,32 @@ class MklConv2DCustomBackpropFilterOp bias_grad->SetOpMemDesc(bias_grad_dims, memory::format::x); } - // Create convolution backward weights primitive. - auto bwd_desc = - (biasEnabled && (bias_grad != nullptr)) - ? convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), bias_grad->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, - padding) - : convolution_backward_weights::desc( - convolution_direct, input->GetOpMemDesc(), - output->GetOpMemDesc(), outbackprop->GetOpMemDesc(), strides, - padding_l, padding_r, padding); - - auto bwd_pd = convolution_backward_weights::primitive_desc( - bwd_desc, cpu_engine, conv_fwd_pd); - - // Allocate output tensor. - AllocateOutputTensor(context, bwd_pd, bwd_output_dims, bwd_output_format, - output_tensor); - - CHECK_NOTNULL(*output_tensor); - // Set buffer handle using allocated output tensor. - output->SetUsrMemDataHandle(*output_tensor); - if (biasEnabled && (bias_grad != nullptr)) { + // Create convolution backward weights with bias primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + bias_grad->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); + // Allocate bias_grad tensor TensorShape bias_grad_shape({depth}); Tensor* bias_grad_tensor = nullptr; @@ -553,11 +557,32 @@ class MklConv2DCustomBackpropFilterOp memory::desc({bias_grad_dims}, MklDnnType(), memory::format::x); bias_grad->SetUsrMem(bias_grad_md, bias_grad_tensor); bias_grad->SetUsrMemDataHandle(bias_grad_tensor); - } - if (biasEnabled && (bias_grad != nullptr)) { - PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, bias_grad); + PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output, + bias_grad); } else { + // Create convolution backward weights primitive. + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding) : + convolution_backward_weights::desc(convolution_direct, + input->GetOpMemDesc(), output->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); + auto bwd_pd = convolution_backward_weights::primitive_desc(bwd_desc, + cpu_engine, + conv_fwd_pd); + + // Allocate output tensor. + AllocateOutputTensor(context, bwd_pd, bwd_output_dims, + bwd_output_format, output_tensor); + + CHECK_NOTNULL(*output_tensor); + // Set buffer handle using allocated output tensor. + output->SetUsrMemDataHandle(*output_tensor); PrepareAndExecutePrimitive(bwd_pd, input, outbackprop, output); } } diff --git a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc index eeed009531..d203c04934 100644 --- a/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_grad_input_ops.cc @@ -369,6 +369,7 @@ class MklConv2DCustomBackpropInputOp private: const int kInputIndex_Filter = 1, kInputIndex_InputSizes = 0, kInputIndex_OutBackProp = 2; + const int kDilationH = 0, kDilationW = 1; void ValidateMklShapes(const MklDnnShape& input_mkl_shape, const MklDnnShape& filter_mkl_shape, const MklDnnShape& obp_mkl_shape) { @@ -419,7 +420,9 @@ class MklConv2DCustomBackpropInputOp const convolution_forward::primitive_desc& conv_fwd_pd, MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, MklDnnData* output, - Tensor** output_tensor, const memory::dims& strides, + Tensor** output_tensor, + const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, const memory::dims& padding_r, padding_kind padding, const memory::dims& bwd_output_dims, @@ -432,9 +435,16 @@ class MklConv2DCustomBackpropInputOp CHECK_NOTNULL(output_tensor); // Create convolution backward data primitive. - auto bwd_desc = convolution_backward_data::desc( - convolution_direct, output->GetOpMemDesc(), filter->GetOpMemDesc(), - outbackprop->GetOpMemDesc(), strides, padding_l, padding_r, padding); + // Use dilated convolution in case dilate rates are greater than zero. + auto bwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0) ? + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), strides, + dilations, padding_l, padding_r, padding): + convolution_backward_data::desc(convolution_direct, + output->GetOpMemDesc(), filter->GetOpMemDesc(), + outbackprop->GetOpMemDesc(), + strides, padding_l, padding_r, padding); auto bwd_pd = convolution_backward_data::primitive_desc( bwd_desc, cpu_engine, conv_fwd_pd); diff --git a/tensorflow/core/kernels/mkl_conv_ops.cc b/tensorflow/core/kernels/mkl_conv_ops.cc index 1440da8f82..f0818eb96d 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.cc +++ b/tensorflow/core/kernels/mkl_conv_ops.cc @@ -493,6 +493,7 @@ class MklConv2DOp : public OpKernel { ~MklConv2DOp() {} explicit MklConv2DOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); OP_REQUIRES_OK(context, context->GetAttr("strides", &strides_)); string data_format; OP_REQUIRES_OK(context, context->GetAttr("data_format", &data_format)); @@ -509,6 +510,20 @@ class MklConv2DOp : public OpKernel { errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + const int64 dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + const int64 dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + const int64 dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + const int64 dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, dilation_n == 1 && dilation_c == 1, + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); } void Compute(OpKernelContext* context) override { @@ -530,17 +545,19 @@ class MklConv2DOp : public OpKernel { MklDnnData filter(&cpu_engine); MklDnnData output(&cpu_engine); - memory::dims src_dims, filter_dims, padding_l, padding_r, strides; + memory::dims src_dims, filter_dims, padding_l, padding_r, + dilations, strides; memory::dims output_dims_tf_order, output_dims_mkl_order; // Get shapes of input tensors in MKL-DNN order - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); auto src_tf_shape = GetTfShape(context, kInputIndex_Src); auto filter_tf_shape = GetTfShape(context, kInputIndex_Filter); conv_utl.GetConvFwdSizesInMklOrder( src_tf_shape, filter_tf_shape, &src_dims, &filter_dims, &strides, - &output_dims_tf_order, &output_dims_mkl_order, &padding_l, - &padding_r); + &dilations, &output_dims_tf_order, &output_dims_mkl_order, + &padding_l, &padding_r); if (!context->status().ok()) return; // Check for corner case - if there is nothing to compute, return. @@ -553,6 +570,7 @@ class MklConv2DOp : public OpKernel { // Need semantics for Null MKL tensor MklDnnShape output_mkl_shape; output_mkl_shape.SetMklTensor(false); + AllocateOutputSetMklShape(context, kOutputIndex_Dst, &output_tensor, src_tf_shape, output_mkl_shape); @@ -596,55 +614,79 @@ class MklConv2DOp : public OpKernel { filter.SetOpMemDesc(filter_dims, memory::format::any); output.SetOpMemDesc(output_dims_mkl_order, memory::format::any); - // If bias is enabled, then do the same steps as above for bias. + // MKLDNN dilation starts from 0. + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + if (biasEnabled) { - MklDnnData bias(&cpu_engine); - memory::dims bias_size; - conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); - const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); - bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); - bias.SetOpMemDesc(bias_size, memory::format::any); - - // Create convolution primitive with Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), bias.GetOpMemDesc(), output.GetOpMemDesc(), - strides, padding_l, padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, - filter_out_tensor); + // Create convolution primitive with Bias. + MklDnnData bias(&cpu_engine); + memory::dims bias_size; + conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_size); + const Tensor& bias_tensor = MklGetInput(context, kInputIndex_Bias); + bias.SetUsrMem(bias_size, memory::format::x, &bias_tensor); + bias.SetOpMemDesc(bias_size, memory::format::any); + + // Create convolution primitive with Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, dilations, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), bias.GetOpMemDesc(), + output.GetOpMemDesc(), strides, + padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, + output_dims_mkl_order, tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, &bias, &output, + filter_out_tensor); } else { - // Create convolution primitive without Bias. - auto conv_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, src.GetOpMemDesc(), - filter.GetOpMemDesc(), output.GetOpMemDesc(), strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_)); - - auto conv_prim_desc = - convolution_forward::primitive_desc(conv_desc, cpu_engine); - AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, - tf_fmt, &output_tensor); - // Set data handle for output. - output.SetUsrMemDataHandle(output_tensor); - - Tensor* filter_out_tensor = nullptr; - AllocateFilterOutputTensor(context, conv_prim_desc, - TFShapeToMklDnnDims(filter_tf_shape), - &filter_out_tensor); - PrepareAndExecuteNet(conv_prim_desc, &src, &filter, nullptr, &output, - filter_out_tensor); + // Create convolution primitive without Bias. + // Use MKLDNN dilated convolution in case of dilated rate (>0). + auto conv_desc = (dilations[kDilationH] > 0 || + dilations[kDilationW] > 0) ? + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)): + convolution_forward::desc(prop_kind::forward, + convolution_direct, src.GetOpMemDesc(), + filter.GetOpMemDesc(), output.GetOpMemDesc(), + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); + + auto conv_prim_desc = convolution_forward::primitive_desc(conv_desc, + cpu_engine); + AllocateOutputTensor(context, conv_prim_desc, output_dims_mkl_order, + tf_fmt, &output_tensor); + // Set data handle for output. + output.SetUsrMemDataHandle(output_tensor); + + Tensor* filter_out_tensor = nullptr; + AllocateFilterOutputTensor(context, conv_prim_desc, + TFShapeToMklDnnDims(filter_tf_shape), + &filter_out_tensor); + PrepareAndExecuteNet(conv_prim_desc, &src, &filter, + nullptr, &output, filter_out_tensor); } } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -658,10 +700,12 @@ class MklConv2DOp : public OpKernel { private: std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; const int kInputIndex_Src = 0, kInputIndex_Filter = 1, kInputIndex_Bias = 2; const int kOutputIndex_Dst = 0, kOutputIndex_Filter = 1; + const int kDilationH = 0, kDilationW = 1; // Allocate output tensor. void AllocateOutputTensor( diff --git a/tensorflow/core/kernels/mkl_conv_ops.h b/tensorflow/core/kernels/mkl_conv_ops.h index 9dd88221a8..7ca10db895 100644 --- a/tensorflow/core/kernels/mkl_conv_ops.h +++ b/tensorflow/core/kernels/mkl_conv_ops.h @@ -58,13 +58,16 @@ class MklDnnConvUtil { protected: OpKernelContext* context_; // We don't own this. std::vector strides_; + std::vector dilations_; Padding padding_; TensorFormat data_format_; public: MklDnnConvUtil(OpKernelContext* context, const std::vector& strides, - Padding pad, TensorFormat fm) - : context_(context), strides_(strides), padding_(pad), data_format_(fm) {} + Padding pad, TensorFormat fm, + const std::vector& dilations) : + context_(context), strides_(strides), padding_(pad), + data_format_(fm), dilations_(dilations) {} virtual ~MklDnnConvUtil() { context_ = nullptr; } @@ -78,6 +81,16 @@ class MklDnnConvUtil { *strides = {stride_rows, stride_cols}; } + // Calculate Convolution dilations + virtual inline void GetDilationsInMklOrder(memory::dims *dilations) { + // For now we take the dilation from the second and third dimensions only + // (we do not support dilation on the batch or depth dimension). + CHECK_NOTNULL(dilations); + int dilations_rows = GetTensorDim(dilations_, data_format_, 'H'); + int dilations_cols = GetTensorDim(dilations_, data_format_, 'W'); + *dilations = {dilations_rows, dilations_cols}; + } + // Calculate Convolution input size in MKL-DNN order. MKL-DNN // requires input in NCHW format. Function does not return anything. // But errors arising from sanity checks are returned in context's @@ -213,7 +226,8 @@ class MklDnnConvUtil { // TODO(nhasabni): Add similar function for input and filter in MklShape. virtual inline void GetOutputAndPadSizeInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, - const memory::dims& strides, memory::dims* output_dims_tf_order, + const memory::dims& strides, const memory::dims& dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -232,6 +246,8 @@ class MklDnnConvUtil { // Stride is vector of 2 elements: {s_r, s_c} int stride_rows = strides[0]; int stride_cols = strides[1]; + int dilation_rows = dilations[0]; + int dilation_cols = dilations[1]; // Output batch is same as input batch. int out_batch = GetTensorDim(input_shape, data_format_, 'N'); @@ -241,11 +257,13 @@ class MklDnnConvUtil { int64 out_rows = 0, out_cols = 0; int64 pad_top = 0, pad_bottom = 0, pad_left, pad_right; - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_rows, filter_rows, stride_rows, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_rows, filter_rows, + dilation_rows, stride_rows, padding_, &out_rows, &pad_top, &pad_bottom)); - OP_REQUIRES_OK(context_, GetWindowedOutputSizeVerbose( - input_cols, filter_cols, stride_cols, padding_, + OP_REQUIRES_OK(context_, + GetWindowedOutputSizeVerboseV2(input_cols, filter_cols, + dilation_cols, stride_cols, padding_, &out_cols, &pad_left, &pad_right)); // Tensorflow output is in data_format order. (NHWC or NCHW) @@ -271,7 +289,8 @@ class MklDnnConvUtil { // // Function does not return anything, but sets error in context status. inline void GetOutputAndPadSizeInMklOrder( - size_t src_index, size_t filter_index, const memory::dims& strides, + size_t src_index, size_t filter_index, + const memory::dims& strides, const memory::dims& dilations, memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(output_dims_tf_order); @@ -286,9 +305,9 @@ class MklDnnConvUtil { errors::InvalidArgument("input must be 4-dimensional", input_tf_shape.DebugString())); - GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, strides, - output_dims_tf_order, output_dims_mkl_order, - pad_l, pad_r); + GetOutputAndPadSizeInMklOrder(input_tf_shape, filter_tf_shape, + strides, dilations, output_dims_tf_order, + output_dims_mkl_order, pad_l, pad_r); } // Wrapper function to calculate input, filter, and output sizes of @@ -300,12 +319,14 @@ class MklDnnConvUtil { inline void GetConvFwdSizesInMklOrder( const TensorShape& input_shape, const TensorShape& filter_shape, memory::dims* input_dims, memory::dims* filter_dims, - memory::dims* strides, memory::dims* output_dims_tf_order, + memory::dims* strides, memory::dims *dilations, + memory::dims* output_dims_tf_order, memory::dims* output_dims_mkl_order, memory::dims* pad_l, memory::dims* pad_r) { CHECK_NOTNULL(input_dims); CHECK_NOTNULL(filter_dims); CHECK_NOTNULL(strides); + CHECK_NOTNULL(dilations); CHECK_NOTNULL(output_dims_tf_order); CHECK_NOTNULL(output_dims_mkl_order); CHECK_NOTNULL(pad_l); @@ -316,7 +337,9 @@ class MklDnnConvUtil { GetFilterSizeInMklOrder(input_shape, filter_shape, filter_dims); if (!context_->status().ok()) return; GetStridesInMklOrder(strides); - GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, *strides, + GetDilationsInMklOrder(dilations); + GetOutputAndPadSizeInMklOrder(input_shape, filter_shape, + *strides, *dilations, output_dims_tf_order, output_dims_mkl_order, pad_l, pad_r); if (!context_->status().ok()) return; @@ -344,7 +367,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { context, (stride_n == 1 && stride_c == 1), errors::InvalidArgument("Current implementation does not yet support " "strides in the batch and depth dimensions.")); - + OP_REQUIRES_OK(context, context->GetAttr("dilations", &dilations_)); + OP_REQUIRES(context, dilations_.size() == 4, + errors::InvalidArgument("Sliding window dilations field must " + "specify 4 dimensions")); + int dilation_n = GetTensorDim(dilations_, data_format_, 'N'); + int dilation_c = GetTensorDim(dilations_, data_format_, 'C'); + int dilation_h = GetTensorDim(dilations_, data_format_, 'H'); + int dilation_w = GetTensorDim(dilations_, data_format_, 'W'); + OP_REQUIRES(context, (dilation_n == 1 && dilation_c == 1), + errors::InvalidArgument( + "Current implementation does not yet support " + "dilations in the batch and depth dimensions.")); + OP_REQUIRES( + context, dilation_h > 0 && dilation_w > 0, + errors::InvalidArgument("Dilated rates should be larger than 0.")); OP_REQUIRES_OK(context, context->GetAttr("padding", &padding_)); } @@ -406,15 +443,16 @@ class MklConv2DBackpropCommonOp : public OpKernel { // By default, all dims are in MKL order. Only dims in TF order // are those with prefix tf_order. memory::dims outbprop_dims, fwd_input_dims, fwd_filter_dims; - memory::dims padding_l, padding_r, strides, fwd_output_dims; + memory::dims padding_l, padding_r, dilations, strides, fwd_output_dims; memory::dims fwd_output_dims_tf_order; // Get forward convolution parameters. - MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_); + MklDnnConvUtil conv_utl(context, strides_, padding_, data_format_, + dilations_); conv_utl.GetConvFwdSizesInMklOrder( input_tf_shape, filter_tf_shape, &fwd_input_dims, &fwd_filter_dims, - &strides, &fwd_output_dims_tf_order, &fwd_output_dims, &padding_l, - &padding_r); + &strides, &dilations, &fwd_output_dims_tf_order, &fwd_output_dims, + &padding_l, &padding_r); if (!context->status().ok()) return; // Create Convolution forward descriptor since Convolution backward @@ -437,10 +475,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { memory::format::hwio); // Tensorflow Output of Conv2D is in data_format order. auto fwd_out_md = memory::desc(fwd_output_dims, MklDnnType(), tf_fmt); - auto fwd_desc = convolution_forward::desc( - prop_kind::forward, convolution_direct, fwd_input_md, fwd_filter_md, - fwd_out_md, strides, padding_l, padding_r, - TFPaddingToMklDnnPadding(padding_)); + + const int kDilationH = 0, kDilationW = 1; + dilations[kDilationH] -= 1; + dilations[kDilationW] -= 1; + auto fwd_desc = (dilations[kDilationH] > 0 || dilations[kDilationW] > 0)? + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)) : + convolution_forward::desc(prop_kind::forward, + convolution_direct, fwd_input_md, + fwd_filter_md, fwd_out_md, + strides, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_)); auto fwd_pd = convolution_forward::primitive_desc(fwd_desc, cpu_engine); // Create memory for user data. Describe how the inputs and outputs of @@ -485,8 +534,9 @@ class MklConv2DBackpropCommonOp : public OpKernel { // Operator-specific call to create and execute primitive. CreatePrimitive(context, cpu_engine, fwd_pd, &input, &filter, - &outbackprop, &output, &output_tensor, strides, padding_l, - padding_r, TFPaddingToMklDnnPadding(padding_), + &outbackprop, &output, &output_tensor, + strides, dilations, padding_l, padding_r, + TFPaddingToMklDnnPadding(padding_), bwd_output_dims, bwd_output_format); } catch (mkldnn::error& e) { string error_msg = "Status: " + std::to_string(e.status) + @@ -535,20 +585,21 @@ class MklConv2DBackpropCommonOp : public OpKernel { virtual memory::format GetOutputFormat(const memory::format data_format) = 0; /// Create and execute the primitive storing output in the output_tensor. - virtual void CreatePrimitive( - OpKernelContext* context, const engine& cpu_engine, - const convolution_forward::primitive_desc& conv_fwd_pd, - MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, - MklDnnData* output, Tensor** output_tensor, - const memory::dims& strides, const memory::dims& padding_l, - const memory::dims& padding_r, padding_kind padding, - const memory::dims& bwd_output_dims, - memory::format bwd_output_format) = 0; + virtual void CreatePrimitive(OpKernelContext* context, + const engine& cpu_engine, + const convolution_forward::primitive_desc& conv_fwd_pd, + MklDnnData* input, MklDnnData* filter, MklDnnData* outbackprop, + MklDnnData* output, Tensor** output_tensor, const memory::dims& strides, + const memory::dims& dilations, const memory::dims& padding_l, + const memory::dims& padding_r, padding_kind padding, + const memory::dims& bwd_output_dims, + memory::format bwd_output_format) = 0; // Get the data_format {NCHW, NHWC} TensorFormat GetTFDataFormat() { return data_format_; } private: + std::vector dilations_; std::vector strides_; Padding padding_; TensorFormat data_format_; diff --git a/tensorflow/core/ops/nn_ops.cc b/tensorflow/core/ops/nn_ops.cc index 910fbaca9e..d6a0f38033 100644 --- a/tensorflow/core/ops/nn_ops.cc +++ b/tensorflow/core/ops/nn_ops.cc @@ -1498,6 +1498,7 @@ REGISTER_OP("_MklConv2D") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn(shape_inference::Conv2DShape) .Doc(R"doc( MKL version of Conv2D operator. Uses MKL DNN APIs to perform 2D convolution. @@ -1516,6 +1517,7 @@ REGISTER_OP("__MklDummyConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( Dummy node that enables fusing Conv2D and BiasAdd operator for MKL. This node does not perform anything. It is just created as an intermediate output of @@ -1541,6 +1543,7 @@ REGISTER_OP("_MklConv2DWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2D and BiasAdd operator. Uses MKL DNN APIs to perform 2D convolution and add Bias to the output of convolution. @@ -1563,6 +1566,7 @@ REGISTER_OP("_MklConv2DBackpropFilter") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(1, &s)); @@ -1589,6 +1593,7 @@ REGISTER_OP("__MklDummyConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1633,6 +1638,7 @@ REGISTER_OP("_MklConv2DBackpropFilterWithBias") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle input_shape; // Fetch the data_format attribute, which may not exist. @@ -1668,6 +1674,7 @@ REGISTER_OP("_MklConv2DWithBiasBackpropBias") .Attr("T: {half, float, double}") .Attr("strides: list(int)") .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .Doc(R"doc( MKL version of Conv2DBackpropBias. Uses MKL DNN APIs to compute the gradients of convolution with respect to the bias. @@ -1690,6 +1697,7 @@ REGISTER_OP("_MklConv2DBackpropInput") .Attr("use_cudnn_on_gpu: bool = true") .Attr(GetPaddingAttrString()) .Attr(GetConvnetDataFormatAttrString()) + .Attr("dilations: list(int) = [1, 1, 1, 1]") .SetShapeFn([](InferenceContext* c) { ShapeHandle s; TF_RETURN_IF_ERROR(c->MakeShapeFromShapeTensor(0, &s)); diff --git a/tensorflow/python/kernel_tests/conv_ops_test.py b/tensorflow/python/kernel_tests/conv_ops_test.py index f4fe01f868..25525cc128 100644 --- a/tensorflow/python/kernel_tests/conv_ops_test.py +++ b/tensorflow/python/kernel_tests/conv_ops_test.py @@ -970,7 +970,7 @@ class Conv2DTest(test.TestCase): self.assertArrayNear(value_2.flatten(), value.flatten(), err) def testConv2D2x2Depth3ValidBackpropFilterStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 6, 1], @@ -984,7 +984,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -998,7 +998,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropFilterDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 2, 3, 1], @@ -1012,7 +1012,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 4, 3], @@ -1026,7 +1026,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DKernelSizeMatchesInputSizeBackpropFilterDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropFilterDilation( input_sizes=[1, 3, 3, 1], @@ -1040,7 +1040,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputStride1x1Dilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 6, 1], @@ -1054,7 +1054,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth1ValidBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 2, 3, 1], @@ -1068,7 +1068,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2DEmptyBackpropInputDilation1x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[0, 2, 3, 1], @@ -1082,7 +1082,7 @@ class Conv2DTest(test.TestCase): err=1e-5) def testConv2D2x2Depth3ValidBackpropInputDilation2x1(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): # The GPU version of this test is not very stable. So adjusting the # error threshold to 1e-4. @@ -1098,7 +1098,7 @@ class Conv2DTest(test.TestCase): err=1e-4) def testConv2DKernelSizeMatchesInputSizeBackpropInputDilation2x2(self): - if test.is_gpu_available(cuda_only=True): + if test.is_gpu_available(cuda_only=True) or test_util.IsMklEnabled(): for (data_format, use_gpu) in GetTestConfigs(): self._RunAndVerifyBackpropInputDilation( input_sizes=[1, 3, 3, 1], -- GitLab From 96a7b1443f6b652c04957ac8c53d6597be434697 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 09:39:21 -0800 Subject: [PATCH 663/884] Use the multithreaded conv only when threads are available. PiperOrigin-RevId: 188495357 --- tensorflow/contrib/lite/kernels/conv.cc | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index b93a416351..6821a22226 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -43,6 +43,8 @@ namespace conv { enum KernelType { kReference, kGenericOptimized, // Neon-free + // kMultithreadOptimized is a mixture of an Eigen-based kernel when threads + // are available and kGenericOptimized when we must use only one thread. kMultithreadOptimized, // The kernel uses use CBLAS interface for matrix multiplication. // It's fast when an optimized CBLAS implementation is available (e.g. Apple @@ -75,6 +77,8 @@ struct OpData { bool need_hwcn_weights; bool have_weights_been_transposed; bool need_im2col; + + bool run_multithreaded_kernel; }; void* Init(TfLiteContext* context, const char* buffer, size_t length) { @@ -83,6 +87,14 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { // to carry information from Prepare() to Eval(). auto* data = new OpData; gemm_support::IncrementUsageCounter(context); + + // TODO(ahentz): This is the gemmlowp context, which really only applies to + // quantized kernels. However, Interpreter::SetNumThreads() should also be + // setting the number of kernel on Eigen, so this works OK as a proxy for + // now. + int num_threads = gemm_support::GetFromContext(context)->max_num_threads(); + data->run_multithreaded_kernel = num_threads != 1; + return data; } @@ -137,7 +149,8 @@ static TfLiteStatus AllocateTemporaryTensorsIfRequired(TfLiteContext* context, // buffer to store the results. // This path is only used for float processing, so only create the buffer if // we're running with that data type. - data->need_hwcn_weights = (input->type == kTfLiteFloat32); + data->need_hwcn_weights = + (input->type == kTfLiteFloat32 && data->run_multithreaded_kernel); int temporaries_count = 0; if (data->need_im2col) { @@ -449,8 +462,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { // separate ops to avoid dispatch overhead here. switch (input->type) { // Already know in/outtypes are same. case kTfLiteFloat32: - EvalFloat(context, node, params, data, input, filter, bias, - im2col, hwcn_weights, output); + if (data->run_multithreaded_kernel) { + EvalFloat(context, node, params, data, input, filter, bias, + im2col, hwcn_weights, output); + } else { + EvalFloat(context, node, params, data, input, filter, + bias, im2col, hwcn_weights, output); + } break; case kTfLiteUInt8: EvalQuantized(context, node, params, data, input, filter, -- GitLab From 41a12df5de7d767a1a872348f3ba630350fcc78e Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Fri, 9 Mar 2018 09:48:05 -0800 Subject: [PATCH 664/884] Ensure that the rank of the input to tf.Unique is 1 as shape inference time instead of letting the kernel catch invalid inputs. PiperOrigin-RevId: 188496351 --- tensorflow/core/ops/array_ops.cc | 4 +++- tensorflow/core/ops/array_ops_test.cc | 6 +++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/ops/array_ops.cc b/tensorflow/core/ops/array_ops.cc index eeb458a287..a1027f1422 100644 --- a/tensorflow/core/ops/array_ops.cc +++ b/tensorflow/core/ops/array_ops.cc @@ -1168,7 +1168,9 @@ REGISTER_OP("Unique") .SetShapeFn([](InferenceContext* c) { c->set_output(0, c->Vector(InferenceContext::kUnknownDim)); c->set_output(1, c->input(0)); - return Status::OK(); + // Assert that the input rank is 1. + ShapeHandle dummy; + return c->WithRank(c->input(0), 1, &dummy); }); REGISTER_OP("UniqueV2") diff --git a/tensorflow/core/ops/array_ops_test.cc b/tensorflow/core/ops/array_ops_test.cc index 86d64635f4..cf5bb5ad84 100644 --- a/tensorflow/core/ops/array_ops_test.cc +++ b/tensorflow/core/ops/array_ops_test.cc @@ -368,7 +368,11 @@ TEST(ArrayOpsTest, ShapeN_ShapeFn) { TEST(ArrayOpsTest, Unique_ShapeFn) { ShapeInferenceTestOp op("Unique"); INFER_OK(op, "?", "[?];in0"); - INFER_OK(op, "[1,2,3,?,5]", "[?];in0"); + INFER_OK(op, "[5]", "[?];in0"); + INFER_ERROR( + "Shape must be rank 1 but is rank 5 for '' (op: '') with input shapes: " + "[1,2,3,?,5].", + op, "[1,2,3,?,5]"); } TEST(ArrayOpsTest, UniqueWithCounts_ShapeFn) { -- GitLab From 48fc3bc388b09c67482db9751b6eab1d89ae140e Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 10:22:16 -0800 Subject: [PATCH 665/884] Implement partial constant folding for Concat. PiperOrigin-RevId: 188501394 --- .../core/grappler/costs/graph_properties.cc | 7 + .../core/grappler/costs/graph_properties.h | 2 + .../grappler/costs/graph_properties_test.cc | 27 ++++ tensorflow/core/grappler/op_types.cc | 6 + tensorflow/core/grappler/op_types.h | 2 + .../grappler/optimizers/constant_folding.cc | 143 ++++++++++++++++-- .../grappler/optimizers/constant_folding.h | 2 +- .../optimizers/constant_folding_test.cc | 100 ++++++++++-- 8 files changed, 261 insertions(+), 28 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 243ca9121c..817247e379 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -1182,5 +1182,12 @@ GraphProperties::GetOutputProperties(const string& node_name) const { return missing_properties_; } +void GraphProperties::ClearInputProperties(const string& node_name) { + input_properties_.erase(node_name); +} +void GraphProperties::ClearOutputProperties(const string& node_name) { + output_properties_.erase(node_name); +} + } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 6fc53a7f2e..5aa4962072 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -64,6 +64,8 @@ class GraphProperties { const string& node_name) const; const std::vector& GetOutputProperties( const string& node_name) const; + void ClearInputProperties(const string& node_name); + void ClearOutputProperties(const string& node_name); static void FillTensorPropertiesFromContext( const shape_inference::ShapeHandle&, const DataType&, diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index 5012069118..284d9d409b 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -113,6 +113,33 @@ TEST_F(GraphPropertiesTest, StaticProperties) { } } +TEST_F(GraphPropertiesTest, ClearProperties) { + TrivialTestGraphInputYielder fake_input(4, 1, 10, false, + cluster_->GetDeviceNames()); + GrapplerItem item; + CHECK(fake_input.NextItem(&item)); + + GraphProperties properties(item); + Status s = properties.InferStatically(true); + TF_CHECK_OK(s); + + for (const auto& node : item.graph.node()) { + if (node.op() == "RandomStandardNormal") { + EXPECT_EQ(1, properties.GetInputProperties(node.name()).size()); + const auto props = properties.GetOutputProperties(node.name()); + properties.ClearOutputProperties(node.name()); + const auto cleared_props = properties.GetOutputProperties(node.name()); + EXPECT_TRUE(cleared_props.empty()); + } else if (node.op() == "AddN") { + const auto in_props = properties.GetInputProperties(node.name()); + EXPECT_EQ(1, in_props.size()); + properties.ClearInputProperties(node.name()); + const auto cleared_props = properties.GetInputProperties(node.name()); + EXPECT_TRUE(cleared_props.empty()); + } + } +} + TEST_F(GraphPropertiesTest, DynamicProperties) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, cluster_->GetDeviceNames()); diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 8cf1402ae8..ae71094079 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -72,6 +72,10 @@ bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; } bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; } +bool IsConcat(const NodeDef& node) { + return node.op() == "Concat" || node.op() == "ConcatV2"; +} + bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; } bool IsConstant(const NodeDef& node) { return node.op() == "Const"; } @@ -213,6 +217,8 @@ bool IsNextIteration(const NodeDef& node) { return op == "NextIteration" || op == "RefNextIteration"; } +bool IsPack(const NodeDef& node) { return node.op() == "Pack"; } + bool IsPad(const NodeDef& node) { const auto& op = node.op(); return op == "Pad" || op == "PadV2"; diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index a7c33ef97b..690275da7c 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -40,6 +40,7 @@ bool IsCast(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConj(const NodeDef& node); +bool IsConcat(const NodeDef& node); bool IsConcatOffset(const NodeDef& node); bool IsConstant(const NodeDef& node); bool IsConv2D(const NodeDef& node); @@ -85,6 +86,7 @@ bool IsMul(const NodeDef& node); bool IsMatMul(const NodeDef& node); bool IsNextIteration(const NodeDef& node); bool IsPad(const NodeDef& node); +bool IsPack(const NodeDef& node); bool IsNoOp(const NodeDef& node); bool IsNotEqual(const NodeDef& node); bool IsPlaceholder(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 31dc1b73e1..4036ea3f16 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1510,7 +1510,7 @@ Status ConstantFolding::ReplaceOperationWithConstant( } Status ConstantFolding::SimplifyGraph(GraphDef* output, - const GraphProperties& properties, + GraphProperties* properties, bool use_shape_info) { const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (int i = 0; i < output->node_size(); ++i) { @@ -1520,7 +1520,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (use_shape_info && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { const auto& shape = - properties.GetInputProperties(node->name())[0].shape(); + properties->GetInputProperties(node->name())[0].shape(); // The node is replaceable iff // unknown_rank == false && (dim_size == 0 || all dims have size 1) bool replaceable = !shape.unknown_rank(); @@ -1649,7 +1649,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - if (use_shape_info && IsSimplifiableReshape(*node, properties)) { + if (use_shape_info && IsSimplifiableReshape(*node, *properties)) { DataType output_type = node->attr().at("T").type(); node->set_op("Identity"); node->clear_attr(); @@ -1667,8 +1667,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // Simplify arithmetic operations with ones or zeros. if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && - properties.HasInputProperties(node->name()) && - properties.HasOutputProperties(node->name())) { + properties->HasInputProperties(node->name()) && + properties->HasOutputProperties(node->name())) { const NodeDef* x = node_map_->GetNode(node->input(0)); const NodeDef* y = node_map_->GetNode(node->input(1)); if (x == nullptr || y == nullptr) { @@ -1676,12 +1676,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, node->DebugString()); } const TensorShapeProto& output_shape = - properties.GetOutputProperties(node->name())[0].shape(); + properties->GetOutputProperties(node->name())[0].shape(); // Simplify element-wise multiplication by ones or addition/subtraction // of zeros. const TensorShapeProto& y_shape = - properties.GetInputProperties(node->name())[1].shape(); + properties->GetInputProperties(node->name())[1].shape(); const bool x_is_zero = IsZeros(*x); const bool x_is_one = IsOnes(*x); const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); @@ -1708,7 +1708,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } const TensorShapeProto& x_shape = - properties.GetInputProperties(node->name())[0].shape(); + properties->GetInputProperties(node->name())[0].shape(); const bool y_is_zero = IsZeros(*y); const bool y_is_one = IsOnes(*y); const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); @@ -1921,13 +1921,11 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // folding of ops when more than one but not all inputs are constant. // For AddN and AccumulateNV2, we may furthermore reorder inputs, since // addition is commutative. - // TODO(rmlarsen): Concat/Pack/ParallelConcat which are not commutative, so - // we have to preserve order and can only push consecutive runs of constant - // inputs into sub-nodes. + const int num_non_control_inputs = NumNonControlInputs(*node); if (IsAggregate(*node) && IsCommutative(*node) && - NumNonControlInputs(*node) > 2) { + num_non_control_inputs > 2) { const int num_control_inputs = - node->input_size() - NumNonControlInputs(*node); + node->input_size() - num_non_control_inputs; std::vector const_inputs; std::vector nonconst_inputs; for (int i = 0; i < node->input_size(); ++i) { @@ -1943,7 +1941,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } // Promote AccumulateNV2 with all constant inputs to AddN, since it is // a fake node that cannot be constant folded by itself. - if (const_inputs.size() == NumNonControlInputs(*node) && + if (const_inputs.size() == num_non_control_inputs && node->op() == "AccumulateNV2") { node->set_op("AddN"); node->mutable_attr()->erase("shape"); @@ -1953,7 +1951,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const string new_node_name = OptimizedNodeName( *node, strings::StrCat("_partial_split_", const_inputs.size())); if (1 < const_inputs.size() && - const_inputs.size() < NumNonControlInputs(*node) && + const_inputs.size() < num_non_control_inputs && !node_map_->NodeExists(new_node_name)) { NodeDef* added_node = output->add_node(); *added_node = *node; @@ -1987,8 +1985,121 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const_inputs.size() - 1); (*node->mutable_attr())["N"].set_i(node->input_size() - num_control_inputs); + properties->ClearInputProperties(node->name()); (*added_node->mutable_attr())["N"].set_i(const_inputs.size()); graph_modified_ = true; + continue; + } + } + + // Partial constant folding for Concat which is not commutative, so + // we have to preserve order and can only push consecutive runs of constant + // inputs into sub-nodes. + if (IsConcat(*node) && num_non_control_inputs > 3) { + bool already_optimized = false; + const string optimized = strings::StrCat(node->name(), "_partial_split_"); + for (const string& input : node->input()) { + if (input.rfind(optimized) != string::npos) { + already_optimized = true; + break; + } + } + if (already_optimized) { + continue; + } + int axis_arg = -1; + int begin = 0; + int end = num_non_control_inputs; + if (node->op() == "Concat") { + begin = 1; + axis_arg = 0; + } else if (node->op() == "ConcatV2") { + end = num_non_control_inputs - 1; + axis_arg = num_non_control_inputs - 1; + } else { + continue; + } + + const NodeDef* axis_arg_node = + node_map_->GetNode(NodeName(node->input(axis_arg))); + if (axis_arg_node == nullptr || !IsReallyConstant(*axis_arg_node)) { + // We cannot constant fold Concat unless we know the axis. + // Skip node. + continue; + } + + // We search for consecutive runs of constant inputs in the range + // [begin:end[ and push then down into child nodes. + std::vector> constant_input_runs; + int first = begin; + int last = begin; + while (last < end) { + while (first < end && !IsReallyConstant(*node_map_->GetNode( + NodeName(node->input(first))))) { + ++first; + } + // Invariant: node[first] is constant || first >= end. + last = first + 1; + while (last < end && IsReallyConstant(*node_map_->GetNode( + NodeName(node->input(last))))) { + ++last; + } + // Invariant: node[last] is not constant || last >= end + // Discard intervals shorter than 2 elements. + if (first < end && (last - first) > 1) { + constant_input_runs.emplace_back(first, last); + } + first = last; + } + + std::set inputs_to_delete; + for (auto interval : constant_input_runs) { + // Push the constant inputs in the interval to a child node than can be + // constant folded. + const string new_node_name = OptimizedNodeName( + *node, strings::StrCat("_partial_split_", interval.first)); + if (node_map_->NodeExists(new_node_name)) { + break; + } + NodeDef* added_node = output->add_node(); + *added_node = *node; + added_node->set_name(new_node_name); + node_map_->AddNode(added_node->name(), added_node); + added_node->clear_input(); + for (int i = interval.first; i < interval.second; ++i) { + added_node->add_input(node->input(i)); + node_map_->UpdateOutput(NodeName(node->input(i)), node->name(), + added_node->name()); + if (i != interval.first) { + inputs_to_delete.insert(i); + } + } + added_node->add_input(node->input(axis_arg)); + (*added_node->mutable_attr())["N"].set_i(interval.second - + interval.first); + node_map_->AddOutput(NodeName(node->input(axis_arg)), + added_node->name()); + + // Overwrite the first constant input with the result of the added + // child node. + node->set_input(interval.first, added_node->name()); + node_map_->AddOutput(added_node->name(), node->name()); + } + if (!constant_input_runs.empty()) { + graph_modified_ = true; + if (!inputs_to_delete.empty()) { + // Fix up the inputs to the original node. + std::vector tmp(node->input().begin(), node->input().end()); + node->clear_input(); + for (int i = 0; i < tmp.size(); ++i) { + if (inputs_to_delete.find(i) == inputs_to_delete.end()) { + node->add_input(tmp[i]); + } + } + (*node->mutable_attr())["N"].set_i(node->input_size() - 1); + properties->ClearInputProperties(node->name()); + } + continue; } } } @@ -2030,7 +2141,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, TF_RETURN_IF_ERROR(FoldGraph(output)); node_map_.reset(new NodeMap(output)); - TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info)); + TF_RETURN_IF_ERROR(SimplifyGraph(output, &properties, can_use_shape_info)); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 2fd59c7f9c..13ecfcd281 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -92,7 +92,7 @@ class ConstantFolding : public GraphOptimizer { bool IsSimplifiableReduction(const NodeDef& node) const; bool IsSimplifiableReshape(const NodeDef& node, const GraphProperties& properties) const; - Status SimplifyGraph(GraphDef* output, const GraphProperties& properties, + Status SimplifyGraph(GraphDef* output, GraphProperties* properties, bool use_shape_info); Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 4b9770889f..9214695eb6 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -188,20 +188,19 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros); Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y); Output concat = - ops::Concat(s.WithOpName("concat"), - {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, - matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}, - 0); + ops::Stack(s.WithOpName("stack"), + {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, + matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"concat", "matmul3", "matmul4"}; + item.fetch = {"stack", "matmul3", "matmul4"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(28, output.node_size()); + EXPECT_EQ(27, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); const string& name = node.name(); @@ -1626,19 +1625,19 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { Output acc4 = fun(s.WithOpName("acc4"), {c1, y, c2}); Output acc5 = fun(s.WithOpName("acc5"), {x, c1, c2}); Output acc6 = fun(s.WithOpName("acc6"), {x, c1, y, c2}); - Output concat = ops::Concat(s.WithOpName("concat"), - {acc0, acc1, acc2, acc3, acc4, acc5, acc6}, 0); + Output stack = ops::Stack(s.WithOpName("stack"), + {acc0, acc1, acc2, acc3, acc4, acc5, acc6}); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"concat"}; + item.fetch = {"stack"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(17, output.node_size()); + EXPECT_EQ(16, output.node_size()); for (const NodeDef& node : output.node()) { if (node.name() == "acc0") { EXPECT_EQ("Const", node.op()); @@ -1696,7 +1695,86 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } } -TEST_F(ConstantFoldingTest, IdenticalN) { +TEST_F(ConstantFoldingTest, PartialFolding_Concat) { + Scope s = Scope::NewRootScope(); + Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output z = ops::Placeholder(s.WithOpName("z"), DT_FLOAT, + ops::Placeholder::Shape(TensorShape({2, 2}))); + Output axis = ops::Const(s.WithOpName("axis"), 0, {}); + Output c1 = ops::Const(s.WithOpName("c1"), 1.0f, {2, 2}); + Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2, 2}); + Output concat0 = ops::Concat(s.WithOpName("concat0"), {c1, c2, c1}, axis); + Output concat1 = ops::Concat(s.WithOpName("concat1"), {x, y, z}, axis); + Output concat2 = ops::Concat(s.WithOpName("concat2"), {c1, x, y}, axis); + Output concat3 = ops::Concat(s.WithOpName("concat3"), {c1, c2, z}, axis); + Output concat4 = ops::Concat(s.WithOpName("concat4"), {c1, y, c2}, axis); + Output concat5 = ops::Concat(s.WithOpName("concat5"), {x, c1, c2}, axis); + Output concat6 = ops::Concat(s.WithOpName("concat6"), {x, c1, y, c2}, axis); + Output concat7 = ops::Concat(s.WithOpName("concat7"), {x, y, c1, c2}, axis); + Output concat8 = ops::Concat(s.WithOpName("concat8"), {x, c1, c2, y}, axis); + Output concat9 = ops::Concat(s.WithOpName("concat9"), {c1, c2, x, y}, axis); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"concat0", "concat1", "concat2", "concat3", "concat4", + "concat5", "concat6", "concat7", "concat8", "concat9"}; + + ConstantFolding optimizer(nullptr /* cpu_device */); + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + EXPECT_EQ(21, output.node_size()); + for (int i = 0; i < output.node_size(); ++i) { + const NodeDef& node = output.node(i); + if (node.name() == "concat0") { + EXPECT_EQ("Const", node.op()); + } else if (node.name() == "concat3") { + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("ConstantFolding/concat3_partial_split_0", node.input(0)); + EXPECT_EQ("z", node.input(1)); + EXPECT_EQ("axis", node.input(2)); + } else if (node.name() == "concat5") { + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/concat5_partial_split_1", node.input(1)); + EXPECT_EQ("axis", node.input(2)); + } else if (node.name() == "concat7") { + EXPECT_EQ(4, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("y", node.input(1)); + EXPECT_EQ("ConstantFolding/concat7_partial_split_2", node.input(2)); + EXPECT_EQ("axis", node.input(3)); + } else if (node.name() == "concat8") { + EXPECT_EQ(4, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/concat8_partial_split_1", node.input(1)); + EXPECT_EQ("y", node.input(2)); + EXPECT_EQ("axis", node.input(3)); + } else if (node.name() == "concat9") { + EXPECT_EQ(4, node.input_size()); + EXPECT_EQ("ConstantFolding/concat9_partial_split_0", node.input(0)); + EXPECT_EQ("x", node.input(1)); + EXPECT_EQ("y", node.input(2)); + EXPECT_EQ("axis", node.input(3)); + } else if (StringPiece(node.name()).starts_with("ConstantFolding/")) { + EXPECT_EQ("Const", node.op()); + } else { + EXPECT_EQ(item.graph.node(i).DebugString(), node.DebugString()); + } + } + + auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); + auto tensors = EvaluateNodes(output, {"concat0"}); + EXPECT_EQ(1, tensors_expected.size()); + EXPECT_EQ(1, tensors.size()); + test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); +} + +TEST_F(ConstantFoldingTest, PartialFolding_IdentityN) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); Output x = ops::Placeholder(scope.WithOpName("x"), DT_FLOAT, ops::Placeholder::Shape(TensorShape({}))); -- GitLab From 58d5fa05a67b65979708f541336c2c11bfed978e Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 9 Mar 2018 10:30:25 -0800 Subject: [PATCH 666/884] [XLA:GPU] Convert FusionMergerTest to use module strings. This is a nice cleanup, but it also makes this a proper unit test -- the module strings we use are post fusion. (Without module strings, fusion computations are a real pain to create.) PiperOrigin-RevId: 188502642 --- .../xla/service/gpu/fusion_merger_test.cc | 566 ++++++------------ 1 file changed, 176 insertions(+), 390 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc index c0def27525..2217776c7d 100644 --- a/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc +++ b/tensorflow/compiler/xla/service/gpu/fusion_merger_test.cc @@ -27,250 +27,10 @@ namespace { namespace op = xla::testing::opcode_matchers; -class FusionMergerTest : public HloTestBase { - protected: - FusionMergerTest() : module_(CreateNewModule()) {} - - // Builds the following computation: - // - // Param - // / | \ - // / | \ - // OnesVec GTE(0) GTE(1) GTE(2) - // \ / \ / - // Add Add OnesVec - // \ / \ / - // \ Add Mul OnesVec - // \ | | / - // \ Mul Add - // \ | / - // \ | / - // Tuple - // - HloComputation* BuildComputation0() { - auto builder = HloComputation::Builder(TestName() + ".Computation0"); - // Create param instruction to access computation state. - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape3_, "param")); - - // Create GetTupleElement instructions for each tuple element. - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, param, 1)); - auto gte2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, param, 2)); - - // Create const vector of ones to be used in element-wise computations. - auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({1.f, 1.f, 1.f, 1.f}))); - - // Create simple fusable computation for tuple element 0 (wont get merged). - auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, one_vec, gte0)); - - // Create fusable computation which is dependent on second and third tuple - // elements (will initially be fused on its own). - auto add1 = builder.AddInstruction( - HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte1, gte2)); - - // Create two sub-computations, both of which are users of 'add1'. - - // First sub-computation: out1 = Mul(Add(add1, one_vec), one_vec) - auto add2 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, add1, one_vec)); - auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, add2, one_vec)); - - // Second sub-computation: out2 = Add(Mul(add1, one_vec), one_vec) - auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, add1, one_vec)); - auto out2 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, mul0, one_vec)); - - // Create output Tuple. - builder.AddInstruction(HloInstruction::CreateTuple({out0, out1, out2})); - return module_->AddEntryComputation(builder.Build()); - } - - // Builds the following computation: - // - // Param - // / \ - // GTE(0) GTE(1) - // | | \ / - // | | Mul - // \ \ | - // \ Mul - // \ | - // OnesVec Mul OnesVec - // \ / \ / - // OnesVec Add Mul OnesVec - // \ | | / - // Mul Add - // \ / - // \ / - // Tuple - // - HloComputation* BuildComputation1() { - auto builder = HloComputation::Builder(TestName() + ".Computation1"); - Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_}); - // Create param instruction to access computation state. - auto state = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape2_, "state")); - - // Create shared sub-computation (will initially be fused on its own). - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 2)); - // Calculate the flops we need to generate for this shared computation - // to exceed the threshold flops_to_bytes_ratio. - // Note that bytes transferred is multiplied by 3 because there are two - // operands and one output of size 'data_shape_'. - const int64 flops_needed = FusionMerger::GetThresholdFlopsToBytesRatio() * - ShapeUtil::ByteSizeOf(data_shape_) * 3; - const int64 vec_elements = ShapeUtil::ElementsIn(data_shape_); - const int64 iters = (flops_needed + vec_elements - 1) / vec_elements; - - auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, gte0, gte1)); - for (int i = 0; i < iters; ++i) { - mul0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, gte0, mul0)); - } - - // Create two sub-computations, both of which are users of 'mul0'. - auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({1.f, 1.f, 1.f, 1.f}))); - - // First sub-computation: out0 = Mul(Add(mul0, one_vec), one_vec) - auto add0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, mul0, one_vec)); - auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, add0, one_vec)); - - // Second sub-computation: out1 = Add(Mul(mul0, one_vec), one_vec) - auto mul1 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, mul0, one_vec)); - auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, mul1, one_vec)); - - // Create output Tuple. - builder.AddInstruction(HloInstruction::CreateTuple({out0, out1})); - return module_->AddEntryComputation(builder.Build()); - } - - // Builds the following computation: - // - // Param - // / | | \ - // / | | \ - // / | | \ - // GTE(0) GTE(1) GTE(2) GTE(3) - // \ / / / - // Add / / - // \ / / - // Add / - // \ / - // \ / - // OnesVec Add OnesVec - // \ / \ / - // OnesVec Add Mul OnesVec - // \ | | / - // Mul Add - // \ / - // \ / - // Tuple - // - HloComputation* BuildComputation2(bool add_extra_input) { - auto builder = HloComputation::Builder(TestName() + ".Computation2"); - Shape state_shape = add_extra_input ? tuple_shape4_ : tuple_shape3_; - // Create param instruction to access computation state. - auto state = builder.AddInstruction( - HloInstruction::CreateParameter(0, state_shape, "state")); - - // Create GetTupleElement instructions for each tuple element. - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 1)); - auto gte2 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 2)); - - // Create shared fusable computation that reduces its operands. - auto reduce0 = builder.AddInstruction( - HloInstruction::CreateBinary(data_shape_, HloOpcode::kAdd, gte0, gte1)); - auto reduce_out = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, reduce0, gte2)); - if (add_extra_input) { - auto gte3 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape_, state, 3)); - reduce_out = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, reduce_out, gte3)); - } - - // Create two fusable sub-computations which are dependent on shared - // computation 'reduce_out'. - auto one_vec = builder.AddInstruction(HloInstruction::CreateConstant( - Literal::CreateR1({1.f, 1.f, 1.f, 1.f}))); - - // First sub-computation: out0 = Mul(Add(reduce_out, one_vec), one_vec) - auto add2 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, reduce_out, one_vec)); - auto out0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, add2, one_vec)); - - // Second sub-computation: out1 = Add(Mul(reduce_out, one_vec), one_vec) - auto mul0 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kMultiply, reduce_out, one_vec)); - auto out1 = builder.AddInstruction(HloInstruction::CreateBinary( - data_shape_, HloOpcode::kAdd, mul0, one_vec)); - - // Create output Tuple. - builder.AddInstruction(HloInstruction::CreateTuple({out0, out1})); - return module_->AddEntryComputation(builder.Build()); - } - - Shape data_shape_ = ShapeUtil::MakeShape(F32, {4}); - Shape tuple_shape2_ = ShapeUtil::MakeTupleShape({data_shape_, data_shape_}); - Shape tuple_shape3_ = - ShapeUtil::MakeTupleShape({data_shape_, data_shape_, data_shape_}); - Shape tuple_shape4_ = ShapeUtil::MakeTupleShape( - {data_shape_, data_shape_, data_shape_, data_shape_}); - - std::unique_ptr module_; -}; +class FusionMergerTest : public HloTestBase {}; // Tests that we can merge a fusion instruction that is below threshold. // -// Original computation: -// -// Param -// / | \ -// / | \ -// OnesVec GTE(0) GTE(1) GTE(2) -// \ / \ / -// Add Add OnesVec -// \ / \ / -// \ Add Mul OnesVec -// \ | | / -// \ Mul Add -// \ | / -// \ | / -// Tuple -// -// Computation after fusion passes: -// -// Param -// / \ -// Fusion3 Fusion2 -// | / \ -// \ Fusion0 Fusion1 -// \ | / -// \ | / -// Tuple -// // Computation after fusion merger pass (Fusion2 is merged into Fusion0 and // Fusion1): // Param @@ -280,19 +40,50 @@ class FusionMergerTest : public HloTestBase { // Tuple // TEST_F(FusionMergerTest, MergeSharedFusionInstruction) { - auto computation = BuildComputation0(); - // Run standard fusion passes. - EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/false) - .Run(module_.get()) - .ValueOrDie()); - EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module_.get()) - .ValueOrDie()); - // Run fusion merger pass, which should merge the shared fusion instruction - // into its two users. - EXPECT_TRUE(FusionMerger().Run(module_.get()).ValueOrDie()); - - auto* root = computation->root_instruction(); + auto module = tools::Parse(R"( +HloModule MergeSharedFusionInstruction + +comp.3 { + constant.param_0 = f32[4]{0} parameter(0) + param.param_1.2 = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(1) + get-tuple-element.6 = f32[4]{0} get-tuple-element(param.param_1.2), index=0 + ROOT add.7 = f32[4]{0} add(constant.param_0, get-tuple-element.6) +} + +comp.2 { + param.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + get-tuple-element.4 = f32[4]{0} get-tuple-element(param.param_1.1), index=1 + get-tuple-element.5 = f32[4]{0} get-tuple-element(param.param_1.1), index=2 + ROOT add.6 = f32[4]{0} add(get-tuple-element.4, get-tuple-element.5) +} + +comp.1 { + add.1.param_1.1 = f32[4]{0} parameter(1) + constant.param_1.3 = f32[4]{0} parameter(0) + add.5 = f32[4]{0} add(add.1.param_1.1, constant.param_1.3) + ROOT multiply.3 = f32[4]{0} multiply(add.5, constant.param_1.3) +} + +comp { + add.1.param_1 = f32[4]{0} parameter(1) + constant.param_1.1 = f32[4]{0} parameter(0) + multiply.2 = f32[4]{0} multiply(add.1.param_1, constant.param_1.1) + ROOT add.4 = f32[4]{0} add(multiply.2, constant.param_1.1) +} + +ENTRY MergeSharedFusionInstruction.Computation0 { + constant = f32[4]{0} constant({1, 1, 1, 1}) + param = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + fusion.3 = f32[4]{0} fusion(constant, param), kind=kLoop, calls=comp.3 + fusion.4 = f32[4]{0} fusion(param), kind=kLoop, calls=comp.2 + fusion.5 = f32[4]{0} fusion(constant, fusion.4), kind=kLoop, calls=comp.1 + fusion.6 = f32[4]{0} fusion(constant, fusion.4), kind=kLoop, calls=comp + ROOT tuple = (f32[4]{0}, f32[4]{0}, f32[4]{0}) tuple(fusion.3, fusion.5, fusion.6) +})") + .ValueOrDie(); + EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); + + auto* root = module->entry_computation()->root_instruction(); EXPECT_EQ(HloOpcode::kTuple, root->opcode()); // Check operand 0 (not merged). Should have 4 instructions. auto* operand0 = root->operand(0); @@ -311,162 +102,158 @@ TEST_F(FusionMergerTest, MergeSharedFusionInstruction) { // Tests that we do not merge a fusion instruction that above flops to bytes // threshold. // -// Original computation: -// -// Param -// / \ -// GTE(0) GTE(1) -// | | \ / -// | | Mul -// \ \ | -// \ Mul -// \ | -// OnesVec Mul OnesVec -// \ / \ / -// OnesVec Add Mul OnesVec -// \ | | / -// Mul Add -// \ / -// \ / -// Tuple -// -// Computation after fusion passes and fusion merger pass (Fusion2 is not -// merged because it exceeds the threshold flops to bytes ratio). -// -// Param -// | -// Fusion2 -// / \ -// Fusion0 Fusion1 -// \ / -// Tuple -// +// Fusion2 is not merged because it exceeds the threshold flops-to-bytes ratio. TEST_F(FusionMergerTest, FlopsToBytesRatioThresholdExceeded) { - BuildComputation1(); - // Run standard fusion passes. - EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/false) - .Run(module_.get()) - .ValueOrDie()); - EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module_.get()) - .ValueOrDie()); + auto module = tools::Parse(R"( +HloModule FlopsToBytesRatioThresholdExceeded + +comp.2 { + state.param_1.1 = (f32[4]{0}, f32[4]{0}) parameter(0) + get-tuple-element.3 = f32[4]{0} get-tuple-element(state.param_1.1), index=0 + get-tuple-element.4 = f32[4]{0} get-tuple-element(state.param_1.1), index=2 + multiply.29 = f32[4]{0} multiply(get-tuple-element.3, get-tuple-element.4) + multiply.30 = f32[4]{0} multiply(get-tuple-element.3, multiply.29) + multiply.31 = f32[4]{0} multiply(get-tuple-element.3, multiply.30) + multiply.32 = f32[4]{0} multiply(get-tuple-element.3, multiply.31) + multiply.33 = f32[4]{0} multiply(get-tuple-element.3, multiply.32) + multiply.34 = f32[4]{0} multiply(get-tuple-element.3, multiply.33) + multiply.35 = f32[4]{0} multiply(get-tuple-element.3, multiply.34) + multiply.36 = f32[4]{0} multiply(get-tuple-element.3, multiply.35) + multiply.37 = f32[4]{0} multiply(get-tuple-element.3, multiply.36) + multiply.38 = f32[4]{0} multiply(get-tuple-element.3, multiply.37) + multiply.39 = f32[4]{0} multiply(get-tuple-element.3, multiply.38) + multiply.40 = f32[4]{0} multiply(get-tuple-element.3, multiply.39) + ROOT multiply.41 = f32[4]{0} multiply(get-tuple-element.3, multiply.40) +} + +comp.1 { + multiply.12.param_1.1 = f32[4]{0} parameter(1) + constant.param_1.3 = f32[4]{0} parameter(0) + add.3 = f32[4]{0} add(multiply.12.param_1.1, constant.param_1.3) + ROOT multiply.16 = f32[4]{0} multiply(add.3, constant.param_1.3) +} + +comp { + multiply.12.param_1 = f32[4]{0} parameter(1) + constant.param_1.1 = f32[4]{0} parameter(0) + multiply.15 = f32[4]{0} multiply(multiply.12.param_1, constant.param_1.1) + ROOT add.2 = f32[4]{0} add(multiply.15, constant.param_1.1) +} + +ENTRY FlopsToBytesRatioThresholdExceeded.Computation1 { + constant = f32[4]{0} constant({1, 1, 1, 1}) + state = (f32[4]{0}, f32[4]{0}) parameter(0) + fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2 + fusion.3 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp.1 + fusion.4 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp + ROOT tuple = (f32[4]{0}, f32[4]{0}) tuple(fusion.3, fusion.4) +})") + .ValueOrDie(); // Run fusion merger pass, which should detect that the flops/bytes of the // shared fusion instruction exceeds the threshold ratio, and therefore // cannot be merged with other fusion instructions. - EXPECT_FALSE(FusionMerger().Run(module_.get()).ValueOrDie()); + EXPECT_FALSE(FusionMerger().Run(module.get()).ValueOrDie()); } // Tests that threshold for bytes transferred if merged is exceeded. // -// Original computation: -// -// Param -// / | | \ -// / | | \ -// / | | \ -// GTE(0) GTE(1) GTE(2) GTE(3) -// \ / / / -// Add / / -// \ / / -// Add / -// \ / -// \ / -// OnesVec Add OnesVec -// \ / \ / -// OnesVec Add Mul OnesVec -// \ | | / -// Mul Add -// \ / -// \ / -// Tuple -// -// Computation after fusion passes and fusion merger pass. Fusion2 is not -// merged because it exceeds the threshold bytes transferred. This is because -// the bytes read by Fusion2 (when replicated if the instruction is merged -// into Fusion0 and Fusion1) would exceed the bytes transferred threshold. -// -// Param -// | -// Fusion2 -// / \ -// Fusion0 Fusion1 -// \ / -// Tuple -// +// Fusion2 is not merged because it exceeds the threshold bytes transferred. +// This is because the bytes read by Fusion2 (when replicated if the instruction +// is merged into Fusion0 and Fusion1) would exceed the bytes transferred +// threshold. TEST_F(FusionMergerTest, BytesTransferredThresholdExeceeded) { - BuildComputation2(/*add_extra_input=*/true); - // Run standard fusion passes. - EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/false) - .Run(module_.get()) - .ValueOrDie()); - EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module_.get()) - .ValueOrDie()); + auto module = tools::Parse(R"( +HloModule BytesTransferredThresholdExeceeded + +comp.2 { + state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + get-tuple-element.7 = f32[4]{0} get-tuple-element(state.param_1.1), index=0 + get-tuple-element.8 = f32[4]{0} get-tuple-element(state.param_1.1), index=1 + add.9 = f32[4]{0} add(get-tuple-element.7, get-tuple-element.8) + get-tuple-element.9 = f32[4]{0} get-tuple-element(state.param_1.1), index=2 + add.10 = f32[4]{0} add(add.9, get-tuple-element.9) + get-tuple-element.10 = f32[4]{0} get-tuple-element(state.param_1.1), index=3 + ROOT add.11 = f32[4]{0} add(add.10, get-tuple-element.10) +} + +comp.1 { + add.2.param_1.1 = f32[4]{0} parameter(1) + constant.param_1.3 = f32[4]{0} parameter(0) + add.6 = f32[4]{0} add(add.2.param_1.1, constant.param_1.3) + ROOT multiply.3 = f32[4]{0} multiply(add.6, constant.param_1.3) +} + +comp { + add.2.param_1 = f32[4]{0} parameter(1) + constant.param_1.1 = f32[4]{0} parameter(0) + multiply.2 = f32[4]{0} multiply(add.2.param_1, constant.param_1.1) + ROOT add.5 = f32[4]{0} add(multiply.2, constant.param_1.1) +} + +ENTRY BytesTransferredThresholdExeceeded.Computation2 { + constant = f32[4]{0} constant({1, 1, 1, 1}) + state = (f32[4]{0}, f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2 + fusion.3 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp.1 + fusion.4 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp + ROOT tuple = (f32[4]{0}, f32[4]{0}) tuple(fusion.3, fusion.4) +})") + .ValueOrDie(); // Run fusion merger pass, which should detect that the net bytes transferred // (if merged) would increase. - EXPECT_FALSE(FusionMerger().Run(module_.get()).ValueOrDie()); + EXPECT_FALSE(FusionMerger().Run(module.get()).ValueOrDie()); } // Tests that threshold for bytes transferred if merged is not exceeded. // -// Original computation: -// -// Param -// / | \ -// / | \ -// / | \ -// GTE(0) GTE(1) GTE(2) -// \ / / -// Add / -// \ / -// OnesVec Add OnesVec -// \ / \ / -// OnesVec Add Mul OnesVec -// \ / \ / -// Mul Add -// \ / -// \ / -// Tuple -// -// Computation after fusion passes: -// -// Param -// | -// Fusion2 -// / \ -// Fusion0 Fusion1 -// \ / -// Tuple -// -// Computation after fusion merger pass (Fusion2 is merged into Fusion0 and -// Fusion1, because bytes read from Param by Fusion2 is reduced for this test -// which makes the merge operation into its operand below the bytes -// transferred threshold. -// -// Param -// / \ -// Fusion0 Fusion1 -// \ / -// Tuple -// +// Fusion2 is merged into Fusion0 and Fusion1, because bytes read from Param by +// Fusion2 is reduced for this test which makes the merge operation into its +// operand below the bytes transferred threshold. TEST_F(FusionMergerTest, BytesTransferredThresholdNotExeceeded) { - BuildComputation2(/*add_extra_input=*/false); - // Run standard fusion passes. - EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/false) - .Run(module_.get()) - .ValueOrDie()); - EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module_.get()) - .ValueOrDie()); + auto module = tools::Parse(R"( +HloModule BytesTransferredThresholdNotExeceeded + +comp.2 { + state.param_1.1 = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + get-tuple-element.5 = f32[4]{0} get-tuple-element(state.param_1.1), index=0 + get-tuple-element.6 = f32[4]{0} get-tuple-element(state.param_1.1), index=1 + add.7 = f32[4]{0} add(get-tuple-element.5, get-tuple-element.6) + get-tuple-element.7 = f32[4]{0} get-tuple-element(state.param_1.1), index=2 + ROOT add.8 = f32[4]{0} add(add.7, get-tuple-element.7) +} + +comp.1 { + add.1.param_1.1 = f32[4]{0} parameter(1) + constant.param_1.3 = f32[4]{0} parameter(0) + add.5 = f32[4]{0} add(add.1.param_1.1, constant.param_1.3) + ROOT multiply.3 = f32[4]{0} multiply(add.5, constant.param_1.3) +} + +comp { + add.1.param_1 = f32[4]{0} parameter(1) + constant.param_1.1 = f32[4]{0} parameter(0) + multiply.2 = f32[4]{0} multiply(add.1.param_1, constant.param_1.1) + ROOT add.4 = f32[4]{0} add(multiply.2, constant.param_1.1) +} + +ENTRY BytesTransferredThresholdNotExeceeded.Computation2 { + constant = f32[4]{0} constant({1, 1, 1, 1}) + state = (f32[4]{0}, f32[4]{0}, f32[4]{0}) parameter(0) + fusion.2 = f32[4]{0} fusion(state), kind=kLoop, calls=comp.2 + fusion.3 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp.1 + fusion.4 = f32[4]{0} fusion(constant, fusion.2), kind=kLoop, calls=comp + ROOT tuple = (f32[4]{0}, f32[4]{0}) tuple(fusion.3, fusion.4) +})") + .ValueOrDie(); // Run fusion merger pass, which should detect that the net bytes transferred // (if merged) would not increase. - EXPECT_TRUE(FusionMerger().Run(module_.get()).ValueOrDie()); + EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); } // Check that we're willing to merge f1_computation into f2_computation, even // though f2 is an input fusion node. TEST_F(FusionMergerTest, WillMergeIntoInputFusion) { - const char* const kModule = R"( + auto module = tools::Parse(R"( HloModule m f1_computation { @@ -492,9 +279,8 @@ TEST_F(FusionMergerTest, WillMergeIntoInputFusion) { p0 = f32[10]{0} parameter(0) f1 = f32[10]{0} fusion(p0), kind=kLoop, calls=f1_computation ROOT f2 = f32[] fusion(f1), kind=kInput, calls=f2_computation - } - )"; - auto module = tools::Parse(kModule).ValueOrDie(); + })") + .ValueOrDie(); EXPECT_TRUE(FusionMerger().Run(module.get()).ValueOrDie()); EXPECT_THAT(module->entry_computation()->root_instruction(), op::Fusion(op::Parameter())); -- GitLab From 87dab2d8289750c9d34f26d7d5fb18475dff985b Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 10:33:28 -0800 Subject: [PATCH 667/884] Automated g4 rollback of changelist 188397087 PiperOrigin-RevId: 188503184 --- .../xla/service/while_loop_simplifier.cc | 76 +-------------- .../xla/service/while_loop_simplifier_test.cc | 96 +------------------ 2 files changed, 2 insertions(+), 170 deletions(-) diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier.cc b/tensorflow/compiler/xla/service/while_loop_simplifier.cc index 1a93a880dd..c9d77c9376 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/compiler/xla/service/while_loop_simplifier.h" #include "tensorflow/compiler/xla/service/call_inliner.h" #include "tensorflow/compiler/xla/service/hlo_evaluator.h" -#include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/optional.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" @@ -606,75 +605,6 @@ static StatusOr TryRemoveWhileLoop(HloInstruction* while_op) { return false; } -static StatusOr TryPropagateConstant(HloInstruction* while_op) { - auto while_init = while_op->operand(0); - if (while_init->opcode() != HloOpcode::kTuple) { - return false; - } - - auto while_body = while_op->while_body(); - auto while_body_root = while_body->root_instruction(); - if (while_body_root->opcode() != HloOpcode::kTuple) { - return false; - } - - auto while_body_param = while_body->parameter_instruction(0); - const HloInstruction::InstructionVector& root_operands = - while_body_root->operands(); - - // Find the loop invariant tuple elements with constant init value and - // build a map from the tuple element index to the constant value. - tensorflow::gtl::FlatMap index_to_constant; - for (int i = 0; i < root_operands.size(); i++) { - HloInstruction* instr = root_operands[i]; - if (instr->opcode() == HloOpcode::kGetTupleElement && - instr->tuple_index() == i && instr->operand(0) == while_body_param) { - auto tuple_element = while_init->operand(i); - if (tuple_element->IsConstant()) { - VLOG(3) << "Found loop invariant tuple element " << i << " " - << tuple_element->ToString(); - index_to_constant[i] = tuple_element; - } - } - } - - if (index_to_constant.empty()) { - return false; - } - - // Replace the use of each constant tuple element in the loop_condition and - // loop_body with the corresponding constant value. - auto propagate_constant = [&](HloComputation* computation) -> StatusOr { - HloInstruction* param = computation->parameter_instruction(0); - bool changed = false; - for (auto instr : param->users()) { - // Since only a while-loop with a tuple result reaches here, we can safely - // assume that `param` is a tuple and the first operand of the - // GetTupleElement instruction is a use of `param`. - if (instr->opcode() == HloOpcode::kGetTupleElement) { - VLOG(3) << "tuple index " << instr->tuple_index() << " " - << instr->ToString(); - auto iter = index_to_constant.find(instr->tuple_index()); - if (iter != index_to_constant.end()) { - const HloInstruction* hlo_constant = (*iter).second; - VLOG(3) << "Replace use of " << instr->ToString() << " with " - << hlo_constant->ToString(); - TF_RETURN_IF_ERROR(instr->ReplaceAllUsesWith( - computation->AddInstruction(hlo_constant->Clone()))); - changed = true; - } - } - } - return changed; - }; - - TF_ASSIGN_OR_RETURN(bool changed_cond, - propagate_constant(while_op->while_condition())); - TF_ASSIGN_OR_RETURN(bool changed_body, propagate_constant(while_body)); - - return changed_cond || changed_body; -} - StatusOr WhileLoopSimplifier::Run(HloModule* module) { XLA_VLOG_LINES(3, "WhileLoopSimplifier::Run(), before:\n" + module->ToString()); @@ -705,11 +635,7 @@ StatusOr WhileLoopSimplifier::Run(HloModule* module) { continue; } - StatusOr result = TryPropagateConstant(while_op); - TF_RETURN_IF_ERROR(result.status()); - changed |= result.ValueOrDie(); - - result = TryRemoveWhileLoop(while_op); + StatusOr result = TryRemoveWhileLoop(while_op); TF_RETURN_IF_ERROR(result.status()); if (result.ValueOrDie()) { changed = true; diff --git a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc index 396f942dc0..cbea3e3cf2 100644 --- a/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc +++ b/tensorflow/compiler/xla/service/while_loop_simplifier_test.cc @@ -30,11 +30,6 @@ class WhileLoopSimplifierTest : public HloVerifiedTestBase { protected: // Makes an HloModule that contains a loop with `num_iters` iteration. void MakeModuleWithSimpleLoop(int num_iters); - - // Similar to MakeModuleWithSimpleLoop except that the loop bound is passed to - // the loop-condition through an element of a tuple which is the - // loop-condition parameter. - void MakeModuleWithSimpleLoopTupleElementLoopBound(int num_iters); }; void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { @@ -71,45 +66,6 @@ void WhileLoopSimplifierTest::MakeModuleWithSimpleLoop(int num_iters) { ParseAndVerifyModule(hlo_string.c_str()); } -void WhileLoopSimplifierTest::MakeModuleWithSimpleLoopTupleElementLoopBound( - int num_iters) { - string hlo_string_template = R"( - HloModule SimpleLoopWithIndirectLoopBound - SimpleLoopWithIndirectLoopBound.body { - loop_var.1 = (s32[], s32[3]{0}, s32[]) parameter(0) - get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0 - constant.1 = s32[] constant(1) - add = s32[] add(get-tuple-element.1, constant.1) - get-tuple-element.2 = s32[3]{0} get-tuple-element(loop_var.1), index=1 - multiply = s32[3]{0} multiply(get-tuple-element.2, get-tuple-element.2) - limit = s32[] get-tuple-element(loop_var.1), index=2 - ROOT tuple = (s32[], s32[3]{0}, s32[]) tuple(add, multiply, limit) - } - SimpleLoopWithIndirectLoopBound.condition { - loop_var.2 = (s32[], s32[3]{0}, s32[]) parameter(0) - get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0 - get-tuple-element.4 = s32[] get-tuple-element(loop_var.2), index=2 - ROOT less-than = pred[] less-than(get-tuple-element.3, get-tuple-element.4) - } - ENTRY SimpleLoopWithIndirectLoopBound { - constant.3 = s32[] constant(42) - constant.4 = s32[3]{0} constant({0, 1, 2}) - constant.2 = s32[] constant({{LOOP_BOUND}}) - tuple.1 = (s32[], s32[3]{0}, s32[]) tuple(constant.3, constant.4, - constant.2) - ROOT while = (s32[], s32[3]{0}, s32[]) while(tuple.1), - condition=SimpleLoopWithIndirectLoopBound.condition, - body=SimpleLoopWithIndirectLoopBound.body - } - )"; - - string hlo_string = tensorflow::str_util::StringReplace( - hlo_string_template, "{{LOOP_BOUND}}", - tensorflow::strings::StrCat(42 + num_iters), - /*replace_all=*/true); - ParseAndVerifyModule(hlo_string.c_str()); -} - TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { MakeModuleWithSimpleLoop(/*num_iters=*/0); HloModule* the_module = &module(); @@ -118,15 +74,6 @@ TEST_F(WhileLoopSimplifierTest, LoopWithZeroIterationSimiplified) { op::Tuple(op::Constant(), op::Constant())); } -TEST_F(WhileLoopSimplifierTest, - LoopWithZeroIterationTupleElementLoopBoundSimplified) { - MakeModuleWithSimpleLoopTupleElementLoopBound(/*num_iters=*/0); - HloModule* the_module = &module(); - ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); - EXPECT_THAT(the_module->entry_computation()->root_instruction(), - op::Tuple(op::Constant(), op::Constant(), op::Constant())); -} - TEST_F(WhileLoopSimplifierTest, LoopWithOneIterationSimplified) { MakeModuleWithSimpleLoop(/*num_iters=*/1); HloModule* the_module = &module(); @@ -135,15 +82,6 @@ TEST_F(WhileLoopSimplifierTest, LoopWithOneIterationSimplified) { op::Tuple(op::Add(), op::Multiply())); } -TEST_F(WhileLoopSimplifierTest, - LoopWithOneIterationTupleELementLoopBoundSimplified) { - MakeModuleWithSimpleLoopTupleElementLoopBound(/*num_iters=*/1); - HloModule* the_module = &module(); - ASSERT_TRUE(WhileLoopSimplifier().Run(the_module).ValueOrDie()); - EXPECT_THAT(the_module->entry_computation()->root_instruction(), - op::Tuple(op::Add(), op::Multiply(), op::Constant())); -} - TEST_F(WhileLoopSimplifierTest, LoopWithTwoIterationsNotSimplified) { MakeModuleWithSimpleLoop(/*num_iters=*/2); EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); @@ -426,6 +364,7 @@ TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { HloModule BodyHasNonTupleRoot BodyHasNonTupleRoot.passthrough { ROOT param = (s32[], s32[]) parameter(0) + get-tuple-element = s32[] get-tuple-element((s32[], s32[]) param), index=1 } BodyHasNonTupleRoot.always_true { param.1 = (s32[], s32[]) parameter(0) @@ -443,38 +382,5 @@ TEST_F(WhileLoopSimplifierTest, LoopWithNonTupleBodyShapeNotSimplified) { EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); } -TEST_F(WhileLoopSimplifierTest, - LoopWithNonTupleBodyRootInstructionNotSimplified) { - const string hlo_string = R"( - HloModule SimpleLoop - SimpleLoop.body { - loop_var.1 = (s32[], s32[3]{0}) parameter(0) - get-tuple-element.1 = s32[] get-tuple-element(loop_var.1), index=0 - constant.1 = s32[] constant(1) - add = s32[] add(get-tuple-element.1, constant.1) - get-tuple-element.2 = s32[3]{0} get-tuple-element(loop_var.1), index=1 - multiply = s32[3]{0} multiply(get-tuple-element.2, get-tuple-element.2) - ROOT custom-call = (s32[], s32[3]{0}) custom-call(add, multiply), - custom_call_target="x" - } - SimpleLoop.condition { - loop_var.2 = (s32[], s32[3]{0}) parameter(0) - get-tuple-element.3 = s32[] get-tuple-element(loop_var.2), index=0 - constant.2 = s32[] constant(44) - ROOT less-than = pred[] less-than(get-tuple-element.3, constant.2) - } - ENTRY SimpleLoop { - constant.3 = s32[] constant(42) - constant.4 = s32[3]{0} constant({0, 1, 2}) - tuple.1 = (s32[], s32[3]{0}) tuple(constant.3, constant.4) - ROOT while = (s32[], s32[3]{0}) while(tuple.1), condition= - SimpleLoop.condition, body=SimpleLoop.body - } - )"; - - ParseAndVerifyModule(hlo_string.c_str()); - EXPECT_FALSE(WhileLoopSimplifier().Run(&module()).ValueOrDie()); -} - } // namespace } // namespace xla -- GitLab From 0ebfee36ed65f3540c216f10b8ec326b7b52db3a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 10:39:50 -0800 Subject: [PATCH 668/884] Make SetNumThreads apply to the eigen threads. (This creates a dependency on eigen!) PiperOrigin-RevId: 188504172 --- tensorflow/contrib/lite/context.h | 5 ++ tensorflow/contrib/lite/interpreter.cc | 7 ++- tensorflow/contrib/lite/kernels/BUILD | 17 ++++++ tensorflow/contrib/lite/kernels/conv.cc | 10 ++-- .../contrib/lite/kernels/eigen_support.cc | 52 +++++++++++++++++++ .../contrib/lite/kernels/eigen_support.h | 34 ++++++++++++ .../contrib/lite/kernels/gemm_support.cc | 7 +-- .../contrib/lite/kernels/gemm_support.h | 3 -- 8 files changed, 116 insertions(+), 19 deletions(-) create mode 100644 tensorflow/contrib/lite/kernels/eigen_support.cc create mode 100644 tensorflow/contrib/lite/kernels/eigen_support.h diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index 23946dd26e..6491d8c86a 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -324,9 +324,14 @@ typedef struct TfLiteContext { struct TfLiteContext*, TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, TfLiteDelegate* delegate); + // Number of threads that are recommended to subsystems like gemmlowp and + // eigen. + int recommended_num_threads; + // TODO(ahentz): we should create a more general mechanism for this sort of // library-global objects. void* gemm_context; + void* eigen_context; } TfLiteContext; typedef struct _TfLiteRegistration { diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 4710488065..819782a3c6 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -92,7 +92,9 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.AddTensors = AddTensors; context_.tensors = nullptr; context_.tensors_size = 0; + context_.eigen_context = nullptr; context_.gemm_context = nullptr; + context_.recommended_num_threads = 0; // Invalid to call these these except from TfLiteDelegate SetForbiddenContextFunction(&context_.GetNodeAndRegistration); @@ -691,10 +693,7 @@ void Interpreter::UseNNAPI(bool enable) { } void Interpreter::SetNumThreads(int num_threads) { - // TODO(ahentz): this forces us to link against gemmlowp even when the ops - // don't use it. We should implement some dynamic mechanism for this sort of - // library-specific initialization. - tflite::gemm_support::SetMaxNumThreads(&context_, num_threads); + context_.recommended_num_threads = num_threads; } TfLiteStatus Interpreter::ModifyGraphWithDelegate(TfLiteDelegate* delegate) { diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index c6c11b0aee..9c63269324 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -40,6 +40,22 @@ cc_library( ], ) +cc_library( + name = "eigen_support", + srcs = [ + "eigen_support.cc", + ], + hdrs = [ + "eigen_support.h", + ], + copts = tflite_copts(), + deps = [ + ":op_macros", + "//tensorflow/contrib/lite:context", + "//third_party/eigen3", + ], +) + cc_library( name = "gemm_support", srcs = [ @@ -175,6 +191,7 @@ cc_library( }), deps = [ ":activation_functor", + ":eigen_support", ":kernel_util", ":op_macros", "//tensorflow/contrib/lite:builtin_op_data", diff --git a/tensorflow/contrib/lite/kernels/conv.cc b/tensorflow/contrib/lite/kernels/conv.cc index 6821a22226..b91ba1a03d 100644 --- a/tensorflow/contrib/lite/kernels/conv.cc +++ b/tensorflow/contrib/lite/kernels/conv.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/contrib/lite/builtin_op_data.h" #include "tensorflow/contrib/lite/context.h" +#include "tensorflow/contrib/lite/kernels/eigen_support.h" #include "tensorflow/contrib/lite/kernels/gemm_support.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/cblas_conv.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/multithreaded_conv.h" @@ -87,18 +88,15 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) { // to carry information from Prepare() to Eval(). auto* data = new OpData; gemm_support::IncrementUsageCounter(context); + eigen_support::IncrementUsageCounter(context); - // TODO(ahentz): This is the gemmlowp context, which really only applies to - // quantized kernels. However, Interpreter::SetNumThreads() should also be - // setting the number of kernel on Eigen, so this works OK as a proxy for - // now. - int num_threads = gemm_support::GetFromContext(context)->max_num_threads(); - data->run_multithreaded_kernel = num_threads != 1; + data->run_multithreaded_kernel = context->recommended_num_threads != 1; return data; } void Free(TfLiteContext* context, void* buffer) { + eigen_support::DecrementUsageCounter(context); gemm_support::DecrementUsageCounter(context); delete reinterpret_cast(buffer); } diff --git a/tensorflow/contrib/lite/kernels/eigen_support.cc b/tensorflow/contrib/lite/kernels/eigen_support.cc new file mode 100644 index 0000000000..1435a45672 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/eigen_support.cc @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include "tensorflow/contrib/lite/kernels/eigen_support.h" + +#include "third_party/eigen3/Eigen/Core" +#include "tensorflow/contrib/lite/kernels/op_macros.h" + +namespace tflite { +namespace eigen_support { + +struct RefCountedEigenContext { + int num_references = 0; +}; + +void IncrementUsageCounter(TfLiteContext* context) { + auto* ptr = reinterpret_cast(context->eigen_context); + if (ptr == nullptr) { + Eigen::setNbThreads(context->recommended_num_threads); + + ptr = new RefCountedEigenContext; + ptr->num_references = 0; + context->eigen_context = ptr; + } + ptr->num_references++; +} + +void DecrementUsageCounter(TfLiteContext* context) { + auto* ptr = reinterpret_cast(context->eigen_context); + if (ptr == nullptr) { + TF_LITE_FATAL( + "Call to DecrementUsageCounter() not preceded by " + "IncrementUsageCounter()"); + } + if (--ptr->num_references == 0) { + delete ptr; + } +} + +} // namespace eigen_support +} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/eigen_support.h b/tensorflow/contrib/lite/kernels/eigen_support.h new file mode 100644 index 0000000000..d47e691123 --- /dev/null +++ b/tensorflow/contrib/lite/kernels/eigen_support.h @@ -0,0 +1,34 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#ifndef TENSORFLOW_CONTRIB_LITE_KERNELS_EIGEN_SUPPORT_H_ +#define TENSORFLOW_CONTRIB_LITE_KERNELS_EIGEN_SUPPORT_H_ + +#include "tensorflow/contrib/lite/context.h" + +namespace tflite { +namespace eigen_support { + +// Let the framework know that the op will be using Eigen. If necessary a set of +// temporary Eigen objects might be created and placed in 'context'. +void IncrementUsageCounter(TfLiteContext* context); + +// Let the framework know that the op stopped using Eigen. If there are no more +// usages all temporary Eigen objects will be deleted. +void DecrementUsageCounter(TfLiteContext* context); + +} // namespace eigen_support +} // namespace tflite + +#endif // TENSORFLOW_CONTRIB_LITE_KERNELS_EIGEN_SUPPORT_H_ diff --git a/tensorflow/contrib/lite/kernels/gemm_support.cc b/tensorflow/contrib/lite/kernels/gemm_support.cc index eb2b0aacf7..df8a9c8cee 100644 --- a/tensorflow/contrib/lite/kernels/gemm_support.cc +++ b/tensorflow/contrib/lite/kernels/gemm_support.cc @@ -29,6 +29,7 @@ void IncrementUsageCounter(TfLiteContext* context) { if (ptr == nullptr) { ptr = new RefCountedGemmContext; ptr->gemm_context_ = new gemmlowp::GemmContext(); + ptr->gemm_context_->set_max_num_threads(context->recommended_num_threads); ptr->num_references_ = 0; context->gemm_context = ptr; } @@ -58,11 +59,5 @@ gemmlowp::GemmContext* GetFromContext(TfLiteContext* context) { return ptr->gemm_context_; } -void SetMaxNumThreads(TfLiteContext* context, int num_threads) { - IncrementUsageCounter(context); - GetFromContext(context)->set_max_num_threads(num_threads); - DecrementUsageCounter(context); -} - } // namespace gemm_support } // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/gemm_support.h b/tensorflow/contrib/lite/kernels/gemm_support.h index 466781cbce..37af772c68 100644 --- a/tensorflow/contrib/lite/kernels/gemm_support.h +++ b/tensorflow/contrib/lite/kernels/gemm_support.h @@ -45,9 +45,6 @@ void IncrementUsageCounter(TfLiteContext* context); // 'context'. If there are no more usages the GemmContext will be deleted. void DecrementUsageCounter(TfLiteContext* context); -// Set the maximum number threads available for gemmlowp operations. -void SetMaxNumThreads(TfLiteContext* context, int num_threads); - } // namespace gemm_support } // namespace tflite -- GitLab From eaff882e8e3868f6f8dfde56347ec408592154a0 Mon Sep 17 00:00:00 2001 From: Justin Lebar Date: Fri, 9 Mar 2018 11:11:44 -0800 Subject: [PATCH 669/884] [XLA:GPU] Don't fuse get-tuple-element. Fusing GTE works, but it's slower than not fusing. (In some sense, GTE is *always* fused; it's just that our "implicit fusion" implementation is faster than our explicit fusion implementation.) PiperOrigin-RevId: 188509801 --- .../xla/service/gpu/instruction_fusion.cc | 7 +++- .../service/gpu/instruction_fusion_test.cc | 42 +++++++------------ 2 files changed, 22 insertions(+), 27 deletions(-) diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc index 84504d29e0..f6576cd8e0 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion.cc @@ -25,6 +25,12 @@ namespace gpu { namespace { bool IsFusile(const HloInstruction& hlo) { + // Don't fuse get-tuple-element on GPU: We can, but it's slower than not + // fusing. We never generate kernels for unfused GTEs. Instead, if an + // unfused GTE is an input to a kernel (including a fusion kernel), we + // compute the address of the GTE at the top of the kernel. Often we know the + // address of the GTE result statically, so we can do this without chasing any + // pointers. return (hlo.IsElementwise() && hlo.operand_count() > 0) || hlo.opcode() == HloOpcode::kBitcast || hlo.opcode() == HloOpcode::kBroadcast || @@ -32,7 +38,6 @@ bool IsFusile(const HloInstruction& hlo) { hlo.opcode() == HloOpcode::kDynamicSlice || hlo.opcode() == HloOpcode::kDynamicUpdateSlice || hlo.opcode() == HloOpcode::kFusion || - hlo.opcode() == HloOpcode::kGetTupleElement || hlo.opcode() == HloOpcode::kPad || hlo.opcode() == HloOpcode::kReduce || hlo.opcode() == HloOpcode::kReduceWindow || diff --git a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc index c81dbb7bf3..f383d19035 100644 --- a/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc +++ b/tensorflow/compiler/xla/service/gpu/instruction_fusion_test.cc @@ -138,32 +138,6 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfDotUnfused) { .ValueOrDie()); } -TEST_F(InstructionFusionTest, GetTupleElementFused) { - HloComputation::Builder builder(TestName()); - Shape data_shape = ShapeUtil::MakeShape(F32, {8}); - Shape tuple_shape = ShapeUtil::MakeTupleShape({data_shape, data_shape}); - auto param = builder.AddInstruction( - HloInstruction::CreateParameter(0, tuple_shape, "param")); - auto gte0 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, param, 0)); - auto gte1 = builder.AddInstruction( - HloInstruction::CreateGetTupleElement(data_shape, param, 1)); - builder.AddInstruction( - HloInstruction::CreateBinary(data_shape, HloOpcode::kAdd, gte0, gte1)); - auto module = CreateNewModule(); - auto computation = module->AddEntryComputation(builder.Build()); - EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true) - .Run(module.get()) - .ValueOrDie()); - HloInstruction* root = computation->root_instruction(); - EXPECT_EQ(HloOpcode::kFusion, root->opcode()); - HloInstruction* fused_root = root->fused_expression_root(); - EXPECT_EQ(HloOpcode::kAdd, fused_root->opcode()); - // Check that operands of 'fused_root' are GTE. - EXPECT_EQ(HloOpcode::kGetTupleElement, fused_root->operand(0)->opcode()); - EXPECT_EQ(HloOpcode::kGetTupleElement, fused_root->operand(1)->opcode()); -} - // Tests that broadcasts fused into a fusion with a reduce root. TEST_F(InstructionFusionTest, BroadcastIntoReduce) { auto module = tools::Parse(R"( @@ -238,5 +212,21 @@ TEST_F(InstructionFusionTest, AddIntoBitcast) { op::Bitcast(op::Add(op::Parameter(), op::Parameter()))); } +TEST_F(InstructionFusionTest, DontFuseGTE) { + auto module = tools::Parse(R"( + HloModule test_module + ENTRY DontFuseGTE { + p0 = (f32[10], f32[10]) parameter(0) + gte0 = f32[10] get-tuple-element(p0), index=0 + gte1 = f32[10] get-tuple-element(p0), index=1 + ROOT add = f32[10] add(gte0, gte1) + })") + .ValueOrDie(); + + EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true) + .Run(module.get()) + .ValueOrDie()); +} + } // namespace gpu } // namespace xla -- GitLab From 46c2d1a6c4c65883fa4a37f3737f1cdc0eebceef Mon Sep 17 00:00:00 2001 From: Mingxing Tan Date: Fri, 9 Mar 2018 11:29:40 -0800 Subject: [PATCH 670/884] Add bool type for tflite. PiperOrigin-RevId: 188512706 --- tensorflow/contrib/lite/toco/tooling_util.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/toco/tooling_util.cc b/tensorflow/contrib/lite/toco/tooling_util.cc index 48aad89b8c..e70291ad0e 100644 --- a/tensorflow/contrib/lite/toco/tooling_util.cc +++ b/tensorflow/contrib/lite/toco/tooling_util.cc @@ -1354,6 +1354,8 @@ void UseDefaultMinMaxRangeValues(Model* model, double default_ranges_min, int ElementSize(ArrayDataType data_type) { switch (data_type) { + case ArrayDataType::kBool: + return sizeof(bool); case ArrayDataType::kFloat: return 4; case ArrayDataType::kInt8: @@ -1379,7 +1381,7 @@ int ElementSize(ArrayDataType data_type) { LOG(FATAL) << "Transient arrays with strings are not supported yet"; return 0; default: - LOG(FATAL) << "Should not get here."; + LOG(FATAL) << "Unknown data_type = " << static_cast(data_type); return 0; } } -- GitLab From 61a744fffbcc68e453aafc6eaa2c7ff2318a3584 Mon Sep 17 00:00:00 2001 From: Jacques Pienaar Date: Fri, 9 Mar 2018 11:37:04 -0800 Subject: [PATCH 671/884] Add more debugging output, filtering by int_type in XLA test, more tests. PiperOrigin-RevId: 188513895 --- tensorflow/compiler/tests/BUILD | 2 +- tensorflow/compiler/tests/binary_ops_test.py | 106 ++++++++++++++++-- tensorflow/compiler/tests/xla_test.py | 11 +- .../compiler/xla/service/hlo_instruction.cc | 1 + .../compiler/xla/service/hlo_verifier.cc | 7 +- .../compiler/xla/service/shape_inference.cc | 5 +- .../compiler/xla/tests/dynamic_ops_test.cc | 8 +- 7 files changed, 121 insertions(+), 19 deletions(-) diff --git a/tensorflow/compiler/tests/BUILD b/tensorflow/compiler/tests/BUILD index 4143aa1f80..85a2adab28 100644 --- a/tensorflow/compiler/tests/BUILD +++ b/tensorflow/compiler/tests/BUILD @@ -98,7 +98,7 @@ tf_xla_py_test( tf_xla_py_test( name = "binary_ops_test", - size = "small", + size = "medium", srcs = ["binary_ops_test.py"], shard_count = 5, tags = [ diff --git a/tensorflow/compiler/tests/binary_ops_test.py b/tensorflow/compiler/tests/binary_ops_test.py index 6bcfed7b69..ba7b9bacd2 100644 --- a/tensorflow/compiler/tests/binary_ops_test.py +++ b/tensorflow/compiler/tests/binary_ops_test.py @@ -232,11 +232,16 @@ class BinaryOpsTest(XLATestCase): expected=np.right_shift(lhs, rhs)) if dtype in [np.int8, np.int16, np.int32, np.int64]: - lhs = np.array([-1, -5, -3, -14], dtype=dtype) - rhs = np.array([5, 0, 1, 11], dtype=dtype) - self._testBinary( - bitwise_ops.right_shift, lhs, rhs, - expected=np.right_shift(lhs, rhs)) + lhs = np.array([-1, -5, -3, -14, -2], dtype=dtype) + rhs = np.array([5, 0, 1, 11, 36], dtype=dtype) + # HLO has saturating shift behavior. + bits = np.ceil( + np.log(np.iinfo(dtype).max - np.iinfo(dtype).min) / np.log(2)) + expected = [ + np.right_shift(l, r) if r < bits else np.sign(l) + for l, r in zip(lhs, rhs) + ] + self._testBinary(bitwise_ops.right_shift, lhs, rhs, expected=expected) def testNumericOps(self): for dtype in self.numeric_types: @@ -255,12 +260,18 @@ class BinaryOpsTest(XLATestCase): np.array([[1], [2]], dtype=dtype), dtype(7), expected=np.array([[8], [9]], dtype=dtype)) + self._testBinary( + math_ops.add, + np.array([0xffffffff, 0xfffffffff, 1, 1], dtype=np.int64), + np.array([1, 1, 0xffffffff, 0xfffffffff], dtype=np.int64), + expected=np.array( + [1 << 32, 1 << 36, 1 << 32, 1 << 36], dtype=np.int64)) self._testBinary( math_ops.subtract, - np.array([1, 2], dtype=dtype), - np.array([10, 20], dtype=dtype), - expected=np.array([-9, -18], dtype=dtype)) + np.array([1, 2, 100], dtype=dtype), + np.array([10, 20, -1], dtype=dtype), + expected=np.array([-9, -18, 101], dtype=dtype)) self._testBinary( math_ops.subtract, dtype(5), @@ -668,6 +679,11 @@ class BinaryOpsTest(XLATestCase): np.array([[10], [7], [2]], dtype=np.float32), np.float32(7), expected=np.array([[False], [False], [True]], dtype=np.bool)) + self._testBinary( + less_op, + np.array([[10], [7], [2], [-1]], dtype=np.int64), + np.int64(7), + expected=np.array([[False], [False], [True], [True]], dtype=np.bool)) for less_equal_op in [math_ops.less_equal, (lambda x, y: x <= y)]: self._testBinary( @@ -686,6 +702,80 @@ class BinaryOpsTest(XLATestCase): np.float32(7), expected=np.array([[False], [True], [True]], dtype=np.bool)) + def testS64Comparisons(self): + for op in [(lambda x, y: x < y), (lambda x, y: x <= y), + (lambda x, y: x >= y), (lambda x, y: x > y)]: + lhs = np.array( + [ + np.int64(0x000000007FFFFFFF), + np.int64(0x000000007FFFFFFF), + np.int64(0x0000000080000000), + np.int64(0x0000000080000000), + np.int64(0x0000000080000001), + np.int64(0x00000000FFFF0000), + np.int64(0x00000000FFFF0000), + np.int64(0x00000000FFFFFFFE), + np.int64(0x00000000FFFFFFFF), + np.int64(0x00000000FFFFFFFF), + np.int64(0x0000000100000000), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(0x0000000200000002), + np.int64(-0x7FFFFFFF00000002), + np.int64(-0x7FFFFFFF00000002), + np.int64(-0x7FFFFFFF00000001), + np.int64(-0x7FFFFFFF00000001), + np.int64(-0x7FFFFFFF00000001), + np.int64(-0x7FFFFFFF00000001), + np.int64(0x7ffffffefff00010), + np.int64(0x7ffffffefff00010), + np.int64(-1), + np.int64(-1) + ], + dtype=np.int64) + rhs = np.array( + [ + np.int64(0x000000007FFFFFFE), + np.int64(0x000000007FFFFFFF), + np.int64(0x000000007FFFFFFF), + np.int64(0x0000000080000000), + np.int64(0x0000000080000001), + np.int64(0x00000000FFFF0000), + np.int64(0x00000000FFFF0001), + np.int64(0x00000000FFFFFFFF), + np.int64(0x00000000FFFFFFFE), + np.int64(0x00000000FFFFFFFF), + np.int64(0x00000000FFFFFFFF), + np.int64(0x0000000100000001), + np.int64(0x0000000100000002), + np.int64(0x0000000100000003), + np.int64(0x0000000200000001), + np.int64(0x0000000200000002), + np.int64(0x0000000200000003), + np.int64(0x0000000300000001), + np.int64(0x0000000300000002), + np.int64(0x0000000300000003), + np.int64(0x00000000FFFFFFFF), + np.int64(-0x7FFFFFFF00000001), + np.int64(0x00000000FFFFFFFE), + np.int64(0x00000000FFFFFFFF), + np.int64(-0x7FFFFFFF00000002), + np.int64(-0x7FFFFFFF00000001), + np.int64(0x00000000FFFFFFFF), + np.int64(-0x7FFFFFFF00000001), + np.int64(-2), + np.int64(-1) + ], + dtype=np.int64) + expected = np.array([op(l, r) for l, r in zip(lhs, rhs)], dtype=np.bool) + self._testBinary(op, lhs, rhs, expected=expected) + def testBroadcasting(self): """Tests broadcasting behavior of an operator.""" diff --git a/tensorflow/compiler/tests/xla_test.py b/tensorflow/compiler/tests/xla_test.py index 7e1f5c76ed..cc778f1c3c 100644 --- a/tensorflow/compiler/tests/xla_test.py +++ b/tensorflow/compiler/tests/xla_test.py @@ -71,14 +71,14 @@ class XLATestCase(test.TestCase): self._all_types = set( [dtype.as_numpy_dtype for dtype in self._all_tf_types]) - self.int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types]) + self._int_types = set([dtype.as_numpy_dtype for dtype in self.int_tf_types]) self._float_types = set( [dtype.as_numpy_dtype for dtype in self._float_tf_types]) self.complex_types = set([ dtype.as_numpy_dtype for dtype in self.complex_tf_types ]) - self._numeric_types = set( - self.int_types | self._float_types | self.complex_types) + self._numeric_types = set(self._int_types | self._float_types + | self.complex_types) # Parse the manifest file, if any, into a regex identifying tests to # disable @@ -130,6 +130,11 @@ class XLATestCase(test.TestCase): name = '{}.{}'.format(type(self).__name__, self._testMethodName) return self._float_tf_types - self._method_types_filter.get(name, set()) + @property + def int_types(self): + name = '{}.{}'.format(type(self).__name__, self._testMethodName) + return self._int_types - self._method_types_filter.get(name, set()) + @property def numeric_tf_types(self): name = '{}.{}'.format(type(self).__name__, self._testMethodName) diff --git a/tensorflow/compiler/xla/service/hlo_instruction.cc b/tensorflow/compiler/xla/service/hlo_instruction.cc index af9d772b00..d33add23d0 100644 --- a/tensorflow/compiler/xla/service/hlo_instruction.cc +++ b/tensorflow/compiler/xla/service/hlo_instruction.cc @@ -182,6 +182,7 @@ StatusOr> HloInstruction::CreateFromProto( /* static */ std::unique_ptr HloInstruction::CreateGetTupleElement(const Shape& shape, HloInstruction* operand, int64 index) { + CHECK(ShapeUtil::IsTuple(operand->shape())); auto instruction = WrapUnique(new HloInstruction(HloOpcode::kGetTupleElement, shape)); instruction->tuple_index_ = index; diff --git a/tensorflow/compiler/xla/service/hlo_verifier.cc b/tensorflow/compiler/xla/service/hlo_verifier.cc index b1fd068115..8c875698eb 100644 --- a/tensorflow/compiler/xla/service/hlo_verifier.cc +++ b/tensorflow/compiler/xla/service/hlo_verifier.cc @@ -762,11 +762,14 @@ StatusOr HloVerifier::Run(HloModule* module) { } else if (instruction->opcode() == HloOpcode::kBroadcast) { // If you see this failure then someone has confused the difference // between the HLO broadcast op, and the UserComputation broadcast - // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I + // op. See https://groups.google.com/forum/#!topic/xla-dev/9LqijHmTt_I // or ComputationLowerer::Visit() TF_RET_CHECK(instruction->dimensions().size() == ShapeUtil::Rank(instruction->operand(0)->shape())) - << "Broadcast HLO has invalid number of dimensions."; + << "Broadcast HLO (" << instruction->ToShortString() + << ") has invalid number of dimensions: " + << instruction->dimensions().size() + << " != " << ShapeUtil::Rank(instruction->operand(0)->shape()); } else if (instruction->opcode() == HloOpcode::kWhile) { auto* while_cond = instruction->while_condition(); auto* while_body = instruction->while_body(); diff --git a/tensorflow/compiler/xla/service/shape_inference.cc b/tensorflow/compiler/xla/service/shape_inference.cc index 2ff7ae97b7..74f744a62b 100644 --- a/tensorflow/compiler/xla/service/shape_inference.cc +++ b/tensorflow/compiler/xla/service/shape_inference.cc @@ -193,7 +193,10 @@ tensorflow::Status VerifyReducerShape(const ProgramShape& reducer_shape, const Shape& accumulator_shape = reducer_shape.result(); if (ShapeUtil::Rank(accumulator_shape) != 0) { - return InvalidArgument("Reduction function must have rank 0."); + return InvalidArgument( + "Reduction function must have rank 0 (rank %lld reduction function " + "given).", + ShapeUtil::Rank(accumulator_shape)); } // Check that the accumulator can be passed in as the first argument. diff --git a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc index 877dc7db0e..4f354e6aef 100644 --- a/tensorflow/compiler/xla/tests/dynamic_ops_test.cc +++ b/tensorflow/compiler/xla/tests/dynamic_ops_test.cc @@ -206,19 +206,19 @@ XLA_TEST_F(DynamicSliceTest, Int32R1BF16) { TestR1(); } XLA_TEST_F(DynamicSliceTest, Int32R1) { TestR1(); } XLA_TEST_F(DynamicSliceTest, Int32R1Wrap) { TestR1Wrap(); } XLA_TEST_F(DynamicSliceTest, Int64R1) { TestR1(); } -XLA_TEST_F(DynamicSliceTest, UInt64R1) { TestR1(); } +XLA_TEST_F(DynamicSliceTest, UInt64R1) { TestR1(); } XLA_TEST_F(DynamicSliceTest, Int32R2BF16) { TestR2(); } XLA_TEST_F(DynamicSliceTest, Int32R2) { TestR2(); } XLA_TEST_F(DynamicSliceTest, Int32R2Wrap) { TestR2Wrap(); } -XLA_TEST_F(DynamicSliceTest, Int64R2) { TestR2(); } +XLA_TEST_F(DynamicSliceTest, Int64R2) { TestR2(); } XLA_TEST_F(DynamicSliceTest, UInt64R2) { TestR2(); } XLA_TEST_F(DynamicSliceTest, Int32R3BF16) { TestR3(); } XLA_TEST_F(DynamicSliceTest, Int32R3) { TestR3(); } XLA_TEST_F(DynamicSliceTest, Int32R3Wrap) { TestR3Wrap(); } XLA_TEST_F(DynamicSliceTest, Int64R3) { TestR3(); } -XLA_TEST_F(DynamicSliceTest, UInt64R3) { TestR3(); } +XLA_TEST_F(DynamicSliceTest, UInt64R3) { TestR3(); } XLA_TEST_F(DynamicSliceTest, Int32R1Pred) { // Slice at dimension start. @@ -506,7 +506,7 @@ XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R1BF16)) { } XLA_TEST_F(DynamicUpdateSliceTest, Int32R1) { TestR1(); } XLA_TEST_F(DynamicUpdateSliceTest, Int64R1) { TestR1(); } -XLA_TEST_F(DynamicUpdateSliceTest, UInt64R1) { TestR1(); } +XLA_TEST_F(DynamicUpdateSliceTest, UInt64R1) { TestR1(); } // TODO(b/71820067): The CPU parallel backend failed for this on 2018-01-10. XLA_TEST_F(DynamicUpdateSliceTest, DISABLED_ON_CPU_PARALLEL(Int32R2BF16)) { -- GitLab From 20dfc25c378c600fac683e62dc8a1ed2a522711c Mon Sep 17 00:00:00 2001 From: Rohan Jain Date: Fri, 9 Mar 2018 12:20:32 -0800 Subject: [PATCH 672/884] Allowing for FunctionLibraryRuntime::Run calls to not be provided with a runner to execute kernels with. In that case, it defaults to using the threadpool provided by the device. Also makes sure each device has a default threadpool to fall back on. PiperOrigin-RevId: 188520648 --- tensorflow/c/eager/runtime_test.cc | 2 +- .../core/common_runtime/direct_session.cc | 2 +- .../common_runtime/direct_session_test.cc | 54 +------- tensorflow/core/common_runtime/function.cc | 33 ++++- tensorflow/core/common_runtime/function.h | 4 +- .../core/common_runtime/function_test.cc | 116 ++++++++++++++---- .../core/common_runtime/function_testlib.cc | 53 ++++++++ .../core/common_runtime/function_testlib.h | 16 +++ .../process_function_library_runtime.cc | 27 ++-- .../process_function_library_runtime.h | 3 + .../process_function_library_runtime_test.cc | 4 +- .../core/distributed_runtime/graph_mgr.cc | 3 +- 12 files changed, 221 insertions(+), 96 deletions(-) diff --git a/tensorflow/c/eager/runtime_test.cc b/tensorflow/c/eager/runtime_test.cc index 643153058c..4f75d27887 100644 --- a/tensorflow/c/eager/runtime_test.cc +++ b/tensorflow/c/eager/runtime_test.cc @@ -41,7 +41,7 @@ class TestEnv { device_mgr_.reset(new DeviceMgr({device})); flib_runtime_ = NewFunctionLibraryRuntime(device_mgr_.get(), Env::Default(), device, TF_GRAPH_DEF_VERSION, - &flib_def_, {}, nullptr); + &flib_def_, nullptr, {}, nullptr); } FunctionLibraryRuntime* function_library_runtime() const { diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index ecbffcbf6c..9def58cb9c 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1181,7 +1181,7 @@ Status DirectSession::GetOrCreateExecutors( } func_info->proc_flr.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), options_.env, graph_def_version, - func_info->flib_def.get(), optimizer_opts)); + func_info->flib_def.get(), optimizer_opts, thread_pools_[0].first)); GraphOptimizer optimizer(optimizer_opts); for (auto iter = graphs.begin(); iter != graphs.end(); ++iter) { diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index b75a4f76d9..6fe0cba1e5 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -23,6 +23,7 @@ limitations under the License. #include "tensorflow/core/common_runtime/device_factory.h" #include "tensorflow/core/common_runtime/device_mgr.h" +#include "tensorflow/core/common_runtime/function_testlib.h" #include "tensorflow/core/framework/allocator.h" #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/op_kernel.h" @@ -868,59 +869,14 @@ TEST(DirectSessionTest, TestTimeoutCleanShutdown) { TF_ASSERT_OK(session->Close()); } -class BlockingOpState { - public: - void AwaitState(int awaiting_state) { - mutex_lock ml(mu_); - while (state_ != awaiting_state) { - cv_.wait(ml); - } - } - void MoveToState(int expected_current, int next) { - mutex_lock ml(mu_); - CHECK_EQ(expected_current, state_); - state_ = next; - cv_.notify_all(); - } - - private: - mutex mu_; - condition_variable cv_; - int state_ = 0; -}; -static BlockingOpState* blocking_op_state = nullptr; - -// BlockingOp blocks on the global state, -// and also updates it when it is unblocked and finishing computation. -class BlockingOp : public OpKernel { - public: - explicit BlockingOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} - void Compute(OpKernelContext* ctx) override { - blocking_op_state->MoveToState(0, 1); - blocking_op_state->AwaitState(2); - blocking_op_state->MoveToState(2, 3); - - Tensor* out = nullptr; - const Tensor& in = ctx->input(0); - OP_REQUIRES_OK(ctx, ctx->allocate_output(0, in.shape(), &out)); - out->flat() = in.flat(); - } -}; -REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_CPU), BlockingOp); -REGISTER_OP("BlockingOp").Input("x: float").Output("y: float").Doc(""); - static void TestSessionInterOpThreadsImpl(bool use_function_lib, bool use_global_pools) { + using test::function::blocking_op_state; + using test::function::BlockingOpState; + FunctionDefLibrary library_graph_def; if (use_function_lib) { - const string lib = R"proto( - signature: { - name: "BlockingOpFn" input_arg: { name: "x" type: DT_FLOAT } - output_arg: { name: "y" type: DT_FLOAT }} - node_def: { name: "y" op: "BlockingOp" input: "x" } - ret: { key: "y" value: "y:y:0" } )proto"; - CHECK(protobuf::TextFormat::ParseFromString( - lib, library_graph_def.add_function())); + *library_graph_def.add_function() = test::function::BlockingOpFn(); } FunctionLibraryDefinition flib(OpRegistry::Global(), library_graph_def); diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index 3e937ceb64..effe53c961 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -34,6 +34,7 @@ limitations under the License. #include "tensorflow/core/graph/gradients.h" #include "tensorflow/core/graph/graph_constructor.h" #include "tensorflow/core/graph/optimizer_cse.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/platform/macros.h" @@ -141,6 +142,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { FunctionLibraryRuntimeImpl(const DeviceMgr* dmgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, + thread::ThreadPool* default_thread_pool, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, ProcessFunctionLibraryRuntime* parent); @@ -194,6 +196,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { const FunctionLibraryDefinition* const base_lib_def_; GraphOptimizer optimizer_; const CustomKernelCreator custom_kernel_creator_; + Executor::Args::Runner default_runner_; const string device_name_; std::function get_func_sig_; @@ -243,6 +246,7 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime { FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( const DeviceMgr* dmgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, + thread::ThreadPool* default_thread_pool, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, ProcessFunctionLibraryRuntime* parent) @@ -253,6 +257,7 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( base_lib_def_(lib_def), optimizer_(optimizer_options), custom_kernel_creator_(std::move(custom_kernel_creator)), + default_runner_(nullptr), device_name_(device_ == nullptr ? ProcessFunctionLibraryRuntime::kDefaultFLRDevice : device_->name()), @@ -264,6 +269,18 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl( create_kernel_ = [this](const NodeDef& ndef, OpKernel** kernel) { return CreateKernel(ndef, kernel); }; + thread::ThreadPool* pool = nullptr; + if (device_ != nullptr) { + pool = device_->tensorflow_device_thread_pool(); + } + if (pool == nullptr) { + pool = default_thread_pool; + } + if (pool != nullptr) { + default_runner_ = [pool](Executor::Args::Closure c) { + pool->Schedule(std::move(c)); + }; + } } FunctionLibraryRuntimeImpl::~FunctionLibraryRuntimeImpl() { @@ -768,6 +785,9 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, return; } + if (run_opts.runner == nullptr) { + run_opts.runner = &default_runner_; + } DCHECK(run_opts.runner != nullptr); Executor::Args* exec_args = new Executor::Args; @@ -854,6 +874,9 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle, done(s); return; } + if (run_opts.runner == nullptr) { + run_opts.runner = &default_runner_; + } DCHECK(run_opts.runner != nullptr); Executor::Args* exec_args = new Executor::Args; @@ -942,21 +965,21 @@ void RegisterDefaultCustomKernelCreator(CustomKernelCreator cb) { std::unique_ptr NewFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, - const OptimizerOptions& optimizer_options, + thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, ProcessFunctionLibraryRuntime* parent) { return std::unique_ptr(new FunctionLibraryRuntimeImpl( - device_mgr, env, device, graph_def_version, lib_def, optimizer_options, - std::move(custom_kernel_creator), parent)); + device_mgr, env, device, graph_def_version, lib_def, thread_pool, + optimizer_options, std::move(custom_kernel_creator), parent)); } std::unique_ptr NewFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, - const OptimizerOptions& optimizer_options, + thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, ProcessFunctionLibraryRuntime* parent) { return NewFunctionLibraryRuntime(device_mgr, env, device, graph_def_version, - lib_def, optimizer_options, + lib_def, thread_pool, optimizer_options, GetCustomCreatorSingleton()->Get(), parent); } diff --git a/tensorflow/core/common_runtime/function.h b/tensorflow/core/common_runtime/function.h index 477340d87a..a0f9fcae0a 100644 --- a/tensorflow/core/common_runtime/function.h +++ b/tensorflow/core/common_runtime/function.h @@ -55,7 +55,7 @@ void RegisterDefaultCustomKernelCreator(CustomKernelCreator cb); std::unique_ptr NewFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, - const OptimizerOptions& optimizer_options, + thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, ProcessFunctionLibraryRuntime* parent); @@ -65,7 +65,7 @@ std::unique_ptr NewFunctionLibraryRuntime( std::unique_ptr NewFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, Device* device, int graph_def_version, const FunctionLibraryDefinition* lib_def, - const OptimizerOptions& optimizer_options, + thread::ThreadPool* thread_pool, const OptimizerOptions& optimizer_options, ProcessFunctionLibraryRuntime* parent); // FunctionLibraryRuntime::GetFunctionBody returns a description of an diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index 63ad0d231c..d7e5f0018e 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -38,6 +38,7 @@ limitations under the License. #include "tensorflow/core/lib/core/notification.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" #include "tensorflow/core/platform/test.h" #include "tensorflow/core/public/session_options.h" #include "tensorflow/core/public/version.h" @@ -135,7 +136,8 @@ TEST_F(FunctionTest, WXPlusB) { class FunctionLibraryRuntimeTest : public ::testing::Test { protected: - void Init(const std::vector& flib) { + void Init(const std::vector& flib, + thread::ThreadPool* default_thread_pool = nullptr) { SessionOptions options; auto* device_count = options.config.mutable_device_count(); device_count->insert({"CPU", 3}); @@ -149,7 +151,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { device_mgr_.reset(new DeviceMgr(devices_)); pflr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), - opts, nullptr /* cluster_flr */)); + opts, default_thread_pool, nullptr /* cluster_flr */)); flr0_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); flr1_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:1"); flr2_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:2"); @@ -158,16 +160,20 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, FunctionLibraryRuntime::Options opts, - const std::vector& args, std::vector rets) { + const std::vector& args, std::vector rets, + bool add_runner = true) { std::atomic call_count(0); std::function)> runner = [&call_count](std::function fn) { ++call_count; test::function::FunctionTestSchedClosure(fn); }; - + if (add_runner) { + opts.runner = &runner; + } else { + opts.runner = nullptr; + } Notification done; - opts.runner = &runner; std::vector out; Status status; flr->Run(opts, handle, args, &out, [&status, &done](const Status& s) { @@ -183,7 +189,9 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { *rets[i] = out[i]; } - EXPECT_GE(call_count, 1); // Test runner is used. + if (add_runner) { + EXPECT_GE(call_count, 1); // Test runner is used. + } return Status::OK(); } @@ -204,24 +212,25 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { Status InstantiateAndRun(FunctionLibraryRuntime* flr, const string& name, test::function::Attrs attrs, const std::vector& args, - std::vector rets) { + std::vector rets, bool add_runner = true) { return InstantiateAndRun(flr, name, attrs, FunctionLibraryRuntime::InstantiateOptions(), args, - std::move(rets)); + std::move(rets), add_runner); } Status InstantiateAndRun( FunctionLibraryRuntime* flr, const string& name, test::function::Attrs attrs, const FunctionLibraryRuntime::InstantiateOptions& options, - const std::vector& args, std::vector rets) { + const std::vector& args, std::vector rets, + bool add_runner = true) { FunctionLibraryRuntime::Handle handle; Status status = flr->Instantiate(name, attrs, options, &handle); if (!status.ok()) { return status; } FunctionLibraryRuntime::Options opts; - status = Run(flr, handle, opts, args, rets); + status = Run(flr, handle, opts, args, rets, add_runner); if (!status.ok()) return status; // Release the handle and try running again. It should not succeed. @@ -237,16 +246,20 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { } Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, - FunctionLibraryRuntime::Options opts, CallFrameInterface* frame) { + FunctionLibraryRuntime::Options opts, CallFrameInterface* frame, + bool add_runner = true) { std::atomic call_count(0); std::function)> runner = [&call_count](std::function fn) { ++call_count; test::function::FunctionTestSchedClosure(fn); }; - + if (add_runner) { + opts.runner = &runner; + } else { + opts.runner = nullptr; + } Notification done; - opts.runner = &runner; std::vector out; Status status; flr->Run(opts, handle, frame, [&status, &done](const Status& s) { @@ -258,7 +271,9 @@ class FunctionLibraryRuntimeTest : public ::testing::Test { return status; } - EXPECT_GE(call_count, 1); // Test runner is used. + if (add_runner) { + EXPECT_GE(call_count, 1); // Test runner is used. + } return Status::OK(); } @@ -447,7 +462,7 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { { // Simple case: instantiating with no state_handle. for (int32 expected : {6, 4}) { - TF_CHECK_OK(Run(flr0_, handle, opts, {}, {&y})); + TF_CHECK_OK(Run(flr0_, handle, opts, {}, {&y}, true)); test::ExpectTensorEqual(y, test::AsTensor({expected})); } } @@ -460,7 +475,7 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { Instantiate(flr0_, "RandomUniformWrapper", {}, &handle_non_isolated)); EXPECT_EQ(handle, handle_non_isolated); for (int32 expected : {0, 1}) { - TF_CHECK_OK(Run(flr0_, handle_non_isolated, opts, {}, {&y})); + TF_CHECK_OK(Run(flr0_, handle_non_isolated, opts, {}, {&y}, true)); test::ExpectTensorEqual(y, test::AsTensor({expected})); } } @@ -475,7 +490,7 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { &handle_isolated)); EXPECT_NE(handle, handle_isolated); for (int32 expected : {6, 4, 0, 1}) { - TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y})); + TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}, true)); test::ExpectTensorEqual(y, test::AsTensor({expected})); } } @@ -490,7 +505,7 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { &handle_isolated)); EXPECT_NE(handle, handle_isolated); for (int32 expected : {6, 4, 0, 1}) { - TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y})); + TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}, true)); test::ExpectTensorEqual(y, test::AsTensor({expected})); } } @@ -507,7 +522,7 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { &handle_isolated)); EXPECT_NE(handle, handle_isolated); for (int32 expected : {6, 4, 0, 1}) { - TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y})); + TF_CHECK_OK(Run(flr0_, handle_isolated, opts, {}, {&y}, true)); test::ExpectTensorEqual(y, test::AsTensor({expected})); } TF_CHECK_OK(flr0_->ReleaseHandle(handle_isolated)); @@ -515,6 +530,59 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { } } +TEST_F(FunctionLibraryRuntimeTest, DefaultThreadpool) { + using test::function::blocking_op_state; + using test::function::BlockingOpState; + + thread::ThreadPool* tp = new thread::ThreadPool(Env::Default(), "FLRTest", 1); + Init({test::function::BlockingOpFn(), test::function::XTimesTwo()}, tp); + + auto x = test::AsScalar(1.3); + Tensor y; + blocking_op_state = new BlockingOpState(); + + thread::ThreadPool* tp1 = new thread::ThreadPool(Env::Default(), "tp1", 5); + bool finished_running = false; + tp1->Schedule([&x, &y, &finished_running, this]() { + TF_CHECK_OK(InstantiateAndRun(flr0_, "BlockingOpFn", {}, {x}, {&y}, + false /* add_runner */)); + finished_running = true; + }); + + // InstantiateAndRun shouldn't finish because BlockingOpFn should be blocked. + EXPECT_FALSE(finished_running); + + FunctionLibraryRuntime::Handle h; + TF_CHECK_OK(Instantiate(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, &h)); + + auto x1 = test::AsTensor({1, 2, 3, 4}); + Tensor y1; + std::atomic num_done(0); + FunctionLibraryRuntime::Options opts; + for (int i = 0; i < 4; ++i) { + tp1->Schedule([&h, &x1, &y1, &opts, &num_done, this]() { + TF_CHECK_OK(Run(flr0_, h, opts, {x1}, {&y1}, false /* add_runner */)); + num_done.fetch_add(1); + }); + } + // All the 4 Run() calls should be blocked because the runner is occupied. + EXPECT_EQ(0, num_done.load()); + + blocking_op_state->AwaitState(1); + blocking_op_state->MoveToState(1, 2); + // Now the runner should be unblocked and all the other Run() calls should + // proceed. + blocking_op_state->AwaitState(3); + blocking_op_state->MoveToState(3, 0); + delete tp1; + EXPECT_TRUE(finished_running); + EXPECT_EQ(4, num_done.load()); + + delete blocking_op_state; + blocking_op_state = nullptr; + delete tp; +} + TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { Init({test::function::XTimesTwo(), test::function::XTimesFour(), test::function::XTimes16()}); @@ -787,7 +855,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto x4_x2_scale = ops::Const( - s.WithOpName("x4/x2/scale/_12__cf__6") + s.WithOpName("x4/x2/scale/_12__cf__10") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale); @@ -993,13 +1061,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); auto scale = ops::Const( - s.WithOpName("scale/_6__cf__11") + s.WithOpName("scale/_6__cf__15") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale); auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x); auto const0 = ops::Const( - s.WithOpName("Func/_1/sy/_5__cf__10") + s.WithOpName("Func/_1/sy/_5__cf__14") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 0, {0}); auto func1_rx = ops::internal::BroadcastGradientArgs( @@ -1247,14 +1315,14 @@ TEST_F(FunctionLibraryRuntimeTest, CrossDevice) { opts.rendezvous = new IntraProcessRendezvous(device_mgr_.get()); opts.source_device = "/device:CPU:1"; // Run on flr1_, flr2_ and make sure that the device it ran on was cpu:1. - TF_CHECK_OK(Run(flr1_, handle, opts, {}, {&y})); + TF_CHECK_OK(Run(flr1_, handle, opts, {}, {&y}, true)); test::ExpectTensorEqual( y, test::AsTensor({"/job:localhost/replica:0/task:0/device:CPU:1"}, TensorShape({}))); opts.remote_execution = true; opts.source_device = "/job:localhost/replica:0/task:0/cpu:2"; - TF_CHECK_OK(Run(flr2_, handle, opts, {}, {&y})); + TF_CHECK_OK(Run(flr2_, handle, opts, {}, {&y}, true)); test::ExpectTensorEqual( y, test::AsTensor({"/job:localhost/replica:0/task:0/device:CPU:1"}, diff --git a/tensorflow/core/common_runtime/function_testlib.cc b/tensorflow/core/common_runtime/function_testlib.cc index 87733ed2db..1720ee64c0 100644 --- a/tensorflow/core/common_runtime/function_testlib.cc +++ b/tensorflow/core/common_runtime/function_testlib.cc @@ -58,6 +58,59 @@ FunctionDef FindDevice() { {{{"device_name"}, "FindDeviceOp", {}, {}}}); } +void BlockingOpState::AwaitState(int awaiting_state) { + mutex_lock ml(mu_); + while (state_ != awaiting_state) { + cv_.wait(ml); + } +} + +void BlockingOpState::MoveToState(int expected_current, int next) { + mutex_lock ml(mu_); + CHECK_EQ(expected_current, state_); + state_ = next; + cv_.notify_all(); +} + +BlockingOpState* blocking_op_state = nullptr; + +// BlockingOp blocks on the global state, +// and also updates it when it is unblocked and finishing computation. +class BlockingOp : public OpKernel { + public: + explicit BlockingOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} + void Compute(OpKernelContext* ctx) override { + blocking_op_state->MoveToState(0, 1); + blocking_op_state->AwaitState(2); + blocking_op_state->MoveToState(2, 3); + + Tensor* out = nullptr; + const Tensor& in = ctx->input(0); + OP_REQUIRES_OK(ctx, ctx->allocate_output(0, in.shape(), &out)); + out->flat() = in.flat(); + } +}; +REGISTER_KERNEL_BUILDER(Name("BlockingOp").Device(DEVICE_CPU), BlockingOp); +REGISTER_OP("BlockingOp") + .Input("x: float") + .Output("y: float") + .Doc("") + .SetShapeFn(shape_inference::UnknownShape); + +FunctionDef BlockingOpFn() { + return FDH::Define( + // Name + "BlockingOpFn", + // Args + {"x: float"}, + // Return values + {"y: float"}, + // Attr def + {}, + // Nodes + {{{"y"}, "BlockingOp", {"x"}, {}}}); +} + // TODO(phawkins): replace with C++ API for calling functions, when that exists. Output Call(Scope* scope, const string& op_name, const string& fn_name, gtl::ArraySlice inputs) { diff --git a/tensorflow/core/common_runtime/function_testlib.h b/tensorflow/core/common_runtime/function_testlib.h index 3ddb26de92..fb967a6123 100644 --- a/tensorflow/core/common_runtime/function_testlib.h +++ b/tensorflow/core/common_runtime/function_testlib.h @@ -25,6 +25,22 @@ namespace function { // {} -> y:DT_STRING (device where this op runs). FunctionDef FindDevice(); +class BlockingOpState { + public: + void AwaitState(int awaiting_state); + + void MoveToState(int expected_current, int next); + + private: + mutex mu_; + condition_variable cv_; + int state_ = 0; +}; + +extern BlockingOpState* blocking_op_state; + +FunctionDef BlockingOpFn(); + // Adds a function call to the given scope and returns the output for the node. // TODO(phawkins): replace with C++ API for calling functions, when that exists. Output Call(Scope* scope, const string& op_name, const string& fn_name, diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 44dc6f9459..07c657a741 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -42,21 +42,23 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime( const DeviceMgr* device_mgr, Env* env, int graph_def_version, const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, + thread::ThreadPool* default_thread_pool, DistributedFunctionLibraryRuntime* parent) : device_mgr_(device_mgr), lib_def_(lib_def), + default_thread_pool_(default_thread_pool), next_handle_(0), parent_(parent) { if (device_mgr == nullptr) { - flr_map_[nullptr] = - NewFunctionLibraryRuntime(nullptr, env, nullptr, graph_def_version, - lib_def, optimizer_options, this); + flr_map_[nullptr] = NewFunctionLibraryRuntime( + nullptr, env, nullptr, graph_def_version, lib_def, default_thread_pool, + optimizer_options, this); return; } for (Device* d : device_mgr->ListDevices()) { - flr_map_[d] = - NewFunctionLibraryRuntime(device_mgr, env, d, graph_def_version, - lib_def, optimizer_options, this); + flr_map_[d] = NewFunctionLibraryRuntime( + device_mgr, env, d, graph_def_version, lib_def, default_thread_pool, + optimizer_options, this); } } @@ -65,21 +67,23 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime( const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, + thread::ThreadPool* default_thread_pool, DistributedFunctionLibraryRuntime* parent) : device_mgr_(device_mgr), lib_def_(lib_def), + default_thread_pool_(default_thread_pool), next_handle_(0), parent_(parent) { if (device_mgr == nullptr) { flr_map_[nullptr] = NewFunctionLibraryRuntime( - nullptr, env, nullptr, graph_def_version, lib_def, optimizer_options, - std::move(custom_kernel_creator), this); + nullptr, env, nullptr, graph_def_version, lib_def, default_thread_pool, + optimizer_options, std::move(custom_kernel_creator), this); return; } for (Device* d : device_mgr->ListDevices()) { flr_map_[d] = NewFunctionLibraryRuntime( - device_mgr, env, d, graph_def_version, lib_def, optimizer_options, - custom_kernel_creator, this); + device_mgr, env, d, graph_def_version, lib_def, default_thread_pool, + optimizer_options, custom_kernel_creator, this); } } @@ -370,7 +374,8 @@ Status ProcessFunctionLibraryRuntime::Clone( out_lib_def->reset(new FunctionLibraryDefinition(*lib_def_)); out_pflr->reset(new ProcessFunctionLibraryRuntime( device_mgr_, env, graph_def_version, out_lib_def->get(), - optimizer_options, std::move(custom_kernel_creator), parent_)); + optimizer_options, std::move(custom_kernel_creator), default_thread_pool_, + parent_)); return Status::OK(); } diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.h b/tensorflow/core/common_runtime/process_function_library_runtime.h index 10619ba6ea..d69e8bc2a0 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.h +++ b/tensorflow/core/common_runtime/process_function_library_runtime.h @@ -33,6 +33,7 @@ class ProcessFunctionLibraryRuntime { const DeviceMgr* device_mgr, Env* env, int graph_def_version, const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, + thread::ThreadPool* thread_pool = nullptr, DistributedFunctionLibraryRuntime* parent = nullptr); // With `custom_kernel_creator`. @@ -41,6 +42,7 @@ class ProcessFunctionLibraryRuntime { const FunctionLibraryDefinition* lib_def, const OptimizerOptions& optimizer_options, CustomKernelCreator custom_kernel_creator, + thread::ThreadPool* thread_pool, DistributedFunctionLibraryRuntime* parent); // Sends `tensors_to_send` from `source_device` to `target_device` using @@ -174,6 +176,7 @@ class ProcessFunctionLibraryRuntime { const DeviceMgr* const device_mgr_; const FunctionLibraryDefinition* lib_def_; + thread::ThreadPool* default_thread_pool_; // Holds all the function invocations here. std::unordered_map table_ GUARDED_BY(mu_); diff --git a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc index ab1f919852..2da67b084a 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime_test.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime_test.cc @@ -71,7 +71,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test { cluster_flr_.reset(new TestClusterFLR()); proc_flr_.reset(new ProcessFunctionLibraryRuntime( device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), - opts, cluster_flr_.get())); + opts, nullptr, cluster_flr_.get())); rendezvous_ = new IntraProcessRendezvous(device_mgr_.get()); } @@ -153,7 +153,7 @@ TEST_F(ProcessFunctionLibraryRuntimeTest, GetFLRNull) { std::unique_ptr proc_flr( new ProcessFunctionLibraryRuntime( nullptr /* device_mgr */, Env::Default(), TF_GRAPH_DEF_VERSION, - lib_def.get(), opts, nullptr /* cluster_flr */)); + lib_def.get(), opts, nullptr, nullptr /* cluster_flr */)); FunctionLibraryRuntime* flr = proc_flr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice); EXPECT_NE(flr, nullptr); diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 7878ebb5f0..9768a244f2 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -134,7 +134,8 @@ Status GraphMgr::InitItem(const string& session, const GraphDef& gdef, item->proc_flr.reset(new ProcessFunctionLibraryRuntime( device_mgr_, worker_env_->env, gdef.versions().producer(), - item->lib_def.get(), graph_options.optimizer_options(), cluster_flr)); + item->lib_def.get(), graph_options.optimizer_options(), + worker_env_->compute_pool, cluster_flr)); // Constructs the graph out of "gdef". Graph graph(OpRegistry::Global()); -- GitLab From 3374643a2d1a00f57acf501023e487f101c7a04c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 12:39:41 -0800 Subject: [PATCH 673/884] Move warm_starting_util from third_party/tensorflow/python/estimator to third_party/tensorflow/python/training (move WarmStartSettings definition to third_party/tensorflow/python/estimator/estimator.py), and make _warm_start() public under tf.train.warm_start(). WarmStartSettings and VocabInfo are both available under tf.estimator, and VocabInfo is also available under tf.train. PiperOrigin-RevId: 188522820 --- tensorflow/python/BUILD | 19 ++ tensorflow/python/estimator/BUILD | 40 +-- .../canned/dnn_linear_combined_test.py | 4 +- .../estimator/canned/dnn_testing_utils.py | 10 +- .../estimator/canned/linear_testing_utils.py | 9 +- tensorflow/python/estimator/estimator.py | 193 +++++++++++++- tensorflow/python/estimator/estimator_lib.py | 4 +- tensorflow/python/training/training.py | 4 + .../warm_starting_util.py | 246 +++--------------- .../warm_starting_util_test.py | 85 +++--- .../tensorflow.estimator.-vocab-info.pbtxt | 4 +- ...rflow.estimator.-warm-start-settings.pbtxt | 4 +- .../golden/tensorflow.train.-vocab-info.pbtxt | 39 +++ .../tools/api/golden/tensorflow.train.pbtxt | 8 + 14 files changed, 351 insertions(+), 318 deletions(-) rename tensorflow/python/{estimator => training}/warm_starting_util.py (67%) rename tensorflow/python/{estimator => training}/warm_starting_util_test.py (94%) create mode 100644 tensorflow/tools/api/golden/tensorflow.train.-vocab-info.pbtxt diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 3b050a8763..ccc1f4c18c 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4001,6 +4001,25 @@ py_test( ], ) +py_test( + name = "warm_starting_util_test", + size = "small", + srcs = ["training/warm_starting_util_test.py"], + srcs_version = "PY2AND3", + deps = [ + ":array_ops", + ":client_testlib", + ":dtypes", + ":framework_ops", + ":init_ops", + ":training", + ":variable_scope", + ":variables", + "//tensorflow/python/feature_column", + "//third_party/py/numpy", + ], +) + py_test( name = "monitored_session_test", size = "medium", diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index c519fd557a..e3a6708d67 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -37,7 +37,6 @@ py_library( ":parsing_utils", ":run_config", ":training", - ":warm_starting_util", "//tensorflow/python:util", ], ) @@ -278,12 +277,12 @@ py_library( srcs = ["canned/dnn_testing_utils.py"], srcs_version = "PY2AND3", deps = [ + ":estimator", ":head", ":metric_keys", ":model_fn", ":numpy_io", ":prediction_keys", - ":warm_starting_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:array_ops", "//tensorflow/python:check_ops", @@ -427,7 +426,6 @@ py_library( ":model_fn", ":run_config", ":util", - ":warm_starting_util", "//tensorflow/core:protos_all_py", "//tensorflow/python:client", "//tensorflow/python:control_flow_ops", @@ -868,39 +866,3 @@ py_test( "//tensorflow/python:training", ], ) - -py_library( - name = "warm_starting_util", - srcs = ["warm_starting_util.py"], - srcs_version = "PY2AND3", - deps = [ - "//tensorflow/python:array_ops", - "//tensorflow/python:framework_ops", - "//tensorflow/python:platform", - "//tensorflow/python:state_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//tensorflow/python/feature_column", - ], -) - -py_test( - name = "warm_starting_util_test", - size = "small", - srcs = ["warm_starting_util_test.py"], - srcs_version = "PY2AND3", - deps = [ - ":warm_starting_util", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:dtypes", - "//tensorflow/python:framework_ops", - "//tensorflow/python:init_ops", - "//tensorflow/python:training", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - "//tensorflow/python/feature_column", - "//third_party/py/numpy", - ], -) diff --git a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py index 84675bf2a4..d275695eb3 100644 --- a/tensorflow/python/estimator/canned/dnn_linear_combined_test.py +++ b/tensorflow/python/estimator/canned/dnn_linear_combined_test.py @@ -26,7 +26,7 @@ import six from tensorflow.core.example import example_pb2 from tensorflow.core.example import feature_pb2 -from tensorflow.python.estimator import warm_starting_util +from tensorflow.python.estimator import estimator from tensorflow.python.estimator.canned import dnn_linear_combined from tensorflow.python.estimator.canned import dnn_testing_utils from tensorflow.python.estimator.canned import linear_testing_utils @@ -866,7 +866,7 @@ class DNNLinearCombinedWarmStartingTest(test.TestCase): learning_rate=0.0), # The provided regular expression will only warm-start the deep # portion of the model. - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_lc_classifier.model_dir, vars_to_warm_start='.*(dnn).*'))) diff --git a/tensorflow/python/estimator/canned/dnn_testing_utils.py b/tensorflow/python/estimator/canned/dnn_testing_utils.py index 706575985f..9a7d088778 100644 --- a/tensorflow/python/estimator/canned/dnn_testing_utils.py +++ b/tensorflow/python/estimator/canned/dnn_testing_utils.py @@ -27,8 +27,8 @@ import six from tensorflow.core.framework import summary_pb2 from tensorflow.python.client import session as tf_session +from tensorflow.python.estimator import estimator from tensorflow.python.estimator import model_fn -from tensorflow.python.estimator import warm_starting_util from tensorflow.python.estimator.canned import head as head_lib from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.canned import prediction_keys @@ -828,7 +828,7 @@ class BaseDNNWarmStartingTest(object): optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), # The provided regular expression will only warm-start the city # embedding, not the kernels and biases of the hidden weights. - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, vars_to_warm_start='.*(city).*')) @@ -892,7 +892,7 @@ class BaseDNNWarmStartingTest(object): dimension=2) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. - occupation_vocab_info = warm_starting_util.VocabInfo( + occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.categorical_column.vocabulary_file, new_vocab_size=new_occupation.categorical_column.vocabulary_size, num_oov_buckets=new_occupation.categorical_column.num_oov_buckets, @@ -907,7 +907,7 @@ class BaseDNNWarmStartingTest(object): feature_columns=[occupation], n_classes=4, optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_EMBEDDING_NAME: occupation_vocab_info @@ -978,7 +978,7 @@ class BaseDNNWarmStartingTest(object): optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), # The 'city' variable correspond to the 'locality' variable in the # previous model. - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=dnn_classifier.model_dir, var_name_to_prev_var_name={ CITY_EMBEDDING_NAME: diff --git a/tensorflow/python/estimator/canned/linear_testing_utils.py b/tensorflow/python/estimator/canned/linear_testing_utils.py index 3e9183cf1b..8e506a7631 100644 --- a/tensorflow/python/estimator/canned/linear_testing_utils.py +++ b/tensorflow/python/estimator/canned/linear_testing_utils.py @@ -31,7 +31,6 @@ from tensorflow.core.example import feature_pb2 from tensorflow.python.client import session as tf_session from tensorflow.python.estimator import estimator from tensorflow.python.estimator import run_config -from tensorflow.python.estimator import warm_starting_util from tensorflow.python.estimator.canned import linear from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.export import export @@ -1968,7 +1967,7 @@ class BaseLinearWarmStartingTest(object): optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), # The provided regular expression will only warm-start the age variable # and not the bias. - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, vars_to_warm_start='.*(age).*')) @@ -2016,7 +2015,7 @@ class BaseLinearWarmStartingTest(object): vocabulary_size=len(new_vocab_list)) # We can create our VocabInfo object from the new and old occupation # FeatureColumn's. - occupation_vocab_info = warm_starting_util.VocabInfo( + occupation_vocab_info = estimator.VocabInfo( new_vocab=new_occupation.vocabulary_file, new_vocab_size=new_occupation.vocabulary_size, num_oov_buckets=new_occupation.num_oov_buckets, @@ -2030,7 +2029,7 @@ class BaseLinearWarmStartingTest(object): feature_columns=[occupation], n_classes=4, optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_vocab_info={ OCCUPATION_WEIGHT_NAME: occupation_vocab_info @@ -2082,7 +2081,7 @@ class BaseLinearWarmStartingTest(object): optimizer=gradient_descent.GradientDescentOptimizer(learning_rate=0.0), # The 'age' variable correspond to the 'age_in_years' variable in the # previous model. - warm_start_from=warm_starting_util.WarmStartSettings( + warm_start_from=estimator.WarmStartSettings( ckpt_to_initialize_from=linear_classifier.model_dir, var_name_to_prev_var_name={ AGE_WEIGHT_NAME: AGE_WEIGHT_NAME.replace('age', 'age_in_years') diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 6c402d8dc9..41a13587d1 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -19,6 +19,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import collections import copy import os import tempfile @@ -35,7 +36,6 @@ from tensorflow.python.eager import context from tensorflow.python.estimator import model_fn as model_fn_lib from tensorflow.python.estimator import run_config from tensorflow.python.estimator import util -from tensorflow.python.estimator import warm_starting_util from tensorflow.python.estimator.export.export import build_all_signature_defs from tensorflow.python.estimator.export.export import get_temp_export_dir from tensorflow.python.estimator.export.export import get_timestamped_export_dir @@ -55,6 +55,7 @@ from tensorflow.python.training import monitored_session from tensorflow.python.training import saver from tensorflow.python.training import training from tensorflow.python.training import training_util +from tensorflow.python.training import warm_starting_util from tensorflow.python.util import compat from tensorflow.python.util import compat_internal from tensorflow.python.util import nest @@ -217,8 +218,8 @@ class Estimator(object): self._params = copy.deepcopy(params or {}) # pylint: disable=protected-access - self._warm_start_settings = ( - warm_starting_util._get_default_warm_start_settings(warm_start_from)) + self._warm_start_settings = _get_default_warm_start_settings( + warm_start_from) # pylint: enable=protected-access @property @@ -830,7 +831,7 @@ class Estimator(object): logging.info('Warm-starting with WarmStartSettings: %s' % (self._warm_start_settings,)) # pylint: disable=protected-access - warm_starting_util._warm_start(self._warm_start_settings) + warm_starting_util.warm_start(*self._warm_start_settings) # pylint: enable=protected-access # Check if the user created a loss summary, and add one if they didn't. # We assume here that the summary is called 'loss'. If it is not, we will @@ -1152,3 +1153,187 @@ class _DatasetInitializerHook(training.SessionRunHook): def after_create_session(self, session, coord): del coord session.run(self._initializer) + +VocabInfo = warm_starting_util.VocabInfo # pylint: disable=invalid-name + + +@tf_export('estimator.WarmStartSettings') +class WarmStartSettings( + collections.namedtuple('WarmStartSettings', [ + 'ckpt_to_initialize_from', + 'vars_to_warm_start', + 'var_name_to_vocab_info', + 'var_name_to_prev_var_name', + ])): + """Settings for warm-starting in Estimators. + + Example Use with canned `DNNEstimator`: + + ``` + emb_vocab_file = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_vocabulary_file( + "sc_vocab_file", "new_vocab.txt", vocab_size=100), + dimension=8) + emb_vocab_list = tf.feature_column.embedding_column( + tf.feature_column.categorical_column_with_vocabulary_list( + "sc_vocab_list", vocabulary_list=["a", "b"]), + dimension=8) + estimator = tf.estimator.DNNClassifier( + hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list], + warm_start_from=ws) + ``` + + where `ws` could be defined as: + + Warm-start all weights in the model (input layer and hidden weights). + Either the directory or a specific checkpoint can be provided (in the case + of the former, the latest checkpoint will be used): + + ``` + ws = WarmStartSettings(ckpt_to_initialize_from="/tmp") + ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000") + ``` + + Warm-start only the embeddings (input layer): + + ``` + ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", + vars_to_warm_start=".*input_layer.*") + ``` + + Warm-start all weights but the embedding parameters corresponding to + `sc_vocab_file` have a different vocab from the one used in the current + model: + + ``` + vocab_info = tf.estimator.VocabInfo( + new_vocab=sc_vocab_file.vocabulary_file, + new_vocab_size=sc_vocab_file.vocabulary_size, + num_oov_buckets=sc_vocab_file.num_oov_buckets, + old_vocab="old_vocab.txt" + ) + ws = WarmStartSettings( + ckpt_to_initialize_from="/tmp", + var_name_to_vocab_info={ + "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info + }) + ``` + + Warm-start only `sc_vocab_file` embeddings (and no other variables), which + have a different vocab from the one used in the current model: + + ``` + vocab_info = tf.estimator.VocabInfo( + new_vocab=sc_vocab_file.vocabulary_file, + new_vocab_size=sc_vocab_file.vocabulary_size, + num_oov_buckets=sc_vocab_file.num_oov_buckets, + old_vocab="old_vocab.txt" + ) + ws = WarmStartSettings( + ckpt_to_initialize_from="/tmp", + vars_to_warm_start=None, + var_name_to_vocab_info={ + "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info + }) + ``` + + Warm-start all weights but the parameters corresponding to `sc_vocab_file` + have a different vocab from the one used in current checkpoint, and only + 100 of those entries were used: + + ``` + vocab_info = tf.estimator.VocabInfo( + new_vocab=sc_vocab_file.vocabulary_file, + new_vocab_size=sc_vocab_file.vocabulary_size, + num_oov_buckets=sc_vocab_file.num_oov_buckets, + old_vocab="old_vocab.txt", + old_vocab_size=100 + ) + ws = WarmStartSettings( + ckpt_to_initialize_from="/tmp", + var_name_to_vocab_info={ + "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info + }) + ``` + + Warm-start all weights but the parameters corresponding to `sc_vocab_file` + have a different vocab from the one used in current checkpoint and the + parameters corresponding to `sc_vocab_list` have a different name from the + current checkpoint: + + ``` + vocab_info = tf.estimator.VocabInfo( + new_vocab=sc_vocab_file.vocabulary_file, + new_vocab_size=sc_vocab_file.vocabulary_size, + num_oov_buckets=sc_vocab_file.num_oov_buckets, + old_vocab="old_vocab.txt", + old_vocab_size=100 + ) + ws = WarmStartSettings( + ckpt_to_initialize_from="/tmp", + var_name_to_vocab_info={ + "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info + }, + var_name_to_prev_var_name={ + "input_layer/sc_vocab_list_embedding/embedding_weights": + "old_tensor_name" + }) + ``` + + Attributes: + ckpt_to_initialize_from: [Required] A string specifying the directory with + checkpoint file(s) or path to checkpoint from which to warm-start the + model parameters. + vars_to_warm_start: [Optional] A regular expression that captures which + variables to warm-start (see tf.get_collection). Defaults to `'.*'`, + which warm-starts all variables. If `None` is explicitly given, only + variables specified in `var_name_to_vocab_info` will be warm-started. + var_name_to_vocab_info: [Optional] Dict of variable names (strings) to + VocabInfo. The variable names should be "full" variables, not the names + of the partitions. If not explicitly provided, the variable is assumed to + have no vocabulary. + var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to + name of the previously-trained variable in `ckpt_to_initialize_from`. If + not explicitly provided, the name of the variable is assumed to be same + between previous checkpoint and current model. + """ + + def __new__(cls, + ckpt_to_initialize_from, + vars_to_warm_start='.*', + var_name_to_vocab_info=None, + var_name_to_prev_var_name=None): + if not ckpt_to_initialize_from: + raise ValueError( + '`ckpt_to_initialize_from` MUST be set in WarmStartSettings') + return super(WarmStartSettings, cls).__new__( + cls, + ckpt_to_initialize_from, + vars_to_warm_start, + var_name_to_vocab_info or {}, + var_name_to_prev_var_name or {}, + ) + + +def _get_default_warm_start_settings(warm_start_from): + """Returns default WarmStartSettings. + + Args: + warm_start_from: Either a string representing the filepath of a checkpoint + to initialize from, or an instance of WarmStartSettings. + + Returns: + Either None or an instance of WarmStartSettings. + + Raises: + ValueError: If warm_start_from is not None but is neither a string nor an + instance of WarmStartSettings. + """ + if warm_start_from is None: + return None + if isinstance(warm_start_from, six.string_types): + return WarmStartSettings(ckpt_to_initialize_from=warm_start_from) + elif isinstance(warm_start_from, WarmStartSettings): + return warm_start_from + else: + raise ValueError('warm_start_from must be a string or a WarmStartSettings') diff --git a/tensorflow/python/estimator/estimator_lib.py b/tensorflow/python/estimator/estimator_lib.py index 01699e7399..be8930b3cb 100644 --- a/tensorflow/python/estimator/estimator_lib.py +++ b/tensorflow/python/estimator/estimator_lib.py @@ -30,6 +30,8 @@ from tensorflow.python.estimator.canned.linear import LinearRegressor from tensorflow.python.estimator.canned.parsing_utils import classifier_parse_example_spec from tensorflow.python.estimator.canned.parsing_utils import regressor_parse_example_spec from tensorflow.python.estimator.estimator import Estimator +from tensorflow.python.estimator.estimator import VocabInfo +from tensorflow.python.estimator.estimator import WarmStartSettings from tensorflow.python.estimator.export import export_lib as export from tensorflow.python.estimator.exporter import Exporter from tensorflow.python.estimator.exporter import FinalExporter @@ -41,8 +43,6 @@ from tensorflow.python.estimator.run_config import RunConfig from tensorflow.python.estimator.training import EvalSpec from tensorflow.python.estimator.training import train_and_evaluate from tensorflow.python.estimator.training import TrainSpec -from tensorflow.python.estimator.warm_starting_util import VocabInfo -from tensorflow.python.estimator.warm_starting_util import WarmStartSettings from tensorflow.python.util.all_util import remove_undocumented diff --git a/tensorflow/python/training/training.py b/tensorflow/python/training/training.py index e623e27a21..6880cfc4db 100644 --- a/tensorflow/python/training/training.py +++ b/tensorflow/python/training/training.py @@ -95,6 +95,8 @@ See the @{$python/train} guide. @@load_variable @@list_variables @@init_from_checkpoint +@@warm_start +@@VocabInfo """ # Optimizers. @@ -188,6 +190,8 @@ from tensorflow.python.training.training_util import get_global_step from tensorflow.python.training.training_util import assert_global_step from tensorflow.python.training.training_util import create_global_step from tensorflow.python.training.training_util import get_or_create_global_step +from tensorflow.python.training.warm_starting_util import VocabInfo +from tensorflow.python.training.warm_starting_util import warm_start from tensorflow.python.pywrap_tensorflow import do_quantize_training_on_graphdef from tensorflow.python.pywrap_tensorflow import NewCheckpointReader from tensorflow.python.util.tf_export import tf_export diff --git a/tensorflow/python/estimator/warm_starting_util.py b/tensorflow/python/training/warm_starting_util.py similarity index 67% rename from tensorflow/python/estimator/warm_starting_util.py rename to tensorflow/python/training/warm_starting_util.py index adb013f5c6..4d4fb394c1 100644 --- a/tensorflow/python/estimator/warm_starting_util.py +++ b/tensorflow/python/training/warm_starting_util.py @@ -33,7 +33,7 @@ from tensorflow.python.training import saver from tensorflow.python.util.tf_export import tf_export -@tf_export("estimator.VocabInfo") +@tf_export("train.VocabInfo", "estimator.VocabInfo") class VocabInfo( collections.namedtuple("VocabInfo", [ "new_vocab", @@ -43,7 +43,7 @@ class VocabInfo( "old_vocab_size", "backup_initializer", ])): - """Vocabulary information for WarmStartSettings. + """Vocabulary information for warm-starting. See @{tf.estimator.WarmStartSettings$WarmStartSettings} for examples of using VocabInfo to warm-start. @@ -83,164 +83,6 @@ class VocabInfo( ) -@tf_export("estimator.WarmStartSettings") -class WarmStartSettings( - collections.namedtuple("WarmStartSettings", [ - "ckpt_to_initialize_from", - "vars_to_warm_start", - "var_name_to_vocab_info", - "var_name_to_prev_var_name", - ])): - """Settings for warm-starting in Estimators. - - Example Use with canned `DNNEstimator`: - - ``` - emb_vocab_file = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_vocabulary_file( - "sc_vocab_file", "new_vocab.txt", vocab_size=100), - dimension=8) - emb_vocab_list = tf.feature_column.embedding_column( - tf.feature_column.categorical_column_with_vocabulary_list( - "sc_vocab_list", vocabulary_list=["a", "b"]), - dimension=8) - estimator = tf.estimator.DNNClassifier( - hidden_units=[128, 64], feature_columns=[emb_vocab_file, emb_vocab_list], - warm_start_from=ws) - ``` - - where `ws` could be defined as: - - Warm-start all weights in the model (input layer and hidden weights). - Either the directory or a specific checkpoint can be provided (in the case - of the former, the latest checkpoint will be used): - - ``` - ws = WarmStartSettings(ckpt_to_initialize_from="/tmp") - ws = WarmStartSettings(ckpt_to_initialize_from="/tmp/model-1000") - ``` - - Warm-start only the embeddings (input layer): - - ``` - ws = WarmStartSettings(ckpt_to_initialize_from="/tmp", - vars_to_warm_start=".*input_layer.*") - ``` - - Warm-start all weights but the embedding parameters corresponding to - `sc_vocab_file` have a different vocab from the one used in the current - model: - - ``` - vocab_info = ws_util.VocabInfo( - new_vocab=sc_vocab_file.vocabulary_file, - new_vocab_size=sc_vocab_file.vocabulary_size, - num_oov_buckets=sc_vocab_file.num_oov_buckets, - old_vocab="old_vocab.txt" - ) - ws = WarmStartSettings( - ckpt_to_initialize_from="/tmp", - var_name_to_vocab_info={ - "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info - }) - ``` - - Warm-start only `sc_vocab_file` embeddings (and no other variables), which - have a different vocab from the one used in the current model: - - ``` - vocab_info = ws_util.VocabInfo( - new_vocab=sc_vocab_file.vocabulary_file, - new_vocab_size=sc_vocab_file.vocabulary_size, - num_oov_buckets=sc_vocab_file.num_oov_buckets, - old_vocab="old_vocab.txt" - ) - ws = WarmStartSettings( - ckpt_to_initialize_from="/tmp", - vars_to_warm_start=None, - var_name_to_vocab_info={ - "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info - }) - ``` - - Warm-start all weights but the parameters corresponding to `sc_vocab_file` - have a different vocab from the one used in current checkpoint, and only - 100 of those entries were used: - - ``` - vocab_info = ws_util.VocabInfo( - new_vocab=sc_vocab_file.vocabulary_file, - new_vocab_size=sc_vocab_file.vocabulary_size, - num_oov_buckets=sc_vocab_file.num_oov_buckets, - old_vocab="old_vocab.txt", - old_vocab_size=100 - ) - ws = WarmStartSettings( - ckpt_to_initialize_from="/tmp", - var_name_to_vocab_info={ - "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info - }) - ``` - - Warm-start all weights but the parameters corresponding to `sc_vocab_file` - have a different vocab from the one used in current checkpoint and the - parameters corresponding to `sc_vocab_list` have a different name from the - current checkpoint: - - ``` - vocab_info = ws_util.VocabInfo( - new_vocab=sc_vocab_file.vocabulary_file, - new_vocab_size=sc_vocab_file.vocabulary_size, - num_oov_buckets=sc_vocab_file.num_oov_buckets, - old_vocab="old_vocab.txt", - old_vocab_size=100 - ) - ws = WarmStartSettings( - ckpt_to_initialize_from="/tmp", - var_name_to_vocab_info={ - "input_layer/sc_vocab_file_embedding/embedding_weights": vocab_info - }, - var_name_to_prev_var_name={ - "input_layer/sc_vocab_list_embedding/embedding_weights": - "old_tensor_name" - }) - ``` - - Attributes: - ckpt_to_initialize_from: [Required] A string specifying the directory with - checkpoint file(s) or path to checkpoint from which to warm-start the - model parameters. - vars_to_warm_start: [Optional] A regular expression that captures which - variables to warm-start (see tf.get_collection). Defaults to `'.*'`, - which warm-starts all variables. If `None` is explicitly given, only - variables specified in `var_name_to_vocab_info` will be warm-started. - var_name_to_vocab_info: [Optional] Dict of variable names (strings) to - VocabInfo. The variable names should be "full" variables, not the names - of the partitions. If not explicitly provided, the variable is assumed to - have no vocabulary. - var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to - name of the previously-trained variable in `ckpt_to_initialize_from`. If - not explicitly provided, the name of the variable is assumed to be same - between previous checkpoint and current model. - """ - - def __new__(cls, - ckpt_to_initialize_from, - vars_to_warm_start=".*", - var_name_to_vocab_info=None, - var_name_to_prev_var_name=None): - if not ckpt_to_initialize_from: - raise ValueError( - "`ckpt_to_initialize_from` MUST be set in WarmStartSettings") - return super(WarmStartSettings, cls).__new__( - cls, - ckpt_to_initialize_from, - vars_to_warm_start, - var_name_to_vocab_info or {}, - var_name_to_prev_var_name or {}, - ) - - def _is_variable(x): return (isinstance(x, variables_lib.Variable) or isinstance(x, resource_variable_ops.ResourceVariable)) @@ -375,8 +217,7 @@ def _warm_start_var_with_vocab(var, full_shape=slice_info.full_shape, var_offset=slice_info.var_offset) - # TODO(eddz): Support WarmStartSettings where class vocabularies need - # remapping too. + # TODO(eddz): Support cases where class vocabularies need remapping too. init = checkpoint_ops._load_and_remap_matrix_initializer( ckpt_path=checkpoint_utils._get_checkpoint_filename(prev_ckpt), old_tensor_name=prev_tensor_name, @@ -396,32 +237,53 @@ def _warm_start_var_with_vocab(var, # pylint: enable=protected-access -def _warm_start(warm_start_settings): +@tf_export("train.warm_start") +def warm_start(ckpt_to_initialize_from, + vars_to_warm_start=".*", + var_name_to_vocab_info=None, + var_name_to_prev_var_name=None): """Warm-starts a model using the given settings. If you are using a tf.estimator.Estimator, this will automatically be called during training. Args: - warm_start_settings: An object of `WarmStartSettings`. + ckpt_to_initialize_from: [Required] A string specifying the directory with + checkpoint file(s) or path to checkpoint from which to warm-start the + model parameters. + vars_to_warm_start: [Optional] A regular expression that captures which + variables to warm-start (see tf.get_collection). Defaults to `'.*'`, + which warm-starts all variables. If `None` is explicitly given, only + variables specified in `var_name_to_vocab_info` will be warm-started. + var_name_to_vocab_info: [Optional] Dict of variable names (strings) to + VocabInfo. The variable names should be "full" variables, not the names + of the partitions. If not explicitly provided, the variable is assumed to + have no vocabulary. + var_name_to_prev_var_name: [Optional] Dict of variable names (strings) to + name of the previously-trained variable in `ckpt_to_initialize_from`. If + not explicitly provided, the name of the variable is assumed to be same + between previous checkpoint and current model. Raises: ValueError: If the WarmStartSettings contains prev_var_name or VocabInfo configuration for variable names that are not used. This is to ensure a stronger check for variable configuration than relying on users to examine the logs. """ - logging.info("Warm-starting from: %s", - (warm_start_settings.ckpt_to_initialize_from,)) + if var_name_to_vocab_info is None: + var_name_to_vocab_info = {} + if var_name_to_prev_var_name is None: + var_name_to_prev_var_name = {} + logging.info("Warm-starting from: %s", (ckpt_to_initialize_from,)) # We have to deal with partitioned variables, since get_collection flattens # out the list. grouped_variables = {} - # Both warm_start_settings.vars_to_warm_start = '.*' and - # warm_start_settings.vars_to_warm_start = None will match everything here. + # Both vars_to_warm_start = '.*' and + # vars_to_warm_start = None will match everything here. for v in ops.get_collection( # TODO(eddz): Allow for different collections here (to support # warm-starting accumulators). ops.GraphKeys.TRAINABLE_VARIABLES, - scope=warm_start_settings.vars_to_warm_start): + scope=vars_to_warm_start): if not isinstance(v, list): var_name = _infer_var_name([v]) else: @@ -437,10 +299,10 @@ def _warm_start(warm_start_settings): vocab_info_used = set() for var_name, variable in six.iteritems(grouped_variables): - prev_var_name = warm_start_settings.var_name_to_prev_var_name.get(var_name) + prev_var_name = var_name_to_prev_var_name.get(var_name) if prev_var_name: prev_var_name_used.add(var_name) - vocab_info = warm_start_settings.var_name_to_vocab_info.get(var_name) + vocab_info = var_name_to_vocab_info.get(var_name) if vocab_info: vocab_info_used.add(var_name) logging.info( @@ -460,16 +322,16 @@ def _warm_start(warm_start_settings): variable, current_vocab_path=vocab_info.new_vocab, current_vocab_size=vocab_info.new_vocab_size, - prev_ckpt=warm_start_settings.ckpt_to_initialize_from, + prev_ckpt=ckpt_to_initialize_from, prev_vocab_path=vocab_info.old_vocab, previous_vocab_size=vocab_info.old_vocab_size, current_oov_buckets=vocab_info.num_oov_buckets, prev_tensor_name=prev_var_name, initializer=vocab_info.backup_initializer) else: - # For the special value of warm_start_settings.vars_to_warm_start = None, + # For the special value of vars_to_warm_start = None, # we only warm-start variables with explicitly specified vocabularies. - if warm_start_settings.vars_to_warm_start: + if vars_to_warm_start: logging.info("Warm-starting variable: {}; prev_var_name: {}".format( var_name, prev_var_name or "Unchanged")) # Because we use a default empty list in grouped_variables, single @@ -477,48 +339,22 @@ def _warm_start(warm_start_settings): # for init_from_checkpoint logic to work correctly. if len(variable) == 1: variable = variable[0] - _warm_start_var(variable, warm_start_settings.ckpt_to_initialize_from, - prev_var_name) + _warm_start_var(variable, ckpt_to_initialize_from, prev_var_name) prev_var_name_not_used = set( - warm_start_settings.var_name_to_prev_var_name.keys()) - prev_var_name_used - vocab_info_not_used = set( - warm_start_settings.var_name_to_vocab_info.keys()) - vocab_info_used + var_name_to_prev_var_name.keys()) - prev_var_name_used + vocab_info_not_used = set(var_name_to_vocab_info.keys()) - vocab_info_used if prev_var_name_not_used: raise ValueError( "You provided the following variables in " - "warm_start_settings.var_name_to_prev_var_name that were not used: " + "var_name_to_prev_var_name that were not used: " "{0}. Perhaps you misspelled them? Here is the list of viable " "variable names: {1}".format(prev_var_name_not_used, grouped_variables.keys())) if vocab_info_not_used: raise ValueError( "You provided the following variables in " - "warm_start_settings.var_name_to_vocab_info that were not used: {0}. " + "var_name_to_vocab_info that were not used: {0}. " " Perhaps you misspelled them? Here is the list of viable variable " "names: {1}".format(vocab_info_not_used, grouped_variables.keys())) - - -def _get_default_warm_start_settings(warm_start_from): - """Returns default WarmStartSettings. - - Args: - warm_start_from: Either a string representing the filepath of a checkpoint - to initialize from, or an instance of WarmStartSettings. - - Returns: - Either None or an instance of WarmStartSettings. - - Raises: - ValueError: If warm_start_from is not None but is neither a string nor an - instance of WarmStartSettings. - """ - if warm_start_from is None: - return None - if isinstance(warm_start_from, six.string_types): - return WarmStartSettings(ckpt_to_initialize_from=warm_start_from) - elif isinstance(warm_start_from, WarmStartSettings): - return warm_start_from - else: - raise ValueError("warm_start_from must be a string or a WarmStartSettings") diff --git a/tensorflow/python/estimator/warm_starting_util_test.py b/tensorflow/python/training/warm_starting_util_test.py similarity index 94% rename from tensorflow/python/estimator/warm_starting_util_test.py rename to tensorflow/python/training/warm_starting_util_test.py index 3985d9ebd0..6e445d8bd1 100644 --- a/tensorflow/python/estimator/warm_starting_util_test.py +++ b/tensorflow/python/training/warm_starting_util_test.py @@ -22,7 +22,6 @@ import os import numpy as np import six -from tensorflow.python.estimator import warm_starting_util as ws_util from tensorflow.python.feature_column import feature_column as fc from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -32,6 +31,7 @@ from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import saver as saver_lib +from tensorflow.python.training import warm_starting_util as ws_util ones = init_ops.ones_initializer norms = init_ops.truncated_normal_initializer @@ -330,9 +330,7 @@ class WarmStartingUtilTest(test.TestCase): with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_int], partitioner) - ws_util._warm_start( - ws_util.WarmStartSettings( - self.get_temp_dir(), vars_to_warm_start=".*sc_int.*")) + ws_util.warm_start(self.get_temp_dir(), vars_to_warm_start=".*sc_int.*") sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, {sc_int: [prev_int_val]}, sess) @@ -361,9 +359,8 @@ class WarmStartingUtilTest(test.TestCase): with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([sc_hash], partitioner) - ws_util._warm_start( - ws_util.WarmStartSettings( - self.get_temp_dir(), vars_to_warm_start=".*sc_hash.*")) + ws_util.warm_start( + self.get_temp_dir(), vars_to_warm_start=".*sc_hash.*") sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, {sc_hash: [prev_hash_val]}, @@ -398,9 +395,8 @@ class WarmStartingUtilTest(test.TestCase): cols_to_vars = self._create_linear_model([sc_vocab], partitioner) # Since old vocab is not explicitly set in WarmStartSettings, the old # vocab is assumed to be same as new vocab. - ws_util._warm_start( - ws_util.WarmStartSettings( - self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*")) + ws_util.warm_start( + self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*") sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]}, @@ -435,11 +431,10 @@ class WarmStartingUtilTest(test.TestCase): cols_to_vars = self._create_linear_model([sc_vocab], partitioner) # Since old vocab is not explicitly set in WarmStartSettings, the old # vocab is assumed to be same as new vocab. - ws_util._warm_start( - ws_util.WarmStartSettings( - # Explicitly provide the file prefix instead of just the dir. - os.path.join(self.get_temp_dir(), "model-0"), - vars_to_warm_start=".*sc_vocab.*")) + ws_util.warm_start( + # Explicitly provide the file prefix instead of just the dir. + os.path.join(self.get_temp_dir(), "model-0"), + vars_to_warm_start=".*sc_vocab.*") sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, {sc_vocab: [prev_vocab_val]}, @@ -485,13 +480,12 @@ class WarmStartingUtilTest(test.TestCase): num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=old_vocab_path, old_vocab_size=old_vocab_size) - warm_start_settings = ws_util.WarmStartSettings( + ws_util.warm_start( ckpt_to_initialize_from=self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab/weights": vocab_info }) - ws_util._warm_start(warm_start_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. 'banana' isn't in the # first two entries of the old vocabulary, so it's newly initialized. @@ -523,9 +517,8 @@ class WarmStartingUtilTest(test.TestCase): with ops.Graph().as_default() as g: with self.test_session(graph=g) as sess: cols_to_vars = self._create_linear_model([real_bucket], partitioner) - ws_util._warm_start( - ws_util.WarmStartSettings( - self.get_temp_dir(), vars_to_warm_start=".*real_bucketized.*")) + ws_util.warm_start( + self.get_temp_dir(), vars_to_warm_start=".*real_bucketized.*") sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, @@ -606,12 +599,11 @@ class WarmStartingUtilTest(test.TestCase): new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=vocab_path) - ws_util._warm_start( - ws_util.WarmStartSettings( - self.get_temp_dir(), - var_name_to_vocab_info={ - "linear_model/sc_vocab/weights": vocab_info - })) + ws_util.warm_start( + self.get_temp_dir(), + var_name_to_vocab_info={ + "linear_model/sc_vocab/weights": vocab_info + }) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. self._assert_cols_to_vars(cols_to_vars, { @@ -668,7 +660,7 @@ class WarmStartingUtilTest(test.TestCase): new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path) - ws_settings = ws_util.WarmStartSettings( + ws_util.warm_start( self.get_temp_dir(), vars_to_warm_start=".*(sc_keys|sc_vocab).*", var_name_to_vocab_info={ @@ -678,7 +670,6 @@ class WarmStartingUtilTest(test.TestCase): ws_util._infer_var_name(cols_to_vars[sc_keys]): "some_other_name" }) - ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # sc_hash should not be warm-started. Var corresponding to sc_vocab @@ -732,7 +723,7 @@ class WarmStartingUtilTest(test.TestCase): new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path) - ws_settings = ws_util.WarmStartSettings( + ws_util.warm_start( self.get_temp_dir(), vars_to_warm_start=".*(sc_keys|sc_vocab).*", var_name_to_vocab_info={ @@ -742,7 +733,6 @@ class WarmStartingUtilTest(test.TestCase): ws_util._infer_var_name(cols_to_vars[sc_keys]): "some_other_name" }) - ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # sc_hash should not be warm-started. Var corresponding to sc_vocab @@ -796,7 +786,7 @@ class WarmStartingUtilTest(test.TestCase): new_vocab_size=sc_vocab.vocabulary_size, num_oov_buckets=sc_vocab.num_oov_buckets, old_vocab=prev_vocab_path) - ws_settings = ws_util.WarmStartSettings( + ws_util.warm_start( self.get_temp_dir(), # The special value of None here will ensure that only the variable # specified in var_name_to_vocab_info (sc_vocab embedding) is @@ -812,7 +802,6 @@ class WarmStartingUtilTest(test.TestCase): ws_util._infer_var_name(cols_to_vars[sc_keys]): "some_other_name" }) - ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # sc_vocab should be correctly warm-started after vocab remapping, @@ -874,13 +863,12 @@ class WarmStartingUtilTest(test.TestCase): # use a truncated normal initializer. backup_initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) - ws_settings = ws_util.WarmStartSettings( + ws_util.warm_start( self.get_temp_dir(), var_name_to_vocab_info={ ws_util._infer_var_name(cols_to_vars[emb_vocab_column]): vocab_info }) - ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # emb_vocab_column should be correctly warm-started after vocab @@ -947,13 +935,12 @@ class WarmStartingUtilTest(test.TestCase): # use a truncated normal initializer. backup_initializer=init_ops.random_uniform_initializer( minval=0.42, maxval=0.42)) - ws_settings = ws_util.WarmStartSettings( + ws_util.warm_start( self.get_temp_dir(), vars_to_warm_start=".*sc_vocab.*", var_name_to_vocab_info={ "linear_model/sc_vocab_embedding/embedding_weights": vocab_info }) - ws_util._warm_start(ws_settings) sess.run(variables.global_variables_initializer()) # Verify weights were correctly warm-started. Var corresponding to # emb_vocab should be correctly warm-started after vocab remapping. @@ -973,7 +960,6 @@ class WarmStartingUtilTest(test.TestCase): }, sess) def testErrorConditions(self): - self.assertRaises(ValueError, ws_util.WarmStartSettings, None) x = variable_scope.get_variable( "x", shape=[4, 1], @@ -983,9 +969,6 @@ class WarmStartingUtilTest(test.TestCase): # List of PartitionedVariable is invalid type when warm-starting with vocab. self.assertRaises(TypeError, ws_util._warm_start_var_with_vocab, [x], "/tmp", 5, "/tmp", "/tmp") - # Keys of type other than FeatureColumn. - self.assertRaises(TypeError, ws_util._warm_start, {"StringType": x}, - ws_util.WarmStartSettings("/tmp")) # Unused variable names raises ValueError. with ops.Graph().as_default(): @@ -997,18 +980,16 @@ class WarmStartingUtilTest(test.TestCase): partitioner=lambda shape, dtype: [2, 1]) self._write_checkpoint(sess) - self.assertRaises(ValueError, ws_util._warm_start, - ws_util.WarmStartSettings( - self.get_temp_dir(), - var_name_to_vocab_info={ - "y": ws_util.VocabInfo("", 1, 0, "") - })) - self.assertRaises(ValueError, ws_util._warm_start, - ws_util.WarmStartSettings( - self.get_temp_dir(), - var_name_to_prev_var_name={ - "y": "y2" - })) + self.assertRaises( + ValueError, + ws_util.warm_start, + self.get_temp_dir(), + var_name_to_vocab_info={"y": ws_util.VocabInfo("", 1, 0, "")}) + self.assertRaises( + ValueError, + ws_util.warm_start, + self.get_temp_dir(), + var_name_to_prev_var_name={"y": "y2"}) if __name__ == "__main__": diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt index a16e3aedae..5301b94eb3 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-vocab-info.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.estimator.VocabInfo" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "backup_initializer" diff --git a/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt b/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt index afdd6bb058..43f5343359 100644 --- a/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.estimator.-warm-start-settings.pbtxt @@ -1,7 +1,7 @@ path: "tensorflow.estimator.WarmStartSettings" tf_class { - is_instance: "" - is_instance: "" + is_instance: "" + is_instance: "" is_instance: "" member { name: "ckpt_to_initialize_from" diff --git a/tensorflow/tools/api/golden/tensorflow.train.-vocab-info.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.-vocab-info.pbtxt new file mode 100644 index 0000000000..4ce7cb1111 --- /dev/null +++ b/tensorflow/tools/api/golden/tensorflow.train.-vocab-info.pbtxt @@ -0,0 +1,39 @@ +path: "tensorflow.train.VocabInfo" +tf_class { + is_instance: "" + is_instance: "" + is_instance: "" + member { + name: "backup_initializer" + mtype: "" + } + member { + name: "new_vocab" + mtype: "" + } + member { + name: "new_vocab_size" + mtype: "" + } + member { + name: "num_oov_buckets" + mtype: "" + } + member { + name: "old_vocab" + mtype: "" + } + member { + name: "old_vocab_size" + mtype: "" + } + member_method { + name: "__init__" + } + member_method { + name: "count" + } + member_method { + name: "index" + } +} diff --git a/tensorflow/tools/api/golden/tensorflow.train.pbtxt b/tensorflow/tools/api/golden/tensorflow.train.pbtxt index 3b06aafa9f..c75ee474aa 100644 --- a/tensorflow/tools/api/golden/tensorflow.train.pbtxt +++ b/tensorflow/tools/api/golden/tensorflow.train.pbtxt @@ -224,6 +224,10 @@ tf_module { name: "SyncReplicasOptimizer" mtype: "" } + member { + name: "VocabInfo" + mtype: "" + } member { name: "WorkerSessionCreator" mtype: "" @@ -436,6 +440,10 @@ tf_module { name: "update_checkpoint_state" argspec: "args=[\'save_dir\', \'model_checkpoint_path\', \'all_model_checkpoint_paths\', \'latest_filename\'], varargs=None, keywords=None, defaults=[\'None\', \'None\'], " } + member_method { + name: "warm_start" + argspec: "args=[\'ckpt_to_initialize_from\', \'vars_to_warm_start\', \'var_name_to_vocab_info\', \'var_name_to_prev_var_name\'], varargs=None, keywords=None, defaults=[\'.*\', \'None\', \'None\'], " + } member_method { name: "write_graph" argspec: "args=[\'graph_or_graph_def\', \'logdir\', \'name\', \'as_text\'], varargs=None, keywords=None, defaults=[\'True\'], " -- GitLab From c8789853bf7a07e9eecfebcf9a7ff43360c7ed3b Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Fri, 9 Mar 2018 12:57:56 -0800 Subject: [PATCH 674/884] Automated g4 rollback of changelist 188433328 PiperOrigin-RevId: 188525171 --- tensorflow/contrib/lite/kernels/BUILD | 31 +--- .../contrib/lite/kernels/audio_spectrogram.cc | 165 ------------------ .../lite/kernels/audio_spectrogram_test.cc | 122 ------------- .../lite/kernels/internal/spectrogram.cc | 1 + tensorflow/contrib/lite/kernels/mfcc.cc | 154 ---------------- tensorflow/contrib/lite/kernels/mfcc_test.cc | 104 ----------- tensorflow/contrib/lite/kernels/register.cc | 14 -- 7 files changed, 2 insertions(+), 589 deletions(-) delete mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram.cc delete mode 100644 tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc delete mode 100644 tensorflow/contrib/lite/kernels/mfcc.cc delete mode 100644 tensorflow/contrib/lite/kernels/mfcc_test.cc diff --git a/tensorflow/contrib/lite/kernels/BUILD b/tensorflow/contrib/lite/kernels/BUILD index 9c63269324..b8ab6d96a0 100644 --- a/tensorflow/contrib/lite/kernels/BUILD +++ b/tensorflow/contrib/lite/kernels/BUILD @@ -135,7 +135,6 @@ cc_library( srcs = [ "activations.cc", "add.cc", - "audio_spectrogram.cc", "basic_rnn.cc", "batch_to_space_nd.cc", "bidirectional_sequence_lstm.cc", @@ -157,7 +156,6 @@ cc_library( "lsh_projection.cc", "lstm.cc", "mean.cc", - "mfcc.cc", "mul.cc", "pad.cc", "pooling.cc", @@ -198,42 +196,15 @@ cc_library( "//tensorflow/contrib/lite:framework", "//tensorflow/contrib/lite:string_util", "//tensorflow/contrib/lite/kernels:gemm_support", - "//tensorflow/contrib/lite/kernels/internal:audio_utils", "//tensorflow/contrib/lite/kernels/internal:kernel_utils", "//tensorflow/contrib/lite/kernels/internal:optimized", "//tensorflow/contrib/lite/kernels/internal:optimized_base", "//tensorflow/contrib/lite/kernels/internal:quantization_util", "//tensorflow/contrib/lite/kernels/internal:reference", "//tensorflow/contrib/lite/kernels/internal:reference_base", + "//tensorflow/contrib/lite/kernels/internal:round", "//tensorflow/contrib/lite/kernels/internal:tensor_utils", "@farmhash_archive//:farmhash", - "@flatbuffers", - ], -) - -tf_cc_test( - name = "audio_spectrogram_test", - size = "small", - srcs = ["audio_spectrogram_test.cc"], - deps = [ - ":builtin_ops", - "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite/kernels:test_util", - "@com_google_googletest//:gtest", - "@flatbuffers", - ], -) - -tf_cc_test( - name = "mfcc_test", - size = "small", - srcs = ["mfcc_test.cc"], - deps = [ - ":builtin_ops", - "//tensorflow/contrib/lite:framework", - "//tensorflow/contrib/lite/kernels:test_util", - "@com_google_googletest//:gtest", - "@flatbuffers", ], ) diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram.cc deleted file mode 100644 index 5a17d3a598..0000000000 --- a/tensorflow/contrib/lite/kernels/audio_spectrogram.cc +++ /dev/null @@ -1,165 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include "tensorflow/contrib/lite/builtin_op_data.h" -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/spectrogram.h" -#include "tensorflow/contrib/lite/kernels/internal/tensor.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" -#include "tensorflow/contrib/lite/kernels/op_macros.h" - -#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace audio_spectrogram { - -constexpr int kInputTensor = 0; -constexpr int kOutputTensor = 0; - -enum KernelType { - kReference, -}; - -typedef struct { - int window_size; - int stride; - bool magnitude_squared; - int output_height; - internal::Spectrogram* spectrogram; -} TfLiteAudioSpectrogramParams; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - auto* data = new TfLiteAudioSpectrogramParams; - - const uint8_t* buffer_t = reinterpret_cast(buffer); - - const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); - data->window_size = m["window_size"].AsInt64(); - data->stride = m["stride"].AsInt64(); - data->magnitude_squared = m["magnitude_squared"].AsBool(); - - data->spectrogram = new internal::Spectrogram; - - return data; -} - -void Free(TfLiteContext* context, void* buffer) { - auto* params = reinterpret_cast(buffer); - delete params->spectrogram; - delete params; -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - auto* params = - reinterpret_cast(node->user_data); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 1); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - - TF_LITE_ENSURE_EQ(context, NumDimensions(input), 2); - - TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); - TF_LITE_ENSURE_EQ(context, input->type, output->type); - - TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, - params->stride)); - const int64_t sample_count = input->dims->data[0]; - const int64_t length_minus_window = (sample_count - params->window_size); - if (length_minus_window < 0) { - params->output_height = 0; - } else { - params->output_height = 1 + (length_minus_window / params->stride); - } - TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); - output_size->data[0] = input->dims->data[1]; - output_size->data[1] = params->output_height; - output_size->data[2] = params->spectrogram->output_frequency_channels(); - - return context->ResizeTensor(context, output, output_size); -} - -template -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = - reinterpret_cast(node->user_data); - - TfLiteTensor* input = GetInput(context, node, kInputTensor); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - - TF_LITE_ENSURE(context, params->spectrogram->Initialize(params->window_size, - params->stride)); - - const float* input_data = GetTensorData(input); - - const int64_t sample_count = input->dims->data[0]; - const int64_t channel_count = input->dims->data[1]; - - const int64_t output_width = params->spectrogram->output_frequency_channels(); - - float* output_flat = GetTensorData(output); - - std::vector input_for_channel(sample_count); - for (int64_t channel = 0; channel < channel_count; ++channel) { - float* output_slice = - output_flat + (channel * params->output_height * output_width); - for (int i = 0; i < sample_count; ++i) { - input_for_channel[i] = input_data[i * channel_count + channel]; - } - std::vector> spectrogram_output; - TF_LITE_ENSURE(context, - params->spectrogram->ComputeSquaredMagnitudeSpectrogram( - input_for_channel, &spectrogram_output)); - TF_LITE_ENSURE_EQ(context, spectrogram_output.size(), - params->output_height); - TF_LITE_ENSURE(context, spectrogram_output.empty() || - (spectrogram_output[0].size() == output_width)); - for (int row_index = 0; row_index < params->output_height; ++row_index) { - const std::vector& spectrogram_row = spectrogram_output[row_index]; - TF_LITE_ENSURE_EQ(context, spectrogram_row.size(), output_width); - float* output_row = output_slice + (row_index * output_width); - if (params->magnitude_squared) { - for (int i = 0; i < output_width; ++i) { - output_row[i] = spectrogram_row[i]; - } - } else { - for (int i = 0; i < output_width; ++i) { - output_row[i] = sqrtf(spectrogram_row[i]); - } - } - } - } - return kTfLiteOk; -} - -} // namespace audio_spectrogram - -TfLiteRegistration* Register_AUDIO_SPECTROGRAM() { - static TfLiteRegistration r = { - audio_spectrogram::Init, audio_spectrogram::Free, - audio_spectrogram::Prepare, - audio_spectrogram::Eval}; - return &r; -} - -} // namespace custom -} // namespace ops -} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc b/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc deleted file mode 100644 index 38708930d9..0000000000 --- a/tensorflow/contrib/lite/kernels/audio_spectrogram_test.cc +++ /dev/null @@ -1,122 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include -#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" -#include "tensorflow/contrib/lite/interpreter.h" -#include "tensorflow/contrib/lite/kernels/register.h" -#include "tensorflow/contrib/lite/kernels/test_util.h" -#include "tensorflow/contrib/lite/model.h" - -namespace tflite { -namespace ops { -namespace custom { - -TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); - -namespace { - -using ::testing::ElementsAre; -using ::testing::ElementsAreArray; - -class BaseAudioSpectrogramOpModel : public SingleOpModel { - public: - BaseAudioSpectrogramOpModel(const TensorData& input1, - const TensorData& output, int window_size, - int stride, bool magnitude_squared) { - input1_ = AddInput(input1); - output_ = AddOutput(output); - - flexbuffers::Builder fbb; - fbb.Map([&]() { - fbb.Int("window_size", window_size); - fbb.Int("stride", stride); - fbb.Bool("magnitude_squared", magnitude_squared); - }); - fbb.Finish(); - SetCustomOp("AudioSpectrogram", fbb.GetBuffer(), - Register_AUDIO_SPECTROGRAM); - BuildInterpreter({GetShape(input1_)}); - } - - int input1() { return input1_; } - std::vector GetOutput() { return ExtractVector(output_); } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - protected: - int input1_; - int output_; -}; - -TEST(BaseAudioSpectrogramOpModel, NonSquaredTest) { - BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, - {TensorType_FLOAT32, {}}, 8, 1, false); - m.PopulateTensor(m.input1(), - {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); - - m.Invoke(); - - std::vector output_shape = m.GetOutputShape(); - EXPECT_EQ(3, output_shape.size()); - EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); - - EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( - {0.0f, 1.0f, 2.0f, 1.0f, 0.0f}, 1e-3))); -} - -TEST(SpectrogramOpTest, SquaredTest) { - BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {8, 1}}, - {TensorType_FLOAT32, {}}, 8, 1, true); - m.PopulateTensor(m.input1(), - {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, 1.0f, 0.0f}); - - m.Invoke(); - - std::vector output_shape = m.GetOutputShape(); - EXPECT_EQ(3, output_shape.size()); - EXPECT_THAT(output_shape, ElementsAre(1, 1, 5)); - - EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( - {0.f, 1.f, 4.f, 1.f, 0.f}, 1e-3))); -} - -TEST(SpectrogramOpTest, StrideTest) { - BaseAudioSpectrogramOpModel m({TensorType_FLOAT32, {10, 1}}, - {TensorType_FLOAT32, {}}, 8, 2, true); - m.PopulateTensor(m.input1(), {-1.0f, 0.0f, 1.0f, 0.0f, -1.0f, 0.0f, - 1.0f, 0.0f, 1.0f, 0.0f}); - - m.Invoke(); - - std::vector output_shape = m.GetOutputShape(); - EXPECT_THAT(output_shape, ElementsAre(1, 2, 5)); - EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear( - {0, 1, 4, 1, 0, 1, 2, 1, 2, 1}, 1e-3))); -} - -} // namespace -} // namespace custom -} // namespace ops -} // namespace tflite - -int main(int argc, char** argv) { - ::tflite::LogToStderr(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc index 0e481a9d40..66ca694dc4 100644 --- a/tensorflow/contrib/lite/kernels/internal/spectrogram.cc +++ b/tensorflow/contrib/lite/kernels/internal/spectrogram.cc @@ -54,6 +54,7 @@ inline int Log2Floor(uint n) { log += shift; } } + assert(value == 1); return log; } diff --git a/tensorflow/contrib/lite/kernels/mfcc.cc b/tensorflow/contrib/lite/kernels/mfcc.cc deleted file mode 100644 index 5dfcf8067e..0000000000 --- a/tensorflow/contrib/lite/kernels/mfcc.cc +++ /dev/null @@ -1,154 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ -#include "tensorflow/contrib/lite/kernels/internal/mfcc.h" -#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" -#include "tensorflow/contrib/lite/builtin_op_data.h" -#include "tensorflow/contrib/lite/context.h" -#include "tensorflow/contrib/lite/kernels/internal/mfcc_dct.h" -#include "tensorflow/contrib/lite/kernels/internal/mfcc_mel_filterbank.h" -#include "tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h" -#include "tensorflow/contrib/lite/kernels/internal/tensor.h" -#include "tensorflow/contrib/lite/kernels/kernel_util.h" -#include "tensorflow/contrib/lite/kernels/op_macros.h" - -namespace tflite { -namespace ops { -namespace custom { -namespace mfcc { - -enum KernelType { - kReference, -}; - -typedef struct { - float upper_frequency_limit; - float lower_frequency_limit; - int filterbank_channel_count; - int dct_coefficient_count; -} TfLiteMfccParams; - -constexpr int kInputTensorWav = 0; -constexpr int kInputTensorRate = 1; -constexpr int kOutputTensor = 0; - -void* Init(TfLiteContext* context, const char* buffer, size_t length) { - auto* data = new TfLiteMfccParams; - - const uint8_t* buffer_t = reinterpret_cast(buffer); - - const flexbuffers::Map& m = flexbuffers::GetRoot(buffer_t, length).AsMap(); - data->upper_frequency_limit = m["upper_frequency_limit"].AsInt64(); - data->lower_frequency_limit = m["lower_frequency_limit"].AsInt64(); - data->filterbank_channel_count = m["filterbank_channel_count"].AsInt64(); - data->dct_coefficient_count = m["dct_coefficient_count"].AsInt64(); - return data; -} - -void Free(TfLiteContext* context, void* buffer) { - delete reinterpret_cast(buffer); -} - -TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->user_data); - - TF_LITE_ENSURE_EQ(context, NumInputs(node), 2); - TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1); - - TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); - TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - - TF_LITE_ENSURE_EQ(context, NumDimensions(inputWav), 3); - TF_LITE_ENSURE_EQ(context, NumDimensions(inputRate), 1); - - TF_LITE_ENSURE_EQ(context, output->type, kTfLiteFloat32); - TF_LITE_ENSURE_EQ(context, inputWav->type, output->type); - - TfLiteIntArray* output_size = TfLiteIntArrayCreate(3); - output_size->data[0] = inputWav->dims->data[0]; - output_size->data[1] = inputWav->dims->data[1]; - output_size->data[2] = params->dct_coefficient_count; - - return context->ResizeTensor(context, output, output_size); -} - -// Input is a single squared-magnitude spectrogram frame. The input spectrum -// is converted to linear magnitude and weighted into bands using a -// triangular mel filterbank, and a discrete cosine transform (DCT) of the -// values is taken. Output is populated with the lowest dct_coefficient_count -// of these values. -template -TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) { - auto* params = reinterpret_cast(node->user_data); - - TfLiteTensor* inputWav = GetInput(context, node, kInputTensorWav); - TfLiteTensor* inputRate = GetInput(context, node, kInputTensorRate); - TfLiteTensor* output = GetOutput(context, node, kOutputTensor); - - const int32 sample_rate = *GetTensorData(inputRate); - - const int spectrogram_channels = inputWav->dims->data[2]; - const int spectrogram_samples = inputWav->dims->data[1]; - const int audio_channels = inputWav->dims->data[0]; - - internal::Mfcc mfcc; - mfcc.set_upper_frequency_limit(params->upper_frequency_limit); - mfcc.set_lower_frequency_limit(params->lower_frequency_limit); - mfcc.set_filterbank_channel_count(params->filterbank_channel_count); - mfcc.set_dct_coefficient_count(params->dct_coefficient_count); - - mfcc.Initialize(spectrogram_channels, sample_rate); - - const float* spectrogram_flat = GetTensorData(inputWav); - float* output_flat = GetTensorData(output); - - for (int audio_channel = 0; audio_channel < audio_channels; ++audio_channel) { - for (int spectrogram_sample = 0; spectrogram_sample < spectrogram_samples; - ++spectrogram_sample) { - const float* sample_data = - spectrogram_flat + - (audio_channel * spectrogram_samples * spectrogram_channels) + - (spectrogram_sample * spectrogram_channels); - std::vector mfcc_input(sample_data, - sample_data + spectrogram_channels); - std::vector mfcc_output; - mfcc.Compute(mfcc_input, &mfcc_output); - TF_LITE_ENSURE_EQ(context, params->dct_coefficient_count, - mfcc_output.size()); - float* output_data = output_flat + - (audio_channel * spectrogram_samples * - params->dct_coefficient_count) + - (spectrogram_sample * params->dct_coefficient_count); - for (int i = 0; i < params->dct_coefficient_count; ++i) { - output_data[i] = mfcc_output[i]; - } - } - } - - return kTfLiteOk; -} - -} // namespace mfcc - -TfLiteRegistration* Register_MFCC() { - static TfLiteRegistration r = {mfcc::Init, mfcc::Free, mfcc::Prepare, - mfcc::Eval}; - return &r; -} - -} // namespace custom -} // namespace ops -} // namespace tflite diff --git a/tensorflow/contrib/lite/kernels/mfcc_test.cc b/tensorflow/contrib/lite/kernels/mfcc_test.cc deleted file mode 100644 index 3f1b231f92..0000000000 --- a/tensorflow/contrib/lite/kernels/mfcc_test.cc +++ /dev/null @@ -1,104 +0,0 @@ -/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -==============================================================================*/ - -#include -#include -#include - -#include -#include "third_party/flatbuffers/include/flatbuffers/flexbuffers.h" -#include "tensorflow/contrib/lite/interpreter.h" -#include "tensorflow/contrib/lite/kernels/register.h" -#include "tensorflow/contrib/lite/kernels/test_util.h" -#include "tensorflow/contrib/lite/model.h" - -namespace tflite { -namespace ops { -namespace custom { - -TfLiteRegistration* Register_MFCC(); - -namespace { - -using ::testing::ElementsAre; -using ::testing::ElementsAreArray; - -class BaseMfccOpModel : public SingleOpModel { - public: - BaseMfccOpModel(const TensorData& input1, const TensorData& input2, - const TensorData& output) { - input1_ = AddInput(input1); - input2_ = AddInput(input2); - output_ = AddOutput(output); - - flexbuffers::Builder fbb; - fbb.Map([&]() { - fbb.Int("upper_frequency_limit", 4000); - fbb.Int("lower_frequency_limit", 20); - fbb.Int("filterbank_channel_count", 40); - fbb.Int("dct_coefficient_count", 13); - }); - fbb.Finish(); - SetCustomOp("Mfcc", fbb.GetBuffer(), Register_MFCC); - - BuildInterpreter({GetShape(input1_), GetShape(input2_)}); - } - - int input1() { return input1_; } - int input2() { return input2_; } - std::vector GetOutput() { return ExtractVector(output_); } - std::vector GetOutputShape() { return GetTensorShape(output_); } - - protected: - int input1_; - int input2_; - int output_; -}; - -TEST(MfccOpTest, SimpleTest) { - BaseMfccOpModel m({TensorType_FLOAT32, {1, 1, 513}}, {TensorType_INT32, {1}}, - {TensorType_FLOAT32, {}}); - - std::vector data(513); - for (int i = 0; i < data.size(); ++i) { - data[i] = i + 1; - } - m.PopulateTensor(m.input1(), 0, data.data(), - data.data() + data.size()); - m.PopulateTensor(m.input2(), {22050}); - - m.Invoke(); - - std::vector output_shape = m.GetOutputShape(); - EXPECT_THAT(output_shape, ElementsAre(1, 1, 13)); - EXPECT_THAT( - m.GetOutput(), - ElementsAreArray(ArrayFloatNear( - {29.13970072, -6.41568601, -0.61903012, -0.96778652, -0.26819878, - -0.40907028, -0.15614748, -0.23203119, -0.10481487, -0.1543029, - -0.0769791, -0.10806114, -0.06047613}, - 1e-3))); -} - -} // namespace -} // namespace custom -} // namespace ops -} // namespace tflite - -int main(int argc, char** argv) { - ::tflite::LogToStderr(); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tensorflow/contrib/lite/kernels/register.cc b/tensorflow/contrib/lite/kernels/register.cc index 369d3b9886..9537b79a9a 100644 --- a/tensorflow/contrib/lite/kernels/register.cc +++ b/tensorflow/contrib/lite/kernels/register.cc @@ -17,14 +17,6 @@ limitations under the License. namespace tflite { namespace ops { - -namespace custom { - -TfLiteRegistration* Register_AUDIO_SPECTROGRAM(); -TfLiteRegistration* Register_MFCC(); - -} // namespace custom - namespace builtin { TfLiteRegistration* Register_RELU(); @@ -131,12 +123,6 @@ BuiltinOpResolver::BuiltinOpResolver() { AddBuiltin(BuiltinOperator_LOG_SOFTMAX, Register_LOG_SOFTMAX()); AddBuiltin(BuiltinOperator_CAST, Register_CAST()); AddBuiltin(BuiltinOperator_DEQUANTIZE, Register_DEQUANTIZE()); - - // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that - // custom ops aren't always included by default. - AddCustom("Mfcc", tflite::ops::custom::Register_MFCC()); - AddCustom("AudioSpectrogram", - tflite::ops::custom::Register_AUDIO_SPECTROGRAM()); } TfLiteRegistration* BuiltinOpResolver::FindOp( -- GitLab From ff783fe97e25cb901395eb8ae8746ca5c56bca39 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 13:00:12 -0800 Subject: [PATCH 675/884] Automated g4 rollback of changelist 188492233 PiperOrigin-RevId: 188525453 --- tensorflow/python/framework/test_util.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index cfe8b19cb3..9fc1154201 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -898,6 +898,8 @@ class TensorFlowTestCase(googletest.TestCase): config.graph_options.optimizer_options.opt_level = -1 config.graph_options.rewrite_options.constant_folding = ( rewriter_config_pb2.RewriterConfig.OFF) + config.graph_options.rewrite_options.arithmetic_optimization = ( + rewriter_config_pb2.RewriterConfig.OFF) return config if graph is None: -- GitLab From 4c686715368a87c2490fc2041f2d828c59170ce9 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 13:24:23 -0800 Subject: [PATCH 676/884] Internal Change PiperOrigin-RevId: 188528771 --- .../lite/kernels/strided_slice_test.cc | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tensorflow/contrib/lite/kernels/strided_slice_test.cc b/tensorflow/contrib/lite/kernels/strided_slice_test.cc index 5cac04b383..5c98c5f431 100644 --- a/tensorflow/contrib/lite/kernels/strided_slice_test.cc +++ b/tensorflow/contrib/lite/kernels/strided_slice_test.cc @@ -522,6 +522,28 @@ TEST(StridedSliceOpTest, In3D_IdentityShrinkAxis7) { EXPECT_TRUE(m.GetOutputShape().empty()); EXPECT_THAT(m.GetOutput(), ElementsAreArray({1})); } + +// This tests catches a very subtle bug that was fixed by cl/188403234. +TEST(StridedSliceOpTest, RunTwice) { + StridedSliceOpModel m({2, 3}, {2}, {2}, {2}, 1, 0, 0, 0, 0); + + auto setup_inputs = [&m]() { + m.SetInput({1, 2, 3, 4, 5, 6}); + m.SetBegin({1, 0}); + m.SetEnd({2, 2}); + m.SetStrides({1, 1}); + }; + + setup_inputs(); + m.Invoke(); + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 4, 5})); + + setup_inputs(); + m.Invoke(); + // Prior to cl/188403234 this was {4, 5}. + EXPECT_THAT(m.GetOutput(), ElementsAreArray({1, 2, 4, 5})); +} + } // namespace } // namespace tflite -- GitLab From 8fd38155b4e1d7fb1cb8b0583b51a7df2e15c92d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 13:26:41 -0800 Subject: [PATCH 677/884] LSTM support: Add non-uint8 quantized operators. PiperOrigin-RevId: 188529107 --- .../internal/optimized/optimized_ops.h | 197 ++++++++++++++++++ .../internal/reference/reference_ops.h | 184 +++++++++++++++- 2 files changed, 380 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h index f1937228f6..6bbc213cc6 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/optimized/optimized_ops.h @@ -610,6 +610,58 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, input_offset, output_pipeline); } +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + gemmlowp::ScopedProfilingLabel label("FullyConnected/Uint8Int16"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + (void)gemm_context; // only used in properly optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(output_offset, 0); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(filter_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data[out_c]; + // Accumulation loop. + for (int d = 0; d < accum_depth; ++d) { + int16 input_val = input_data[b * accum_depth + d] + input_offset; + int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset; + accum += filter_val * input_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + accum = std::max(accum, output_activation_min - output_offset); + accum = std::min(accum, output_activation_max - output_offset); + accum += output_offset; + output_data[out_c + output_depth * b] = accum; + } + } +} + // legacy, for compatibility with old checked-in code template void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, @@ -1599,6 +1651,39 @@ inline void Add(int left_shift, const uint8* input1_data, } } +template +inline void Add(const int16* input1_data, const Dims<4>& input1_dims, + int input1_shift, const int16* input2_data, + const Dims<4>& input2_dims, int input2_shift, + int16* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Add/Int16"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + static_assert(Ac == FusedActivationFunctionType::kNone, ""); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + TFLITE_DCHECK(input1_shift == 0 || input2_shift == 0); + TFLITE_DCHECK_GE(input1_shift, 0); + TFLITE_DCHECK_GE(input2_shift, 0); + const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16* shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_shift = input1_shift == 0 ? input2_shift : input1_shift; + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_shift)); + F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); + output_data[i] = result.raw(); + } +} + template void Add(const int32* input1_data, const Dims<4>& input1_dims, const int32* input2_data, const Dims<4>& input2_dims, @@ -1873,6 +1958,57 @@ void Mul(const int32* input1_data, const Dims<4>& input1_dims, } } +inline void Mul(const int16* input1_data, const Dims<4>& input1_dims, + const int16* input2_data, const Dims<4>& input2_dims, + int16* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/Int16"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = + F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const int16* input1_data, const Dims<4>& input1_dims, + const int16* input2_data, const Dims<4>& input2_dims, + int32 output_offset, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/Int16Uint8"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = + F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = + gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = + std::min(output_activation_max - output_offset, rescaled_result); + clamped_result = + std::max(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + // TODO(jiawen): We can implement BroadcastMul on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then @@ -3632,6 +3768,28 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, } } +inline void Logistic(const int16* input_data, const Dims<4>& input_dims, + int16* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Logistic/Int16"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + const F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::logistic(input); + output_data[i] = output.raw(); + } +} + inline void Tanh(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { gemmlowp::ScopedProfilingLabel label("Tanh"); @@ -3790,6 +3948,45 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, output_data[c] = output_val; } } + +inline void Tanh(const int16* input_data, const Dims<4>& input_dims, + int input_left_shift, int16* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Tanh/Int16"); + // This is a copy of the reference implementation. We do not currently have a + // properly optimized version. + + // Support for shifts is limited until we have a parameterized version of + // SaturatingRoundingMultiplyByPOT(). + TFLITE_DCHECK_GE(input_left_shift, 0); + TFLITE_DCHECK_LE(input_left_shift, 1); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + if (input_left_shift == 0) { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } else { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw( + gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } +} + inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, int32 zero_point, double scale, float* output_data, const Dims<4>& output_dims) { diff --git a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h index 84f6cf6e4f..d3d15edf4c 100644 --- a/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h +++ b/tensorflow/contrib/lite/kernels/internal/reference/reference_ops.h @@ -551,6 +551,55 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, } } +inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, + int32 input_offset, const uint8* filter_data, + const Dims<4>& filter_dims, int32 filter_offset, + const int32* bias_data, const Dims<4>& bias_dims, + int32 output_offset, int32 output_multiplier, + int output_shift, int32 output_activation_min, + int32 output_activation_max, int16* output_data, + const Dims<4>& output_dims, + gemmlowp::GemmContext* gemm_context) { + (void)gemm_context; // only used in optimized code. + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + TFLITE_DCHECK_EQ(output_offset, 0); + // TODO(benoitjacob): This really should be: + // const int batches = ArraySize(output_dims, 1); + // but the current --variable_batch hack consists in overwriting the 3rd + // dimension with the runtime batch size, as we don't keep track for each + // array of which dimension is the batch dimension in it. + const int batches = ArraySize(output_dims, 1) * ArraySize(output_dims, 2) * + ArraySize(output_dims, 3); + const int output_depth = MatchingArraySize(filter_dims, 1, output_dims, 0); + const int accum_depth = ArraySize(filter_dims, 0); + TFLITE_DCHECK(IsPackedWithoutStrides(input_dims)); + TFLITE_DCHECK(IsPackedWithoutStrides(filter_dims)); + for (int b = 0; b < batches; ++b) { + for (int out_c = 0; out_c < output_depth; ++out_c) { + // Internal accumulation. + // Initialize accumulator with the bias-value. + int32 accum = bias_data[out_c]; + // Accumulation loop. + for (int d = 0; d < accum_depth; ++d) { + int16 input_val = input_data[b * accum_depth + d] + input_offset; + int16 filter_val = filter_data[out_c * accum_depth + d] + filter_offset; + accum += filter_val * input_val; + } + // Down-scale the final int32 accumulator to the scale used by our + // (16-bit, typically 3 integer bits) fixed-point format. The quantized + // multiplier and shift here have been pre-computed offline + // (e.g. by toco). + accum = MultiplyByQuantizedMultiplier(accum, output_multiplier, + -output_shift); + // Saturate, cast to int16, and store to output array. + accum = std::max(accum, output_activation_min - output_offset); + accum = std::min(accum, output_activation_max - output_offset); + accum += output_offset; + output_data[out_c + output_depth * b] = accum; + } + } +} + // legacy, for compatibility with old checked-in code template void FullyConnected(const uint8* input_data, const Dims<4>& input_dims, @@ -903,6 +952,36 @@ inline void Add(int left_shift, const uint8* input1_data, } } +template +inline void Add(const int16* input1_data, const Dims<4>& input1_dims, + int input1_shift, const int16* input2_data, + const Dims<4>& input2_dims, int input2_shift, + int16* output_data, const Dims<4>& output_dims) { + static_assert(Ac == FusedActivationFunctionType::kNone, ""); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + TFLITE_DCHECK(input1_shift == 0 || input2_shift == 0); + TFLITE_DCHECK_GE(input1_shift, 0); + TFLITE_DCHECK_GE(input2_shift, 0); + const int16* not_shift_input = input1_shift == 0 ? input1_data : input2_data; + const int16* shift_input = input1_shift == 0 ? input2_data : input1_data; + const int input_shift = input1_shift == 0 ? input2_shift : input1_shift; + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 input_ready_scaled = F0::FromRaw(not_shift_input[i]); + F0 scaled_input = + F0::FromRaw(gemmlowp::RoundingDivideByPOT(shift_input[i], input_shift)); + F0 result = gemmlowp::SaturatingAdd(scaled_input, input_ready_scaled); + output_data[i] = result.raw(); + } +} + // TODO(jiawen): We can implement BroadcastAdd on buffers of arbitrary // dimensionality if the runtime code does a single loop over one dimension // that handles broadcasting as the base case. The code generator would then @@ -1184,6 +1263,53 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, } } +inline void Mul(const int16* input1_data, const Dims<4>& input1_dims, + const int16* input2_data, const Dims<4>& input2_dims, + int16* output_data, const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/Int16"); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = + F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + output_data[i] = unclamped_result.raw(); + } +} + +inline void Mul(const int16* input1_data, const Dims<4>& input1_dims, + const int16* input2_data, const Dims<4>& input2_dims, + int32 output_offset, int32 output_activation_min, + int32 output_activation_max, uint8* output_data, + const Dims<4>& output_dims) { + gemmlowp::ScopedProfilingLabel label("Mul/Int16Uint8"); + TFLITE_DCHECK_LE(output_activation_min, output_activation_max); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input1_dims), flat_size); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input2_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + using F0 = gemmlowp::FixedPoint; + + F0 unclamped_result = + F0::FromRaw(input1_data[i]) * F0::FromRaw(input2_data[i]); + int16 rescaled_result = + gemmlowp::RoundingDivideByPOT(unclamped_result.raw(), 8); + int16 clamped_result = + std::min(output_activation_max - output_offset, rescaled_result); + clamped_result = + std::max(output_activation_min - output_offset, clamped_result); + output_data[i] = output_offset + clamped_result; + } +} + // legacy, for compatibility with old checked-in code template inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims, @@ -2317,11 +2443,13 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); const FixedPoint0 output_val_f0 = gemmlowp::logistic(input_val_f4); + // Convert from Q0.31 to Q23.8. using gemmlowp::RoundingDivideByPOT; int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 23); if (output_val_s32 == 256) { output_val_s32 = 255; } + // Reinterpret as U0.8. TFLITE_DCHECK_GE(output_val_s32, 0); TFLITE_DCHECK_LE(output_val_s32, 255); output_val = static_cast(output_val_s32); @@ -2333,6 +2461,25 @@ inline void Logistic(const uint8* input_data, const Dims<4>& input_dims, } } +inline void Logistic(const int16* input_data, const Dims<4>& input_dims, + int16* output_data, const Dims<4>& output_dims) { + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + + for (int i = 0; i < flat_size; i++) { + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + const F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::logistic(input); + output_data[i] = output.raw(); + } +} + inline void Tanh(const float* input_data, const Dims<4>& input_dims, float* output_data, const Dims<4>& output_dims) { const int batches = MatchingArraySize(input_dims, 3, output_dims, 3); @@ -2382,13 +2529,14 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, const FixedPoint4 input_val_f4 = FixedPoint4::FromRaw(input_val_rescaled); const FixedPoint0 output_val_f0 = gemmlowp::tanh(input_val_f4); - + // Convert from Q0.31 to Q24.7. using gemmlowp::RoundingDivideByPOT; int32 output_val_s32 = RoundingDivideByPOT(output_val_f0.raw(), 24); output_val_s32 += output_zero_point; if (output_val_s32 == 256) { output_val_s32 = 255; } + // Reinterpret as Q0.7, encoded in uint8. TFLITE_DCHECK_GE(output_val_s32, 0); TFLITE_DCHECK_LE(output_val_s32, 255); output_val = static_cast(output_val_s32); @@ -2400,6 +2548,40 @@ inline void Tanh(const uint8* input_data, const Dims<4>& input_dims, } } +inline void Tanh(const int16* input_data, const Dims<4>& input_dims, + int input_left_shift, int16* output_data, + const Dims<4>& output_dims) { + // Support for shifts is limited until we have a parameterized version of + // SaturatingRoundingMultiplyByPOT(). + TFLITE_DCHECK_GE(input_left_shift, 0); + TFLITE_DCHECK_LE(input_left_shift, 1); + + const int flat_size = RequiredBufferSizeForDims(output_dims); + TFLITE_DCHECK_EQ(RequiredBufferSizeForDims(input_dims), flat_size); + + // F0 uses 0 integer bits, range [-1, 1]. + // This is the return type of math functions such as tanh, logistic, + // whose range is in [-1, 1]. + using F0 = gemmlowp::FixedPoint; + // F3 uses 3 integer bits, range [-8, 8], the input range expected here. + using F3 = gemmlowp::FixedPoint; + + if (input_left_shift == 0) { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw(input_data[i]); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } else { + for (int i = 0; i < flat_size; i++) { + F3 input = F3::FromRaw( + gemmlowp::SaturatingRoundingMultiplyByPOT<1>(input_data[i])); + F0 output = gemmlowp::tanh(input); + output_data[i] = output.raw(); + } + } +} + inline void Dequantize(const uint8* input_data, const Dims<4>& input_dims, int32 zero_point, double scale, float* output_data, const Dims<4>& output_dims) { -- GitLab From eeaf414ee2ed4d90ce451d622a2f19e301639529 Mon Sep 17 00:00:00 2001 From: Kay Zhu Date: Fri, 9 Mar 2018 13:47:36 -0800 Subject: [PATCH 678/884] [XLA:Doc] Fix an typo "Alternately" -> "Alternatively". PiperOrigin-RevId: 188532135 --- tensorflow/docs_src/performance/xla/operation_semantics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/performance/xla/operation_semantics.md b/tensorflow/docs_src/performance/xla/operation_semantics.md index 8162382846..11a4ef4312 100644 --- a/tensorflow/docs_src/performance/xla/operation_semantics.md +++ b/tensorflow/docs_src/performance/xla/operation_semantics.md @@ -257,7 +257,7 @@ the range between the minimum and maximum, else returns the minimum value if the operand is below this range or the maximum value if the operand is above this range. That is, `clamp(a, x, b) = min(max(a, x), b)`. -All three arrays must be the same shape. Alternately, as a restricted form of +All three arrays must be the same shape. Alternatively, as a restricted form of [broadcasting](broadcasting.md), `min` and/or `max` can be a scalar of type `T`. Example with scalar `min` and `max`: -- GitLab From a60c7785325fce1d6c0c388c7ab348ac228a8032 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 13:54:17 -0800 Subject: [PATCH 679/884] Remove the nondeterminism from a test for initializing variables from checkpoints. PiperOrigin-RevId: 188533156 --- .../python/training/checkpoint_utils_test.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tensorflow/python/training/checkpoint_utils_test.py b/tensorflow/python/training/checkpoint_utils_test.py index 640bd665cb..2bb95b80ff 100644 --- a/tensorflow/python/training/checkpoint_utils_test.py +++ b/tensorflow/python/training/checkpoint_utils_test.py @@ -158,23 +158,23 @@ class CheckpointsTest(test.TestCase): "some_scope", initializer=init_ops.zeros_initializer()): my1 = variable_scope.get_variable("my1", [1, 10]) - # At this point, my1.initialized_value() will add ops that reference - # the zeros initializer of my1. - before = variables.Variable(my1.initialized_value(), name="before") + before = my1.initialized_value() checkpoint_utils.init_from_checkpoint(checkpoint_dir, {"var1": my1}) - # At this point, my1.initialized_value() will add ops that reference - # the newly set initializer of my1. - after = variables.Variable(my1.initialized_value(), name="after") + after = my1.initialized_value() + + self.assertAllEqual(session.run(before), [[0.0] * 10]) + self.assertAllEqual(session.run(after), v1) session.run(variables.global_variables_initializer()) + self.assertAllEqual(session.run(my1), v1) self.assertAllEqual(session.run(my1.initialized_value()), v1) - self.assertAllClose(session.run(before), [[0.0] * 10]) + self.assertAllClose(session.run(before), v1) self.assertAllClose(session.run(after), v1) with self.assertRaises(AssertionError): - self.assertAllClose(session.run(before), session.run(after)) + self.assertAllClose(v1, [[0.0] * 10]) def testInitWithScopeDoesNotCaptureSuffixes(self): checkpoint_dir = self.get_temp_dir() -- GitLab From 381f000714b3ecd4be79f057c7a230d5152fa3dd Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Fri, 9 Mar 2018 14:00:23 -0800 Subject: [PATCH 680/884] Add smart_case that calls smart_cond. PiperOrigin-RevId: 188534066 --- tensorflow/contrib/framework/__init__.py | 2 + tensorflow/python/framework/smart_cond.py | 29 ++++++ .../python/framework/smart_cond_test.py | 50 +++++++++- tensorflow/python/ops/control_flow_ops.py | 91 +++++++++++++------ 4 files changed, 144 insertions(+), 28 deletions(-) diff --git a/tensorflow/contrib/framework/__init__.py b/tensorflow/contrib/framework/__init__.py index 21f9651318..3398b3fd1c 100644 --- a/tensorflow/contrib/framework/__init__.py +++ b/tensorflow/contrib/framework/__init__.py @@ -90,6 +90,7 @@ See the @{$python/contrib.framework} guide. @@smart_cond @@smart_constant_value +@@smart_case @@CriticalSection @@ -108,6 +109,7 @@ from tensorflow.contrib.framework.python.ops import * from tensorflow.python.framework.ops import prepend_name_scope from tensorflow.python.framework.ops import strip_name_scope +from tensorflow.python.framework.smart_cond import smart_case from tensorflow.python.framework.smart_cond import smart_cond from tensorflow.python.framework.smart_cond import smart_constant_value from tensorflow.python.framework.tensor_spec import BoundedTensorSpec diff --git a/tensorflow/python/framework/smart_cond.py b/tensorflow/python/framework/smart_cond.py index 7bd9f47d5a..c7ff23e4ff 100644 --- a/tensorflow/python/framework/smart_cond.py +++ b/tensorflow/python/framework/smart_cond.py @@ -92,3 +92,32 @@ def smart_constant_value(pred): raise TypeError("`pred` must be a Tensor, or a Python bool, or 1 or 0. " "Found instead: %s" % pred) return pred_value + + +def smart_case(pred_fn_pairs, default=None, exclusive=False, name="smart_case"): + """Like tf.case, except attempts to statically evaluate predicates. + + If any predicate in `pred_fn_pairs` is a bool or has a constant value, the + associated callable will be called or omitted depending on its value. + Otherwise this functions like tf.case. + + Args: + pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a + callable which returns a list of tensors. + default: Optional callable that returns a list of tensors. + exclusive: True iff at most one predicate is allowed to evaluate to `True`. + name: A name for this operation (optional). + + Returns: + The tensors returned by the first pair whose predicate evaluated to True, or + those returned by `default` if none does. + + Raises: + TypeError: If `pred_fn_pairs` is not a list/dictionary. + TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples. + TypeError: If `fns[i]` is not callable for any i, or `default` is not + callable. + """ + return control_flow_ops._case_helper( # pylint: disable=protected-access + smart_cond, pred_fn_pairs, default, exclusive, name, + allow_python_preds=True) diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py index 3070355980..582ce81e7a 100644 --- a/tensorflow/python/framework/smart_cond_test.py +++ b/tensorflow/python/framework/smart_cond_test.py @@ -28,6 +28,10 @@ from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest +def raise_exception(): + raise RuntimeError("did not expect to be called") + + @test_util.with_c_api class SmartCondTest(test_util.TensorFlowTestCase): @@ -68,8 +72,6 @@ class SmartCondTest(test_util.TensorFlowTestCase): y = constant_op.constant(2) # x * y > 0 can be evaluated at graph construction time, so the false # branch shouldn't be evaluated at all. - def raise_exception(): - raise RuntimeError("did not expect to be called") z = smart_cond.smart_cond(x * y > 0, lambda: constant_op.constant(1), raise_exception) self.assertEqual(z.eval(feed_dict={x: 1}), 1) @@ -98,5 +100,49 @@ class SmartCondTest(test_util.TensorFlowTestCase): smart_cond.smart_cond(True, lambda: x) +@test_util.with_c_api +class SmartCaseTest(test_util.TensorFlowTestCase): + + def testTrue(self): + x = array_ops.placeholder(dtype=dtypes.int32, shape=[]) + conditions = [(True, lambda: constant_op.constant(1)), + (x == 0, raise_exception)] + y = smart_cond.smart_case(conditions, default=raise_exception, + exclusive=False) + z = smart_cond.smart_case(conditions, default=raise_exception, + exclusive=True) + with session.Session() as sess: + # No feed_dict necessary + self.assertEqual(sess.run(y), 1) + self.assertEqual(sess.run(z), 1) + + def testFalse(self): + conditions = [(False, raise_exception)] + y = smart_cond.smart_case(conditions, + default=lambda: constant_op.constant(1), + exclusive=False) + z = smart_cond.smart_case(conditions, + default=lambda: constant_op.constant(1), + exclusive=True) + with session.Session() as sess: + self.assertEqual(sess.run(y), 1) + self.assertEqual(sess.run(z), 1) + + def testMix(self): + # Constant expression evaluation only works with the C API enabled. + if not ops._USE_C_API: return + + x = array_ops.placeholder(dtype=dtypes.int32, shape=[]) + y = constant_op.constant(10) + conditions = [(x > 1, lambda: constant_op.constant(1)), + (y < 1, raise_exception), + (False, raise_exception), + (True, lambda: constant_op.constant(3))] + z = smart_cond.smart_case(conditions, default=raise_exception) + with session.Session() as sess: + self.assertEqual(sess.run(z, feed_dict={x: 2}), 1) + self.assertEqual(sess.run(z, feed_dict={x: 0}), 3) + + if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/ops/control_flow_ops.py b/tensorflow/python/ops/control_flow_ops.py index a2f52de749..ff4f452bed 100644 --- a/tensorflow/python/ops/control_flow_ops.py +++ b/tensorflow/python/ops/control_flow_ops.py @@ -3484,15 +3484,17 @@ def _case_create_default_action(predicates, actions): return default_action, other_predicates, other_actions -def _case_verify_and_canonicalize_args(pred_fn_pairs, exclusive, name): +def _case_verify_and_canonicalize_args(pred_fn_pairs, exclusive, name, + allow_python_preds): """Verifies input arguments for the case function. Args: - pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor and a - callable which returns a list of tensors. + pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor, + and a callable which returns a list of tensors. exclusive: True iff at most one predicate is allowed to evaluate to `True`. name: A name for the case operation. - + allow_python_preds: if true, pred_fn_pairs may contain Python bools in + addition to boolean Tensors Raises: TypeError: If `pred_fn_pairs` is not a list/dictionary. TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples. @@ -3517,14 +3519,69 @@ def _case_verify_and_canonicalize_args(pred_fn_pairs, exclusive, name): if not isinstance(pred_fn_pair, _basetuple) or len(pred_fn_pair) != 2: raise TypeError("Each entry in pred_fn_pairs must be a 2-tuple") pred, fn = pred_fn_pair - if pred.dtype != dtypes.bool: - raise TypeError("pred must be of type bool: %s", pred.name) + + if isinstance(pred, ops.Tensor): + if pred.dtype != dtypes.bool: + raise TypeError("pred must be Tensor of type bool: %s" % pred.name) + elif not allow_python_preds: + raise TypeError("pred must be a Tensor, got: %s" % pred) + elif not isinstance(pred, bool): + raise TypeError("pred must be a Tensor or bool, got: %s" % pred) + if not callable(fn): raise TypeError("fn for pred %s must be callable." % pred.name) + predicates, actions = zip(*pred_fn_pairs) return predicates, actions +def _case_helper(cond_fn, pred_fn_pairs, default, + exclusive, name, allow_python_preds=False, **cond_kwargs): + """Implementation of case that allows for different cond functions. + + Args: + cond_fn: method that has signature and semantics of `cond` above. + pred_fn_pairs: Dict or list of pairs of a boolean scalar tensor, and a + callable which returns a list of tensors. + default: Optional callable that returns a list of tensors. + exclusive: True iff at most one predicate is allowed to evaluate to `True`. + name: A name for this operation (optional). + allow_python_preds: if true, pred_fn_pairs may contain Python bools in + addition to boolean Tensors + **cond_kwargs: keyword arguments that will be passed to `cond_fn`. + + Returns: + The tensors returned by the first pair whose predicate evaluated to True, or + those returned by `default` if none does. + + Raises: + TypeError: If `pred_fn_pairs` is not a list/dictionary. + TypeError: If `pred_fn_pairs` is a list but does not contain 2-tuples. + TypeError: If `fns[i]` is not callable for any i, or `default` is not + callable. + """ + predicates, actions = _case_verify_and_canonicalize_args( + pred_fn_pairs, exclusive, name, allow_python_preds) + with ops.name_scope(name, "case", [predicates]): + if default is None: + default, predicates, actions = _case_create_default_action( + predicates, actions) + fn = default + # To eval conditions in direct order we create nested conditions in reverse: + # cond_fn(c[0], true_fn=.., false_fn=cond_fn(c[1], ...)) + for predicate, action in reversed(list(zip(predicates, actions))): + fn = functools.partial( + cond_fn, predicate, true_fn=action, false_fn=fn, **cond_kwargs) + if exclusive: + with ops.control_dependencies([ + _assert_at_most_n_true( + predicates, n=1, msg="Input error: exclusive=True") + ]): + return fn() + else: + return fn() + + @tf_export("case") def case(pred_fn_pairs, default=None, @@ -3615,26 +3672,8 @@ def case(pred_fn_pairs, TypeError: If `fns[i]` is not callable for any i, or `default` is not callable. """ - predicates, actions = _case_verify_and_canonicalize_args( - pred_fn_pairs, exclusive, name) - with ops.name_scope(name, "case", [predicates]): - if default is None: - default, predicates, actions = _case_create_default_action( - predicates, actions) - fn = default - # To eval conditions in direct order we create nested conditions in reverse: - # cond(c[0], true_fn=.., false_fn=cond(c[1], ...)) - for predicate, action in reversed(list(zip(predicates, actions))): - fn = functools.partial( - cond, predicate, true_fn=action, false_fn=fn, strict=strict) - if exclusive: - with ops.control_dependencies([ - _assert_at_most_n_true( - predicates, n=1, msg="Input error: exclusive=True") - ]): - return fn() - else: - return fn() + return _case_helper(cond, pred_fn_pairs, default, exclusive, name, + allow_python_preds=False, strict=strict) class XLAControlFlowContext(ControlFlowContext): -- GitLab From 2c45be4f9f1af11e35fa64cb799a099a84d17504 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Fri, 9 Mar 2018 14:15:23 -0800 Subject: [PATCH 681/884] [TF:XLA] Bump open source llvm revision to r326989 PiperOrigin-RevId: 188536576 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 38acb1a6b2..d7c3e3702f 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/195a164675af86f390f9816e53291013d1b551d7.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/195a164675af86f390f9816e53291013d1b551d7.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/636e2230de961637b059b9cd15799daef32544f8.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/636e2230de961637b059b9cd15799daef32544f8.tar.gz", ], - sha256 = "57a8333f8e6095d49f1e597ca18e591aba8a89d417f4b58bceffc5fe1ffcc02b", - strip_prefix = "llvm-195a164675af86f390f9816e53291013d1b551d7", + sha256 = "44f08a32ac48eca545fd6eac4d5ef3a9cea4382f805b87dce38340255e7d2138", + strip_prefix = "llvm-636e2230de961637b059b9cd15799daef32544f8", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 4ffc1043866d688023ed2942bb8b02e803c42891 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 14:17:03 -0800 Subject: [PATCH 682/884] Automated g4 rollback of changelist 188501394 PiperOrigin-RevId: 188536863 --- .../core/grappler/costs/graph_properties.cc | 7 - .../core/grappler/costs/graph_properties.h | 2 - .../grappler/costs/graph_properties_test.cc | 27 ---- tensorflow/core/grappler/op_types.cc | 6 - tensorflow/core/grappler/op_types.h | 2 - .../grappler/optimizers/constant_folding.cc | 143 ++---------------- .../grappler/optimizers/constant_folding.h | 2 +- .../optimizers/constant_folding_test.cc | 100 ++---------- 8 files changed, 28 insertions(+), 261 deletions(-) diff --git a/tensorflow/core/grappler/costs/graph_properties.cc b/tensorflow/core/grappler/costs/graph_properties.cc index 817247e379..243ca9121c 100644 --- a/tensorflow/core/grappler/costs/graph_properties.cc +++ b/tensorflow/core/grappler/costs/graph_properties.cc @@ -1182,12 +1182,5 @@ GraphProperties::GetOutputProperties(const string& node_name) const { return missing_properties_; } -void GraphProperties::ClearInputProperties(const string& node_name) { - input_properties_.erase(node_name); -} -void GraphProperties::ClearOutputProperties(const string& node_name) { - output_properties_.erase(node_name); -} - } // end namespace grappler } // end namespace tensorflow diff --git a/tensorflow/core/grappler/costs/graph_properties.h b/tensorflow/core/grappler/costs/graph_properties.h index 5aa4962072..6fc53a7f2e 100644 --- a/tensorflow/core/grappler/costs/graph_properties.h +++ b/tensorflow/core/grappler/costs/graph_properties.h @@ -64,8 +64,6 @@ class GraphProperties { const string& node_name) const; const std::vector& GetOutputProperties( const string& node_name) const; - void ClearInputProperties(const string& node_name); - void ClearOutputProperties(const string& node_name); static void FillTensorPropertiesFromContext( const shape_inference::ShapeHandle&, const DataType&, diff --git a/tensorflow/core/grappler/costs/graph_properties_test.cc b/tensorflow/core/grappler/costs/graph_properties_test.cc index 284d9d409b..5012069118 100644 --- a/tensorflow/core/grappler/costs/graph_properties_test.cc +++ b/tensorflow/core/grappler/costs/graph_properties_test.cc @@ -113,33 +113,6 @@ TEST_F(GraphPropertiesTest, StaticProperties) { } } -TEST_F(GraphPropertiesTest, ClearProperties) { - TrivialTestGraphInputYielder fake_input(4, 1, 10, false, - cluster_->GetDeviceNames()); - GrapplerItem item; - CHECK(fake_input.NextItem(&item)); - - GraphProperties properties(item); - Status s = properties.InferStatically(true); - TF_CHECK_OK(s); - - for (const auto& node : item.graph.node()) { - if (node.op() == "RandomStandardNormal") { - EXPECT_EQ(1, properties.GetInputProperties(node.name()).size()); - const auto props = properties.GetOutputProperties(node.name()); - properties.ClearOutputProperties(node.name()); - const auto cleared_props = properties.GetOutputProperties(node.name()); - EXPECT_TRUE(cleared_props.empty()); - } else if (node.op() == "AddN") { - const auto in_props = properties.GetInputProperties(node.name()); - EXPECT_EQ(1, in_props.size()); - properties.ClearInputProperties(node.name()); - const auto cleared_props = properties.GetInputProperties(node.name()); - EXPECT_TRUE(cleared_props.empty()); - } - } -} - TEST_F(GraphPropertiesTest, DynamicProperties) { TrivialTestGraphInputYielder fake_input(4, 1, 10, false, cluster_->GetDeviceNames()); diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index ae71094079..8cf1402ae8 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -72,10 +72,6 @@ bool IsComplex(const NodeDef& node) { return node.op() == "Complex"; } bool IsComplexAbs(const NodeDef& node) { return node.op() == "ComplexAbs"; } -bool IsConcat(const NodeDef& node) { - return node.op() == "Concat" || node.op() == "ConcatV2"; -} - bool IsConcatOffset(const NodeDef& node) { return node.op() == "ConcatOffset"; } bool IsConstant(const NodeDef& node) { return node.op() == "Const"; } @@ -217,8 +213,6 @@ bool IsNextIteration(const NodeDef& node) { return op == "NextIteration" || op == "RefNextIteration"; } -bool IsPack(const NodeDef& node) { return node.op() == "Pack"; } - bool IsPad(const NodeDef& node) { const auto& op = node.op(); return op == "Pad" || op == "PadV2"; diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index 690275da7c..a7c33ef97b 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -40,7 +40,6 @@ bool IsCast(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConj(const NodeDef& node); -bool IsConcat(const NodeDef& node); bool IsConcatOffset(const NodeDef& node); bool IsConstant(const NodeDef& node); bool IsConv2D(const NodeDef& node); @@ -86,7 +85,6 @@ bool IsMul(const NodeDef& node); bool IsMatMul(const NodeDef& node); bool IsNextIteration(const NodeDef& node); bool IsPad(const NodeDef& node); -bool IsPack(const NodeDef& node); bool IsNoOp(const NodeDef& node); bool IsNotEqual(const NodeDef& node); bool IsPlaceholder(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 4036ea3f16..31dc1b73e1 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1510,7 +1510,7 @@ Status ConstantFolding::ReplaceOperationWithConstant( } Status ConstantFolding::SimplifyGraph(GraphDef* output, - GraphProperties* properties, + const GraphProperties& properties, bool use_shape_info) { const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (int i = 0; i < output->node_size(); ++i) { @@ -1520,7 +1520,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (use_shape_info && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { const auto& shape = - properties->GetInputProperties(node->name())[0].shape(); + properties.GetInputProperties(node->name())[0].shape(); // The node is replaceable iff // unknown_rank == false && (dim_size == 0 || all dims have size 1) bool replaceable = !shape.unknown_rank(); @@ -1649,7 +1649,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - if (use_shape_info && IsSimplifiableReshape(*node, *properties)) { + if (use_shape_info && IsSimplifiableReshape(*node, properties)) { DataType output_type = node->attr().at("T").type(); node->set_op("Identity"); node->clear_attr(); @@ -1667,8 +1667,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // Simplify arithmetic operations with ones or zeros. if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && - properties->HasInputProperties(node->name()) && - properties->HasOutputProperties(node->name())) { + properties.HasInputProperties(node->name()) && + properties.HasOutputProperties(node->name())) { const NodeDef* x = node_map_->GetNode(node->input(0)); const NodeDef* y = node_map_->GetNode(node->input(1)); if (x == nullptr || y == nullptr) { @@ -1676,12 +1676,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, node->DebugString()); } const TensorShapeProto& output_shape = - properties->GetOutputProperties(node->name())[0].shape(); + properties.GetOutputProperties(node->name())[0].shape(); // Simplify element-wise multiplication by ones or addition/subtraction // of zeros. const TensorShapeProto& y_shape = - properties->GetInputProperties(node->name())[1].shape(); + properties.GetInputProperties(node->name())[1].shape(); const bool x_is_zero = IsZeros(*x); const bool x_is_one = IsOnes(*x); const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); @@ -1708,7 +1708,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } const TensorShapeProto& x_shape = - properties->GetInputProperties(node->name())[0].shape(); + properties.GetInputProperties(node->name())[0].shape(); const bool y_is_zero = IsZeros(*y); const bool y_is_one = IsOnes(*y); const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); @@ -1921,11 +1921,13 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // folding of ops when more than one but not all inputs are constant. // For AddN and AccumulateNV2, we may furthermore reorder inputs, since // addition is commutative. - const int num_non_control_inputs = NumNonControlInputs(*node); + // TODO(rmlarsen): Concat/Pack/ParallelConcat which are not commutative, so + // we have to preserve order and can only push consecutive runs of constant + // inputs into sub-nodes. if (IsAggregate(*node) && IsCommutative(*node) && - num_non_control_inputs > 2) { + NumNonControlInputs(*node) > 2) { const int num_control_inputs = - node->input_size() - num_non_control_inputs; + node->input_size() - NumNonControlInputs(*node); std::vector const_inputs; std::vector nonconst_inputs; for (int i = 0; i < node->input_size(); ++i) { @@ -1941,7 +1943,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } // Promote AccumulateNV2 with all constant inputs to AddN, since it is // a fake node that cannot be constant folded by itself. - if (const_inputs.size() == num_non_control_inputs && + if (const_inputs.size() == NumNonControlInputs(*node) && node->op() == "AccumulateNV2") { node->set_op("AddN"); node->mutable_attr()->erase("shape"); @@ -1951,7 +1953,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const string new_node_name = OptimizedNodeName( *node, strings::StrCat("_partial_split_", const_inputs.size())); if (1 < const_inputs.size() && - const_inputs.size() < num_non_control_inputs && + const_inputs.size() < NumNonControlInputs(*node) && !node_map_->NodeExists(new_node_name)) { NodeDef* added_node = output->add_node(); *added_node = *node; @@ -1985,121 +1987,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, const_inputs.size() - 1); (*node->mutable_attr())["N"].set_i(node->input_size() - num_control_inputs); - properties->ClearInputProperties(node->name()); (*added_node->mutable_attr())["N"].set_i(const_inputs.size()); graph_modified_ = true; - continue; - } - } - - // Partial constant folding for Concat which is not commutative, so - // we have to preserve order and can only push consecutive runs of constant - // inputs into sub-nodes. - if (IsConcat(*node) && num_non_control_inputs > 3) { - bool already_optimized = false; - const string optimized = strings::StrCat(node->name(), "_partial_split_"); - for (const string& input : node->input()) { - if (input.rfind(optimized) != string::npos) { - already_optimized = true; - break; - } - } - if (already_optimized) { - continue; - } - int axis_arg = -1; - int begin = 0; - int end = num_non_control_inputs; - if (node->op() == "Concat") { - begin = 1; - axis_arg = 0; - } else if (node->op() == "ConcatV2") { - end = num_non_control_inputs - 1; - axis_arg = num_non_control_inputs - 1; - } else { - continue; - } - - const NodeDef* axis_arg_node = - node_map_->GetNode(NodeName(node->input(axis_arg))); - if (axis_arg_node == nullptr || !IsReallyConstant(*axis_arg_node)) { - // We cannot constant fold Concat unless we know the axis. - // Skip node. - continue; - } - - // We search for consecutive runs of constant inputs in the range - // [begin:end[ and push then down into child nodes. - std::vector> constant_input_runs; - int first = begin; - int last = begin; - while (last < end) { - while (first < end && !IsReallyConstant(*node_map_->GetNode( - NodeName(node->input(first))))) { - ++first; - } - // Invariant: node[first] is constant || first >= end. - last = first + 1; - while (last < end && IsReallyConstant(*node_map_->GetNode( - NodeName(node->input(last))))) { - ++last; - } - // Invariant: node[last] is not constant || last >= end - // Discard intervals shorter than 2 elements. - if (first < end && (last - first) > 1) { - constant_input_runs.emplace_back(first, last); - } - first = last; - } - - std::set inputs_to_delete; - for (auto interval : constant_input_runs) { - // Push the constant inputs in the interval to a child node than can be - // constant folded. - const string new_node_name = OptimizedNodeName( - *node, strings::StrCat("_partial_split_", interval.first)); - if (node_map_->NodeExists(new_node_name)) { - break; - } - NodeDef* added_node = output->add_node(); - *added_node = *node; - added_node->set_name(new_node_name); - node_map_->AddNode(added_node->name(), added_node); - added_node->clear_input(); - for (int i = interval.first; i < interval.second; ++i) { - added_node->add_input(node->input(i)); - node_map_->UpdateOutput(NodeName(node->input(i)), node->name(), - added_node->name()); - if (i != interval.first) { - inputs_to_delete.insert(i); - } - } - added_node->add_input(node->input(axis_arg)); - (*added_node->mutable_attr())["N"].set_i(interval.second - - interval.first); - node_map_->AddOutput(NodeName(node->input(axis_arg)), - added_node->name()); - - // Overwrite the first constant input with the result of the added - // child node. - node->set_input(interval.first, added_node->name()); - node_map_->AddOutput(added_node->name(), node->name()); - } - if (!constant_input_runs.empty()) { - graph_modified_ = true; - if (!inputs_to_delete.empty()) { - // Fix up the inputs to the original node. - std::vector tmp(node->input().begin(), node->input().end()); - node->clear_input(); - for (int i = 0; i < tmp.size(); ++i) { - if (inputs_to_delete.find(i) == inputs_to_delete.end()) { - node->add_input(tmp[i]); - } - } - (*node->mutable_attr())["N"].set_i(node->input_size() - 1); - properties->ClearInputProperties(node->name()); - } - continue; } } } @@ -2141,7 +2030,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, TF_RETURN_IF_ERROR(FoldGraph(output)); node_map_.reset(new NodeMap(output)); - TF_RETURN_IF_ERROR(SimplifyGraph(output, &properties, can_use_shape_info)); + TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info)); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 13ecfcd281..2fd59c7f9c 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -92,7 +92,7 @@ class ConstantFolding : public GraphOptimizer { bool IsSimplifiableReduction(const NodeDef& node) const; bool IsSimplifiableReshape(const NodeDef& node, const GraphProperties& properties) const; - Status SimplifyGraph(GraphDef* output, GraphProperties* properties, + Status SimplifyGraph(GraphDef* output, const GraphProperties& properties, bool use_shape_info); Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 9214695eb6..4b9770889f 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -188,19 +188,20 @@ TEST_F(ConstantFoldingTest, NeutralElement) { Output sub1 = ops::Sub(s.WithOpName("sub1"), x, zeros); Output sub2 = ops::Sub(s.WithOpName("sub2"), zeros, y); Output concat = - ops::Stack(s.WithOpName("stack"), - {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, - matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}); + ops::Concat(s.WithOpName("concat"), + {mul1, mul2, mul3, mul4, mul5, mul6, div1, div2, matmul1, + matmul2, add1, add2, bias_add1, bias_add2, sub1, sub2}, + 0); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"stack", "matmul3", "matmul4"}; + item.fetch = {"concat", "matmul3", "matmul4"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(27, output.node_size()); + EXPECT_EQ(28, output.node_size()); for (int i = 0; i < output.node_size(); ++i) { const NodeDef& node = output.node(i); const string& name = node.name(); @@ -1625,19 +1626,19 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { Output acc4 = fun(s.WithOpName("acc4"), {c1, y, c2}); Output acc5 = fun(s.WithOpName("acc5"), {x, c1, c2}); Output acc6 = fun(s.WithOpName("acc6"), {x, c1, y, c2}); - Output stack = ops::Stack(s.WithOpName("stack"), - {acc0, acc1, acc2, acc3, acc4, acc5, acc6}); + Output concat = ops::Concat(s.WithOpName("concat"), + {acc0, acc1, acc2, acc3, acc4, acc5, acc6}, 0); GrapplerItem item; TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"stack"}; + item.fetch = {"concat"}; ConstantFolding optimizer(nullptr /* cpu_device */); GraphDef output; Status status = optimizer.Optimize(nullptr, item, &output); TF_EXPECT_OK(status); - EXPECT_EQ(16, output.node_size()); + EXPECT_EQ(17, output.node_size()); for (const NodeDef& node : output.node()) { if (node.name() == "acc0") { EXPECT_EQ("Const", node.op()); @@ -1695,86 +1696,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_AssociativeAndCommutative) { } } -TEST_F(ConstantFoldingTest, PartialFolding_Concat) { - Scope s = Scope::NewRootScope(); - Output x = ops::Placeholder(s.WithOpName("x"), DT_FLOAT, - ops::Placeholder::Shape(TensorShape({2, 2}))); - Output y = ops::Placeholder(s.WithOpName("y"), DT_FLOAT, - ops::Placeholder::Shape(TensorShape({2, 2}))); - Output z = ops::Placeholder(s.WithOpName("z"), DT_FLOAT, - ops::Placeholder::Shape(TensorShape({2, 2}))); - Output axis = ops::Const(s.WithOpName("axis"), 0, {}); - Output c1 = ops::Const(s.WithOpName("c1"), 1.0f, {2, 2}); - Output c2 = ops::Const(s.WithOpName("c2"), 2.0f, {2, 2}); - Output concat0 = ops::Concat(s.WithOpName("concat0"), {c1, c2, c1}, axis); - Output concat1 = ops::Concat(s.WithOpName("concat1"), {x, y, z}, axis); - Output concat2 = ops::Concat(s.WithOpName("concat2"), {c1, x, y}, axis); - Output concat3 = ops::Concat(s.WithOpName("concat3"), {c1, c2, z}, axis); - Output concat4 = ops::Concat(s.WithOpName("concat4"), {c1, y, c2}, axis); - Output concat5 = ops::Concat(s.WithOpName("concat5"), {x, c1, c2}, axis); - Output concat6 = ops::Concat(s.WithOpName("concat6"), {x, c1, y, c2}, axis); - Output concat7 = ops::Concat(s.WithOpName("concat7"), {x, y, c1, c2}, axis); - Output concat8 = ops::Concat(s.WithOpName("concat8"), {x, c1, c2, y}, axis); - Output concat9 = ops::Concat(s.WithOpName("concat9"), {c1, c2, x, y}, axis); - - GrapplerItem item; - TF_CHECK_OK(s.ToGraphDef(&item.graph)); - item.fetch = {"concat0", "concat1", "concat2", "concat3", "concat4", - "concat5", "concat6", "concat7", "concat8", "concat9"}; - - ConstantFolding optimizer(nullptr /* cpu_device */); - GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - - EXPECT_EQ(21, output.node_size()); - for (int i = 0; i < output.node_size(); ++i) { - const NodeDef& node = output.node(i); - if (node.name() == "concat0") { - EXPECT_EQ("Const", node.op()); - } else if (node.name() == "concat3") { - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("ConstantFolding/concat3_partial_split_0", node.input(0)); - EXPECT_EQ("z", node.input(1)); - EXPECT_EQ("axis", node.input(2)); - } else if (node.name() == "concat5") { - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("ConstantFolding/concat5_partial_split_1", node.input(1)); - EXPECT_EQ("axis", node.input(2)); - } else if (node.name() == "concat7") { - EXPECT_EQ(4, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("y", node.input(1)); - EXPECT_EQ("ConstantFolding/concat7_partial_split_2", node.input(2)); - EXPECT_EQ("axis", node.input(3)); - } else if (node.name() == "concat8") { - EXPECT_EQ(4, node.input_size()); - EXPECT_EQ("x", node.input(0)); - EXPECT_EQ("ConstantFolding/concat8_partial_split_1", node.input(1)); - EXPECT_EQ("y", node.input(2)); - EXPECT_EQ("axis", node.input(3)); - } else if (node.name() == "concat9") { - EXPECT_EQ(4, node.input_size()); - EXPECT_EQ("ConstantFolding/concat9_partial_split_0", node.input(0)); - EXPECT_EQ("x", node.input(1)); - EXPECT_EQ("y", node.input(2)); - EXPECT_EQ("axis", node.input(3)); - } else if (StringPiece(node.name()).starts_with("ConstantFolding/")) { - EXPECT_EQ("Const", node.op()); - } else { - EXPECT_EQ(item.graph.node(i).DebugString(), node.DebugString()); - } - } - - auto tensors_expected = EvaluateNodes(item.graph, {"concat0"}); - auto tensors = EvaluateNodes(output, {"concat0"}); - EXPECT_EQ(1, tensors_expected.size()); - EXPECT_EQ(1, tensors.size()); - test::ExpectTensorNear(tensors_expected[0], tensors[0], 1e-6); -} - -TEST_F(ConstantFoldingTest, PartialFolding_IdentityN) { +TEST_F(ConstantFoldingTest, IdenticalN) { tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); Output x = ops::Placeholder(scope.WithOpName("x"), DT_FLOAT, ops::Placeholder::Shape(TensorShape({}))); -- GitLab From 8044288df687b07004624275295b93dca07b267b Mon Sep 17 00:00:00 2001 From: Francois Chollet Date: Fri, 9 Mar 2018 14:40:18 -0800 Subject: [PATCH 683/884] Part of the update of tf.keras to the Keras 2.1.5 API. PiperOrigin-RevId: 188540513 --- .../python/keras/_impl/keras/__init__.py | 2 +- .../python/keras/_impl/keras/backend.py | 9 +- .../keras/_impl/keras/engine/network.py | 15 +- .../python/keras/_impl/keras/engine/saving.py | 243 +++++++++++++++--- .../keras/_impl/keras/engine/saving_test.py | 86 +++++++ .../keras/_impl/keras/engine/training.py | 51 ++-- .../_impl/keras/engine/training_arrays.py | 11 +- .../_impl/keras/engine/training_generator.py | 75 +++--- .../keras/_impl/keras/engine/training_test.py | 83 ++++-- .../python/keras/_impl/keras/optimizers.py | 24 ++ .../keras/_impl/keras/utils/data_utils.py | 23 +- 11 files changed, 479 insertions(+), 143 deletions(-) diff --git a/tensorflow/python/keras/_impl/keras/__init__.py b/tensorflow/python/keras/_impl/keras/__init__.py index b63907b2e6..53f5d31e9c 100644 --- a/tensorflow/python/keras/_impl/keras/__init__.py +++ b/tensorflow/python/keras/_impl/keras/__init__.py @@ -40,4 +40,4 @@ from tensorflow.python.keras._impl.keras.layers import Input from tensorflow.python.keras._impl.keras.models import Model from tensorflow.python.keras._impl.keras.models import Sequential -__version__ = '2.1.4-tf' +__version__ = '2.1.5-tf' diff --git a/tensorflow/python/keras/_impl/keras/backend.py b/tensorflow/python/keras/_impl/keras/backend.py index 688dc070e6..04866fbe0f 100644 --- a/tensorflow/python/keras/_impl/keras/backend.py +++ b/tensorflow/python/keras/_impl/keras/backend.py @@ -423,8 +423,9 @@ def get_session(): A TensorFlow session. """ global _SESSION - if ops.get_default_session() is not None: - session = ops.get_default_session() + default_session = ops.get_default_session() + if default_session is not None: + session = default_session else: if _SESSION is None: if not os.environ.get('OMP_NUM_THREADS'): @@ -495,7 +496,7 @@ def _is_current_explicit_device(device_type): """ device_type = device_type.upper() if device_type not in ['CPU', 'GPU']: - raise ValueError('device_type should be either "CPU" or "GPU".') + raise ValueError('`device_type` should be either "CPU" or "GPU".') device = _get_current_tf_device() return device is not None and device.device_type == device_type.upper() @@ -3514,7 +3515,7 @@ def l2_normalize(x, axis=None): Returns: A tensor. """ - return nn.l2_normalize(x, dim=axis) + return nn.l2_normalize(x, axis=axis) @tf_export('keras.backend.in_top_k') diff --git a/tensorflow/python/keras/_impl/keras/engine/network.py b/tensorflow/python/keras/_impl/keras/engine/network.py index bde16cdeb0..bf82390438 100644 --- a/tensorflow/python/keras/_impl/keras/engine/network.py +++ b/tensorflow/python/keras/_impl/keras/engine/network.py @@ -406,6 +406,7 @@ class Network(base_layer.Layer): def get_layer(self, name=None, index=None): """Retrieves a layer based on either its name (unique) or index. + If `name` and `index` are both provided, `index` will take precedence. Indices are based on order of horizontal graph traversal (bottom-up). Arguments: @@ -437,7 +438,7 @@ class Network(base_layer.Layer): @property def updates(self): - """Retrieve the network's updates. + """Retrieves the network's updates. Will only include updates that are either unconditional, or conditional on inputs to this model @@ -517,7 +518,7 @@ class Network(base_layer.Layer): @property def losses(self): - """Retrieve the network's losses. + """Retrieves the network's losses. Will only include losses that are either unconditional, or conditional on inputs to this model @@ -600,7 +601,7 @@ class Network(base_layer.Layer): return specs def call(self, inputs, training=None, mask=None): - """Call the model on new inputs. + """Calls the model on new inputs. In this case `call` just reapplies all ops in the graph to the new inputs @@ -1030,7 +1031,7 @@ class Network(base_layer.Layer): layer(input_tensors, **kwargs) def process_layer(layer_data): - """Deserialize a layer, then call it on appropriate inputs. + """Deserializes a layer, then call it on appropriate inputs. Arguments: layer_data: layer config dict. @@ -1087,7 +1088,7 @@ class Network(base_layer.Layer): return cls(inputs=input_tensors, outputs=output_tensors, name=name) def save(self, filepath, overwrite=True, include_optimizer=True): - """Save the model to a single HDF5 file. + """Saves the model to a single HDF5 file. The savefile includes: - The model architecture, allowing to re-instantiate the model. @@ -1193,7 +1194,7 @@ class Network(base_layer.Layer): saving.load_weights_from_hdf5_group(f, self.layers) def _updated_config(self): - """Util hared between different serialization methods. + """Util shared between different serialization methods. Returns: Model config with Keras version information added. @@ -1333,7 +1334,7 @@ def _make_node_key(layer_name, node_index): def _map_graph_network(inputs, outputs): - """Validate a network's topology and gather its layers and nodes. + """Validates a network's topology and gather its layers and nodes. Arguments: inputs: List of input tensors. diff --git a/tensorflow/python/keras/_impl/keras/engine/saving.py b/tensorflow/python/keras/_impl/keras/engine/saving.py index 52522e6935..2ad06ca4fd 100644 --- a/tensorflow/python/keras/_impl/keras/engine/saving.py +++ b/tensorflow/python/keras/_impl/keras/engine/saving.py @@ -35,6 +35,7 @@ from tensorflow.python.util.tf_export import tf_export # pylint: disable=g-import-not-at-top try: import h5py + HDF5_OBJECT_HEADER_LIMIT = 64512 except ImportError: h5py = None @@ -47,7 +48,7 @@ except ImportError: @tf_export('keras.models.save_model') def save_model(model, filepath, overwrite=True, include_optimizer=True): - """Save a model to a HDF5 file. + """Saves a model to a HDF5 file. The saved model contains: - the model's configuration (topology) @@ -74,7 +75,7 @@ def save_model(model, filepath, overwrite=True, include_optimizer=True): raise ImportError('`save_model` requires h5py.') def get_json_type(obj): - """Serialize any object to a JSON-serializable structure. + """Serializes any object to a JSON-serializable structure. Arguments: obj: the object to serialize @@ -358,34 +359,6 @@ def model_from_json(json_string, custom_objects=None): return deserialize(config, custom_objects=custom_objects) -def save_weights_to_hdf5_group(f, layers): - from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top - - f.attrs['layer_names'] = [layer.name.encode('utf8') for layer in layers] - f.attrs['backend'] = K.backend().encode('utf8') - f.attrs['keras_version'] = str(keras_version).encode('utf8') - - for layer in layers: - g = f.create_group(layer.name) - symbolic_weights = layer.weights - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - weight_names.append(name.encode('utf8')) - g.attrs['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - param_dset = g.create_dataset(name, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - def preprocess_weights_for_loading(layer, weights, original_keras_version=None, @@ -549,9 +522,140 @@ def preprocess_weights_for_loading(layer, # split the bias into half and merge weights[2] = bias[:units * 4] + bias[units * 4:] + return convert_rnn_weights(layer, weights) + + +def convert_rnn_weights(layer, weights): + """Converts weights for RNN layers between native and CuDNN format. + + Input kernels for each gate are transposed and converted between Fortran + and C layout, recurrent kernels are transposed. For LSTM biases are summed/ + split in half, for GRU biases are reshaped. + + Weights can be converted in both directions between `LSTM` and`CuDNNSLTM` + and between `CuDNNGRU` and `GRU(reset_after=True)`. Default `GRU` is not + compatible with `CuDNNGRU`. + + For missing biases in `LSTM`/`GRU` (`use_bias=False`) no conversion is made. + + Arguments: + layer: Target layer instance. + weights: List of source weights values (input kernels, recurrent + kernels, [biases]) (Numpy arrays). + + Returns: + A list of converted weights values (Numpy arrays). + + Raises: + ValueError: for incompatible GRU layer/weights or incompatible biases + """ + + def transform_kernels(kernels, func, n_gates): + """Transforms kernel for each gate separately using given function. + + Arguments: + kernels: Stacked array of kernels for individual gates. + func: Function applied to kernel of each gate. + n_gates: Number of gates (4 for LSTM, 3 for GRU). + Returns: + Stacked array of transformed kernels. + """ + return np.hstack([func(k) for k in np.hsplit(kernels, n_gates)]) + + def transpose_input(from_cudnn): + """Makes a function that transforms input kernels from/to CuDNN format. + + It keeps the shape, but changes between the layout (Fortran/C). Eg.: + + ``` + Keras CuDNN + [[0, 1, 2], <---> [[0, 2, 4], + [3, 4, 5]] [1, 3, 5]] + ``` + + It can be passed to `transform_kernels()`. + + Arguments: + from_cudnn: `True` if source weights are in CuDNN format, `False` + if they're in plain Keras format. + Returns: + Function that converts input kernel to the other format. + """ + order = 'F' if from_cudnn else 'C' + + def transform(kernel): + return kernel.T.reshape(kernel.shape, order=order) + + return transform + + target_class = layer.__class__.__name__ + + # convert the weights between CuDNNLSTM and LSTM + if target_class in ['LSTM', 'CuDNNLSTM'] and len(weights) == 3: + # determine if we're loading a CuDNNLSTM layer + # from the number of bias weights: + # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) + # if there's no bias weight in the file, skip this conversion + units = weights[1].shape[0] + bias_shape = weights[2].shape + n_gates = 4 + + if bias_shape == (2 * units * n_gates,): + source = 'CuDNNLSTM' + elif bias_shape == (units * n_gates,): + source = 'LSTM' + else: + raise ValueError('Invalid bias shape: ' + str(bias_shape)) + + def convert_lstm_weights(weights, from_cudnn=True): + # Transpose (and reshape) input and recurrent kernels. + kernels = transform_kernels(weights[0], transpose_input(from_cudnn), + n_gates) + recurrent_kernels = transform_kernels(weights[1], lambda k: k.T, n_gates) + if from_cudnn: # Merge input and recurrent biases into a single set. + biases = np.sum(np.split(weights[2], 2, axis=0), axis=0) + else: + # Split single set of biases evenly to two sets. + biases = np.tile(0.5 * weights[2], 2) + return [kernels, recurrent_kernels, biases] + + if source != target_class: + weights = convert_lstm_weights(weights, from_cudnn=source == 'CuDNNLSTM') + + # TODO(fchollet): add feature after GRU is refactored: + # convert the weights between `CuDNNGRU` and `GRU(reset_after=True)` return weights +def save_weights_to_hdf5_group(f, layers): + from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top + + save_attributes_to_hdf5_group( + f, 'layer_names', [layer.name.encode('utf8') for layer in layers]) + f.attrs['backend'] = K.backend().encode('utf8') + f.attrs['keras_version'] = str(keras_version).encode('utf8') + + for layer in layers: + g = f.create_group(layer.name) + symbolic_weights = layer.weights + weight_values = K.batch_get_value(symbolic_weights) + weight_names = [] + for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): + if hasattr(w, 'name') and w.name: + name = str(w.name) + else: + name = 'param_' + str(i) + weight_names.append(name.encode('utf8')) + save_attributes_to_hdf5_group(g, 'weight_names', weight_names) + for name, val in zip(weight_names, weight_values): + param_dset = g.create_dataset(name, val.shape, dtype=val.dtype) + if not val.shape: + # scalar + param_dset[()] = val + else: + param_dset[:] = val + + def load_weights_from_hdf5_group(f, layers): """Implements topological (order-based) weight loading. @@ -578,11 +682,11 @@ def load_weights_from_hdf5_group(f, layers): if weights: filtered_layers.append(layer) - layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] + layer_names = load_attributes_from_hdf5_group(f, 'layer_names') filtered_layer_names = [] for name in layer_names: g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + weight_names = load_attributes_from_hdf5_group(g, 'weight_names') if weight_names: filtered_layer_names.append(name) layer_names = filtered_layer_names @@ -597,7 +701,7 @@ def load_weights_from_hdf5_group(f, layers): weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [g[weight_name] for weight_name in weight_names] layer = filtered_layers[k] symbolic_weights = layer.weights @@ -640,7 +744,7 @@ def load_weights_from_hdf5_group_by_name(f, layers): original_backend = None # New file format. - layer_names = [n.decode('utf8') for n in f.attrs['layer_names']] + layer_names = load_attributes_from_hdf5_group(f, 'layer_names') # Reverse index of layer name to list of layers with name. index = {} @@ -653,7 +757,7 @@ def load_weights_from_hdf5_group_by_name(f, layers): weight_value_tuples = [] for k, name in enumerate(layer_names): g = f[name] - weight_names = [n.decode('utf8') for n in g.attrs['weight_names']] + weight_names = load_attributes_from_hdf5_group(g, 'weight_names') weight_values = [g[weight_name] for weight_name in weight_names] for layer in index.get(name, []): @@ -669,3 +773,72 @@ def load_weights_from_hdf5_group_by_name(f, layers): for i in range(len(weight_values)): weight_value_tuples.append((symbolic_weights[i], weight_values[i])) K.batch_set_value(weight_value_tuples) + + +def save_attributes_to_hdf5_group(group, name, data): + """Saves attributes (data) of the specified name into the HDF5 group. + + This method deals with an inherent problem of HDF5 file which is not + able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. + + Arguments: + group: A pointer to a HDF5 group. + name: A name of the attributes to save. + data: Attributes data to store. + + Raises: + RuntimeError: If any single attribute is too large to be saved. + """ + # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` + # because in that case even chunking the array would not make the saving + # possible. + bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] + + # Expecting this to never be true. + if bad_attributes: + raise RuntimeError('The following attributes cannot be saved to HDF5 ' + 'file because they are larger than %d bytes: %s' % + (HDF5_OBJECT_HEADER_LIMIT, + ', '.join([x for x in bad_attributes]))) + + data_npy = np.asarray(data) + + num_chunks = 1 + chunked_data = np.array_split(data_npy, num_chunks) + + # This will never loop forever thanks to the test above. + while any([x.nbytes > HDF5_OBJECT_HEADER_LIMIT for x in chunked_data]): + num_chunks += 1 + chunked_data = np.array_split(data_npy, num_chunks) + + if num_chunks > 1: + for chunk_id, chunk_data in enumerate(chunked_data): + group.attrs['%s%d' % (name, chunk_id)] = chunk_data + else: + group.attrs[name] = data + + +def load_attributes_from_hdf5_group(group, name): + """Loads attributes of the specified name from the HDF5 group. + + This method deals with an inherent problem + of HDF5 file which is not able to store + data larger than HDF5_OBJECT_HEADER_LIMIT bytes. + + Arguments: + group: A pointer to a HDF5 group. + name: A name of the attributes to load. + + Returns: + data: Attributes data. + """ + if name in group.attrs: + data = [n.decode('utf8') for n in group.attrs[name]] + else: + data = [] + chunk_id = 0 + while '%s%d' % (name, chunk_id) in group.attrs: + data.extend( + [n.decode('utf8') for n in group.attrs['%s%d' % (name, chunk_id)]]) + chunk_id += 1 + return data diff --git a/tensorflow/python/keras/_impl/keras/engine/saving_test.py b/tensorflow/python/keras/_impl/keras/engine/saving_test.py index bdb17641b0..4a18cc2e11 100644 --- a/tensorflow/python/keras/_impl/keras/engine/saving_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/saving_test.py @@ -370,6 +370,92 @@ class TestWholeModelSaving(test.TestCase): self.assertAllClose(mean, model.layers[1].arguments['mu']) self.assertAllClose(std, model.layers[1].arguments['std']) + def test_saving_model_with_long_layer_names(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + # This layer name will make the `layers_name` HDF5 attribute blow + # out of proportion. Note that it fits into the internal HDF5 + # attribute memory limit on its own but because h5py converts + # the list of layer names into numpy array, which uses the same + # amout of memory for every item, it increases the memory + # requirements substantially. + x = keras.Input(shape=(2,), name='input_' + ('x' * (2**15))) + f = x + for i in range(4): + f = keras.layers.Dense(2, name='dense_%d' % (i,))(f) + model = keras.Model(inputs=[x], outputs=[f]) + model.compile(loss='mse', optimizer='adam', metrics=['acc']) + + x = np.random.random((1, 2)) + y = np.random.random((1, 2)) + model.train_on_batch(x, y) + out = model.predict(x) + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + model = keras.models.load_model(fname) + + # Check that the HDF5 files contains chunked array + # of layer names. + with h5py.File(fname, 'r') as h5file: + num_names_arrays = len([attr for attr in h5file['model_weights'].attrs + if attr.startswith('layer_names')]) + # The chunking of layer names array should have happend. + self.assertGreater(num_names_arrays, 0) + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + # Cleanup + os.close(fd) + os.remove(fname) + + def test_saving_model_with_long_weights_names(self): + if h5py is None: + return # Skip test if models cannot be saved. + + with self.test_session(): + x = keras.Input(shape=(2,), name='nested_model_input') + f = x + for i in range(4): + f = keras.layers.Dense(2, name='nested_model_dense_%d' % (i,))(f) + # This layer name will make the `weights_name` + # HDF5 attribute blow out of proportion. + f = keras.layers.Dense(2, name='nested_model_output' + ('x' * (2**15)))(f) + nested_model = keras.Model(inputs=[x], outputs=[f], name='nested_model') + + x = keras.Input(shape=(2,), name='outer_model_input') + f = nested_model(x) + f = keras.layers.Dense(2, name='outer_model_output')(f) + + model = keras.Model(inputs=[x], outputs=[f]) + model.compile(loss='mse', optimizer='adam', metrics=['acc']) + + x = np.random.random((1, 2)) + y = np.random.random((1, 2)) + model.train_on_batch(x, y) + out = model.predict(x) + + fd, fname = tempfile.mkstemp('.h5') + keras.models.save_model(model, fname) + model = keras.models.load_model(fname) + + # Check that the HDF5 files contains chunked array + # of weight names. + with h5py.File(fname, 'r') as h5file: + num_weight_arrays = len( + [attr for attr in h5file['model_weights']['nested_model'].attrs + if attr.startswith('weight_names')]) + # The chunking of layer names array should have happend. + self.assertGreater(num_weight_arrays, 0) + out2 = model.predict(x) + self.assertAllClose(out, out2, atol=1e-05) + + # Cleanup + os.close(fd) + os.remove(fname) + if __name__ == '__main__': test.main() diff --git a/tensorflow/python/keras/_impl/keras/engine/training.py b/tensorflow/python/keras/_impl/keras/engine/training.py index 8b82c0b313..57506f9aff 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training.py +++ b/tensorflow/python/keras/_impl/keras/engine/training.py @@ -1542,20 +1542,19 @@ class Model(Network): max_queue_size: Integer. Maximum size for the generator queue. If unspecified, `max_queue_size` will default to 10. workers: Integer. Maximum number of processes to spin up - when using process based threading. + when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. If True, use process based threading. - If unspecified, `workers` will default to False. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. - shuffle: Whether to shuffle the order of the batches at + use_multiprocessing: Boolean. + If `True`, use process-based threading. + If unspecified, `use_multiprocessing` will default to `False`. + Note that because this implementation relies on multiprocessing, + you should not pass non-picklable arguments to the generator + as they can't be passed easily to children processes. + shuffle: Boolean. Whether to shuffle the order of the batches at the beginning of each epoch. Only used with instances - of `Sequence` (keras.utils.Sequence). + of `Sequence` (`keras.utils.Sequence`). + Has no effect when `steps_per_epoch` is not `None`. initial_epoch: Epoch at which to start training (useful for resuming a previous training run) @@ -1625,16 +1624,15 @@ class Model(Network): the `len(generator)` as a number of steps. max_queue_size: maximum size for the generator queue workers: Integer. Maximum number of processes to spin up - when using process based threading. + when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: if True, use process based threading. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. + use_multiprocessing: Boolean. + If `True`, use process-based threading. + If unspecified, `use_multiprocessing` will default to `False`. + Note that because this implementation relies on multiprocessing, + you should not pass non-picklable arguments to the generator + as they can't be passed easily to children processes. Returns: Scalar test loss (if the model has a single output and no metrics) @@ -1684,16 +1682,15 @@ class Model(Network): the `len(generator)` as a number of steps. max_queue_size: Maximum size for the generator queue. workers: Integer. Maximum number of processes to spin up - when using process based threading. + when using process-based threading. If unspecified, `workers` will default to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: If `True`, use process based threading. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. + use_multiprocessing: Boolean. + If `True`, use process-based threading. + If unspecified, `use_multiprocessing` will default to `False`. + Note that because this implementation relies on multiprocessing, + you should not pass non-picklable arguments to the generator + as they can't be passed easily to children processes. verbose: verbosity mode, 0 or 1. Returns: diff --git a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py index 9291ef5fe6..18116e3a14 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_arrays.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_arrays.py @@ -298,20 +298,13 @@ def predict_loop(model, inputs, batch_size=32, verbose=0, steps=None): else: ins = inputs - if hasattr(model, 'metrics'): - for m in model.metrics: - if isinstance(m, Layer): - m.reset_states() - num_samples = training_utils.check_num_samples( inputs, batch_size, steps, 'steps') if verbose == 1: if steps is not None: - progbar = Progbar(target=steps, - stateful_metrics=model.stateful_metric_names) + progbar = Progbar(target=steps) else: - progbar = Progbar(target=num_samples, - stateful_metrics=model.stateful_metric_names) + progbar = Progbar(target=num_samples) indices_for_conversion_to_dense = [] for i in range(len(model._feed_inputs)): diff --git a/tensorflow/python/keras/_impl/keras/engine/training_generator.py b/tensorflow/python/keras/_impl/keras/engine/training_generator.py index 4af62c85d5..58b5bc39c1 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_generator.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_generator.py @@ -112,42 +112,25 @@ def fit_generator(model, val_enqueuer = None try: - if do_validation: - if val_gen: - if workers > 0: - if isinstance(validation_data, Sequence): - val_enqueuer = OrderedEnqueuer( - validation_data, use_multiprocessing=use_multiprocessing) - if validation_steps is None: - validation_steps = len(validation_data) - else: - val_enqueuer = GeneratorEnqueuer( - validation_data, - use_multiprocessing=use_multiprocessing, - wait_time=wait_time) - val_enqueuer.start(workers=workers, max_queue_size=max_queue_size) - validation_generator = val_enqueuer.get() - else: - validation_generator = validation_data + if do_validation and not val_gen: + # Prepare data for validation + if len(validation_data) == 2: + val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence + val_sample_weight = None + elif len(validation_data) == 3: + val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence else: - if len(validation_data) == 2: - val_x, val_y = validation_data # pylint: disable=unpacking-non-sequence - val_sample_weight = None - elif len(validation_data) == 3: - val_x, val_y, val_sample_weight = validation_data # pylint: disable=unpacking-non-sequence - else: - raise ValueError( - '`validation_data` should be a tuple ' - '`(val_x, val_y, val_sample_weight)` ' - 'or `(val_x, val_y)`. Found: ' + str(validation_data)) - val_x, val_y, val_sample_weights = model._standardize_user_data( - val_x, val_y, val_sample_weight) - val_data = val_x + val_y + val_sample_weights - if model.uses_learning_phase and not isinstance( - K.learning_phase(), int): - val_data += [0] - for cbk in callbacks: - cbk.validation_data = val_data + raise ValueError( + '`validation_data` should be a tuple ' + '`(val_x, val_y, val_sample_weight)` ' + 'or `(val_x, val_y)`. Found: ' + str(validation_data)) + val_x, val_y, val_sample_weights = model._standardize_user_data( + val_x, val_y, val_sample_weight) + val_data = val_x + val_y + val_sample_weights + if model.uses_learning_phase and not isinstance(K.learning_phase(), int): + val_data += [0.] + for cbk in callbacks: + cbk.validation_data = val_data if workers > 0: if is_sequence: @@ -163,7 +146,10 @@ def fit_generator(model, enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: - output_generator = generator + if is_sequence: + output_generator = iter(generator) + else: + output_generator = generator callback_model.stop_training = False # Construct epoch logs. @@ -218,7 +204,12 @@ def fit_generator(model, if steps_done >= steps_per_epoch and do_validation: if val_gen: val_outs = evaluate_generator( - model, validation_generator, validation_steps, workers=0) + model, + validation_data, + validation_steps, + workers=workers, + use_multiprocessing=use_multiprocessing, + max_queue_size=max_queue_size) else: # No need for try/except because # data has already been validated. @@ -297,7 +288,10 @@ def evaluate_generator(model, enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: - output_generator = generator + if is_sequence: + output_generator = iter(generator) + else: + output_generator = generator while steps_done < steps: generator_output = next(output_generator) @@ -387,7 +381,10 @@ def predict_generator(model, enqueuer.start(workers=workers, max_queue_size=max_queue_size) output_generator = enqueuer.get() else: - output_generator = generator + if is_sequence: + output_generator = iter(generator) + else: + output_generator = generator if verbose == 1: progbar = Progbar(target=steps) diff --git a/tensorflow/python/keras/_impl/keras/engine/training_test.py b/tensorflow/python/keras/_impl/keras/engine/training_test.py index 38ba0f0eae..fd91dbba52 100644 --- a/tensorflow/python/keras/_impl/keras/engine/training_test.py +++ b/tensorflow/python/keras/_impl/keras/engine/training_test.py @@ -340,20 +340,21 @@ class TrainingTest(test.TestCase): if scipy_sparse is None: return - test_inputs = [ - scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] - test_outputs = [ - scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] - in1 = keras.layers.Input(shape=(3,)) - in2 = keras.layers.Input(shape=(3,)) - out1 = keras.layers.Dropout(0.5, name='dropout')(in1) - out2 = keras.layers.Dense(4, name='dense_1')(in2) - model = keras.Model([in1, in2], [out1, out2]) - model.predict(test_inputs, batch_size=2) - model.compile('rmsprop', 'mse') - model.fit(test_inputs, test_outputs, - epochs=1, batch_size=2, validation_split=0.5) - model.evaluate(test_inputs, test_outputs, batch_size=2) + with self.test_session(): + test_inputs = [ + scipy_sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] + test_outputs = [ + scipy_sparse.random(6, i, density=0.25).tocsr() for i in range(3, 5)] + in1 = keras.layers.Input(shape=(3,)) + in2 = keras.layers.Input(shape=(3,)) + out1 = keras.layers.Dropout(0.5, name='dropout')(in1) + out2 = keras.layers.Dense(4, name='dense_1')(in2) + model = keras.Model([in1, in2], [out1, out2]) + model.predict(test_inputs, batch_size=2) + model.compile('rmsprop', 'mse') + model.fit(test_inputs, test_outputs, + epochs=1, batch_size=2, validation_split=0.5) + model.evaluate(test_inputs, test_outputs, batch_size=2) def test_that_trainable_disables_updates(self): val_a = np.random.random((10, 4)) @@ -876,9 +877,9 @@ class TestGeneratorMethods(test.TestCase): def custom_generator(): batch_size = 10 - n_samples = 50 + num_samples = 50 while True: - batch_index = np.random.randint(0, n_samples - batch_size) + batch_index = np.random.randint(0, num_samples - batch_size) start = batch_index end = start + batch_size x = arr_data[start: end] @@ -957,9 +958,9 @@ class TestGeneratorMethods(test.TestCase): def custom_generator(): batch_size = 10 - n_samples = 50 + num_samples = 50 while True: - batch_index = np.random.randint(0, n_samples - batch_size) + batch_index = np.random.randint(0, num_samples - batch_size) start = batch_index end = start + batch_size x = arr_data[start: end] @@ -1033,6 +1034,52 @@ class TestGeneratorMethods(test.TestCase): max_queue_size=10, use_multiprocessing=False) + def test_training_with_sequences(self): + + class DummySequence(keras.utils.Sequence): + + def __getitem__(self, idx): + return np.zeros([10, 2]), np.ones([10]) + + def __len__(self): + return 10 + + arr_data = np.random.random((50, 2)) + arr_labels = np.random.random((50,)) + arr_sample_weights = np.random.random((50,)) + + def custom_generator(): + batch_size = 10 + num_samples = 50 + while True: + batch_index = np.random.randint(0, num_samples - batch_size) + start = batch_index + end = start + batch_size + x = arr_data[start: end] + y = arr_labels[start: end] + w = arr_sample_weights[start: end] + yield x, y, w + + with self.test_session(): + model = keras.models.Sequential() + model.add(keras.layers.Dense(1, input_shape=(2,))) + model.compile(loss='mse', optimizer='sgd') + + model.fit_generator(DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=True) + model.fit_generator(DummySequence(), + steps_per_epoch=10, + validation_data=custom_generator(), + validation_steps=1, + max_queue_size=10, + workers=0, + use_multiprocessing=False) + class TestTrainingUtils(test.TestCase): diff --git a/tensorflow/python/keras/_impl/keras/optimizers.py b/tensorflow/python/keras/_impl/keras/optimizers.py index 6520128c5b..b715d722b9 100644 --- a/tensorflow/python/keras/_impl/keras/optimizers.py +++ b/tensorflow/python/keras/_impl/keras/optimizers.py @@ -95,7 +95,26 @@ class Optimizer(object): raise NotImplementedError def get_gradients(self, loss, params): + """Returns gradients of `loss` with respect to `params`. + + Arguments: + loss: Loss tensor. + params: List of variables. + + Returns: + List of gradient tensors. + + Raises: + ValueError: In case any gradient cannot be computed (e.g. if gradient + function not implemented). + """ grads = K.gradients(loss, params) + if None in grads: + raise ValueError('An operation has `None` for gradient. ' + 'Please make sure that all of your ops have a ' + 'gradient defined (i.e. are differentiable). ' + 'Common ops without gradient: ' + 'K.argmax, K.round, K.eval.') if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] @@ -120,6 +139,11 @@ class Optimizer(object): ValueError: in case of incompatible weight shapes. """ params = self.weights + if len(params) != len(weights): + raise ValueError( + 'Length of the specified weight list (' + str(len(weights)) + + ') does not match the number of weights ' + 'of the optimizer (' + str(len(params)) + ')') weight_value_tuples = [] param_values = K.batch_get_value(params) for pv, p, w in zip(param_values, params, weights): diff --git a/tensorflow/python/keras/_impl/keras/utils/data_utils.py b/tensorflow/python/keras/_impl/keras/utils/data_utils.py index e87c8f48ef..4c49544c6a 100644 --- a/tensorflow/python/keras/_impl/keras/utils/data_utils.py +++ b/tensorflow/python/keras/_impl/keras/utils/data_utils.py @@ -393,6 +393,16 @@ class Sequence(object): """ pass + def __iter__(self): + """Creates an infinite generator that iterate over the Sequence. + + Yields: + Sequence items. + """ + while True: + for item in (self[i] for i in range(len(self))): + yield item + # Global variables to be shared across processes _SHARED_SEQUENCES = {} @@ -400,6 +410,11 @@ _SHARED_SEQUENCES = {} _SEQUENCE_COUNTER = None +def init_pool(seqs): + global _SHARED_SEQUENCES + _SHARED_SEQUENCES = seqs + + def get_index(uid, i): """Get the value from the Sequence `uid` at index `i`. @@ -532,9 +547,11 @@ class OrderedEnqueuer(SequenceEnqueuer): (when full, workers could block on `put()`) """ if self.use_multiprocessing: - self.executor_fn = lambda: multiprocessing.Pool(workers) + self.executor_fn = lambda seqs: multiprocessing.Pool( # pylint: disable=g-long-lambda + workers, initializer=init_pool, initargs=(seqs,)) else: - self.executor_fn = lambda: ThreadPool(workers) + # We do not need the init since it's threads. + self.executor_fn = lambda _: ThreadPool(workers) self.workers = workers self.queue = queue.Queue(max_queue_size) self.stop_signal = threading.Event() @@ -557,7 +574,7 @@ class OrderedEnqueuer(SequenceEnqueuer): if self.shuffle: random.shuffle(sequence) - with closing(self.executor_fn()) as executor: + with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: for i in sequence: if self.stop_signal.is_set(): return -- GitLab From 41fe9b97b90dd67479ac57fda94ce5ee862df960 Mon Sep 17 00:00:00 2001 From: Michael Case Date: Fri, 9 Mar 2018 14:41:06 -0800 Subject: [PATCH 684/884] Internal Change. PiperOrigin-RevId: 188540659 --- tensorflow/tools/ci_build/copy_binary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/copy_binary.py b/tensorflow/tools/ci_build/copy_binary.py index 90fd6a6e71..ff26b052f3 100755 --- a/tensorflow/tools/ci_build/copy_binary.py +++ b/tensorflow/tools/ci_build/copy_binary.py @@ -43,7 +43,7 @@ BINARY_STRING_TEMPLATE = "%s-%s-%s.whl" def check_existence(filename): """Check the existence of file or dir.""" if not os.path.exists(filename): - raise RuntimeError("%s not found.") + raise RuntimeError("%s not found." % filename) def copy_binary(directory, origin_tag, new_tag, version, gpu=False): -- GitLab From 88c75b081fe17f04c58c954a76d673abd15255cb Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Fri, 9 Mar 2018 14:42:51 -0800 Subject: [PATCH 685/884] Eager: Fix a Dimension PyObject leak, test for it. PiperOrigin-RevId: 188540944 --- tensorflow/python/eager/pywrap_tfe_src.cc | 1 + tensorflow/python/framework/test_util.py | 15 ++++++++++----- tensorflow/python/layers/core_test.py | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 9146e2bb95..7ccfe9120c 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -444,6 +444,7 @@ bool SetOpAttrScalar( &dims[i])) { return false; } + Py_DECREF(inner_py_value); } TFE_OpSetAttrShape(op, key, dims.get(), num_dims, status); } diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index 9fc1154201..fde9c85891 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -56,6 +56,7 @@ from tensorflow.python.framework import errors from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import random_seed +from tensorflow.python.framework import tensor_shape from tensorflow.python.framework import versions from tensorflow.python.ops import array_ops from tensorflow.python.ops import resource_variable_ops @@ -454,15 +455,19 @@ def assert_no_new_tensors(f): def decorator(self, **kwargs): """Finds existing Tensors, runs the test, checks for new Tensors.""" - def _is_tensor(obj): + def _is_tensorflow_object(obj): try: - return (isinstance(obj, ops.Tensor) or - isinstance(obj, variables.Variable)) + return isinstance(obj, ( + ops.Tensor, + variables.Variable, + tensor_shape.Dimension, + tensor_shape.TensorShape)) except ReferenceError: # If the object no longer exists, we don't care about it. return False - tensors_before = set(id(obj) for obj in gc.get_objects() if _is_tensor(obj)) + tensors_before = set(id(obj) for obj in gc.get_objects() + if _is_tensorflow_object(obj)) outside_graph_key = ops.get_default_graph()._graph_key with ops.Graph().as_default(): # Run the test in a new graph so that collections get cleared when it's @@ -477,7 +482,7 @@ def assert_no_new_tensors(f): gc.collect() tensors_after = [ obj for obj in gc.get_objects() - if _is_tensor(obj) and id(obj) not in tensors_before + if _is_tensorflow_object(obj) and id(obj) not in tensors_before ] if tensors_after: raise AssertionError(("%d Tensors not deallocated after test: %s" % ( diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index ae19866d7a..09287e4906 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -67,7 +67,7 @@ class DenseTest(test.TestCase): variables.global_variables_initializer().run() self.assertAllEqual(x.eval(), [[0.0]]) - @test_util.run_in_graph_and_eager_modes() + @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True) def testCall(self): dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense') inputs = random_ops.random_uniform((5, 4), seed=1) -- GitLab From 9f06c3e1fd6eb0fe7719549afe01ea8a1a121781 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 9 Mar 2018 15:08:48 -0800 Subject: [PATCH 686/884] Remove merge conflict --- tensorflow/python/BUILD | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tensorflow/python/BUILD b/tensorflow/python/BUILD index 2a7a7197a8..3cbeb34c54 100644 --- a/tensorflow/python/BUILD +++ b/tensorflow/python/BUILD @@ -4012,14 +4012,11 @@ py_test( srcs = ["training/checkpoint_utils_test.py"], srcs_version = "PY2AND3", tags = [ -<<<<<<< HEAD - "no_cuda_on_cpu_tap", -======= "manual", + "no_cuda_on_cpu_tap", "no_oss", "no_windows", "notap", ->>>>>>> 88c75b081fe17f04c58c954a76d673abd15255cb ], deps = [ ":client", -- GitLab From be51a9fac97d1497f59ecfc3a9aec4b5f84c9b76 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Fri, 9 Mar 2018 15:27:50 -0800 Subject: [PATCH 687/884] Migrate tf.contrib.bayesflow.optimizers to tfp.optimziers. PiperOrigin-RevId: 188547477 --- tensorflow/contrib/bayesflow/BUILD | 44 --- tensorflow/contrib/bayesflow/__init__.py | 2 - .../kernel_tests/sgld_optimizer_test.py | 212 ------------- .../variational_sgd_optimizer_test.py | 268 ----------------- .../bayesflow/python/ops/optimizers.py | 36 --- .../bayesflow/python/ops/sgld_optimizer.py | 220 -------------- .../python/ops/variational_sgd_optimizer.py | 279 ------------------ 7 files changed, 1061 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/optimizers.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index e1b34d6deb..88956f0512 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -119,50 +119,6 @@ cuda_py_test( tags = ["nomsan"], ) -cuda_py_test( - name = "sgld_optimizer_test", - size = "small", - srcs = ["python/kernel_tests/sgld_optimizer_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/contrib/layers:layers_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_seed", - ], - tags = ["notsan"], -) - -cuda_py_test( - name = "variational_sgd_optimizer_test", - size = "small", - srcs = ["python/kernel_tests/variational_sgd_optimizer_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/distributions:distributions_py", - "//tensorflow/contrib/layers:layers_py", - "//tensorflow/python/ops/distributions", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:math_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:random_seed", - ], - tags = ["notsan"], -) - filegroup( name = "all_files", srcs = glob( diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index bff8ac2cf6..89dfa583a4 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -25,7 +25,6 @@ from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo -from tensorflow.contrib.bayesflow.python.ops import optimizers # pylint: enable=unused-import,line-too-long from tensorflow.python.util.all_util import remove_undocumented @@ -37,7 +36,6 @@ _allowed_symbols = [ 'hmc', 'metropolis_hastings', 'monte_carlo', - 'optimizers', 'special_math', 'stochastic_variables', 'variational_inference', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py deleted file mode 100644 index 756c25683b..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/sgld_optimizer_test.py +++ /dev/null @@ -1,212 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional test for GradientDescent.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import math -from tensorflow.contrib.bayesflow.python.ops.optimizers import SGLDOptimizer -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import test - - -class SGLDOptimizerTest(test.TestCase): - - def testBasic(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.53 - sgd_optimizer = SGLDOptimizer(3.0, preconditioner_decay_rate=decay_rate) - sgd_op = sgd_optimizer.apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + - (1 - decay_rate) * 0.1**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) - grads_scaled = (0.5 * 0.01 / math.sqrt( - decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) - self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval()) - - def testBasicMultiInstance(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - vara = variables.Variable([1.1, 2.1], dtype=dtype) - varb = variables.Variable([3.0, 4.0], dtype=dtype) - gradsa = constant_op.constant([0.1, 0.1], dtype=dtype) - gradsb = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.5 - sgd_optimizer = SGLDOptimizer(3.0, preconditioner_decay_rate=decay_rate) - sgd_op = sgd_optimizer.apply_gradients( - zip([grads0, grads1], [var0, var1])) - sgd_optimizer2 = SGLDOptimizer( - 3.0, preconditioner_decay_rate=decay_rate) - sgd_op2 = sgd_optimizer2.apply_gradients( - zip([gradsa, gradsb], [vara, varb])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - self.assertAllCloseAccordingToType([1.1, 2.1], vara.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], varb.eval()) - - # Run 1 step of sgd - sgd_op.run() - sgd_op2.run() - # Validate updated params - grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + - (1 - decay_rate) * 0.1**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) - self.assertAllCloseAccordingToType( - [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], vara.eval()) - - grads_scaled = (0.5 * 0.01 / math.sqrt( - decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], varb.eval()) - self.assertNotEqual(sgd_optimizer.variable_scope, - sgd_optimizer2.variable_scope) - self.assertNotEqual(sgd_optimizer.variable_scope.name, - sgd_optimizer2.variable_scope.name) - self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval()) - self.assertAllCloseAccordingToType(1, sgd_optimizer2._counter.eval()) - - def testTensorLearningRate(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - lrate = constant_op.constant(3.0) - decay_rate = 0.5 - sgd_op = SGLDOptimizer( - lrate, preconditioner_decay_rate=constant_op.constant( - decay_rate)).apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + - (1 - decay_rate) * 0.1**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) - grads_scaled = (0.5 * 0.01 / math.sqrt( - decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) - - def testGradWrtRef(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - opt = SGLDOptimizer(3.0) - values = [1.0, 3.0] - vars_ = [variables.Variable([v], dtype=dtype) for v in values] - grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) - variables.global_variables_initializer().run() - for grad, _ in grads_and_vars: - self.assertAllCloseAccordingToType([1.0], grad.eval()) - - def testWithGlobalStep(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - global_step = variables.Variable(0, trainable=False) - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.1 - sgd_op = SGLDOptimizer( - 3.0, preconditioner_decay_rate=decay_rate).apply_gradients( - zip([grads0, grads1], [var0, var1]), global_step=global_step) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - - # Validate updated params and global_step - grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + - (1 - decay_rate) * 0.1**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [1.1 - 3.0 * grads_scaled, 2.1 - 3.0 * grads_scaled], var0.eval()) - grads_scaled = (0.5 * 0.01 / math.sqrt( - decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [3.0 - 3.0 * grads_scaled, 4.0 - 3.0 * grads_scaled], var1.eval()) - self.assertAllCloseAccordingToType(1, global_step.eval()) - - def testSparseBasic(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([[1.1], [2.1]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) - decay_rate = 0.9 - sgd_op = SGLDOptimizer( - 3.0, preconditioner_decay_rate=decay_rate).apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.1], [2.1]], var0.eval()) - self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - grads_scaled = (0.5 * 0.1 / math.sqrt(decay_rate + - (1 - decay_rate) * 0.1**2 + 1e-8)) - self.assertAllCloseAccordingToType([[1.1 - 3.0 * grads_scaled], [2.1]], - var0.eval()) - grads_scaled = (0.5 * 0.01 / math.sqrt( - decay_rate + (1 - decay_rate) * 0.01**2 + 1e-8)) - self.assertAllCloseAccordingToType( - [[3.0 - 3.0 * 0], [4.0 - 3.0 * grads_scaled]], var1.eval()) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py deleted file mode 100644 index 83c64dbe0f..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/variational_sgd_optimizer_test.py +++ /dev/null @@ -1,268 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functional test for GradientDescent.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from tensorflow.contrib.bayesflow.python.ops.optimizers import VariationalSGDOptimizer -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import variables -from tensorflow.python.platform import test - - -class VariationalSGDOptimizerTest(test.TestCase): - - def testBasic(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.53 - sgd_op = VariationalSGDOptimizer( - 1, - 1, - preconditioner_decay_rate=decay_rate, - max_learning_rate=3.0, - burnin_max_learning_rate=3.0, - use_single_learning_rate=True).apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - var1.eval()) - - def testBasicMultiInstance(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - vara = variables.Variable([1.1, 2.1], dtype=dtype) - varb = variables.Variable([3.0, 4.0], dtype=dtype) - gradsa = constant_op.constant([0.1, 0.1], dtype=dtype) - gradsb = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.5 - batch_size = 2 - total_num_examples = 10 - optimizer = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=1.0, - burnin_max_learning_rate=3.0, - preconditioner_decay_rate=decay_rate) - sgd_op = optimizer.apply_gradients( - zip([grads0, grads1], [var0, var1])) - optimizer2 = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=1.0, - burnin_max_learning_rate=10.0, - burnin=0, - preconditioner_decay_rate=decay_rate) - sgd_op2 = optimizer2.apply_gradients( - zip([gradsa, gradsb], [vara, varb])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - self.assertAllCloseAccordingToType([1.1, 2.1], vara.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], varb.eval()) - - # Run 1 step of sgd - sgd_op.run() - sgd_op2.run() - # Validate updated params - self.assertAllCloseAccordingToType([1.1 - 3. * 0.1, 2.1 - 3. * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([1.1 - 0.1, 2.1 - 0.1], vara.eval()) - - self.assertAllCloseAccordingToType([3.0 - 3. * 0.01, 4.0 - 3. * 0.01], - var1.eval()) - self.assertAllCloseAccordingToType([3.0 - 0.01, 4.0 - 0.01], - varb.eval()) - self.assertNotEqual(optimizer.variable_scope, - optimizer2.variable_scope) - self.assertNotEqual(optimizer.variable_scope.name, - optimizer2.variable_scope.name) - self.assertAllCloseAccordingToType(1, optimizer._counter.eval()) - self.assertAllCloseAccordingToType(1, optimizer2._counter.eval()) - - def testTensorLearningRate(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - lrate = constant_op.constant(3.0) - decay_rate = 0.5 - batch_size = 2 - total_num_examples = 10 - sgd_op = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=lrate, - burnin=0, - preconditioner_decay_rate=decay_rate).apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - var1.eval()) - - def testTensorDecayLearningRate(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - lrate = variables.Variable(3.0) - lrate_decay_op = lrate.assign_add(-3.) - decay_rate = 0.5 - batch_size = 2 - total_num_examples = 10 - optimizer = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=lrate, - burnin=0, - preconditioner_decay_rate=decay_rate) - sgd_op = optimizer.apply_gradients(zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - var1.eval()) - # Update learning rate to 0 - lrate_decay_op.eval() - sgd_op.run() - # Validate params haven't changed - self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - var1.eval()) - lrate_decay_op.eval() - - with self.assertRaises(errors.InvalidArgumentError): - sgd_op.run() - - def testGradWrtRef(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - opt = VariationalSGDOptimizer(1, 1, max_learning_rate=1.0) - values = [1.0, 3.0] - vars_ = [variables.Variable([v], dtype=dtype) for v in values] - grads_and_vars = opt.compute_gradients(vars_[0] + vars_[1], vars_) - variables.global_variables_initializer().run() - for grad, _ in grads_and_vars: - self.assertAllCloseAccordingToType([1.0], grad.eval()) - - def testWithGlobalStep(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - global_step = variables.Variable(0, trainable=False) - var0 = variables.Variable([1.1, 2.1], dtype=dtype) - var1 = variables.Variable([3.0, 4.0], dtype=dtype) - grads0 = constant_op.constant([0.1, 0.1], dtype=dtype) - grads1 = constant_op.constant([0.01, 0.01], dtype=dtype) - decay_rate = 0.1 - batch_size = 2 - total_num_examples = 10 - sgd_optimizer = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=3.0, - burnin=0, - preconditioner_decay_rate=decay_rate) - sgd_op = sgd_optimizer.apply_gradients( - zip([grads0, grads1], [var0, var1]), global_step=global_step) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([1.1, 2.1], var0.eval()) - self.assertAllCloseAccordingToType([3.0, 4.0], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - - # Validate updated params and global_step - self.assertAllCloseAccordingToType([1.1 - 3.0 * 0.1, 2.1 - 3.0 * 0.1], - var0.eval()) - self.assertAllCloseAccordingToType([3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01], - var1.eval()) - self.assertAllCloseAccordingToType(1, global_step.eval()) - self.assertAllCloseAccordingToType(1, sgd_optimizer._counter.eval()) - - def testSparseBasic(self): - for dtype in [dtypes.half, dtypes.float32, dtypes.float64]: - with self.test_session(): - var0 = variables.Variable([[1.1], [2.1]], dtype=dtype) - var1 = variables.Variable([[3.0], [4.0]], dtype=dtype) - grads0 = ops.IndexedSlices( - constant_op.constant([0.1], shape=[1, 1], dtype=dtype), - constant_op.constant([0]), constant_op.constant([2, 1])) - grads1 = ops.IndexedSlices( - constant_op.constant([0.01], shape=[1, 1], dtype=dtype), - constant_op.constant([1]), constant_op.constant([2, 1])) - decay_rate = 0.1 - batch_size = 2 - total_num_examples = 10 - sgd_op = VariationalSGDOptimizer( - batch_size, - total_num_examples, - max_learning_rate=3.0, - burnin=0, - preconditioner_decay_rate=decay_rate).apply_gradients( - zip([grads0, grads1], [var0, var1])) - variables.global_variables_initializer().run() - # Fetch params to validate initial values - self.assertAllCloseAccordingToType([[1.1], [2.1]], var0.eval()) - self.assertAllCloseAccordingToType([[3.0], [4.0]], var1.eval()) - # Run 1 step of sgd - sgd_op.run() - # Validate updated params - self.assertAllCloseAccordingToType([[1.1 - 3.0 * 0.1], [2.1]], - var0.eval()) - self.assertAllCloseAccordingToType( - [[3.0 - 3.0 * 0], [4.0 - 3.0 * 0.01]], var1.eval()) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/optimizers.py b/tensorflow/contrib/bayesflow/python/ops/optimizers.py deleted file mode 100644 index bff6bb7948..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/optimizers.py +++ /dev/null @@ -1,36 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Probabilistic optimizer modules. - -See @{tf.contrib.bayesflow.optimizers}. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.sgld_optimizer import * -from tensorflow.contrib.bayesflow.python.ops.variational_sgd_optimizer import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'SGLDOptimizer', - 'VariationalSGDOptimizer', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py b/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py deleted file mode 100644 index 7786656398..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/sgld_optimizer.py +++ /dev/null @@ -1,220 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""An optimizer module for stochastic gradient Langevin dynamics.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import random_ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.training import optimizer -from tensorflow.python.training import training_ops - - -class SGLDOptimizer(optimizer.Optimizer): - """An optimizer module for stochastic gradient Langevin dynamics. - - This implements the preconditioned Stochastic Gradient Langevin Dynamics - optimizer [1]. The optimization variable is regarded as a sample from the - posterior under Stochastic Gradient Langevin Dynamics with noise rescaled in - each dimension according to RMSProp [2]. - - Note: If a prior is included in the loss, it should be scaled by - `1/num_pseudo_batches`, where num_pseudo_batches is the number of minibatches - in the data. I.e., it should be divided by the `num_pseudo_batches` term - described below. - - [1]: "Preconditioned Stochastic Gradient Langevin Dynamics for Deep Neural - Networks." Chunyuan Li, Changyou Chen, David Carlson, Lawrence Carin. - ArXiv:1512.07666, 2015. https://arxiv.org/abs/1512.07666 - [2]: http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf - - Args: - learning_rate: Scalar `float`-like `Tensor`. The base learning rate for the - optimizer. Must be tuned to the specific function being minimized. - preconditioner_decay_rate: Scalar `float`-like `Tensor`. The exponential - decay rate of the rescaling of the preconditioner (RMSprop). (This is - "alpha" in [1]). Should be smaller than but nearly `1` to approximate - sampling from the posterior. (Default: `0.95`) - num_pseudo_batches: Scalar `int`-like `Tensor`. The effective number of - minibatches in the data set. Trades off noise and prior with the SGD - likelihood term. Note: Assumes the loss is taken as the mean over a - minibatch. Otherwise if the sum was taken, divide this number by the - batch size. (Default: `1`) - burnin: Scalar `int`-like `Tensor`. The number of iterations to collect - gradient statistics to update the preconditioner before starting to draw - noisy samples. (Default: `25`) - diagonal_bias: Scalar `float`-like `Tensor`. Term added to the diagonal of - the preconditioner to prevent the preconditioner from degenerating. - (Default: `1e-8`) - name: Python `str` describing ops managed by this function. - (Default: `"SGLDOptimizer"`) - variable_scope: Variable scope used for calls to `tf.get_variable`. - If `None`, a new variable scope is created using name - `ops.get_default_graph().unique_name(name or default_name)`. - - Raises: - InvalidArgumentError: If preconditioner_decay_rate is a `Tensor` not in - `(0,1]`. - """ - - def __init__(self, - learning_rate, - preconditioner_decay_rate=0.95, - num_pseudo_batches=1, - burnin=25, - diagonal_bias=1e-8, - name=None, - variable_scope=None): - default_name = 'SGLDOptimizer' - with ops.name_scope(name, default_name, [ - learning_rate, preconditioner_decay_rate, num_pseudo_batches, burnin, - diagonal_bias - ]): - if variable_scope is None: - var_scope_name = ops.get_default_graph().unique_name( - name or default_name) - with varscope_ops.variable_scope(var_scope_name) as scope: - self._variable_scope = scope - else: - self._variable_scope = variable_scope - - self._preconditioner_decay_rate = ops.convert_to_tensor( - preconditioner_decay_rate, name='preconditioner_decay_rate') - self._num_pseudo_batches = ops.convert_to_tensor( - num_pseudo_batches, name='num_pseudo_batches') - self._burnin = ops.convert_to_tensor(burnin, name='burnin') - self._diagonal_bias = ops.convert_to_tensor( - diagonal_bias, name='diagonal_bias') - self._learning_rate = ops.convert_to_tensor( - learning_rate, name='learning_rate') - - with varscope_ops.variable_scope(self._variable_scope): - self._counter = varscope_ops.get_variable( - 'counter', initializer=0, trainable=False) - - self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._preconditioner_decay_rate, - message='`preconditioner_decay_rate` must be non-negative'), - check_ops.assert_less_equal( - self._preconditioner_decay_rate, - 1., - message='`preconditioner_decay_rate` must be at most 1.'), - ], self._preconditioner_decay_rate) - - self._num_pseudo_batches = control_flow_ops.with_dependencies([ - check_ops.assert_greater( - self._num_pseudo_batches, - 0, - message='`num_pseudo_batches` must be greater than zero') - ], self._num_pseudo_batches) - - self._burnin = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._burnin, message='`burnin` must be non-negative'), - check_ops.assert_integer( - self._burnin, message='`burnin` must be an integer') - ], self._burnin) - - self._diagonal_bias = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._diagonal_bias, - message='`diagonal_bias` must be non-negative') - ], self._diagonal_bias) - - super(SGLDOptimizer, self).__init__(use_locking=False, - name=name or default_name) - - def _create_slots(self, var_list): - for v in var_list: - init_rms = init_ops.ones_initializer(dtype=v.dtype) - self._get_or_make_slot_with_initializer(v, init_rms, v.get_shape(), - v.dtype, 'rms', self._name) - - def _prepare(self): - # We need to put the conversion and check here because a user will likely - # want to decay the learning rate dynamically. - self._learning_rate_tensor = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._learning_rate, message='`learning_rate` must be non-negative') - ], ops.convert_to_tensor(self._learning_rate, name='learning_rate_tensor')) - self._decay_tensor = ops.convert_to_tensor( - self._preconditioner_decay_rate, name='preconditioner_decay_rate') - - super(SGLDOptimizer, self)._prepare() - - def _apply_dense(self, grad, var): - rms = self.get_slot(var, 'rms') - - with ops.control_dependencies([ - self._update_momentum(rms, grad, math_ops.cast(self._decay_tensor, - var.dtype.base_dtype))]): - new_grad = self._apply_noisy_update(rms, grad) - - return training_ops.apply_gradient_descent( - var, - math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), - new_grad, - use_locking=self._use_locking).op - - def _apply_sparse(self, grad, var): - rms = self.get_slot(var, 'rms') - - with ops.control_dependencies([ - self._update_momentum(rms, grad, math_ops.cast(self._decay_tensor, - var.dtype.base_dtype))]): - new_grad = self._apply_noisy_update(rms, grad) - - return training_ops.apply_gradient_descent( - var, - math_ops.cast(self._learning_rate_tensor, var.dtype.base_dtype), - new_grad, - use_locking=self._use_locking).op - - def _finish(self, update_ops, name_scope): - update_ops.append([self._counter.assign_add(1)]) - return control_flow_ops.group(*update_ops, name=name_scope) - - @property - def variable_scope(self): - """Variable scope of all calls to `tf.get_variable`.""" - return self._variable_scope - - def _apply_noisy_update(self, mom, grad): - # Compute and apply the gradient update following - # preconditioned Langevin dynamics - stddev = array_ops.where( - array_ops.squeeze(self._counter > self._burnin), - math_ops.cast(math_ops.rsqrt(self._learning_rate), grad.dtype), - array_ops.zeros([], grad.dtype)) - - preconditioner = math_ops.rsqrt( - mom + math_ops.cast(self._diagonal_bias, grad.dtype)) - return ( - 0.5 * preconditioner * grad * math_ops.cast(self._num_pseudo_batches, - grad.dtype) + - random_ops.random_normal(array_ops.shape(grad), 1.0, dtype=grad.dtype) * - stddev * math_ops.sqrt(preconditioner)) - - def _update_momentum(self, mom, grad, decay): - # Keep an exponentially weighted moving average of squared gradients. - # Not thread safe - return mom.assign_add((1.0 - decay) * (math_ops.square(grad) - mom)) diff --git a/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py b/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py deleted file mode 100644 index 4d5f0cfe97..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/variational_sgd_optimizer.py +++ /dev/null @@ -1,279 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""An optimizer module for constant stochastic gradient descent.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from tensorflow.python.framework import errors -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import clip_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import state_ops -from tensorflow.python.ops import variable_scope as varscope_ops -from tensorflow.python.training import optimizer -from tensorflow.python.training import training_ops - - -class VariationalSGDOptimizer(optimizer.Optimizer): - """An optimizer module for constant stochastic gradient descent. - - This implements an optimizer module for the constant stochastic gradient - descent algorithm [1]. The optimization variable is regarded as an - approximate sample from the posterior . - - Note: If a prior is included in the loss, it should be scaled by - `1/num_pseudo_batches`, where num_pseudo_batches is the number of minibatches - in the data. I.e., it should be divided by the `num_pseudo_batches` term - described below. - - [1]: "Stochastic Gradient Descent as Approximate Bayesian Inference - Stephan Mandt, Matthew D. Hoffman, David M. Blei. - ArXiv:1704.04289, 2017. https://arxiv.org/abs/1704.04289 - - Args: - batch_size: Scalar `int`-like `Tensor`. The number of examples in a - minibatch in the data set. Note: Assumes the loss is taken as the mean - over a minibatch. Otherwise if the sum was taken set this to 1. - total_num_examples: Scalar `int`-like `Tensor`. The total number of examples - in the data set. - max_learning_rate: Scalar `float`-like `Tensor`. A maximum allowable - effective coordinate-wise learning rate. The algorithm scales down any - effective learning rate (i.e. after preconditioning) that is larger than - this. (Default: `1`) - preconditioner_decay_rate: Scalar `float`-like `Tensor`. The exponential - decay rate of the rescaling of the preconditioner (RMSprop). (This is - "alpha" in [1]). Should be smaller than but nearly `1` to approximate - sampling from the posterior. (Default: `0.95`) - burnin: Scalar `int`-like `Tensor`. The number of iterations to collect - gradient statistics to update the preconditioner before starting to draw - noisy samples. (Default: `25`) - burnin_max_learning_rate: Scalar `float`-like `Tensor`. Maximum learning - rate to use during the burnin period. - (Default: `1e-8`) - use_single_learning_rate: Boolean Indicates whether one single learning - rate is used or coordinate_wise learning rates are used. - (Default: `False`) - name: Python `str` describing ops managed by this function. - (Default: `"VariationalSGDOptimizer"`) - variable_scope: Variable scope used for calls to `tf.get_variable`. - If `None`, a new variable scope is created using name - `ops.get_default_graph().unique_name(name or default_name)`. - - Raises: - InvalidArgumentError: If preconditioner_decay_rate is a `Tensor` not in - `(0,1]`. - """ - - def __init__(self, - batch_size, - total_num_examples, - max_learning_rate=1.0, - preconditioner_decay_rate=0.95, - burnin=25, - burnin_max_learning_rate=1e-6, - use_single_learning_rate=False, - name=None, - variable_scope=None): - default_name = 'VariationalSGDOptimizer' - with ops.name_scope(name, default_name, [ - max_learning_rate, preconditioner_decay_rate, batch_size, burnin, - burnin_max_learning_rate - ]): - if variable_scope is None: - var_scope_name = ops.get_default_graph().unique_name( - name or default_name) - with varscope_ops.variable_scope(var_scope_name) as scope: - self._variable_scope = scope - else: - self._variable_scope = variable_scope - - self._preconditioner_decay_rate = ops.convert_to_tensor( - preconditioner_decay_rate, name='preconditioner_decay_rate') - self._batch_size = ops.convert_to_tensor(batch_size, name='batch_size') - self._total_num_examples = ops.convert_to_tensor( - total_num_examples, name='total_num_examples') - self._burnin = ops.convert_to_tensor(burnin, name='burnin') - self._burnin_max_learning_rate = ops.convert_to_tensor( - burnin_max_learning_rate, name='burnin_max_learning_rate') - self._max_learning_rate = ops.convert_to_tensor( - max_learning_rate, name='max_learning_rate') - self._use_single_learning_rate = use_single_learning_rate - - with varscope_ops.variable_scope(self._variable_scope): - self._counter = varscope_ops.get_variable( - 'counter', initializer=0, trainable=False) - - self._preconditioner_decay_rate = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._preconditioner_decay_rate, - message='`preconditioner_decay_rate` must be non-negative'), - check_ops.assert_less_equal( - self._preconditioner_decay_rate, - 1., - message='`preconditioner_decay_rate` must be at most 1.'), - ], self._preconditioner_decay_rate) - - self._batch_size = control_flow_ops.with_dependencies([ - check_ops.assert_greater( - self._batch_size, - 0, - message='`batch_size` must be greater than zero') - ], self._batch_size) - - self._total_num_examples = control_flow_ops.with_dependencies([ - check_ops.assert_greater( - self._total_num_examples, - 0, - message='`total_num_examples` must be greater than zero') - ], self._total_num_examples) - - self._burnin = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._burnin, message='`burnin` must be non-negative'), - check_ops.assert_integer( - self._burnin, message='`burnin` must be an integer') - ], self._burnin) - - self._burnin_max_learning_rate = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._burnin_max_learning_rate, - message='`burnin_max_learning_rate` must be non-negative') - ], self._burnin_max_learning_rate) - - self._max_learning_rate = control_flow_ops.with_dependencies([ - check_ops.assert_non_negative( - self._max_learning_rate, - message='`max_learning_rate` must be non-negative') - ], self._max_learning_rate) - - super(VariationalSGDOptimizer, self).__init__( - use_locking=False, name=name or default_name) - - def _create_slots(self, var_list): - for v in var_list: - init_moment = init_ops.zeros_initializer(dtype=v.dtype) - self._get_or_make_slot_with_initializer( - v, init_moment, v.get_shape(), v.dtype, 'first_moment', self._name) - self._get_or_make_slot_with_initializer( - v, init_moment, v.get_shape(), v.dtype, 'second_moment', self._name) - - def _prepare(self): - self._decay_tensor = ops.convert_to_tensor( - self._preconditioner_decay_rate, name='preconditioner_decay_rate') - self._batch_size_tensor = ops.convert_to_tensor( - self._batch_size, name='batch_size_tensor') - - super(VariationalSGDOptimizer, self)._prepare() - - def _get_coordinatewise_learning_rate(self, grad, var): - # Compute the learning rate using a moving average for the diagonal of BB^T - avg_first = self.get_slot(var, 'first_moment') - avg_second = self.get_slot(var, 'second_moment') - decay_tensor = math_ops.cast(self._decay_tensor, var.dtype) - batch_size = math_ops.cast(self._batch_size_tensor, var.dtype) - - # Create an estimator for the moving average of gradient mean and variance - # via Welford's algorithm - if isinstance(grad, ops.Tensor): - delta = grad - avg_first - first_moment_update = avg_first.assign_add( - array_ops.where(self._counter < 1, math_ops.cast(1, var.dtype), - 1. - decay_tensor) * delta) - - with ops.control_dependencies([first_moment_update]): - second_moment_update = avg_second.assign_add( - math_ops.cast(self._counter < 1, var.dtype) * - -(1. - decay_tensor) * ( - avg_second - decay_tensor * math_ops.square(delta))) - diag_preconditioner = control_flow_ops.with_dependencies( - [second_moment_update], - clip_ops.clip_by_value(avg_second, 1e-12, 1e12)) - elif isinstance(grad, ops.IndexedSlices): - delta = grad.values - array_ops.gather_nd(avg_first, grad.indices) - first_moment_update = state_ops.scatter_add( - avg_first, - grad.indices, - array_ops.where(self._counter < 1, - math_ops.cast(1., var.dtype), - 1. - decay_tensor) * delta) - - with ops.control_dependencies([first_moment_update]): - avg_second = state_ops.scatter_add( - avg_second, - grad.indices, - math_ops.cast(self._counter < 1, var.dtype) * - -(1. - decay_tensor) * ( - array_ops.gather_nd(avg_second, grad.indices) - decay_tensor * - math_ops.square(delta))) - avg_second = array_ops.gather_nd(avg_second, grad.indices) - # TODO(b/70783772) - diag_preconditioner = clip_ops.clip_by_value(avg_second, 1e-12, 1e12) - else: - raise errors.InvalidArgumentError( - None, None, 'grad must of type Tensor or IndexedSlice') - - diag_preconditioner *= batch_size - - if self._use_single_learning_rate: - diag_preconditioner = math_ops.reduce_mean(diag_preconditioner) - - # From Theorem 2 Corollary 1 of Mandt et al. 2017 - return 2. * batch_size / ( - math_ops.cast(self._total_num_examples, var.dtype.base_dtype) * - diag_preconditioner) - - def _apply_dense(self, grad, var): - - max_learning_rate = array_ops.where(self._counter < self._burnin, - self._burnin_max_learning_rate, - self._max_learning_rate) - - learn_rates = clip_ops.clip_by_value( - self._get_coordinatewise_learning_rate(grad, var), 0.0, - math_ops.cast(max_learning_rate, var.dtype.base_dtype)) - - newgrad = grad * learn_rates - return training_ops.apply_gradient_descent( - var, - math_ops.cast(1.0, var.dtype), - newgrad, - use_locking=self._use_locking).op - - def _apply_sparse(self, grad, var): - - max_learning_rate = array_ops.where(self._counter < self._burnin, - self._burnin_max_learning_rate, - self._max_learning_rate) - - learn_rate = clip_ops.clip_by_value( - self._get_coordinatewise_learning_rate(grad, var), 0.0, - math_ops.cast(max_learning_rate, var.dtype)) - delta = grad.values * learn_rate - - return state_ops.scatter_sub(var, grad.indices, delta, - use_locking=self._use_locking) - - def _finish(self, update_ops, name_scope): - update_ops.append([self._counter.assign_add(1)]) - return control_flow_ops.group(*update_ops, name=name_scope) - - @property - def variable_scope(self): - """Variable scope of all calls to `tf.get_variable`.""" - return self._variable_scope -- GitLab From 0737f530db779bdec9af1ae87344796a7673c537 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 9 Mar 2018 15:33:56 -0800 Subject: [PATCH 688/884] Disable //tensorflow/core:common_runtime_function_test --- tensorflow/core/BUILD | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index f2b0d542dd..affa71bff3 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3321,6 +3321,10 @@ tf_cc_test( size = "small", srcs = ["common_runtime/function_test.cc"], linkstatic = tf_kernel_tests_linkstatic(), + tags = [ + "manual", + "no_oss", + ], deps = [ ":core", ":core_cpu", -- GitLab From faea16caaf84b065ecf5fd6706a597308984df71 Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Fri, 9 Mar 2018 15:28:15 -0800 Subject: [PATCH 689/884] Copy `replicate_model_fn` to core. PiperOrigin-RevId: 188547527 --- tensorflow/python/estimator/BUILD | 65 + .../python/estimator/replicate_model_fn.py | 823 ++++++++ .../estimator/replicate_model_fn_test.py | 1709 +++++++++++++++++ 3 files changed, 2597 insertions(+) create mode 100644 tensorflow/python/estimator/replicate_model_fn.py create mode 100644 tensorflow/python/estimator/replicate_model_fn_test.py diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index e3a6708d67..04fcbb0e87 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -7,6 +7,7 @@ package( licenses(["notice"]) # Apache 2.0 load("//tensorflow:tensorflow.bzl", "py_test") +load("//tensorflow:tensorflow.bzl", "cuda_py_test") filegroup( name = "all_files", @@ -35,6 +36,7 @@ py_library( ":linear", ":model_fn", ":parsing_utils", + ":replicate_model_fn", ":run_config", ":training", "//tensorflow/python:util", @@ -866,3 +868,66 @@ py_test( "//tensorflow/python:training", ], ) + +py_library( + name = "replicate_model_fn", + srcs = [ + "replicate_model_fn.py", + ], + srcs_version = "PY2AND3", + deps = [ + ":export_output", + ":model_fn", + ":util", + "//tensorflow/core:protos_all_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:device", + "//tensorflow/python:device_lib", + "//tensorflow/python:framework_ops", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform", + "//tensorflow/python:sparse_ops", + "//tensorflow/python:sparse_tensor", + "//tensorflow/python:state_ops", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python/ops/losses", + "@six_archive//:six", + ], +) + +cuda_py_test( + name = "replicate_model_fn_test", + size = "medium", + srcs = ["replicate_model_fn_test.py"], + additional_deps = [ + "//tensorflow/python/estimator", + ":dnn", + ":export_export", + ":export_output", + ":model_fn", + ":numpy_io", + ":optimizers", + ":prediction_keys", + "//tensorflow/python/feature_column", + "//tensorflow/python/ops/losses", + "//tensorflow/python/saved_model:signature_constants", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:control_flow_ops", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:metrics", + "//tensorflow/python:platform", + "//tensorflow/python:summary", + "//tensorflow/python:training", + "//tensorflow/python:variable_scope", + "//tensorflow/python:variables", + ":replicate_model_fn", + ], + tags = [ + "multi_gpu", + ], +) diff --git a/tensorflow/python/estimator/replicate_model_fn.py b/tensorflow/python/estimator/replicate_model_fn.py new file mode 100644 index 0000000000..7418852096 --- /dev/null +++ b/tensorflow/python/estimator/replicate_model_fn.py @@ -0,0 +1,823 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities to replicate model_fn's over local GPUs. + +This file contains util that allow to replicate `Estimator.model_fn` over +GPUs. Replicated version of a `model_fn` is returned that can subsequently +be used with `Estimator`. +""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from collections import defaultdict +from contextlib import contextmanager +import copy + +import six + +from tensorflow.core.framework import node_def_pb2 +from tensorflow.python.client import device_lib +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import util +from tensorflow.python.estimator.export import export_output as export_output_lib +from tensorflow.python.framework import device as framework_device +from tensorflow.python.framework import ops as ops_lib +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import state_ops +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import tf_logging +from tensorflow.python.training import device_setter as device_setter_lib +from tensorflow.python.training import optimizer as optimizer_lib + + +def _replicate_model_fn(model_fn, + loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, + devices=None): + """Replicate `Estimator.model_fn` over GPUs. + + The given `model_fn` specifies a single forward pass of a model. To replicate + such a model over GPUs, each GPU gets its own instance of the forward pass + (a.k.a. a tower). The input features and labels get sharded into the chunks + that correspond to the number of GPUs. Each tower computes a loss based + on its input. For each such loss, gradients are computed. After that, the + available losses are aggregated to form aggregated loss. Available + gradients are summed. Then, they update weights using the specified + optimizer. + + If `devices` are `None`, then all available GPUs are going to be used for + replication. If no GPUs are available, then the model is going to be + placed on the CPU. + + Two modes of local replication over available GPUs are supported: + 1) If exactly 1 GPU is detected, then variables and operations are placed + onto the GPU. + 2) If more than 1 GPU is detected, then variables are going to be placed on + the CPU. Replicas of operations are placed on each individual GPU. + + Here is an example of how one might use their `model_fn` to run over GPUs: + ```python + ... + def model_fn(...): # See `model_fn` in `Estimator`. + loss = ... + optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) + optimizer = tf.contrib.estimator._TowerOptimizer(optimizer) + if mode == tf.estimator.ModeKeys.TRAIN: + # See the section below on `EstimatorSpec.train_op`. + return EstimatorSpec(mode=mode, loss=loss, + train_op=optimizer.minimize(loss)) + + # No change for `ModeKeys.EVAL` or `ModeKeys.PREDICT`. + return EstimatorSpec(...) + ... + classifier = tf.estimator.Estimator( + model_fn=tf.contrib.estimator.replicate_model_fn(model_fn)) + ``` + + Please see `DNNClassifierIntegrationTest` for an example with a canned + Estimator. + + On `EstimatorSpec.train_op`: + `model_fn` returns `EstimatorSpec.train_op` for + `tf.estimator.GraphKeys.TRAIN`. It is typically derived using an optimizer. + Towers are expected to populate it in the same way. Gradients from all towers + are reduced and applied in the last tower. To achieve that in the case of + multiple towers, `_TowerOptimizer` needs to be used. See `_TowerOptimizer`. + + On sharding input features and labels: + Input features and labels are split for consumption by each tower. They are + split across the dimension 0. Features and labels need to be batch major. + + On reduction algorithms: + Certain algorithms were chosen for aggregating results of computations on + multiple towers: + - Losses from all towers are reduced according to `loss_reduction`. + - Gradients from all towers are reduced according to `loss_reduction` + for each trainable variable. + - `eval_metrics_ops` are reduced per metric using `reduce_mean`. + - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are + reduced using concatenation. + - For all other fields of `EstimatorSpec` the values of the first tower + are taken. + + On distribution of variables: + Variables are not duplicated between towers. Instead, they are placed on a + single device as defined above and shared across towers. + + On overhead: + If only one device is specified, then aggregation of loss and gradients + doesn't happen. Replication consists of placing `model_fn` onto the + specified device. + + On current limitations: + - `predictions` are not supported for `ModeKeys.EVAL`. They are required + for `tf.contrib.estimator.add_metrics`. + + Args: + model_fn: `model_fn` as defined in `Estimator`. See the section above about + the train_op argument of `EstimatorSpec`. + loss_reduction: controls whether losses are summed or averaged. + devices: Optional list of devices to replicate the model across. This + argument can be used to replice only on the subset of available GPUs. + If `None`, then all available GPUs are going to be used for replication. + If no GPUs are available, then the model is going to be placed on the CPU. + + Raises: + ValueError: if there is no `loss_reduction` or if _TowerOptimizer is + mis-used. + + Returns: + A replicated version of the supplied `model_fn`. Returned function that + conforms to the requirements of `Estimator`'s `model_fn` and can be used + instead of the supplied `model_fn`. + """ + return _replicate_model_fn_with_mode( + model_fn, + loss_reduction, + devices, + # TODO(isaprykin): Query the system configuration to choose modes other + # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often + # appropriate. + mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER) + + +class _VariableDistributionMode(object): + """Modes for variable distribution used for forcing a particular one. + + Forcing a mode is meant for performance experimentation purposes rather than + for general use cases. + """ + + SHARED_LOCAL_PARAMETER_SERVER = 1 + """Variables are placed on a single device and shared across all devices. + + Two ways to achieve this distribution over available GPUs are supported: + 1) If exactly 1 GPU is detected, then variables and operations are placed + onto GPU. + 2) If more than 1 GPU is detected, then variables are going to be placed on + the CPU. Replicas of operations are placed on each individual GPU. + """ + + SHARED_ROUND_ROBIN = 2 + """Variables are placed on all devices in a round-robin fashion. + + Every subsequent variable is placed on the next device. There is only one + copy of each variable that is shared across all devices. + """ + + +def _replicate_model_fn_with_mode( + model_fn, + loss_reduction, + devices=None, + mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER): + """A version of `replicate_model_fn` that allows to specify a `mode`.""" + if loss_reduction == losses.Reduction.NONE: + raise ValueError('Tower losses need to be reduced in some way, yet {} ' + 'reduction is specified.'.format(loss_reduction)) + if not devices: + devices = _get_local_devices('GPU') or _get_local_devices('CPU') + + is_a_single_gpu_case = len(devices) == 1 and 'GPU' in devices[0].upper() + consolidation_device = devices[0] if is_a_single_gpu_case else '/CPU:0' + + ps_devices = [consolidation_device] + if mode == _VariableDistributionMode.SHARED_ROUND_ROBIN: + ps_devices = devices + + tf_logging.info('Replicating the `model_fn` across {}. Variables are going ' + 'to be placed on {}. Consolidation device is going to be {}.' + .format(devices, ps_devices, consolidation_device)) + + def single_device_model_fn(features, labels, mode, params=None, config=None): + """`model_fn` on a single device without reduction overhead.""" + return _get_loss_towers( + model_fn=model_fn, + mode=mode, + features=[features], + labels=[labels], + params=params, + loss_reduction=loss_reduction, + config=config, + devices=devices, + local_ps_devices=ps_devices)[0] # One device, so one spec is out. + + def replicated_model_fn(features, labels, mode, params=None, config=None): + """Replicated version of `model_fn` to be used instead.""" + feature_shards, label_shards = _split_batch( + features, labels, len(devices), device=consolidation_device) + tower_specs = _get_loss_towers( + model_fn=model_fn, + mode=mode, + features=feature_shards, + labels=label_shards, + params=params, + loss_reduction=loss_reduction, + config=config, + devices=devices, + local_ps_devices=ps_devices) + + if mode == model_fn_lib.ModeKeys.TRAIN: + train_op = _minimize_towers(tower_specs) + return _train_spec( + tower_specs, train_op, aggregation_device=consolidation_device) + elif mode == model_fn_lib.ModeKeys.EVAL: + return _eval_spec(tower_specs, aggregation_device=consolidation_device) + elif mode == model_fn_lib.ModeKeys.PREDICT: + return _predict_spec(tower_specs, aggregation_device=consolidation_device) + + if len(devices) == 1: + return single_device_model_fn + else: + return replicated_model_fn + + +class _TowerOptimizer(optimizer_lib.Optimizer): + """Gathers gradients from all towers and reduces them in the last one.""" + + COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states' + + def __init__(self, optimizer_or_optimizer_fn): + """Wrap an existing optimizer for gathering gradients across towers. + + Each invocation of model_fn has to call the same optimizers in the same + order. + + Multiple optimizers that use the same or different losses are supported. + + If _TowerOptimizer is used but `replicate_model_fn` isn't, then no + aggregation will happen. All calls will simply be forwarded to the + underlying optimizer. The behavior is similar if there is only one tower. + + If _TowerOptimizer is used together with SyncReplicasOptimizer that wraps + the user's optimizer, then it's the SyncReplicasOptimizer that needs to be + wrapped with _TowerOptimizer. + + Args: + optimizer_or_optimizer_fn: an instance of optimizer to wrap. That + instance is going to be used for optimizer-specific logic. This can + also be a no-argument function that returns such an optimizer instance. + """ + self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn + + @staticmethod + def has_been_used(): + return _TowerOptimizer._graph_state().has_tower_optimizer_been_used + + def get_slot(self, *args, **kwargs): + return self._get_optimizer().get_slot(*args, **kwargs) + + def get_slot_names(self, *args, **kwargs): + return self._get_optimizer().get_slot_names(*args, **kwargs) + + def get_name(self, *args, **kwargs): + return self._get_optimizer().get_name(*args, **kwargs) + + def variables(self, *args, **kwargs): + return self._get_optimizer().variables(*args, **kwargs) + + def compute_gradients(self, loss, *args, **kwargs): + """Compute gradients, but first, if needed, scale the loss.""" + loss = _scale_loss(loss, + self._graph_state().loss_reduction, + self._graph_state().number_of_towers) + return self._get_optimizer().compute_gradients(loss, *args, **kwargs) + + def apply_gradients(self, grads_and_vars, global_step=None, **kwargs): + """Collect gradients updates to apply them with the last tower.""" + if self._graph_state().number_of_towers == 1: + # Avoid the overhead of reduction if there's only one tower. + # + # There assumed to be only one tower if aggregation-related methods were + # not called by `_get_loss_towers`, for example if the model_fn uses + # TowerEstimator, but `replicate_model_fn` isn't used. + return self._get_optimizer().apply_gradients(grads_and_vars, global_step, + **kwargs) + + self._graph_state().collect_gradients(grads_and_vars) + + if not self._graph_state().is_the_last_tower: + with ops_lib.control_dependencies(_extract_tensors(grads_and_vars)): + return self._construct_no_op_train_op() + else: + # Gradients need to be gathered and applied in the scope of the first + # tower, so that the tensors are accessible via names without prefixes. + var_scope, name_scope = self._graph_state().scopes_of_the_first_tower + with variable_scope.variable_scope(var_scope): + with ops_lib.name_scope(name_scope): + return self._apply_gathered_gradients(global_step, **kwargs) + + def _apply_gathered_gradients(self, global_step, **kwargs): + graph_state = self._graph_state() + optimizer = self._get_optimizer() + + grad_lists = {} + for grad, var in graph_state.get_latest_gradients_from_all_towers(): + if grad is not None: + grad_lists.setdefault(var, []).append(grad) + + aggregated_grads = [] + with ops_lib.name_scope('gradient_aggregating'): + for var, grads in six.iteritems(grad_lists): + grad = _compute_sum_on_device(grads, var.device) + aggregated_grads.append((grad, var)) + return optimizer.apply_gradients( + aggregated_grads, global_step=global_step, **kwargs) + + def _get_optimizer(self): + if callable(self._optimizer_or_optimizer_fn): + # If optimizer is given as a function then we need to wait till we are + # under the right graph context before constructing it. That's why the + # optimizer is constructed in _get_optimizer() rather than __init__(). + self._optimizer_or_optimizer_fn = self._optimizer_or_optimizer_fn() + self._graph_state().has_tower_optimizer_been_used = True + return self._optimizer_or_optimizer_fn + + def _construct_no_op_train_op(self): + return control_flow_ops.no_op(name='train_op_placeholder') + + @staticmethod + def _graph_state(): + graph_states = ops_lib.get_default_graph().get_collection_ref( + _TowerOptimizer.COLLECTION_FOR_GRAPH_STATES) + if not graph_states: + graph_states.append(_TowerOptimizer._PerGraphState()) + return graph_states[-1] + + @staticmethod + def _did_towers_have_same_optimizer_calls(): + graph_state = _TowerOptimizer._graph_state() + return graph_state.did_towers_have_same_optimizer_calls() + + @staticmethod + def _clear_graph_state(): + # Clearing the Graph collection will prevent _PerGraphState from being + # serialized. + ops_lib.get_default_graph().clear_collection( + _TowerOptimizer.COLLECTION_FOR_GRAPH_STATES) + + class _PerGraphState(object): + """Gradient reduction related state of a Tensorflow graph.""" + + def __init__(self): + self._collected_grads_and_vars = defaultdict(list) + self._current_tower_index = 0 + self._number_of_towers = 1 + self._loss_reduction = None + # Scopes of the first tower that don't have a prefix: + self._variable_scope = None + self._name_scope = None + # If needed, alert that _TowerOptimizer needs to be used with model_fn. + self._has_tower_optimizer_been_used = False + + def collect_gradients(self, grads_and_vars): + self._collected_grads_and_vars[self._current_tower_index].append( + grads_and_vars) + + def get_latest_gradients_from_all_towers(self): + """Get gradients across towers for the last called optimizer.""" + grads_and_vars = [] + index_of_last_gradients = len( + self._collected_grads_and_vars[self._current_tower_index]) - 1 + for tower_id in range(self._current_tower_index + 1): + grads_and_vars.extend( + self._collected_grads_and_vars[tower_id][index_of_last_gradients]) + return grads_and_vars + + def set_reduction_across_towers(self, loss_reduction, number_of_towers): + self._loss_reduction = loss_reduction + self._number_of_towers = number_of_towers + + @contextmanager + def tower(self, tower_id, var_scope, name_scope): + if tower_id == 0: + self._variable_scope = var_scope + self._name_scope = name_scope + self._current_tower_index = tower_id + yield + + @property + def scopes_of_the_first_tower(self): + return self._variable_scope, self._name_scope + + @property + def is_the_last_tower(self): + return self._current_tower_index == (self._number_of_towers - 1) + + @property + def number_of_towers(self): + return self._number_of_towers + + @property + def loss_reduction(self): + return self._loss_reduction + + @property + def has_tower_optimizer_been_used(self): + return self._has_tower_optimizer_been_used + + @has_tower_optimizer_been_used.setter + def has_tower_optimizer_been_used(self, value): + self._has_tower_optimizer_been_used = value + + def did_towers_have_same_optimizer_calls(self): + total_number_of_grads = sum([ + len(grads) + for _, grads in six.iteritems(self._collected_grads_and_vars) + ]) + return total_number_of_grads % self._number_of_towers == 0 + + +def _get_local_devices(device_type): + local_device_protos = device_lib.list_local_devices() + return [ + device.name + for device in local_device_protos + if device.device_type == device_type + ] + + +def _split_batch(features, labels, number_of_shards, device): + """Split input features and labes into batches.""" + + def ensure_divisible_by_shards(sequence): + batch_size = ops_lib.convert_to_tensor(sequence).get_shape()[0] + if batch_size % number_of_shards != 0: + raise ValueError( + 'Batch size {} needs to be divisible by the number of GPUs, which ' + 'is {}.'.format(batch_size, number_of_shards)) + + def split_dictionary(dictionary): + """Split a dictionary into shards.""" + shards = [{} for _ in range(number_of_shards)] + for name, tensor in six.iteritems(dictionary): + if isinstance(tensor, sparse_tensor.SparseTensor): + for i, shard in enumerate( + sparse_ops.sparse_split( + sp_input=tensor, num_split=number_of_shards, axis=0)): + shards[i][name] = shard + else: + ensure_divisible_by_shards(tensor) + for i, shard in enumerate(array_ops.split(tensor, number_of_shards)): + shards[i][name] = shard + return shards + + with ops_lib.name_scope('split_inputs'): + with ops_lib.device(device): + if isinstance(features, dict): + feature_shards = split_dictionary(features) + else: + ensure_divisible_by_shards(features) + feature_shards = array_ops.split(features, number_of_shards) + + if labels is None: + label_shards = None + elif isinstance(labels, dict): + label_shards = split_dictionary(labels) + else: + ensure_divisible_by_shards(labels) + label_shards = array_ops.split(labels, number_of_shards) + return feature_shards, label_shards + + +_DEFAULT_NAME_SCOPE_PATTERN = 'tower_{}' + + +def _get_loss_towers(model_fn, + mode, + features, + labels, + params, + config, + devices, + local_ps_devices, + loss_reduction, + name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): + """Replicate the loss computation across devices.""" + tower_specs = [] + + model_fn_args = util.fn_args(model_fn) + optional_params = {} + if 'params' in model_fn_args: + optional_params['params'] = copy.deepcopy(params) + if 'config' in model_fn_args: + optional_params['config'] = copy.deepcopy(config) + + # pylint: disable=protected-access + round_robin_strategy = device_setter_lib._RoundRobinStrategy( + num_tasks=len(local_ps_devices)) + _TowerOptimizer._graph_state().set_reduction_across_towers( + loss_reduction, len(devices)) + + for i, device in enumerate(devices): + is_the_first_tower = (i == 0) + + device_setter = _local_device_setter( + worker_device=device, + ps_devices=local_ps_devices, + ps_strategy=round_robin_strategy) + + # We would like to preserve the names of the variables and ops that the user + # might be relying on. Names without a prefix are going to resolve to + # variables and ops of the first tower. + name_scope = name_scope_pattern + if is_the_first_tower: + name_scope = '' + + with variable_scope.variable_scope( + '', reuse=not is_the_first_tower) as var_scope: + with ops_lib.name_scope(name_scope.format(i)) as name_scope: + with _TowerOptimizer._graph_state().tower( + tower_id=i, var_scope=var_scope, name_scope=name_scope): + with ops_lib.device(device_setter): + labels_shard = None + if labels: + labels_shard = labels[i] + + tower_spec = model_fn( + mode=mode, + features=features[i], + labels=labels_shard, + **optional_params) + + if (tower_spec.train_op is not None and len(devices) > 1 and + not _TowerOptimizer.has_been_used()): + raise ValueError('Please wrap optimizers with _TowerOptimizer' + ' in order to use replicate_model_fn with' + ' multiple `devices`.') + + # Scaling the loss here doesn't actually affect gradients. Another + # instance of scaling happens inside the _TowerOptimizer. + tower_spec = _scale_tower_loss( + tower_spec, loss_reduction, number_of_towers=len(devices)) + tower_specs.append(tower_spec) + + if not _TowerOptimizer._did_towers_have_same_optimizer_calls(): + raise ValueError('Each invocation of model_fn was supposed to make the same' + ' optimizer calls.') + _TowerOptimizer._clear_graph_state() + # pylint: enable=protected-access + return tower_specs + + +def _local_device_setter(worker_device, ps_devices, ps_strategy): + """A device setter that puts distributes Var/Ops to PS/workers.""" + ps_ops = ['Variable', 'VariableV2', 'VarHandleOp'] + + def local_device_chooser(op): + current_device = framework_device.DeviceSpec.from_string(op.device or '') + + node_def = op if isinstance(op, node_def_pb2.NodeDef) else op.node_def + if node_def.op in ps_ops: + ps_device_spec = framework_device.DeviceSpec.from_string( + '{}'.format(ps_devices[ps_strategy(op)])) + + ps_device_spec.merge_from(current_device) + return ps_device_spec.to_string() + else: + worker_device_spec = framework_device.DeviceSpec.from_string( + worker_device or '') + worker_device_spec.merge_from(current_device) + return worker_device_spec.to_string() + + return local_device_chooser + + +def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers): + """Produce an EstimatorSpec with approproriately scaled loss.""" + if tower_spec.loss is None: + return tower_spec + + estimator_spec = _asdict(tower_spec) + estimator_spec['loss'] = _scale_loss(tower_spec.loss, loss_reduction, + number_of_towers) + return model_fn_lib.EstimatorSpec(**estimator_spec) + + +def _scale_loss(loss, loss_reduction, number_of_towers): + """If needed, scale down the loss for averaging loss by summing.""" + if loss is None: + return None + if number_of_towers == 1: + return loss + + if loss_reduction != losses.Reduction.SUM: + return math_ops.div(loss, 1.0 * number_of_towers, name='averaged_loss') + else: + return loss + + +def _minimize_towers(tower_specs): + """`train_op` of the last tower applies aggregated gradients.""" + return tower_specs[-1].train_op + + +def _compute_sum_on_device(values, device, name=None): + with ops_lib.device(device): + if isinstance(values[0], ops_lib.IndexedSlices): + if name: + raise ValueError('The name {} is not expected to be given to ' + 'IndexedSlices {}'.format(name, values)) + + values_concat = array_ops.concat([v.values for v in values], axis=0) + indices_concat = array_ops.concat([v.indices for v in values], axis=0) + return ops_lib.IndexedSlices(values_concat, indices_concat, + values[0].dense_shape) + else: + return math_ops.add_n(values, name=name) + + +def _train_spec(tower_specs, + train_op, + aggregation_device, + aggregated_loss_name='loss'): + """Populate replicated EstimatorSpec for `GraphKeys.TRAIN`.""" + # Spec of the last tower is used as the template for the final spec, because + # some `EstimatorSpec.training_hooks` rely on calls made in model_fn. For + # example, `SyncReplicasOptimizerHook` validates the + # `SyncReplicasOptimizer.apply_gradients` call. `TowerEstimator` makes that + # call only in the last tower. + estimator_spec = _asdict(tower_specs[-1]) + estimator_spec['mode'] = model_fn_lib.ModeKeys.TRAIN + estimator_spec['train_op'] = train_op + estimator_spec['loss'] = _compute_sum_on_device( + [spec.loss for spec in tower_specs], aggregation_device, + aggregated_loss_name) + return model_fn_lib.EstimatorSpec(**estimator_spec) + + +def _eval_spec(tower_specs, aggregation_device, aggregated_loss_name='loss'): + """Populate replicated EstimatorSpec for `GraphKeys.EVAL`.""" + estimator_spec = _asdict(tower_specs[0]) + estimator_spec['mode'] = model_fn_lib.ModeKeys.EVAL + estimator_spec['loss'] = _compute_sum_on_device( + [spec.loss for spec in tower_specs], aggregation_device, + aggregated_loss_name) + + update_ops = [] + for tower_spec in tower_specs: + for name, (_, update_op) in six.iteritems(tower_spec.eval_metric_ops): + update_ops.append(update_op) + + with ops_lib.control_dependencies(update_ops): + reduced_update_op = _reduce_metric_variables(len(tower_specs)) + + eval_metric_ops = {} + for name, (metric_tensor, _) in six.iteritems(tower_specs[0].eval_metric_ops): + eval_metric_ops[name] = (metric_tensor, reduced_update_op) + estimator_spec['eval_metric_ops'] = eval_metric_ops + return model_fn_lib.EstimatorSpec(**estimator_spec) + + +def _reduce_metric_variables(number_of_towers): + """Aggregate local variables used in metrics into the first tower.""" + if number_of_towers == 1: + return control_flow_ops.no_op(name='no_eval_metric_reduction') + + metric_variables = ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES) + variables_per_tower = len(metric_variables) // number_of_towers + + if len(metric_variables) % number_of_towers != 0: + raise ValueError( + 'Different `EstimatorSpec.eval_metric_ops` across `model_fn()` calls.' + ' Expected {} local variables, but got {} instead.'.format( + variables_per_tower * number_of_towers, len(metric_variables))) + + # `metric_variables` has the size of `variables_per_tower` x + # number_of_towers. Each tower is produced by calling the same model_fn. + # First `variables_per_tower` correspond to the first tower. Each such + # variable has an replica at the `(variables_per_tower * i)` position, where + # `i` is `[1.. number_of_towers]`. We are going to add values from replicas + # to each variable of the first tower. We then zero out replica values, so + # that `_reduce_metric_variables` operation is idempotent. If a metric + # is then computed based on local variables from the first tower, then the + # resulting metric is an estimate for all `number_of_towers` towers. + ops = [] + for i in range(0, variables_per_tower): + next_replica_id = i + variables_per_tower + replicas = [ + metric_variables[replica_id] + for replica_id in range(next_replica_id, len(metric_variables), + variables_per_tower) + ] # `replicas` doesn't contain the first-tower variable. + + reduce_op = state_ops.assign_add(metric_variables[i], + math_ops.add_n(replicas)) + + with ops_lib.control_dependencies([reduce_op]): + for replica in replicas: + zeros_for_replica = array_ops.zeros( + array_ops.shape(replica), dtype=replica.dtype) + zero_out_replica_op = state_ops.assign(replica, zeros_for_replica) + ops.append(zero_out_replica_op) + + return control_flow_ops.group(*ops) + + +def _predict_spec(tower_specs, aggregation_device): + """Populate replicated EstimatorSpec for `GraphKeys.PREDICT`.""" + estimator_spec = _asdict(tower_specs[0]) + estimator_spec['mode'] = model_fn_lib.ModeKeys.PREDICT + + with ops_lib.device(aggregation_device): + estimator_spec['predictions'] = _concat_tensor_dicts( + *[tower_spec.predictions for tower_spec in tower_specs]) + + export_outputs_dict = _dict_concat( + *[tower_spec.export_outputs for tower_spec in tower_specs]) + + export_outputs = {} + for name, export_output_list in six.iteritems(export_outputs_dict): + if isinstance(export_output_list[0], export_output_lib.PredictOutput): + export_outputs[name] = export_output_lib.PredictOutput( + outputs=_concat_tensor_dicts(*[ + export_output.outputs for export_output in export_output_list + ])) + elif isinstance(export_output_list[0], + export_output_lib.RegressionOutput): + export_outputs[name] = export_output_lib.RegressionOutput( + value=array_ops.concat( + [export_output.value for export_output in export_output_list], + axis=0)) + elif isinstance(export_output_list[0], + export_output_lib.ClassificationOutput): + scores = None + if export_output_list[0].scores is not None: + scores = array_ops.concat( + [export_output.scores for export_output in export_output_list], + axis=0) + + classes = None + if export_output_list[0].classes is not None: + classes = array_ops.stack( + [export_output.classes for export_output in export_output_list], + axis=0) + + export_outputs[name] = export_output_lib.ClassificationOutput( + scores=scores, classes=classes) + + estimator_spec['export_outputs'] = export_outputs + return model_fn_lib.EstimatorSpec(**estimator_spec) + + +def _concat_tensor_dicts(*tensor_dicts): + return { + name: array_ops.concat(tensors, axis=0, name=name) + for name, tensors in six.iteritems(_dict_concat(*tensor_dicts)) + } + + +def _extract_tensors(tensors_and_vars): + tensors = [] + for tensor_and_var in tensors_and_vars: + tensor, _ = tensor_and_var + if isinstance(tensor, ops_lib.IndexedSlices): + tensors.append(tensor.values) + elif tensor is not None: + tensors.append(tensor) + return tensors + + +def _dict_concat(*dicts): + list_dict = {} + for d in dicts: + if d is None: + continue + + for k, v in six.iteritems(d): + list_dict.setdefault(k, []).append(v) + return list_dict + + +def _asdict(namedtuple): + """Returns a namedtuple as a dictionary. + + This is required because `_asdict()` in Python 3.x.x is broken in classes + that inherit from `collections.namedtuple`. See + https://bugs.python.org/issue24931 for more details. + + Args: + namedtuple: An object that inherits from `collections.namedtuple`. + + Returns: + A dictionary version of the tuple. + """ + return {k: getattr(namedtuple, k) for k in namedtuple._fields} diff --git a/tensorflow/python/estimator/replicate_model_fn_test.py b/tensorflow/python/estimator/replicate_model_fn_test.py new file mode 100644 index 0000000000..b6dd4e981f --- /dev/null +++ b/tensorflow/python/estimator/replicate_model_fn_test.py @@ -0,0 +1,1709 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for utilities that replicate `Estimator.model_fn` over GPUs.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import re +import shutil +import tempfile +import numpy as np +import six + +from tensorflow.python.estimator import estimator as estimator_lib +from tensorflow.python.estimator import model_fn as model_fn_lib +from tensorflow.python.estimator import replicate_model_fn +from tensorflow.python.estimator.canned import dnn +from tensorflow.python.estimator.canned import optimizers +from tensorflow.python.estimator.canned import prediction_keys +from tensorflow.python.estimator.export import export +from tensorflow.python.estimator.export import export_output +from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.feature_column import feature_column +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops as ops_lib +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import test_util +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import losses +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import metrics as metrics_lib +from tensorflow.python.ops import variable_scope +from tensorflow.python.ops import variables +from tensorflow.python.ops.losses import losses +from tensorflow.python.platform import gfile +from tensorflow.python.platform import test +from tensorflow.python.saved_model import signature_constants +from tensorflow.python.summary.writer import writer_cache +from tensorflow.python.training import adam +from tensorflow.python.training import device_setter +from tensorflow.python.training import gradient_descent +from tensorflow.python.training import training + + +# TODO(isaprykin): Parametrize all the tests on +# replicate_model_fn._VariableDistributionMode when it's supported. +class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): + + def setUp(self): + self._model_dir = tempfile.mkdtemp() + + def test_complete_flow_with_public_version(self): + return self._complete_flow_with_mode(mode=None) + + def test_complete_flow_with_mode_local_ps_server(self): + return self._complete_flow_with_mode( + replicate_model_fn._VariableDistributionMode. + SHARED_LOCAL_PARAMETER_SERVER) + + def test_complete_flow_with_mode_round_robin(self): + return self._complete_flow_with_mode( + replicate_model_fn._VariableDistributionMode.SHARED_ROUND_ROBIN) + + def _complete_flow_with_mode(self, mode): + n_classes = 3 + input_dimension = 2 + batch_size = 12 + + data = np.linspace( + 0., n_classes - 1., batch_size * input_dimension, dtype=np.float32) + x_data = data.reshape(batch_size, input_dimension) + categorical_data = np.random.random_integers( + 0, len(x_data), size=len(x_data)) + y_data = np.reshape(self._as_label(data[:batch_size]), (batch_size, 1)) + train_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data, + 'categories': categorical_data}, + y=y_data, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data, + 'categories': categorical_data}, + y=y_data, + batch_size=batch_size, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': x_data, + 'categories': categorical_data}, + batch_size=batch_size, + shuffle=False) + + feature_columns = [ + feature_column.numeric_column('x', shape=(input_dimension,)), + feature_column.embedding_column( + feature_column.categorical_column_with_vocabulary_list( + 'categories', + vocabulary_list=np.linspace( + 0., len(x_data), len(x_data), dtype=np.int64)), 1) + ] + + def optimizer_fn(): + return optimizers.get_optimizer_instance('Adagrad', learning_rate=0.05) + + estimator = dnn.DNNClassifier( + hidden_units=(2, 2), + # Adagrad is configured with `get_optimizer_instance`, so the function + # form of `_TowerOptimizer.__init__` is used. + optimizer=replicate_model_fn._TowerOptimizer(optimizer_fn), + feature_columns=feature_columns, + n_classes=n_classes, + model_dir=self._model_dir) + + if not mode: # Use the public `replicate_model_fn`. + model_fn = replicate_model_fn._replicate_model_fn( + estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2']) + else: + model_fn = replicate_model_fn._replicate_model_fn_with_mode( + estimator.model_fn, + devices=['/gpu:0', '/gpu:1', '/gpu:2'], + loss_reduction=losses.Reduction.SUM, + mode=mode) + + estimator = estimator_lib.Estimator( + model_fn=model_fn, + model_dir=estimator.model_dir, + config=estimator.config, + params=estimator.params) + + num_steps = 10 + estimator.train(train_input_fn, steps=num_steps) + + scores = estimator.evaluate(eval_input_fn) + self.assertEqual(num_steps, scores[ops_lib.GraphKeys.GLOBAL_STEP]) + self.assertIn('loss', six.iterkeys(scores)) + + predicted_proba = np.array([ + x[prediction_keys.PredictionKeys.PROBABILITIES] + for x in estimator.predict(predict_input_fn) + ]) + self.assertAllEqual((batch_size, n_classes), predicted_proba.shape) + + feature_spec = feature_column.make_parse_example_spec(feature_columns) + serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn( + feature_spec) + export_dir = estimator.export_savedmodel(tempfile.mkdtemp(), + serving_input_receiver_fn) + self.assertTrue(gfile.Exists(export_dir)) + + # Nothing should be left in the graph so that it doesn't get serialized. + self.assertFalse(ops_lib.get_default_graph().get_collection_ref( + replicate_model_fn._TowerOptimizer.COLLECTION_FOR_GRAPH_STATES)) + + def _as_label(self, data_in_float): + return np.rint(data_in_float).astype(np.int64) + + def tearDown(self): + if self._model_dir: + writer_cache.FileWriterCache.clear() + shutil.rmtree(self._model_dir) + + +class ReplicateModelTest(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(params['learning_rate'])) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=optimizer.minimize(loss)) + + @property + def params(self): + params = {} + params['learning_rate'] = 1.0 + return params + + def test_train(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + session.run(variables.global_variables_initializer()) + + # loss = feature * c - label + total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + # derivative of loss = (1*c - 1) + (2*c - 2) is 3. + # new value of c = 10 - learning rate * 3 = 7.0. + session.run(estimator_spec.train_op) + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(7.0, session.run(c)) + + def test_train_with_mean_reduction(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + # Add another trainable variable that doesn't produce a gradient to + # verify that None gradients are supported. + _ = variable_scope.get_variable( + 'another_variable', + initializer=constant_op.constant(1, dtype=dtypes.float64), + dtype=dtypes.float64) + + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + session.run(variables.global_variables_initializer()) + + # loss = feature * c - label + total_loss = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0)) / 2.0 + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + # derivative of loss = (1*c - 1)/2 + (2*c - 2)/2 is 1.5. + # It's the same computation as without mean reduction, but the + # loss from every tower is scaled by 1/. + # new value of c = 10 - learning rate * 1.5 = 8.5 + session.run(estimator_spec.train_op) + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(8.5, session.run(c)) + + def test_train_two_steps_collected_gradients_are_reset_between_steps(self): + with ops_lib.Graph().as_default(): + features = array_ops.placeholder(dtypes.float64) + labels = array_ops.placeholder(dtypes.float64) + + feature_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]]) + label_inputs = np.array([[1.0], [2.0]]), np.array([[1.5], [2.5]]) + + # loss = feature * c - label + expected_losses = ((1.0 * 10 - 1.0) + (2.0 * 10 - 2.0), + (1.5 * 7.0 - 1.5) + (2.5 * 7.0 - 2.5)) + # Derivative of the loss is 1.0 + 2.0 for the first step and 1.5 + 2.5 + # for the second. + expected_c = 10.0 - 3.0, 7.0 - 4.0 + + with self.test_session() as session, variable_scope.variable_scope( + '', reuse=variable_scope.AUTO_REUSE): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + session.run(variables.global_variables_initializer()) + + for feature_input, label_input, loss, weight in zip( + feature_inputs, label_inputs, expected_losses, expected_c): + feeds = {features: feature_input, labels: label_input} + + self.assertEqual(loss, session.run(estimator_spec.loss, feeds)) + + session.run(estimator_spec.train_op, feeds) + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(weight, session.run(c, feeds)) + + def test_eval(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.EVAL, self.params) + session.run(variables.local_variables_initializer()) + session.run(variables.global_variables_initializer()) + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + # loss[i] = features[i] * 10 - labels[i]. + # Accuracy is 0.0 (no match) in the first tower. + # Accuracy is 1.0 (match) in the second tower, since the feature + # times weight "c" happened to be equal to the label. + total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) + + self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) + + def test_eval_with_mean_reduction(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.EVAL, self.params) + session.run(variables.local_variables_initializer()) + session.run(variables.global_variables_initializer()) + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + # loss[i] = features[i] * 10 - labels[i]. + # Accuracy is 0.0 (no match) in the first tower. + # Accuracy is 1.0 (match) in the second tower, since the feature + # times weight "c" happened to be equal to the label. + total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) / 2.0 + + self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) + + def test_predict(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) + session.run(variables.global_variables_initializer()) + + self.assertAllClose({ + 'probabilities': np.array([[0.1], [0.02]]) + }, session.run(estimator_spec.predictions)) + + def test_train_single_tower(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + session.run(variables.global_variables_initializer()) + + # loss = feature * c - label + total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + # loss' of c is 3. + # new value of c = 10 - learning rate * 3 = 7.0. + session.run(estimator_spec.train_op) + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(7.0, session.run(c)) + + def test_eval_single_tower(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.EVAL, self.params) + session.run(variables.local_variables_initializer()) + session.run(variables.global_variables_initializer()) + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + # Accuracy is 0.0 (no match) in the first tower. + # Accuracy is 1.0 (match) in the second tower, since the feature + # times weight "c" happened to be equal to the label. + total_loss = ((0.01 * 10 - 0.01) + (0.002 * 10 - 0.02)) + + self.assertNear((0.0 + 1.0) / 2.0, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertNear(total_loss, session.run(estimator_spec.loss), 0.01) + + def test_predict_single_tower(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) + session.run(variables.global_variables_initializer()) + + self.assertAllClose({ + 'probabilities': np.array([[0.1], [0.02]]) + }, session.run(estimator_spec.predictions)) + + def test_batch_size_that_is_not_divisible_by_the_number_of_gpus(self): + features = np.array([[1.0], [2.0], [3.0]]) + labels = np.array([[1.0], [2.0], [3.0]]) + + with self.assertRaisesRegexp( + ValueError, '.*Batch.+size.+needs.+to.+be.+divisible.+by.+GPUs.+'): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0', '/gpu:1']) + _ = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + + def test_unsupported_loss_reduction(self): + with self.assertRaisesRegexp(ValueError, + '.+none.+reduction.+is.+specified.+'): + _ = replicate_model_fn._replicate_model_fn(self.model_fn, + losses.Reduction.NONE) + + def test_places_on_gpu_with_upper_case_spelling(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session(): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/GPU:0']) + _ = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', c.device) + + def test_places_on_gpu_with_lower_case_spelling(self): + features = np.array([[0.01], [0.002]]) + labels = np.array([[0.01], [0.02]]) + + with self.test_session(): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0']) + _ = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', c.device) + + +class ReplicateAcrossASingleDeviceWithoutTowerOptimizer( + test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + optimizer = gradient_descent.GradientDescentOptimizer( + params['learning_rate']) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=optimizer.minimize(loss)) + + @property + def params(self): + params = {} + params['learning_rate'] = 1.0 + return params + + def test_train_single_tower(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0']) + estimator_spec = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) + session.run(variables.global_variables_initializer()) + + # loss = feature * c - label + total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + # loss' of c is 3. + # new value of c = 10 - learning rate * 3 = 7.0. + session.run(estimator_spec.train_op) + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(7.0, session.run(c)) + + +class UseTowerEstimatorWithoutReplication(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + features = features['features'] + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(params['learning_rate'])) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=optimizer.minimize(loss)) + + @property + def params(self): + params = {} + params['learning_rate'] = 1.0 + return params + + def test_train_single_tower(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + train_input_fn = numpy_io.numpy_input_fn( + x={'features': features}, y=labels, batch_size=2, shuffle=False) + + with self.test_session(): + estimator = estimator_lib.Estimator( + model_fn=self.model_fn, + model_dir=tempfile.mkdtemp(), + params=self.params) + estimator.train(train_input_fn, steps=1) + + self.assertEqual(7.0, estimator.get_variable_value('c')) + + +class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + features = features['features'] + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + optimizer = gradient_descent.GradientDescentOptimizer( + params['learning_rate']) + optimizer = training.SyncReplicasOptimizer( + optimizer, replicas_to_aggregate=1) + sync_hook = optimizer.make_session_run_hook(True) + optimizer = replicate_model_fn._TowerOptimizer(optimizer) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + training_hooks=[sync_hook], + predictions={'probabilities': predictions}, + train_op=optimizer.minimize( + loss, global_step=training.get_global_step())) + + @property + def params(self): + params = {} + params['learning_rate'] = 1.0 + return params + + def test_train_multiple_towers(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + train_input_fn = numpy_io.numpy_input_fn( + x={'features': features}, y=labels, batch_size=2, shuffle=False) + + model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + + estimator = estimator_lib.Estimator( + model_fn=model_fn, model_dir=tempfile.mkdtemp(), params=self.params) + estimator.train(train_input_fn, steps=1) + + self.assertEqual(7.0, estimator.get_variable_value('c')) + + +class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + side_effects = variable_scope.get_variable( + 'side_effects', + initializer=constant_op.constant(0, dtype=dtypes.float64), + dtype=dtypes.float64, + use_resource=True, + trainable=False) + + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + first_optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(1.0)) + second_optimizer = replicate_model_fn._TowerOptimizer( + adam.AdamOptimizer(1.0)) + + with ops_lib.control_dependencies([side_effects.assign_add(1.0)]): + first_grads_and_vars = first_optimizer.compute_gradients(loss) + + train_op = control_flow_ops.group( + [first_optimizer.apply_gradients(first_grads_and_vars), + second_optimizer.minimize(loss)]) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=train_op) + + def test_train(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn(features, labels, + model_fn_lib.ModeKeys.TRAIN, {}) + session.run(variables.global_variables_initializer()) + + # loss = feature * c - label + total_loss = (1.0 * 10 - 1.0) + (2.0 * 10 - 2.0) + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + # loss' of c is 3. + # new value of c = 10 - learning rate * 3 = 7.0. + # Adam subtracts another ~1. + session.run(estimator_spec.train_op) + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertNear(6.0, session.run(c), 0.000001) + + side_effects = variable_scope.get_variable( + 'side_effects', dtype=dtypes.float64) + self.assertNear(2.0, session.run(side_effects), 0.000001) + + +class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase): + + def setUp(self): + self._should_skip_optimizer = False + self._towers_left_before_skipping_optimizer = -1 + + def incorrectly_skip_optimizer_for_tower(self, tower_number): + self._should_skip_optimizer = True + self._towers_left_before_skipping_optimizer = tower_number + + def should_skip_optimizer(self): + if not self._should_skip_optimizer: + return False + if self._towers_left_before_skipping_optimizer == 0: + return True + else: + self._towers_left_before_skipping_optimizer -= 1 + return False + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + d = variable_scope.get_variable( + 'd', + initializer=constant_op.constant(2, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + another_predictions = math_ops.multiply(features, d) + another_loss = losses.absolute_difference( + labels=labels, + predictions=another_predictions, + reduction=losses.Reduction.SUM) + another_loss = math_ops.reduce_sum(another_loss) + + total_loss = math_ops.add(loss, another_loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + train_ops = [] + + optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(1.0)) + train_ops.append(optimizer.minimize(loss, var_list=[c])) + if not self.should_skip_optimizer(): + another_optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(1.0)) + train_ops.append(another_optimizer.minimize(another_loss, var_list=[d])) + + train_op = control_flow_ops.group(train_ops) + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=total_loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=train_op) + + def test_train(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session() as session: + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn(features, labels, + model_fn_lib.ModeKeys.TRAIN, {}) + session.run(variables.global_variables_initializer()) + + # For each tower, loss = (feature * c - label) + (feature * d - label). + total_loss = (1.0 * 10 - 1.0 + 1.0 * 2.0 - 1.0) + ( + 2.0 * 10 - 2.0 + 2.0 * 2.0 - 2.0) + self.assertEqual(total_loss, session.run(estimator_spec.loss)) + + session.run(estimator_spec.train_op) + + # loss' of c or loss' of d is 3. + # new value of c = 10 - learning rate * 3 = 7.0. + # new value of d = 2 - learning rate * 3 = -1.0. + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertNear(7.0, session.run(c), 0.000001) + d = variable_scope.get_variable('d', dtype=dtypes.float64) + self.assertNear(-1.0, session.run(d), 0.000001) + + def test_different_optimizer_calls_within_towers(self): + self.incorrectly_skip_optimizer_for_tower(1) + + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session(), ops_lib.Graph().as_default(): + with self.assertRaisesRegexp( + ValueError, '.+was.+supposed.+to.+make.+same.+optimizer.+calls.+'): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0', '/gpu:1']) + _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, + {}) + + +class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.multiply(features, c) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + + optimizer = gradient_descent.GradientDescentOptimizer(1.0) + train_op = optimizer.minimize(loss) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=train_op) + + def test_train(self): + features = np.array([[1.0], [2.0]]) + labels = np.array([[1.0], [2.0]]) + + with self.test_session(): + with self.assertRaisesRegexp(ValueError, + 'Please.+wrap.+with.+_TowerOptimizer'): + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0', '/gpu:1']) + _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, + {}) + + +class GetLossTowersTest(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(0.25, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) + labels = np.array([0.1, 0.2, 0.3, labels[0]]) + + loss = losses.absolute_difference( + labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + + return model_fn_lib.EstimatorSpec(mode=mode, loss=math_ops.reduce_sum(loss)) + + def test_gradients_are_computed(self): + with self.test_session() as session: + tower_specs = replicate_model_fn._get_loss_towers( + self.model_fn, + mode=None, + features=[[0.6], [1.6]], + labels=[[0.6], [0.6]], + params=None, + config=None, + loss_reduction=losses.Reduction.SUM, + devices=['/gpu:0', '/gpu:1'], + local_ps_devices=['/gpu:0'], + name_scope_pattern='test_tower_{}') + session.run(variables.global_variables_initializer()) + + self.assertEqual(len(tower_specs), 2) + + self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) + self.assertEqual('Sum:0', tower_specs[0].loss.name) + self.assertEqual(1.0, session.run(tower_specs[0].loss)) + + self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) + self.assertEqual('test_tower_1/Sum:0', tower_specs[1].loss.name) + # The input batch for the second tower had a loss that is 1.0 + # bigger: 0.6 vs 1.6. + self.assertEqual(2.0, session.run(tower_specs[1].loss)) + + self.assertEqual(1, len(variables.global_variables())) + self.assertEqual(1, len(variables.trainable_variables())) + + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(0.25, session.run(c)) + + def test_gradients_are_computed_with_mean_reduction(self): + with self.test_session() as session: + tower_specs = replicate_model_fn._get_loss_towers( + self.model_fn, + mode=model_fn_lib.ModeKeys.EVAL, + features=[[0.6], [1.6]], + labels=[[0.6], [0.6]], + params=None, + loss_reduction=losses.Reduction.MEAN, + config=None, + devices=['/gpu:0', '/gpu:1'], + local_ps_devices=['/gpu:0'], + name_scope_pattern='test_tower_{}') + session.run(variables.global_variables_initializer()) + + self.assertEqual(len(tower_specs), 2) + + self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) + self.assertEqual('averaged_loss:0', tower_specs[0].loss.name) + self.assertEqual(0.5, session.run(tower_specs[0].loss)) + + self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) + self.assertEqual('test_tower_1/averaged_loss:0', tower_specs[1].loss.name) + # The input batch for the second tower had a loss that is 1.0 + # bigger: 0.6 vs 1.6. + self.assertEqual(1.0, session.run(tower_specs[1].loss)) + + self.assertEqual(1, len(variables.global_variables())) + self.assertEqual(1, len(variables.trainable_variables())) + + with variable_scope.variable_scope('', reuse=True): + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual(0.25, session.run(c)) + + def test_variables_are_round_robined_correctly(self): + """Test that creates multiple variables and tests round-robin placement.""" + + def model_fn(mode, features, labels, params): + del params + for variable_name in ['a', 'b', 'c', 'd']: + c = variable_scope.get_variable( + variable_name, + initializer=constant_op.constant(0.25, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) + labels = np.array([0.1, 0.2, 0.3, labels[0]]) + loss = losses.absolute_difference( + labels=labels, + predictions=predictions, + reduction=losses.Reduction.SUM) + return model_fn_lib.EstimatorSpec( + mode=mode, loss=math_ops.reduce_sum(loss)) + + with self.test_session() as session: + tower_specs = replicate_model_fn._get_loss_towers( + model_fn, + mode=None, + features=[[0.6], [1.6], [2.6]], + labels=[[0.6], [0.6], [2.6]], + params=None, + loss_reduction=losses.Reduction.SUM, + config=None, + devices=['/gpu:0', '/gpu:1', '/gpu:3'], + local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'], + name_scope_pattern='test_tower_{}') + session.run(variables.global_variables_initializer()) + + self.assertEqual(len(tower_specs), 3) + self.assertEqual('/device:GPU:0', tower_specs[0].loss.device) + self.assertEqual('/device:GPU:1', tower_specs[1].loss.device) + self.assertEqual('/device:GPU:3', tower_specs[2].loss.device) + + with variable_scope.variable_scope('', reuse=True): + a = variable_scope.get_variable('a', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', a.device) + b = variable_scope.get_variable('b', dtype=dtypes.float64) + self.assertEqual('/device:GPU:1', b.device) + c = variable_scope.get_variable('c', dtype=dtypes.float64) + self.assertEqual('/device:GPU:3', c.device) + d = variable_scope.get_variable('d', dtype=dtypes.float64) + self.assertEqual('/device:GPU:0', d.device) + + +class SplitBatchTest(test_util.TensorFlowTestCase): + + def evaluate_shards(self, first_list, second_list): + evaluate_items = lambda x: x.eval() + return list(map(evaluate_items, first_list)), list( + map(evaluate_items, second_list)) + + def assertSparseValuesEqual(self, a, b): + self.assertAllEqual(a.indices, b.indices) + self.assertAllEqual(a.values, b.values) + self.assertAllEqual(a.dense_shape, b.dense_shape) + + def test_simple_half_split(self): + with self.test_session(): + features = [0.0, 1.0, 2.0, 3.0] + labels = [10.0, 11.0, 12.0, 13.0] + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 2, device='/gpu:0') + + feature_shards, label_shards = self.evaluate_shards( + feature_shards, label_shards) + + self.assertAllEqual([[0.0, 1.0], [2.0, 3.0]], feature_shards) + self.assertAllEqual([[10.0, 11.0], [12.0, 13.0]], label_shards) + + def test_to_each_their_own(self): + with self.test_session(): + features = [0.0, 1.0, 2.0, 3.0] + labels = [10.0, 11.0, 12.0, 13.0] + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 4, device='/gpu:0') + + feature_shards, label_shards = self.evaluate_shards( + feature_shards, label_shards) + + self.assertAllEqual([[0.0], [1.0], [2.0], [3.0]], feature_shards) + self.assertAllEqual([[10.0], [11.0], [12.0], [13.0]], label_shards) + + def test_one_batch(self): + with self.test_session(): + features = [0.0, 1.0, 2.0, 3.0] + labels = [10.0, 11.0, 12.0, 13.0] + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 1, device='/gpu:0') + + feature_shards, label_shards = self.evaluate_shards( + feature_shards, label_shards) + + self.assertAllEqual([[0.0, 1.0, 2.0, 3.0]], feature_shards) + self.assertAllEqual([[10.0, 11.0, 12.0, 13.0]], label_shards) + + def test_half_split_in_dictionary(self): + with self.test_session(): + features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} + labels = [10.0, 11.0, 12.0, 13.0] + + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 2, device='/gpu:0') + + self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval()) + self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval()) + self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval()) + self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval()) + self.assertAllEqual([10.0, 11.0], label_shards[0].eval()) + self.assertAllEqual([12.0, 13.0], label_shards[1].eval()) + + def test_sparse_tensor_can_be_split_unevenly(self): + with self.test_session(): + features = { + 'x': + sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 2], [2, 2]], + values=[1.0, 2.0, 3.0], + dense_shape=[3, 4]) + } + labels = np.array([[1.0], [2.0]]) + + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 2, device='/gpu:0') + + self.assertSparseValuesEqual( + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 2]], values=[1., 2.], dense_shape=[2, 4]), + feature_shards[0]['x'].eval()) + self.assertSparseValuesEqual( + sparse_tensor.SparseTensorValue( + indices=[[0, 2]], values=[3.], dense_shape=[1, 4]), + feature_shards[1]['x'].eval()) + self.assertAllEqual([[1.0]], label_shards[0].eval()) + self.assertAllEqual([[2.0]], label_shards[1].eval()) + + def test_sparse_tensor_can_be_split_unevenly_repeated_row(self): + with self.test_session(): + features = { + 'x': + sparse_tensor.SparseTensor( + indices=[[0, 0], [1, 0], [1, 1]], + values=[1.0, 2.0, 3.0], + dense_shape=[3, 4]) + } + labels = np.array([[1.0], [2.0]]) + + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 2, device='/gpu:0') + + self.assertSparseValuesEqual( + sparse_tensor.SparseTensorValue( + indices=[[0, 0], [1, 0], [1, 1]], + values=[1., 2., 3.], + dense_shape=[2, 4]), feature_shards[0]['x'].eval()) + + second_batch = feature_shards[1]['x'].eval() + self.assertFalse(len(second_batch.indices)) + self.assertFalse(len(second_batch.values)) + self.assertAllEqual([1, 4], second_batch.dense_shape) + self.assertAllEqual([[1.0]], label_shards[0].eval()) + self.assertAllEqual([[2.0]], label_shards[1].eval()) + + def test_one_batch_in_dictionary(self): + with self.test_session() as session: # pylint: disable=unused-variable + features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} + labels = [10.0, 11.0, 12.0, 13.0] + + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 1, device='/gpu:0') + + self.assertAllEqual([0.0, 1.0, 2.0, 3.0], + feature_shards[0]['first'].eval()) + self.assertAllEqual([4.0, 5.0, 6.0, 7.0], + feature_shards[0]['second'].eval()) + self.assertAllEqual([10.0, 11.0, 12.0, 13.0], label_shards[0].eval()) + + def test_feature_and_label_dictionaries(self): + with self.test_session() as session: # pylint: disable=unused-variable + features = {'first': [0.0, 1.0, 2.0, 3.0], 'second': [4.0, 5.0, 6.0, 7.0]} + labels = {'first': [10.0, 11.0], 'second': [12.0, 13.0]} + + feature_shards, label_shards = replicate_model_fn._split_batch( + features, labels, 2, device='/gpu:0') + + self.assertAllEqual([0.0, 1.0], feature_shards[0]['first'].eval()) + self.assertAllEqual([4.0, 5.0], feature_shards[0]['second'].eval()) + self.assertAllEqual([2.0, 3.0], feature_shards[1]['first'].eval()) + self.assertAllEqual([6.0, 7.0], feature_shards[1]['second'].eval()) + self.assertAllEqual([10.0], label_shards[0]['first'].eval()) + self.assertAllEqual([12.0], label_shards[0]['second'].eval()) + self.assertAllEqual([11], label_shards[1]['first'].eval()) + self.assertAllEqual([13.0], label_shards[1]['second'].eval()) + + +class TrainSpecTest(test_util.TensorFlowTestCase): + + expected_predictions = {} + + def create_estimator_spec(self, loss): + return model_fn_lib.EstimatorSpec( + mode=model_fn_lib.ModeKeys.TRAIN, + loss=loss, + train_op=loss, # Not used; currently required. + predictions=self.expected_predictions) + + def create_constant_loss(self, loss_value): + return constant_op.constant(loss_value, dtype=dtypes.float64) + + def test_example(self): + with self.test_session() as session: + tower_losses = list(map(self.create_constant_loss, [2, 4, 6])) + tower_specs = list(map(self.create_estimator_spec, tower_losses)) + + expected_train_op = tower_losses[1] + + estimator_spec = replicate_model_fn._train_spec( + tower_specs, expected_train_op, aggregation_device='/gpu:0') + + self.assertEqual(expected_train_op, estimator_spec.train_op) + self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) + self.assertEqual(self.expected_predictions, estimator_spec.predictions) + + +class EvalSpecTest(test_util.TensorFlowTestCase): + + def create_estimator_spec(self, loss, metrics): + return model_fn_lib.EstimatorSpec( + mode=model_fn_lib.ModeKeys.EVAL, loss=loss, eval_metric_ops=metrics) + + def create_constant_loss(self, loss_value): + return constant_op.constant(loss_value, dtype=dtypes.float64) + + def create_eval_metrics(self, noise): + predictions = np.array([0.1, 0.2, 0.3, 0.6 + noise]) + labels = np.array([0.1, 0.2, 0.3, 0.6]) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } + return metrics + + def test_example(self): + with self.test_session() as session: + tower_losses = map(self.create_constant_loss, [2, 4, 6]) + tower_metrics = map(self.create_eval_metrics, [0, 0.2, 0.3]) + tower_specs = [ + self.create_estimator_spec(l, m) + for l, m in zip(tower_losses, tower_metrics) + ] + session.run(variables.local_variables_initializer()) + + estimator_spec = replicate_model_fn._eval_spec( + tower_specs, aggregation_device='/device:GPU:0') + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + self.assertEqual('/device:CPU:0', accuracy.device) + self.assertEqual('/device:CPU:0', auc.device) + + session.run([a, b]) + accuracy, auc = session.run([accuracy, auc]) + + self.assertNear((12 - 2) / 12, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertEqual(2 + 4 + 6, session.run(estimator_spec.loss)) + + def test_handles_single_tower(self): + with self.test_session() as session: + tower_losses = map(self.create_constant_loss, [5]) + tower_metrics = map(self.create_eval_metrics, [0.2]) + tower_specs = [ + self.create_estimator_spec(l, m) + for l, m in zip(tower_losses, tower_metrics) + ] + session.run(variables.local_variables_initializer()) + + estimator_spec = replicate_model_fn._eval_spec( + tower_specs, aggregation_device='/device:GPU:0') + + accuracy, a = estimator_spec.eval_metric_ops['accuracy'] + auc, b = estimator_spec.eval_metric_ops['auc'] + + self.assertEqual('/device:CPU:0', accuracy.device) + self.assertEqual('/device:CPU:0', auc.device) + + session.run([a, b]) + accuracy = session.run(accuracy) + auc = session.run(auc) + + self.assertNear((4 - 1) / 4, accuracy, 0.01) + self.assertEqual(0, auc) + self.assertEqual(5, session.run(estimator_spec.loss)) + + +class PredictSpecTest(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(0.25, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = math_ops.add(np.array([features[0], features[0]]), c) + + return model_fn_lib.EstimatorSpec( + mode=model_fn_lib.ModeKeys.PREDICT, + predictions={ + 'probabilities': predictions + }) + + def test_example(self): + with self.test_session() as session: + tower_specs = replicate_model_fn._get_loss_towers( + self.model_fn, + mode=None, + features=[[0.1], [0.2]], + loss_reduction=losses.Reduction.SUM, + labels=[[], []], + params=None, + config=None, + devices=['/gpu:0', '/gpu:1'], + local_ps_devices=['/gpu:0'], + ) + session.run(variables.global_variables_initializer()) + + estimator_spec = replicate_model_fn._predict_spec( + tower_specs, aggregation_device='/gpu:0') + + self.assertEqual('/device:GPU:0', + estimator_spec.predictions['probabilities'].device) + self.assertAllClose({ + 'probabilities': np.array([0.35, 0.35, 0.45, 0.45]) + }, session.run(estimator_spec.predictions)) + + +class ReduceMetricVariablesTest(test_util.TensorFlowTestCase): + + def create_metric_variable(self, initial_value, name): + return variable_scope.variable( + initial_value, + trainable=False, + collections=[ops_lib.GraphKeys.METRIC_VARIABLES], + validate_shape=True, + name=name) + + def create_tower_metrics(self, tower_id): + with variable_scope.variable_scope('', reuse=(tower_id != 0)): + self.create_metric_variable(1.3 * (tower_id + 1), 'total') + self.create_metric_variable(2.3 * (tower_id + 1), 'count') + self.create_metric_variable( + np.array([3.3, 3.5, 3.7]) * (tower_id + 1), 'total') + + def test_example(self): + with self.test_session() as session: + for tower_id in range(3): + self.create_tower_metrics(tower_id) + + session.run( + variables.variables_initializer( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) + + session.run( + replicate_model_fn._reduce_metric_variables(number_of_towers=3)) + + # 1st tower = 1.3, 2.3, [3.3, 3.5, 3.7] + # 2nd tower = 2.6, 4.6, [6.6, 7.0, 7.4] + # 3rd tower = 3.9, 6.9, [9.9, 10.5, 11.1] + # Reduced = 7.8, 13.8, [19.8, 21.0, 22.2] + # Towers are accumulated in the first tower. + local_metrics = session.run( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) + + self.assertNear(7.8, local_metrics[0], 0.01) + self.assertNear(13.8, local_metrics[1], 0.01) + self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01) + self.assertNear(0.0, local_metrics[3], 0.01) + self.assertNear(0.0, local_metrics[4], 0.01) + self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01) + self.assertNear(0.0, local_metrics[6], 0.01) + self.assertNear(0.0, local_metrics[7], 0.01) + self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01) + + def test_reduce_is_idempotent(self): + with self.test_session() as session: + for tower_id in range(3): + self.create_tower_metrics(tower_id) + + session.run( + variables.variables_initializer( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) + + for _ in range(20): + session.run( + replicate_model_fn._reduce_metric_variables(number_of_towers=3)) + + local_metrics = session.run( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) + + self.assertNear(7.8, local_metrics[0], 0.01) + self.assertNear(13.8, local_metrics[1], 0.01) + self.assertAllClose([19.8, 21., 22.1], local_metrics[2], 0.01) + self.assertNear(0.0, local_metrics[3], 0.01) + self.assertNear(0.0, local_metrics[4], 0.01) + self.assertAllClose([0.0, 0.0, 0.0], local_metrics[5], 0.01) + self.assertNear(0.0, local_metrics[6], 0.01) + self.assertNear(0.0, local_metrics[7], 0.01) + self.assertAllClose([0.0, 0.0, 0.0], local_metrics[8], 0.01) + + def test_handles_single_tower(self): + with self.test_session() as session: + self.create_tower_metrics(0) + session.run( + variables.variables_initializer( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) + + session.run( + replicate_model_fn._reduce_metric_variables(number_of_towers=1)) + + local_metrics = session.run( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES)) + + self.assertNear(1.3, local_metrics[0], 0.01) + self.assertNear(2.3, local_metrics[1], 0.01) + self.assertAllClose([3.3, 3.5, 3.7], local_metrics[2], 0.01) + + def test_doesnt_accept_uneven_number_of_variables(self): + with self.test_session() as session: + for tower_id in range(3): + self.create_tower_metrics(tower_id) + self.create_metric_variable(-1.0, 'oddball') + + session.run( + variables.variables_initializer( + ops_lib.get_collection(ops_lib.GraphKeys.METRIC_VARIABLES))) + + with self.assertRaisesRegexp( + ValueError, '.+Expected.+local.+variables.+but.+got.+instead.+'): + session.run( + replicate_model_fn._reduce_metric_variables(number_of_towers=3)) + + +class MergeExportOutputsTest(test_util.TensorFlowTestCase): + + def model_fn(self, mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) + + predictions = {'probabilities': math_ops.multiply(features, c)} + loss = losses.absolute_difference( + labels=labels, + predictions=predictions['probabilities'], + reduction=losses.Reduction.SUM) + + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions['probabilities']), + 'auc': metrics_lib.auc(labels, predictions['probabilities']) + } + tensor_string_repr = str(features) + classes = constant_op.constant( + re.search('(split_inputs/split:[0-9])', tensor_string_repr).group(1), + dtype=dtypes.string) + + export_outputs = { + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: + export_output.PredictOutput(predictions), + 'classification_output': + export_output.ClassificationOutput(predictions['probabilities'], + classes), + 'classification_scores': + export_output.ClassificationOutput( + scores=predictions['probabilities']), + 'classification_classes': + export_output.ClassificationOutput(classes=classes), + 'regression_output': + export_output.RegressionOutput(predictions['probabilities']), + } + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=math_ops.reduce_sum(loss), + eval_metric_ops=metrics, + predictions=predictions, + export_outputs=export_outputs) + + def replicate_estimator_spec(self, session): + features = np.array([0.01, 0.002]) + labels = np.array([0.01, 0.02]) + + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.model_fn, devices=['/gpu:0', '/gpu:1']) + estimator_spec = replicated_model_fn(features, labels, + model_fn_lib.ModeKeys.PREDICT, {}) + session.run(variables.global_variables_initializer()) + return estimator_spec + + def test_merge_predict_output(self): + with self.test_session() as session: + estimator_spec = self.replicate_estimator_spec(session) + self.assertAllClose( + { + 'probabilities': np.array([0.1, 0.02]) + }, + session.run(estimator_spec.export_outputs[ + signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY].outputs)) + + def test_merge_classification_output_scores_classes(self): + with self.test_session() as session: + estimator_spec = self.replicate_estimator_spec(session) + self.assertAllClose( + [0.1, 0.02], + session.run( + estimator_spec.export_outputs['classification_output'].scores)) + self.assertAllEqual( + [b'split_inputs/split:0', b'split_inputs/split:1'], + session.run( + estimator_spec.export_outputs['classification_output'].classes)) + + def test_merge_classification_output_scores(self): + with self.test_session() as session: + estimator_spec = self.replicate_estimator_spec(session) + self.assertAllClose( + [0.1, 0.02], + session.run( + estimator_spec.export_outputs['classification_scores'].scores)) + self.assertEqual( + None, estimator_spec.export_outputs['classification_scores'].classes) + + def test_merge_classification_output_classes(self): + with self.test_session() as session: + estimator_spec = self.replicate_estimator_spec(session) + self.assertAllEqual( + [b'split_inputs/split:0', b'split_inputs/split:1'], + session.run( + estimator_spec.export_outputs['classification_classes'].classes)) + self.assertEqual( + None, estimator_spec.export_outputs['classification_classes'].scores) + + def test_merge_regression_output(self): + with self.test_session() as session: + estimator_spec = self.replicate_estimator_spec(session) + self.assertAllClose( + [0.1, 0.02], + session.run(estimator_spec.export_outputs['regression_output'].value)) + + +class GetLocalDevicesTest(test_util.TensorFlowTestCase): + + def test_there_is_at_least_a_cpu(self): + self.assertTrue(replicate_model_fn._get_local_devices('CPU')) + + def test_there_is_no_xpu(self): + self.assertFalse( + replicate_model_fn._get_local_devices('XPU')) # XPU doesn't exist. + + def test_whether_there_is_a_gpu(self): + if test.is_gpu_available(): + self.assertTrue(len(replicate_model_fn._get_local_devices('GPU'))) + + +class LocalDeviceSetterTest(test_util.TensorFlowTestCase): + + def test_vars_are_on_ps_but_ops_are_on_workers(self): + ps_devices = ['/device:GPU:3'] + round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) + + local_device_setter = replicate_model_fn._local_device_setter( + ps_devices=ps_devices, + ps_strategy=round_robin, + worker_device='/device:GPU:2') + + with ops_lib.device(local_device_setter): + a = variables.Variable(0.01) + self.assertEqual('/device:GPU:3', a.device) + + b = variables.Variable(0.02) + self.assertEqual('/device:GPU:3', b.device) + + c = variables.Variable(0.03) + self.assertEqual('/device:GPU:3', c.device) + + a_op = array_ops.concat(a, axis=0) + self.assertEqual('/device:GPU:2', a_op.device) + + b_op = array_ops.concat(b, axis=0) + self.assertEqual('/device:GPU:2', b_op.device) + + def test_round_robin_placement(self): + ps_devices = [ + '/device:GPU:0', '/device:GPU:1', '/device:GPU:3', '/device:GPU:4' + ] + round_robin = device_setter._RoundRobinStrategy(num_tasks=len(ps_devices)) + + local_device_setter = replicate_model_fn._local_device_setter( + ps_devices=ps_devices, + ps_strategy=round_robin, + worker_device='/device:GPU:2') + + with ops_lib.device(local_device_setter): + a = variables.Variable(0.01) + self.assertEqual('/device:GPU:0', a.device) + + b = variables.Variable(0.02) + self.assertEqual('/device:GPU:1', b.device) + + c = variables.Variable(0.03) + self.assertEqual('/device:GPU:3', c.device) + + a_op = array_ops.concat(a, axis=0) + self.assertEqual('/device:GPU:2', a_op.device) + + b_op = array_ops.concat(b, axis=0) + self.assertEqual('/device:GPU:2', b_op.device) + + c = variables.Variable(0.03) + self.assertEqual('/device:GPU:4', c.device) + + d = variables.Variable(0.03) + self.assertEqual('/device:GPU:0', d.device) + + c_op = array_ops.concat(c, axis=0) + self.assertEqual('/device:GPU:2', c_op.device) + + +class ComputeSumWithDevicePlacementTest(test_util.TensorFlowTestCase): + + def test_vectors(self): + with self.test_session() as session: + total = replicate_model_fn._compute_sum_on_device( + [1.0, 2.0, 3.0, 4.0], device='/device:GPU:0', name='test_sum') + + self.assertEqual('/device:GPU:0', total.device) + self.assertEqual('test_sum', total.op.name) + self.assertEqual(10.0, session.run(total)) + + def test_tensors(self): + with self.test_session() as session: + total = replicate_model_fn._compute_sum_on_device( + [[1.0, 2.0], [3.0, 4.0]], device='/device:GPU:0', name='test_sum') + + self.assertEqual('/device:GPU:0', total.device) + self.assertEqual('test_sum', total.op.name) + self.assertAllEqual([4.0, 6.0], session.run(total)) + + def test_indexedslices(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 6.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_higher_dimensions(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([[1.0, 5.0], [2.0, 6.0]]), [0, 1], + dense_shape=constant_op.constant([2, 4])) + b = ops_lib.IndexedSlices( + constant_op.constant([[3.0, 7.0], [4.0, 8.0]]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([[4.0, 12.0], [6.0, 14.0]], + session.run(ops_lib.convert_to_tensor(total))) + + def test_indexedslices_some_dont_overlap(self): + with self.test_session() as session: + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 3], + dense_shape=constant_op.constant([4])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + total = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0') + + self.assertEqual('/device:GPU:0', total.device) + self.assertAllEqual([4.0, 4.0, 0.0, 2.0], + session.run(ops_lib.convert_to_tensor(total))) + + def test_no_name_for_indexslices(self): + a = ops_lib.IndexedSlices( + constant_op.constant([1.0, 2.0]), [0, 1], + dense_shape=constant_op.constant([2])) + b = ops_lib.IndexedSlices(constant_op.constant([3.0, 4.0]), [0, 1]) + + with self.assertRaisesRegexp(ValueError, '.+name.+not.+expected.+'): + _ = replicate_model_fn._compute_sum_on_device( + [a, b], device='/device:GPU:0', name='cant_name_indexslices') + + +class ConcatTensorDictsTest(test_util.TensorFlowTestCase): + + def test_example(self): + tensor_dicts = [ + { + 'a': np.array([1.0, 2.0]), + 'b': np.array([11.0]), + 'c': np.array([21.0]), + }, + { + 'a': np.array([3.0]), + 'b': np.array([12.0, 13.0]), + }, + { + 'b': np.array([14.0]), + }, + ] + + with self.test_session() as session: + self.assertAllClose({ + 'a': np.array([1.0, 2.0, 3.0]), + 'b': np.array([11.0, 12.0, 13.0, 14.0]), + 'c': np.array([21.0]), + }, session.run(replicate_model_fn._concat_tensor_dicts(*tensor_dicts))) + + +if __name__ == '__main__': + test.main() -- GitLab From d4f927707174595461b2b068a31a751772b91ba1 Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 9 Mar 2018 15:34:44 -0800 Subject: [PATCH 690/884] Move a utility function into the eager C internal API PiperOrigin-RevId: 188548393 --- tensorflow/c/eager/c_api.cc | 71 +++++++++++++++++++++++ tensorflow/c/eager/c_api_internal.h | 7 +++ tensorflow/python/eager/pywrap_tfe_src.cc | 65 ++------------------- 3 files changed, 82 insertions(+), 61 deletions(-) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index 6793bb548c..b9a47ea244 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -1037,3 +1037,74 @@ void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, status->status = MessageToBuffer(ctx->run_metadata, buf); ctx->run_metadata.Clear(); } + +namespace { +TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func, + TF_Status* status) { + TFE_Op* func_op = TFE_NewOp(ctx, func.name().data(), status); + for (const auto& attr : func.attr()) { + if (TF_GetCode(status) != TF_OK) return nullptr; + SetOpAttrValueScalar(ctx, func_op, attr.second, attr.first.data(), status); + if (TF_GetCode(status) != TF_OK) return nullptr; + } + return func_op; +} +} // namespace + +namespace tensorflow { +void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, + const tensorflow::AttrValue& default_value, + const char* attr_name, TF_Status* status) { + switch (default_value.value_case()) { + case tensorflow::AttrValue::kS: + TFE_OpSetAttrString(op, attr_name, default_value.s().data()); + break; + case tensorflow::AttrValue::kI: + TFE_OpSetAttrInt(op, attr_name, static_cast(default_value.i())); + break; + case tensorflow::AttrValue::kF: + TFE_OpSetAttrFloat(op, attr_name, default_value.f()); + break; + case tensorflow::AttrValue::kB: + TFE_OpSetAttrBool(op, attr_name, default_value.b()); + break; + case tensorflow::AttrValue::kType: + TFE_OpSetAttrType(op, attr_name, + static_cast(default_value.type())); + break; + case tensorflow::AttrValue::kShape: { + const auto& tensor_shape = default_value.shape(); + if (tensor_shape.unknown_rank()) { + TFE_OpSetAttrShape(op, attr_name, nullptr, -1, status); + } else { + const auto num_dims = tensor_shape.dim_size(); + std::unique_ptr dims(new int64_t[num_dims]); + for (int i = 0; i < num_dims; ++i) { + dims[i] = tensor_shape.dim(i).size(); + } + TFE_OpSetAttrShape(op, attr_name, dims.get(), num_dims, status); + } + } break; + case tensorflow::AttrValue::kFunc: { + const auto func_op = GetFunc(ctx, default_value.func(), status); + if (TF_GetCode(status) != TF_OK) return; + // TODO(nareshmodi): TFE_OpSetAttrFunction and TFE_OpSetAttrFunctionList + // require TFE_Op* and just convert it internally a NameAttrValue, so + // consider adding an overload to the C API to make this case easier. + TFE_OpSetAttrFunction(op, attr_name, func_op); + } break; + case tensorflow::AttrValue::kList: + TF_FALLTHROUGH_INTENDED; + case tensorflow::AttrValue::kTensor: + TF_FALLTHROUGH_INTENDED; + case tensorflow::AttrValue::kPlaceholder: + TF_FALLTHROUGH_INTENDED; + case tensorflow::AttrValue::VALUE_NOT_SET: + TF_SetStatus( + status, TF_UNIMPLEMENTED, + tensorflow::strings::StrCat("Unable to get setfor default value: ", + default_value.DebugString()) + .data()); + } +} +} // namespace tensorflow diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 5bbfd577b4..49b9434457 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -142,4 +142,11 @@ struct TFE_Op { bool use_xla = false; }; +namespace tensorflow { +// Set an AttrValue on the op. Doesn't handle the list types. +void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, + const tensorflow::AttrValue& default_value, + const char* attr_name, TF_Status* status); +} // namespace tensorflow + #endif // TENSORFLOW_C_EAGER_C_API_INTERNAL_H_ diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 7ccfe9120c..7b674807f5 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -277,21 +277,12 @@ bool SetOpAttrList( return true; } -// This is only declared here since GetFunc makes a recursive call to -// SetOpAttrScalarDefault. -void SetOpAttrScalarDefault( - TFE_Context* ctx, TFE_Op* op, const tensorflow::AttrValue& default_value, - const char* attr_name, - tensorflow::gtl::FlatMap* attr_list_sizes, - TF_Status* status); - TFE_Op* GetFunc(TFE_Context* ctx, const tensorflow::NameAttrList& func, TF_Status* status) { TFE_Op* func_op = TFE_NewOp(ctx, func.name().data(), status); for (const auto& attr : func.attr()) { if (TF_GetCode(status) != TF_OK) return nullptr; - SetOpAttrScalarDefault(ctx, func_op, attr.second, attr.first.data(), - nullptr, status); + SetOpAttrValueScalar(ctx, func_op, attr.second, attr.first.data(), status); if (TF_GetCode(status) != TF_OK) return nullptr; } return func_op; @@ -493,57 +484,9 @@ void SetOpAttrScalarDefault( const char* attr_name, tensorflow::gtl::FlatMap* attr_list_sizes, TF_Status* status) { - switch (default_value.value_case()) { - case tensorflow::AttrValue::kS: - TFE_OpSetAttrString(op, attr_name, default_value.s().data()); - break; - case tensorflow::AttrValue::kI: - TFE_OpSetAttrInt(op, attr_name, static_cast(default_value.i())); - (*attr_list_sizes)[attr_name] = default_value.i(); - break; - case tensorflow::AttrValue::kF: - TFE_OpSetAttrFloat(op, attr_name, default_value.f()); - break; - case tensorflow::AttrValue::kB: - TFE_OpSetAttrBool(op, attr_name, default_value.b()); - break; - case tensorflow::AttrValue::kType: - TFE_OpSetAttrType(op, attr_name, - static_cast(default_value.type())); - break; - case tensorflow::AttrValue::kShape: { - const auto& tensor_shape = default_value.shape(); - if (tensor_shape.unknown_rank()) { - TFE_OpSetAttrShape(op, attr_name, nullptr, -1, status); - } else { - const auto num_dims = tensor_shape.dim_size(); - std::unique_ptr dims(new int64_t[num_dims]); - for (int i = 0; i < num_dims; ++i) { - dims[i] = tensor_shape.dim(i).size(); - } - TFE_OpSetAttrShape(op, attr_name, dims.get(), num_dims, status); - } - } break; - case tensorflow::AttrValue::kFunc: { - const auto func_op = GetFunc(ctx, default_value.func(), status); - if (TF_GetCode(status) != TF_OK) return; - // TODO(nareshmodi): TFE_OpSetAttrFunction and TFE_OpSetAttrFunctionList - // require TFE_Op* and just convert it internally a NameAttrValue, so - // consider adding an overload to the C API to make this case easier. - TFE_OpSetAttrFunction(op, attr_name, func_op); - } break; - case tensorflow::AttrValue::kList: - TF_FALLTHROUGH_INTENDED; - case tensorflow::AttrValue::kTensor: - TF_FALLTHROUGH_INTENDED; - case tensorflow::AttrValue::kPlaceholder: - TF_FALLTHROUGH_INTENDED; - case tensorflow::AttrValue::VALUE_NOT_SET: - TF_SetStatus( - status, TF_UNIMPLEMENTED, - tensorflow::strings::StrCat("Unable to get setfor default value: ", - default_value.DebugString()) - .data()); + SetOpAttrValueScalar(ctx, op, default_value, attr_name, status); + if (default_value.value_case() == tensorflow::AttrValue::kI) { + (*attr_list_sizes)[attr_name] = default_value.i(); } } -- GitLab From c0d44a74b67cde9d8c8583365c8f20678933dfea Mon Sep 17 00:00:00 2001 From: Akshay Modi Date: Fri, 9 Mar 2018 15:42:30 -0800 Subject: [PATCH 691/884] Remove dupe tag --- tensorflow/contrib/eager/python/BUILD | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/eager/python/BUILD b/tensorflow/contrib/eager/python/BUILD index 8253c024fd..384ef7f963 100644 --- a/tensorflow/contrib/eager/python/BUILD +++ b/tensorflow/contrib/eager/python/BUILD @@ -266,8 +266,10 @@ cuda_py_test( "//tensorflow/python/eager:test", "//tensorflow/python/keras", ], - tags = ["no_windows"], # TODO: needs investigation on Windows - tags = ["notsan"], + tags = [ + "no_windows", # TODO: needs investigation on Windows + "notsan", + ], ) filegroup( -- GitLab From c5e8d4819a897a5701470ae291e09811f5b4762f Mon Sep 17 00:00:00 2001 From: Brennan Saeta Date: Fri, 9 Mar 2018 15:39:11 -0800 Subject: [PATCH 692/884] ProcFLR: Include the remote function target in the function_key PiperOrigin-RevId: 188548941 --- .../process_function_library_runtime.cc | 9 +- .../cluster_function_library_runtime.cc | 5 + tensorflow/core/kernels/function_ops.cc | 5 + .../data/kernel_tests/iterator_ops_test.py | 188 ++++++++++++------ 4 files changed, 145 insertions(+), 62 deletions(-) diff --git a/tensorflow/core/common_runtime/process_function_library_runtime.cc b/tensorflow/core/common_runtime/process_function_library_runtime.cc index 07c657a741..92fdcb404e 100644 --- a/tensorflow/core/common_runtime/process_function_library_runtime.cc +++ b/tensorflow/core/common_runtime/process_function_library_runtime.cc @@ -243,8 +243,9 @@ Status ProcessFunctionLibraryRuntime::Instantiate( "Currently don't support instantiating functions on device: ", options.target); } - - string function_key = Canonicalize(function_name, attrs); + VLOG(1) << "ProcessFLR Instantiate: " << function_name + << " on: " << options.target; + string function_key = Canonicalize(function_name, attrs, options); FunctionData* f; { mutex_lock l(mu_); @@ -262,7 +263,9 @@ Status ProcessFunctionLibraryRuntime::Instantiate( } TF_RETURN_IF_ERROR( f->DistributedInit(parent_, function_name, *lib_def_, attrs, options)); - + VLOG(1) << "ProcessFLR Instantiate [success]: " << function_name + << " on: " << options.target << " with handle: " << *handle + << " (this: " << this << ")"; return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc index 0c5c4d59ed..000a03da5d 100644 --- a/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc +++ b/tensorflow/core/distributed_runtime/cluster_function_library_runtime.cc @@ -121,6 +121,8 @@ Status ClusterFunctionLibraryRuntime::Instantiate( const string& function_name, const FunctionLibraryDefinition& lib_def, AttrSlice attrs, const FunctionLibraryRuntime::InstantiateOptions& options, FunctionLibraryRuntime::LocalHandle* handle) { + VLOG(1) << "CFLR::Instantiate: " << function_name << " on " << options.target + << " (this: " << this << ")"; WorkerInterface* wi = worker_session_->worker_cache->CreateWorker(options.target); @@ -154,6 +156,9 @@ Status ClusterFunctionLibraryRuntime::Instantiate( *handle = function_data_.size(); function_data_.push_back(FunctionData(resp.graph_handle(), options.target, wi, send_keys, recv_keys)); + VLOG(1) << "CFLR::Instantiate: [Success] " << function_name << " on " + << options.target << " (this: " << this << ")" + << " with handle: " << *handle; return Status::OK(); } diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc index 7c302e2fc2..351aad7213 100644 --- a/tensorflow/core/kernels/function_ops.cc +++ b/tensorflow/core/kernels/function_ops.cc @@ -318,6 +318,7 @@ class RemoteCallOp : public AsyncOpKernel { if (cached_entry != handle_cache_.end()) { handle = cached_entry->second; } else { + VLOG(1) << "Instantiating " << func_.name() << " on " << target_device; port::Tracing::TraceMe activity(strings::StrCat( "RemoteCall: Instantiate: ", func_.name(), " on ", target_device)); OP_REQUIRES_OK_ASYNC( @@ -327,6 +328,8 @@ class RemoteCallOp : public AsyncOpKernel { done); auto insert_result = handle_cache_.insert({function_target, handle}); CHECK(insert_result.second) << "Insert unsuccessful."; + VLOG(1) << "Instantiated " << func_.name() << " on " << target_device + << ", resulting in handle: " << handle << " flr: " << lib; } } @@ -349,6 +352,8 @@ class RemoteCallOp : public AsyncOpKernel { auto* rets = new std::vector; auto* trace = new port::Tracing::TraceMe(strings::StrCat( "RemoteCall: Run: ", func_.name(), " on ", target_device)); + VLOG(1) << "Running " << func_.name() << " on " << target_device + << " with handle: " << handle; lib->Run(opts, handle, args, rets, [rets, trace, done, ctx](const Status& status) { if (!status.ok()) { diff --git a/tensorflow/python/data/kernel_tests/iterator_ops_test.py b/tensorflow/python/data/kernel_tests/iterator_ops_test.py index 23c6d7385f..4a14a915bd 100644 --- a/tensorflow/python/data/kernel_tests/iterator_ops_test.py +++ b/tensorflow/python/data/kernel_tests/iterator_ops_test.py @@ -22,6 +22,7 @@ import warnings import numpy as np +from tensorflow.core.protobuf import cluster_pb2 from tensorflow.core.protobuf import config_pb2 from tensorflow.python.client import session from tensorflow.python.data.ops import dataset_ops @@ -44,6 +45,7 @@ from tensorflow.python.ops import script_ops from tensorflow.python.ops import variables from tensorflow.python.platform import test from tensorflow.python.training import server_lib +from tensorflow.python.util import compat class IteratorTest(test.TestCase): @@ -63,8 +65,9 @@ class IteratorTest(test.TestCase): def testCapturingStateInOneShotRaisesException(self): var = variables.Variable(37.0, name="myvar") - dataset = (dataset_ops.Dataset.from_tensor_slices([0.0, 1.0, 2.0]) - .map(lambda x: x + var)) + dataset = ( + dataset_ops.Dataset.from_tensor_slices([0.0, 1.0, 2.0]) + .map(lambda x: x + var)) with self.assertRaisesRegexp( ValueError, r"`Dataset.make_one_shot_iterator\(\)` does not support " "datasets that capture stateful objects.+myvar"): @@ -78,8 +81,9 @@ class IteratorTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) - .repeat(14).make_one_shot_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components).map(_map_fn) + .repeat(14).make_one_shot_iterator()) get_next = iterator.get_next() self.assertEqual([c.shape[1:] for c in components], @@ -103,8 +107,9 @@ class IteratorTest(test.TestCase): def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(tensor_components) - .map(_map_fn).repeat(14).make_one_shot_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensor_slices(tensor_components) + .map(_map_fn).repeat(14).make_one_shot_iterator()) get_next = iterator.get_next() self.assertEqual([c.shape[1:] for c in components], @@ -125,10 +130,13 @@ class IteratorTest(test.TestCase): np.array(37.0) * np.arange(7)) def within_container(): + def _map_fn(x, y, z): return math_ops.square(x), math_ops.square(y), math_ops.square(z) - iterator = (dataset_ops.Dataset.from_tensor_slices(components) - .map(_map_fn).repeat(14).make_one_shot_iterator()) + + iterator = ( + dataset_ops.Dataset.from_tensor_slices(components) + .map(_map_fn).repeat(14).make_one_shot_iterator()) return iterator.get_next() server = server_lib.Server.create_local_server() @@ -159,8 +167,8 @@ class IteratorTest(test.TestCase): # Create a session with a single thread to ensure that the # one-shot iterator initializer does not deadlock. - config = config_pb2.ConfigProto(inter_op_parallelism_threads=1, - use_per_session_threads=True) + config = config_pb2.ConfigProto( + inter_op_parallelism_threads=1, use_per_session_threads=True) with session.Session(config=config) as sess: self.assertAllEqual([1, 4, 9], sess.run(next_element)) with self.assertRaises(errors.OutOfRangeError): @@ -169,6 +177,7 @@ class IteratorTest(test.TestCase): # Test with multiple threads invoking the one-shot iterator concurrently. with session.Session(config=config) as sess: results = [] + def consumer_thread(): try: results.append(sess.run(next_element)) @@ -177,7 +186,8 @@ class IteratorTest(test.TestCase): num_threads = 8 threads = [ - self.checkedThread(consumer_thread) for _ in range(num_threads)] + self.checkedThread(consumer_thread) for _ in range(num_threads) + ] for t in threads: t.start() for t in threads: @@ -205,24 +215,24 @@ class IteratorTest(test.TestCase): sess.run(next_element) with self.test_session() as sess: + def consumer_thread(): with self.assertRaisesRegexp(errors.InvalidArgumentError, "oops"): sess.run(next_element) num_threads = 8 threads = [ - self.checkedThread(consumer_thread) for _ in range(num_threads)] + self.checkedThread(consumer_thread) for _ in range(num_threads) + ] for t in threads: t.start() for t in threads: t.join() def testSimpleSharedResource(self): - components = ( - np.array(1, dtype=np.int64), - np.array([1, 2, 3], dtype=np.int64), - np.array(37.0, dtype=np.float64) - ) + components = (np.array(1, dtype=np.int64), + np.array([1, 2, 3], dtype=np.int64), + np.array(37.0, dtype=np.float64)) server = server_lib.Server.create_local_server() @@ -231,9 +241,10 @@ class IteratorTest(test.TestCase): # first session (initializing the iterator) is visible in the # second session. with ops.Graph().as_default(): - iterator = (dataset_ops.Dataset.from_tensors(components) - .map(lambda x, y, z: (x, y, z)).make_initializable_iterator( - shared_name="shared_iterator")) + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .map(lambda x, y, z: (x, y, z)).make_initializable_iterator( + shared_name="shared_iterator")) init_op = iterator.initializer get_next = iterator.get_next() @@ -269,8 +280,9 @@ class IteratorTest(test.TestCase): def testNotInitializedError(self): components = (np.array(1), np.array([1, 2, 3]), np.array(37.0)) - iterator = (dataset_ops.Dataset.from_tensors(components) - .make_initializable_iterator()) + iterator = ( + dataset_ops.Dataset.from_tensors(components) + .make_initializable_iterator()) get_next = iterator.get_next() with self.test_session() as sess: @@ -320,8 +332,8 @@ class IteratorTest(test.TestCase): def testReinitializableIteratorStaticErrors(self): # Non-matching structure for types and shapes. with self.assertRaises(TypeError): - iterator = iterator_ops.Iterator.from_structure((dtypes.int64, - dtypes.float64), [None]) + iterator = iterator_ops.Iterator.from_structure( + (dtypes.int64, dtypes.float64), [None]) # Test validation of dataset argument. iterator = iterator_ops.Iterator.from_structure((dtypes.int64, @@ -337,18 +349,18 @@ class IteratorTest(test.TestCase): # Incompatible types. with self.assertRaises(TypeError): iterator.make_initializer( - dataset_ops.Dataset.from_tensors((constant_op.constant( - [1, 2, 3], dtype=dtypes.int32), constant_op.constant( - [4., 5., 6., 7.], dtype=dtypes.float32)))) + dataset_ops.Dataset.from_tensors( + (constant_op.constant([1, 2, 3], dtype=dtypes.int32), + constant_op.constant([4., 5., 6., 7.], dtype=dtypes.float32)))) # Incompatible shapes. iterator = iterator_ops.Iterator.from_structure( (dtypes.int64, dtypes.float64), ([None], [])) with self.assertRaises(TypeError): iterator.make_initializer( - dataset_ops.Dataset.from_tensors((constant_op.constant( - [1, 2, 3], dtype=dtypes.int64), constant_op.constant( - [4., 5., 6., 7.], dtype=dtypes.float64)))) + dataset_ops.Dataset.from_tensors( + (constant_op.constant([1, 2, 3], dtype=dtypes.int64), + constant_op.constant([4., 5., 6., 7.], dtype=dtypes.float64)))) def testIteratorStringHandle(self): dataset_3 = dataset_ops.Dataset.from_tensor_slices([1, 2, 3]) @@ -370,33 +382,40 @@ class IteratorTest(test.TestCase): iterator_3_handle = sess.run(iterator_3.string_handle()) iterator_4_handle = sess.run(iterator_4.string_handle()) - self.assertEqual( - 10, sess.run(next_element, - feed_dict={handle_placeholder: iterator_4_handle})) - self.assertEqual( - 1, sess.run(next_element, - feed_dict={handle_placeholder: iterator_3_handle})) - self.assertEqual( - 20, sess.run(next_element, - feed_dict={handle_placeholder: iterator_4_handle})) - self.assertEqual( - 2, sess.run(next_element, - feed_dict={handle_placeholder: iterator_3_handle})) - self.assertEqual( - 30, sess.run(next_element, - feed_dict={handle_placeholder: iterator_4_handle})) - self.assertEqual( - 3, sess.run(next_element, - feed_dict={handle_placeholder: iterator_3_handle})) - self.assertEqual( - 40, sess.run(next_element, - feed_dict={handle_placeholder: iterator_4_handle})) + self.assertEqual(10, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_4_handle})) + self.assertEqual(1, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_3_handle})) + self.assertEqual(20, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_4_handle})) + self.assertEqual(2, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_3_handle})) + self.assertEqual(30, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_4_handle})) + self.assertEqual(3, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_3_handle})) + self.assertEqual(40, + sess.run( + next_element, + feed_dict={handle_placeholder: iterator_4_handle})) with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element, - feed_dict={handle_placeholder: iterator_3_handle}) + sess.run( + next_element, feed_dict={handle_placeholder: iterator_3_handle}) with self.assertRaises(errors.OutOfRangeError): - sess.run(next_element, - feed_dict={handle_placeholder: iterator_4_handle}) + sess.run( + next_element, feed_dict={handle_placeholder: iterator_4_handle}) def testIteratorStringHandleReuseTensorObject(self): dataset = dataset_ops.Dataset.from_tensor_slices([1, 2, 3]) @@ -427,8 +446,8 @@ class IteratorTest(test.TestCase): self.assertIsNot(handle_with_name, handle_with_same_name) def testIteratorStringHandleError(self): - dataset_int_scalar = (dataset_ops.Dataset.from_tensor_slices([1, 2, - 3]).repeat()) + dataset_int_scalar = ( + dataset_ops.Dataset.from_tensor_slices([1, 2, 3]).repeat()) dataset_float_vector = (dataset_ops.Dataset.from_tensors([1.0, 2.0, 3.0])) handle_placeholder = array_ops.placeholder(dtypes.string, shape=[]) @@ -522,6 +541,58 @@ class IteratorTest(test.TestCase): target_placeholder: "/job:localhost/replica:0/task:0/cpu:1" }) + def testRemoteIteratorUsingRemoteCallOpMultiWorkers(self): + s1 = server_lib.Server.create_local_server() + s2 = server_lib.Server.create_local_server() + s3 = server_lib.Server.create_local_server() + + cluster_def = cluster_pb2.ClusterDef() + workers = cluster_def.job.add() + workers.name = "worker" + workers.tasks[0] = s1.target[len("grpc://"):] + workers.tasks[1] = s2.target[len("grpc://"):] + client = cluster_def.job.add() + client.name = "client" + client.tasks[0] = s3.target[len("grpc://"):] + config = config_pb2.ConfigProto(cluster_def=cluster_def) + + worker_devices = [ + "/job:worker/replica:0/task:%d/cpu:0" % i for i in range(2) + ] + itr_handles = [] + for device in worker_devices: + with ops.device(device): + src = dataset_ops.Dataset.from_tensor_slices([device]) + itr = src.make_one_shot_iterator() + itr_handles.append(itr.string_handle()) + + targets = dataset_ops.Dataset.from_tensor_slices(worker_devices) + handles = dataset_ops.Dataset.from_tensor_slices(itr_handles) + + @function.Defun(dtypes.string) + def loading_func(h): + remote_itr = iterator_ops.Iterator.from_string_handle( + h, itr.output_types, itr.output_shapes) + return remote_itr.get_next() + + def map_fn(target, handle): + return functional_ops.remote_call( + args=[handle], Tout=[dtypes.string], f=loading_func, target=target) + + with ops.device("/job:client"): + client_dataset = dataset_ops.Dataset.zip((targets, handles)).map(map_fn) + itr = client_dataset.make_initializable_iterator() + n = itr.get_next() + + with session.Session(s3.target, config=config) as sess: + sess.run(itr.initializer) + expected_values = worker_devices + for expected in expected_values: + self.assertEqual((compat.as_bytes(expected),), sess.run(n)) + + with self.assertRaises(errors.OutOfRangeError): + sess.run(n) + def testRemoteIteratorUsingRemoteCallOpDirectSessionGPUCPU(self): if not test_util.is_gpu_available(): self.skipTest("No GPU available") @@ -641,8 +712,7 @@ class IteratorTest(test.TestCase): with warnings.catch_warnings(record=True) as w: for _ in range(100): iterator.get_next() - self.assertEqual(100 - iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD, - len(w)) + self.assertEqual(100 - iterator_ops.GET_NEXT_CALL_WARNING_THRESHOLD, len(w)) for warning in w: self.assertTrue( iterator_ops.GET_NEXT_CALL_WARNING_MESSAGE in str(warning.message)) -- GitLab From 4f333b63f7b46a3122f91b5358f2763e6c2e8206 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 16:17:08 -0800 Subject: [PATCH 693/884] [XLA] Add a whole graph execution interface. PiperOrigin-RevId: 188554206 --- tensorflow/compiler/xla/BUILD | 1 + tensorflow/compiler/xla/service/service.cc | 5 +++++ tensorflow/compiler/xla/service/service.h | 6 ++++++ tensorflow/compiler/xla/service_interface.h | 3 +++ tensorflow/compiler/xla/xla.proto | 9 +++++++++ 5 files changed, 24 insertions(+) diff --git a/tensorflow/compiler/xla/BUILD b/tensorflow/compiler/xla/BUILD index c7cb69215f..cd13db4d30 100644 --- a/tensorflow/compiler/xla/BUILD +++ b/tensorflow/compiler/xla/BUILD @@ -52,6 +52,7 @@ xla_proto_library( visibility = ["//visibility:public"], deps = [ ":xla_data_proto", + "//tensorflow/compiler/xla/service:hlo_proto", "//tensorflow/compiler/xla/service:session_proto", ], ) diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 25c2fe97e4..8edd457281 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -937,6 +937,11 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg, return tensorflow::Status::OK(); } +tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* /*arg*/, + ExecuteResponse* /*result*/) { + return Unimplemented("execute-graph is not yet implemented"); +} + tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg, ExecuteAsyncResponse* result) { VLOG(1) << "running execute-async request: " << arg->ShortDebugString(); diff --git a/tensorflow/compiler/xla/service/service.h b/tensorflow/compiler/xla/service/service.h index e047df2648..96352d9096 100644 --- a/tensorflow/compiler/xla/service/service.h +++ b/tensorflow/compiler/xla/service/service.h @@ -112,6 +112,12 @@ class Service : public ServiceInterface { tensorflow::Status Execute(const ExecuteRequest* arg, ExecuteResponse* result) override; + // Executes a computation with the provided global data passed as + // immutable arguments. The request contains the whole computation graph. + // Returns global data output and execution timing. + tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg, + ExecuteResponse* result) override; + // Executes one or more computations in parallel with the provided global data // passed as immutable arguments. Returns global data output for each // computation. diff --git a/tensorflow/compiler/xla/service_interface.h b/tensorflow/compiler/xla/service_interface.h index 809941d8fe..d8235113dd 100644 --- a/tensorflow/compiler/xla/service_interface.h +++ b/tensorflow/compiler/xla/service_interface.h @@ -54,6 +54,9 @@ class ServiceInterface { virtual tensorflow::Status Execute(const ExecuteRequest* arg, ExecuteResponse* result) = 0; + virtual tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg, + ExecuteResponse* result) = 0; + virtual tensorflow::Status ExecuteParallel( const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) = 0; diff --git a/tensorflow/compiler/xla/xla.proto b/tensorflow/compiler/xla/xla.proto index 56162ab44e..edf1b07af8 100644 --- a/tensorflow/compiler/xla/xla.proto +++ b/tensorflow/compiler/xla/xla.proto @@ -16,6 +16,7 @@ limitations under the License. syntax = "proto3"; import "tensorflow/compiler/xla/xla_data.proto"; +import "tensorflow/compiler/xla/service/hlo.proto"; import "tensorflow/compiler/xla/service/session.proto"; package xla; @@ -342,6 +343,14 @@ message ExecuteRequest { ExecutionOptions execution_options = 5; } +message ExecuteGraphRequest { + HloModuleProto computation = 1; + repeated GlobalDataHandle arguments = 2; + + // Options that affect how XLA compiles and runs code to service this request. + ExecutionOptions execution_options = 3; +} + message ExecuteParallelRequest { repeated ExecuteRequest requests = 1; } -- GitLab From 2d6b626c16430d922b7ce89f0890636037856aac Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 16:34:42 -0800 Subject: [PATCH 694/884] Allowing ReorderActivationFunctions to reorder output arrays and adding support for propagating activation functions into Gather ops and as constant ops. PiperOrigin-RevId: 188556574 --- tensorflow/contrib/lite/toco/BUILD | 1 + .../graph_transformations.h | 1 + ...gate_activation_function_into_constants.cc | 121 ++++++++++++++++++ .../remove_trivial_reshape.cc | 4 +- .../reorder_activation_functions.cc | 90 +++++++++---- .../resolve_constant_unary.cc | 36 +++++- tensorflow/contrib/lite/toco/toco_tooling.cc | 1 + 7 files changed, 223 insertions(+), 31 deletions(-) create mode 100644 tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc diff --git a/tensorflow/contrib/lite/toco/BUILD b/tensorflow/contrib/lite/toco/BUILD index bf4396486e..395abc5326 100644 --- a/tensorflow/contrib/lite/toco/BUILD +++ b/tensorflow/contrib/lite/toco/BUILD @@ -196,6 +196,7 @@ cc_library( "graph_transformations/identify_relu1.cc", "graph_transformations/lstm_utils.cc", "graph_transformations/make_initial_dequantize_operator.cc", + "graph_transformations/propagate_activation_function_into_constants.cc", "graph_transformations/propagate_array_data_types.cc", "graph_transformations/propagate_fixed_sizes.cc", "graph_transformations/quantize.cc", diff --git a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h index 1447de1220..11e5e19f50 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h +++ b/tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h @@ -131,6 +131,7 @@ DECLARE_GRAPH_TRANSFORMATION(MergeLstmCellInputs) DECLARE_GRAPH_TRANSFORMATION(IdentifyRelu1) DECLARE_GRAPH_TRANSFORMATION(IdentifyDilatedConv) DECLARE_GRAPH_TRANSFORMATION(MakeInitialDequantizeOperator) +DECLARE_GRAPH_TRANSFORMATION(PropagateActivationFunctionIntoConstants) DECLARE_GRAPH_TRANSFORMATION(PropagateArrayDataTypes) DECLARE_GRAPH_TRANSFORMATION(PropagateFixedSizes) DECLARE_GRAPH_TRANSFORMATION(HardcodeMinMax) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc new file mode 100644 index 0000000000..cf17c49b10 --- /dev/null +++ b/tensorflow/contrib/lite/toco/graph_transformations/propagate_activation_function_into_constants.cc @@ -0,0 +1,121 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include +#include +#include +#include + +#include "tensorflow/contrib/lite/toco/graph_transformations/graph_transformations.h" +#include "tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_passthrough.h" +#include "tensorflow/contrib/lite/toco/model.h" +#include "tensorflow/contrib/lite/toco/runtime/types.h" +#include "tensorflow/contrib/lite/toco/tooling_util.h" +#include "tensorflow/core/platform/logging.h" + +namespace toco { + +bool PropagateActivationFunctionIntoConstants::Run(Model* model, + std::size_t op_index) { + const auto ac_it = model->operators.begin() + op_index; + const auto* ac_op = ac_it->get(); + if (ac_op->type != OperatorType::kRelu6 && + ac_op->type != OperatorType::kRelu1 && + ac_op->type != OperatorType::kRelu) { + return false; + } + + // Find the op producing the array passed to this activation function. + auto* src_op = GetOpWithOutput(*model, ac_op->inputs[0]); + if (!src_op) { + return false; + } + + // Ensure the src_op is not used without the activation function applied. + if (CountTrueOutputs(*model, *src_op) > 1) { + AddMessageF( + "Not propagating activation function %s into %s because it has more " + "than one consumed output", + LogName(*ac_op), LogName(*src_op)); + } + + // Filter to the list of supported ops. + string src_op_input; + switch (src_op->type) { + case OperatorType::kGather: + src_op_input = src_op->inputs[0]; + break; + default: + return false; + } + CHECK_EQ(src_op->outputs[0], ac_op->inputs[0]); + + // Ensure the input is constant as otherwise this needs to happen at runtime. + // If we bail here, it's still possible that FuseActivationFunctions will fuse + // the activation if it's supported by the op. + if (!IsConstantParameterArray(*model, src_op_input)) { + AddMessageF( + "Not propagating activation function %s into %s:%s because it is not " + "constant", + LogName(*ac_op), LogName(*src_op), src_op_input); + return false; + } + + // Get the array we'll be working with and ensure it's a compatible type. + auto& const_array = model->GetArray(src_op_input); + if (const_array.data_type != ArrayDataType::kFloat) { + AddMessageF( + "Not propagating activation function %s into %s:%s because it is " + "non-float data", + LogName(*ac_op), LogName(*src_op), src_op_input); + return false; + } + auto& const_array_data = + const_array.GetMutableBuffer().data; + + // Perform the activation function directly into the constant data array. + for (size_t i = 0; i < const_array_data.size(); ++i) { + const float value = const_array_data[i]; + float new_value = value; + switch (ac_op->type) { + case OperatorType::kRelu: { + static constexpr float kLower = 0; + new_value = value < kLower ? kLower : value; + break; + } + case OperatorType::kRelu1: { + static constexpr float kUpper = 1; + static constexpr float kLower = -1; + new_value = value > kUpper ? kUpper : value < kLower ? kLower : value; + break; + } + case OperatorType::kRelu6: { + static constexpr float kUpper = 6; + static constexpr float kLower = 0; + new_value = value > kUpper ? kUpper : value < kLower ? kLower : value; + break; + } + default: + LOG(FATAL) << "Unsupported activation function " << LogName(*ac_op); + return false; + } + const_array_data[i] = new_value; + } + + AddMessageF("Propagated activation function %s into %s:%s", LogName(*ac_op), + LogName(*src_op), src_op_input); + return RemoveTrivialPassthroughOp(this, model, op_index); +} + +} // namespace toco diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc index 90f9381ec1..61477d59ae 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_reshape.cc @@ -61,8 +61,8 @@ bool IsReshapeTrivial(const Model& model, const Operator& op, if (next_op->type == OperatorType::kTensorFlowReshape) { transformation->AddMessageF( "%s is trivial because its output is only consumed by another " - "Reshape op", - LogName(op)); + "Reshape op %s", + LogName(op), LogName(*next_op)); return true; } } diff --git a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc b/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc index 30a005c789..9852c86c21 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/reorder_activation_functions.cc @@ -42,14 +42,22 @@ bool ReorderActivationFunctions::Run(Model* model, std::size_t op_index) { std::unique_ptr& exchange_op = *exchange_it; DCHECK(exchange_op); - if (exchange_op->type != OperatorType::kTensorFlowReshape) { - return false; + // Allow activation functions to move up over any operator that does not + // change the values. + switch (exchange_op->type) { + case OperatorType::kExpandDims: + case OperatorType::kSqueeze: + case OperatorType::kTensorFlowReshape: + case OperatorType::kTranspose: + break; + default: + return false; } DCHECK_EQ(exchange_op->outputs[0], ac_op->inputs[0]); - const auto& exchange_op_input = exchange_op->inputs[0]; - const auto& intermediate_array = exchange_op->outputs[0]; - const auto& ac_op_output = ac_op->outputs[0]; + const auto exchange_op_input = exchange_op->inputs[0]; + const auto intermediate_array = exchange_op->outputs[0]; + const auto ac_op_output = ac_op->outputs[0]; int count_ops_consuming_output = CountOpsWithInput(*model, intermediate_array); @@ -62,32 +70,58 @@ bool ReorderActivationFunctions::Run(Model* model, std::size_t op_index) { return false; } - // If the ac_op was originally producing an output_array we can't reorder as - // otherwise the output array would change. It'd be nice to still be able to - // reorder but if code is relying on the fetch names instead of array indices - // this won't work. - for (int i = 0; i < model->flags.output_arrays_size(); ++i) { - if (model->flags.output_arrays(i) == ac_op->outputs[0]) { - AddMessageF( - "Not exchanging activation function with %s to preserve output array " - "name %s", - LogName(*exchange_op), ac_op->outputs[0]); - return false; - } - } - - // Rewire by changing inputs, including all consumers. - Operator* consumer = GetFirstOpWithInput(*model, ac_op_output); - while (consumer) { - for (int i = 0; i < consumer->inputs.size(); ++i) { - if (consumer->inputs[i] == ac_op_output) { - consumer->inputs[i] = intermediate_array; + // If the ac_op was originally producing an output_array we can't trivially + // reorder as otherwise the output array name would change and break + // downstream assumptions. To work around that we perform some renaming below + // in that case at the cost of a bit more confusing array names in this rare + // case. + bool is_ac_op_output = + std::find(model->flags.output_arrays().begin(), + model->flags.output_arrays().end(), + ac_op_output) != model->flags.output_arrays().end(); + if (is_ac_op_output) { + // To preserve the output array name of the activation function we need to + // create a temporary to use to pass between ac->ex. + // + // Original: + // (a) -> EX -> (b) -> AC -> (c) + // Now: + // (a) -> AC -> (c') -> EX -> (c) + AddMessageF( + "Exchanging activation function %s with %s but renaming to preserve " + "output array %s", + LogName(*ac_op), LogName(*exchange_op), ac_op->outputs[0]); + + auto renamed_ac_op_output = + AvailableArrayName(*model, ac_op_output + "_exchange"); + ac_op->inputs[0] = exchange_op_input; + ac_op->outputs[0] = renamed_ac_op_output; + model->EraseArray(exchange_op->outputs[0]); + exchange_op->inputs[0] = renamed_ac_op_output; + exchange_op->outputs[0] = ac_op_output; + } else { + // Simply swap the order and update consumers to use the exchange_op output + // array (b). + // + // Original: + // (a) -> EX -> (b) -> AC -> (c) + // Now: + // (a) -> AC -> (c) -> EX -> (b) + AddMessageF("Exchanging activation function %s with %s", LogName(*ac_op), + LogName(*exchange_op)); + + Operator* consumer = GetFirstOpWithInput(*model, ac_op_output); + while (consumer) { + for (int i = 0; i < consumer->inputs.size(); ++i) { + if (consumer->inputs[i] == ac_op_output) { + consumer->inputs[i] = intermediate_array; + } } + consumer = GetFirstOpWithInput(*model, ac_op_output); } - consumer = GetFirstOpWithInput(*model, ac_op_output); + ac_op->inputs[0] = exchange_op_input; + exchange_op->inputs[0] = ac_op_output; } - ac_op->inputs[0] = exchange_op_input; - exchange_op->inputs[0] = ac_op_output; // Clear shapes; this will allow shape propagation to fix the sizes for us. model->GetOrCreateArray(ac_op->outputs[0]).clear_shape(); diff --git a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc index d96b3d522d..6d5636d744 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/resolve_constant_unary.cc @@ -40,7 +40,10 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { unary_op->type != OperatorType::kTensorFlowSum && unary_op->type != OperatorType::kTensorFlowMin && unary_op->type != OperatorType::kTensorFlowMax && - unary_op->type != OperatorType::kTensorFlowReshape) { + unary_op->type != OperatorType::kTensorFlowReshape && + unary_op->type != OperatorType::kRelu6 && + unary_op->type != OperatorType::kRelu1 && + unary_op->type != OperatorType::kRelu) { return false; } // Check if the input is a constant parameter. @@ -213,6 +216,37 @@ bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) { } output_float_data[i] = outval; } + } else if (unary_op->type == OperatorType::kRelu6 && + unary_op->type == OperatorType::kRelu1 && + unary_op->type == OperatorType::kRelu) { + for (size_t i = 0; i < output_buffer_size; ++i) { + const float value = (*input_float_data)[i]; + float new_value = 0.0f; + switch (unary_op->type) { + case OperatorType::kRelu: { + static constexpr float kLower = 0; + new_value = value < kLower ? kLower : value; + break; + } + case OperatorType::kRelu1: { + static constexpr float kUpper = 1; + static constexpr float kLower = -1; + new_value = value > kUpper ? kUpper : value < kLower ? kLower : value; + break; + } + case OperatorType::kRelu6: { + static constexpr float kUpper = 6; + static constexpr float kLower = 0; + new_value = value > kUpper ? kUpper : value < kLower ? kLower : value; + break; + } + default: + LOG(FATAL) << "Unsupported activation function " + << LogName(*unary_op); + return false; + } + output_float_data[i] = new_value; + } } else { LOG(FATAL) << "should not get here."; } diff --git a/tensorflow/contrib/lite/toco/toco_tooling.cc b/tensorflow/contrib/lite/toco/toco_tooling.cc index ee3f7ab846..024335b5e4 100644 --- a/tensorflow/contrib/lite/toco/toco_tooling.cc +++ b/tensorflow/contrib/lite/toco/toco_tooling.cc @@ -59,6 +59,7 @@ void MakeGeneralGraphTransformationsSet( transformations->Add(new ConvertReorderAxes); transformations->Add(new ResolveReshapeAttributes); transformations->Add(new ResolveTransposeAttributes); + transformations->Add(new PropagateActivationFunctionIntoConstants); transformations->Add(new PropagateArrayDataTypes); transformations->Add(new PropagateFixedSizes); transformations->Add(new RemoveTensorFlowAssert); -- GitLab From 05aa4e58c88d037868b24a1557a58bc8dd357106 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 9 Mar 2018 18:00:26 -0800 Subject: [PATCH 695/884] Fix flakiness in common_runtime/function_test.cc. The flakiness was due to nondeterministic names being chosen for folded constants; the fix was to split out the source of the nondetermism into a separate test. PiperOrigin-RevId: 188565362 --- tensorflow/core/BUILD | 33 +++ .../core/common_runtime/function_test.cc | 59 +--- .../function_threadpool_test.cc | 258 ++++++++++++++++++ 3 files changed, 294 insertions(+), 56 deletions(-) create mode 100644 tensorflow/core/common_runtime/function_threadpool_test.cc diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index f2b0d542dd..e9ed5c4910 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3349,6 +3349,39 @@ tf_cc_test( ], ) +tf_cc_test( + name = "common_runtime_function_threadpool_test", + size = "small", + srcs = ["common_runtime/function_threadpool_test.cc"], + linkstatic = tf_kernel_tests_linkstatic(), + deps = [ + ":core", + ":core_cpu", + ":core_cpu_internal", + ":direct_session_internal", + ":framework", + ":framework_internal", + ":lib", + ":lib_internal", + ":ops", + ":protos_all_cc", + ":test", + ":test_main", + ":testlib", + "//tensorflow/cc:cc_ops", + "//tensorflow/cc:cc_ops_internal", + "//tensorflow/cc:function_ops", + "//tensorflow/cc:functional_ops", + "//tensorflow/core/kernels:cast_op", + "//tensorflow/core/kernels:cwise_op", + "//tensorflow/core/kernels:function_ops", + "//tensorflow/core/kernels:matmul_op", + "//tensorflow/core/kernels:random_ops", + "//tensorflow/core/kernels:shape_ops", + "//third_party/eigen3", + ], +) + tf_cc_test_gpu( name = "gpu_allocator_retry_test", size = "medium", diff --git a/tensorflow/core/common_runtime/function_test.cc b/tensorflow/core/common_runtime/function_test.cc index d7e5f0018e..d17ef4d459 100644 --- a/tensorflow/core/common_runtime/function_test.cc +++ b/tensorflow/core/common_runtime/function_test.cc @@ -530,59 +530,6 @@ TEST_F(FunctionLibraryRuntimeTest, StateHandle) { } } -TEST_F(FunctionLibraryRuntimeTest, DefaultThreadpool) { - using test::function::blocking_op_state; - using test::function::BlockingOpState; - - thread::ThreadPool* tp = new thread::ThreadPool(Env::Default(), "FLRTest", 1); - Init({test::function::BlockingOpFn(), test::function::XTimesTwo()}, tp); - - auto x = test::AsScalar(1.3); - Tensor y; - blocking_op_state = new BlockingOpState(); - - thread::ThreadPool* tp1 = new thread::ThreadPool(Env::Default(), "tp1", 5); - bool finished_running = false; - tp1->Schedule([&x, &y, &finished_running, this]() { - TF_CHECK_OK(InstantiateAndRun(flr0_, "BlockingOpFn", {}, {x}, {&y}, - false /* add_runner */)); - finished_running = true; - }); - - // InstantiateAndRun shouldn't finish because BlockingOpFn should be blocked. - EXPECT_FALSE(finished_running); - - FunctionLibraryRuntime::Handle h; - TF_CHECK_OK(Instantiate(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, &h)); - - auto x1 = test::AsTensor({1, 2, 3, 4}); - Tensor y1; - std::atomic num_done(0); - FunctionLibraryRuntime::Options opts; - for (int i = 0; i < 4; ++i) { - tp1->Schedule([&h, &x1, &y1, &opts, &num_done, this]() { - TF_CHECK_OK(Run(flr0_, h, opts, {x1}, {&y1}, false /* add_runner */)); - num_done.fetch_add(1); - }); - } - // All the 4 Run() calls should be blocked because the runner is occupied. - EXPECT_EQ(0, num_done.load()); - - blocking_op_state->AwaitState(1); - blocking_op_state->MoveToState(1, 2); - // Now the runner should be unblocked and all the other Run() calls should - // proceed. - blocking_op_state->AwaitState(3); - blocking_op_state->MoveToState(3, 0); - delete tp1; - EXPECT_TRUE(finished_running); - EXPECT_EQ(4, num_done.load()); - - delete blocking_op_state; - blocking_op_state = nullptr; - delete tp; -} - TEST_F(FunctionLibraryRuntimeTest, ExpandInlineFunctions) { Init({test::function::XTimesTwo(), test::function::XTimesFour(), test::function::XTimes16()}); @@ -855,7 +802,7 @@ TEST_F(FunctionLibraryRuntimeTest, OptimizeGraph) { Scope s = Scope::NewRootScope(); auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto x4_x2_scale = ops::Const( - s.WithOpName("x4/x2/scale/_12__cf__10") + s.WithOpName("x4/x2/scale/_12__cf__6") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto x4_x2_y = ops::Mul(s.WithOpName("x4/x2/y"), x, x4_x2_scale); @@ -1061,13 +1008,13 @@ TEST_F(FunctionLibraryRuntimeTest, Gradient_XTimesTwo) { auto x = ops::_Arg(s.WithOpName("x"), DT_FLOAT, 0); auto func0 = ops::_Arg(s.WithOpName("Func/_0"), DT_FLOAT, 1); auto scale = ops::Const( - s.WithOpName("scale/_6__cf__15") + s.WithOpName("scale/_6__cf__11") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 2.0f); auto func1_gx = ops::Mul(s.WithOpName("Func/_1/gx"), func0, scale); auto func1_sx = ops::Shape(s.WithOpName("Func/_1/sx"), x); auto const0 = ops::Const( - s.WithOpName("Func/_1/sy/_5__cf__14") + s.WithOpName("Func/_1/sy/_5__cf__10") .WithDevice("/job:localhost/replica:0/task:0/device:CPU:0"), 0, {0}); auto func1_rx = ops::internal::BroadcastGradientArgs( diff --git a/tensorflow/core/common_runtime/function_threadpool_test.cc b/tensorflow/core/common_runtime/function_threadpool_test.cc new file mode 100644 index 0000000000..6223a4e648 --- /dev/null +++ b/tensorflow/core/common_runtime/function_threadpool_test.cc @@ -0,0 +1,258 @@ +/* Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/common_runtime/function.h" + +#include +#include + +#include "tensorflow/cc/ops/array_ops_internal.h" +#include "tensorflow/cc/ops/function_ops.h" +#include "tensorflow/cc/ops/functional_ops.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/common_runtime/device.h" +#include "tensorflow/core/common_runtime/device_factory.h" +#include "tensorflow/core/common_runtime/executor.h" +#include "tensorflow/core/common_runtime/function_testlib.h" +#include "tensorflow/core/common_runtime/rendezvous_mgr.h" +#include "tensorflow/core/common_runtime/step_stats_collector.h" +#include "tensorflow/core/framework/function.h" +#include "tensorflow/core/framework/function_testlib.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_testutil.h" +#include "tensorflow/core/framework/versions.pb.h" +#include "tensorflow/core/graph/graph_constructor.h" +#include "tensorflow/core/lib/core/notification.h" +#include "tensorflow/core/lib/core/status.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/lib/core/threadpool.h" +#include "tensorflow/core/platform/test.h" +#include "tensorflow/core/public/session_options.h" +#include "tensorflow/core/public/version.h" +#include "tensorflow/core/util/equal_graph_def.h" + +namespace tensorflow { +namespace { + +class FunctionLibraryRuntimeTest : public ::testing::Test { + protected: + void Init(const std::vector& flib, + thread::ThreadPool* default_thread_pool = nullptr) { + SessionOptions options; + auto* device_count = options.config.mutable_device_count(); + device_count->insert({"CPU", 3}); + TF_CHECK_OK(DeviceFactory::AddDevices( + options, "/job:localhost/replica:0/task:0", &devices_)); + + FunctionDefLibrary proto; + for (const auto& fdef : flib) *(proto.add_function()) = fdef; + lib_def_.reset(new FunctionLibraryDefinition(OpRegistry::Global(), proto)); + OptimizerOptions opts; + device_mgr_.reset(new DeviceMgr(devices_)); + pflr_.reset(new ProcessFunctionLibraryRuntime( + device_mgr_.get(), Env::Default(), TF_GRAPH_DEF_VERSION, lib_def_.get(), + opts, default_thread_pool, nullptr /* cluster_flr */)); + flr0_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:0"); + flr1_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:1"); + flr2_ = pflr_->GetFLR("/job:localhost/replica:0/task:0/cpu:2"); + fdef_lib_ = lib_def_->ToProto(); + } + + Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, + FunctionLibraryRuntime::Options opts, + const std::vector& args, std::vector rets, + bool add_runner = true) { + std::atomic call_count(0); + std::function)> runner = + [&call_count](std::function fn) { + ++call_count; + test::function::FunctionTestSchedClosure(fn); + }; + if (add_runner) { + opts.runner = &runner; + } else { + opts.runner = nullptr; + } + Notification done; + std::vector out; + Status status; + flr->Run(opts, handle, args, &out, [&status, &done](const Status& s) { + status = s; + done.Notify(); + }); + done.WaitForNotification(); + if (!status.ok()) { + return status; + } + CHECK_EQ(rets.size(), out.size()); + for (size_t i = 0; i < rets.size(); ++i) { + *rets[i] = out[i]; + } + + if (add_runner) { + EXPECT_GE(call_count, 1); // Test runner is used. + } + + return Status::OK(); + } + + Status Instantiate(FunctionLibraryRuntime* flr, const string& name, + test::function::Attrs attrs, + FunctionLibraryRuntime::Handle* handle) { + return flr->Instantiate(name, attrs, handle); + } + + Status Instantiate(FunctionLibraryRuntime* flr, const string& name, + test::function::Attrs attrs, + const FunctionLibraryRuntime::InstantiateOptions& options, + FunctionLibraryRuntime::Handle* handle) { + return flr->Instantiate(name, attrs, options, handle); + } + + Status InstantiateAndRun(FunctionLibraryRuntime* flr, const string& name, + test::function::Attrs attrs, + const std::vector& args, + std::vector rets, bool add_runner = true) { + return InstantiateAndRun(flr, name, attrs, + FunctionLibraryRuntime::InstantiateOptions(), args, + std::move(rets), add_runner); + } + + Status InstantiateAndRun( + FunctionLibraryRuntime* flr, const string& name, + test::function::Attrs attrs, + const FunctionLibraryRuntime::InstantiateOptions& options, + const std::vector& args, std::vector rets, + bool add_runner = true) { + FunctionLibraryRuntime::Handle handle; + Status status = flr->Instantiate(name, attrs, options, &handle); + if (!status.ok()) { + return status; + } + FunctionLibraryRuntime::Options opts; + status = Run(flr, handle, opts, args, rets, add_runner); + if (!status.ok()) return status; + + // Release the handle and try running again. It should not succeed. + status = flr->ReleaseHandle(handle); + if (!status.ok()) return status; + + Status status2 = Run(flr, handle, opts, args, std::move(rets)); + EXPECT_TRUE(errors::IsInvalidArgument(status2)); + EXPECT_TRUE( + StringPiece(status2.error_message()).contains("remote execution.")); + + return status; + } + + Status Run(FunctionLibraryRuntime* flr, FunctionLibraryRuntime::Handle handle, + FunctionLibraryRuntime::Options opts, CallFrameInterface* frame, + bool add_runner = true) { + std::atomic call_count(0); + std::function)> runner = + [&call_count](std::function fn) { + ++call_count; + test::function::FunctionTestSchedClosure(fn); + }; + if (add_runner) { + opts.runner = &runner; + } else { + opts.runner = nullptr; + } + Notification done; + std::vector out; + Status status; + flr->Run(opts, handle, frame, [&status, &done](const Status& s) { + status = s; + done.Notify(); + }); + done.WaitForNotification(); + if (!status.ok()) { + return status; + } + + if (add_runner) { + EXPECT_GE(call_count, 1); // Test runner is used. + } + + return Status::OK(); + } + + FunctionLibraryRuntime* flr0_; + FunctionLibraryRuntime* flr1_; + FunctionLibraryRuntime* flr2_; + std::vector devices_; + std::unique_ptr device_mgr_; + std::unique_ptr lib_def_; + std::unique_ptr pflr_; + FunctionDefLibrary fdef_lib_; +}; + +TEST_F(FunctionLibraryRuntimeTest, DefaultThreadpool) { + using test::function::blocking_op_state; + using test::function::BlockingOpState; + + thread::ThreadPool* tp = new thread::ThreadPool(Env::Default(), "FLRTest", 1); + Init({test::function::BlockingOpFn(), test::function::XTimesTwo()}, tp); + + auto x = test::AsScalar(1.3); + Tensor y; + blocking_op_state = new BlockingOpState(); + + thread::ThreadPool* tp1 = new thread::ThreadPool(Env::Default(), "tp1", 5); + bool finished_running = false; + tp1->Schedule([&x, &y, &finished_running, this]() { + TF_CHECK_OK(InstantiateAndRun(flr0_, "BlockingOpFn", {}, {x}, {&y}, + false /* add_runner */)); + finished_running = true; + }); + + // InstantiateAndRun shouldn't finish because BlockingOpFn should be blocked. + EXPECT_FALSE(finished_running); + + FunctionLibraryRuntime::Handle h; + TF_CHECK_OK(Instantiate(flr0_, "XTimesTwo", {{"T", DT_FLOAT}}, &h)); + + auto x1 = test::AsTensor({1, 2, 3, 4}); + std::atomic num_done(0); + FunctionLibraryRuntime::Options opts; + for (int i = 0; i < 4; ++i) { + tp1->Schedule([&h, &x1, &opts, &num_done, this]() { + Tensor y1; + TF_CHECK_OK(Run(flr0_, h, opts, {x1}, {&y1}, false /* add_runner */)); + num_done.fetch_add(1); + }); + } + // All the 4 Run() calls should be blocked because the runner is occupied. + EXPECT_EQ(0, num_done.load()); + + blocking_op_state->AwaitState(1); + blocking_op_state->MoveToState(1, 2); + // Now the runner should be unblocked and all the other Run() calls should + // proceed. + blocking_op_state->AwaitState(3); + blocking_op_state->MoveToState(3, 0); + delete tp1; + EXPECT_TRUE(finished_running); + EXPECT_EQ(4, num_done.load()); + + delete blocking_op_state; + blocking_op_state = nullptr; + delete tp; +} + +} // namespace +} // namespace tensorflow -- GitLab From 2426308fa58ebf473092918cc8ffa215325c4079 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Fri, 9 Mar 2018 18:12:02 -0800 Subject: [PATCH 696/884] Add experimental Session::MakeCallable() API and implement it for DirectSession. The intent of this new API matches the Python `tf.Session.make_callable()` method: it splits the two roles of the `Session::Run()` method into separate methods: 1. `Session::MakeCallable()` takes information about a subgraph (such as the names of nodes to feed and fetch), and prunes and optimizes that graph, returning a simple handle. 2. `Session::RunCallable()` takes that handle, plus any values to be fed, and executes the graph, returning whatever outputs are produced. This split moves string processing off the critical path of running a step. We also add a new method `Session::ReleaseCallable()` that makes it possible to free the resources associated with a cached subgraph, and could be useful for seldom-executed graphs such as initializers. PiperOrigin-RevId: 188566635 --- .../core/common_runtime/direct_session.cc | 640 ++++++++++++------ .../core/common_runtime/direct_session.h | 48 +- .../common_runtime/direct_session_test.cc | 367 +++++++++- tensorflow/core/framework/session_state.h | 6 + tensorflow/core/protobuf/config.proto | 23 + tensorflow/core/public/session.h | 35 + 6 files changed, 880 insertions(+), 239 deletions(-) diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 9def58cb9c..1fbc314e2e 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -318,6 +318,7 @@ DirectSession::~DirectSession() { for (auto& it : executors_) { it.second.reset(); } + callables_.clear(); for (auto d : device_mgr_->ListDevices()) { d->op_segment()->RemoveHold(session_handle_); } @@ -409,16 +410,21 @@ Status DirectSession::Run(const NamedTensorList& inputs, } Status DirectSession::CreateDebuggerState( - const DebugOptions& debug_options, int64 session_run_index, - int64 executor_step_index, const std::vector& input_names, - const std::vector& output_names, - const std::vector& target_names, + const CallableOptions& callable_options, int64 global_step, + int64 session_run_index, int64 executor_step_index, std::unique_ptr* debugger_state) { - TF_RETURN_IF_ERROR( - DebuggerStateRegistry::CreateState(debug_options, debugger_state)); + TF_RETURN_IF_ERROR(DebuggerStateRegistry::CreateState( + callable_options.run_options().debug_options(), debugger_state)); + std::vector input_names(callable_options.feed().begin(), + callable_options.feed().end()); + std::vector output_names(callable_options.fetch().begin(), + callable_options.fetch().end()); + std::vector target_names(callable_options.target().begin(), + callable_options.target().end()); + TF_RETURN_IF_ERROR(debugger_state->get()->PublishDebugMetadata( - debug_options.global_step(), session_run_index, executor_step_index, - input_names, output_names, target_names)); + global_step, session_run_index, executor_step_index, input_names, + output_names, target_names)); return Status::OK(); } @@ -433,84 +439,23 @@ Status DirectSession::DecorateAndPublishGraphForDebug( return Status::OK(); } -Status DirectSession::Run(const RunOptions& run_options, - const NamedTensorList& inputs, - const std::vector& output_names, - const std::vector& target_nodes, - std::vector* outputs, - RunMetadata* run_metadata) { - TF_RETURN_IF_ERROR(CheckNotClosed()); - direct_session_runs->GetCell()->IncrementBy(1); - { - mutex_lock l(graph_def_lock_); - if (!graph_created_) { - return errors::InvalidArgument( - "Session was not created with a graph before Run()!"); - } - } - - // Extract the inputs names for this run of the session. - std::vector input_tensor_names; - input_tensor_names.reserve(inputs.size()); - for (const auto& it : inputs) { - input_tensor_names.push_back(it.first); - } - - if (run_options.inter_op_thread_pool() < 0 || - run_options.inter_op_thread_pool() >= thread_pools_.size()) { - return errors::InvalidArgument("Invalid inter_op_thread_pool: ", - run_options.inter_op_thread_pool()); - } - thread::ThreadPool* pool = - thread_pools_[run_options.inter_op_thread_pool()].first; - - // Check if we already have an executor for these arguments. - ExecutorsAndKeys* executors_and_keys; - RunStateArgs run_state_args(run_options.debug_options()); - - Executor::Args args; - args.step_id = step_id_counter_.fetch_add(1); - - TF_RETURN_IF_ERROR(GetOrCreateExecutors(input_tensor_names, output_names, - target_nodes, &executors_and_keys, - &run_state_args)); +Status DirectSession::RunInternal(int64 step_id, const RunOptions& run_options, + CallFrameInterface* call_frame, + ExecutorsAndKeys* executors_and_keys, + RunMetadata* run_metadata) { const int64 executor_step_count = executors_and_keys->step_count.fetch_add(1); std::unique_ptr debugger_state; if (!run_options.debug_options().debug_tensor_watch_opts().empty()) { - TF_RETURN_IF_ERROR(CreateDebuggerState( - run_options.debug_options(), args.step_id, executor_step_count, - input_tensor_names, output_names, target_nodes, &debugger_state)); - } - - // Configure a call frame for the step, which we use to feed and - // fetch values to and from the executors. - FunctionCallFrame call_frame(executors_and_keys->input_types, - executors_and_keys->output_types); - gtl::InlinedVector feed_args(inputs.size()); - for (const auto& it : inputs) { - if (it.second.dtype() == DT_RESOURCE) { - Tensor tensor_from_handle; - TF_RETURN_IF_ERROR( - ResourceHandleToInputTensor(it.second, &tensor_from_handle)); - feed_args[executors_and_keys->input_name_to_index[it.first]] = - tensor_from_handle; - } else { - feed_args[executors_and_keys->input_name_to_index[it.first]] = it.second; - } - } - const Status s = call_frame.SetArgs(feed_args); - if (errors::IsInternal(s)) { - return errors::InvalidArgument(s.error_message()); - } else if (!s.ok()) { - return s; + TF_RETURN_IF_ERROR( + CreateDebuggerState(executors_and_keys->callable_options, + run_options.debug_options().global_step(), step_id, + executor_step_count, &debugger_state)); } // Create a run state and start execution. - RunState run_state(args.step_id, &devices_); + RunState run_state(step_id, &devices_); run_state.rendez = new IntraProcessRendezvous(device_mgr_.get()); - CancellationManager step_cancellation_manager; - args.call_frame = &call_frame; // Start parallel Executors. const size_t num_executors = executors_and_keys->items.size(); @@ -523,15 +468,15 @@ Status DirectSession::Run(const RunOptions& run_options, run_state.executors_done.Notify(); }); + Executor::Args args; + args.step_id = step_id; + args.call_frame = call_frame; args.rendezvous = run_state.rendez; + CancellationManager step_cancellation_manager; args.cancellation_manager = &step_cancellation_manager; - args.session_state = &session_state_; args.tensor_store = &run_state.tensor_store; args.step_container = &run_state.step_container; - if (LogMemory::IsEnabled()) { - LogMemory::RecordStep(args.step_id, run_state_args.handle); - } args.sync_on_finish = sync_on_finish_; const bool do_trace = (run_options.trace_level() > RunOptions::NO_TRACE); @@ -569,6 +514,14 @@ Status DirectSession::Run(const RunOptions& run_options, } } + if (run_options.inter_op_thread_pool() < 0 || + run_options.inter_op_thread_pool() >= thread_pools_.size()) { + run_state.executors_done.Notify(); + delete barrier; + return errors::InvalidArgument("Invalid inter_op_thread_pool: ", + run_options.inter_op_thread_pool()); + } + // Register this step with session's cancellation manager, so that // `Session::Close()` will cancel the step. const CancellationToken cancellation_token = @@ -586,6 +539,9 @@ Status DirectSession::Run(const RunOptions& run_options, return errors::Cancelled("Run call was cancelled"); } + thread::ThreadPool* pool = + thread_pools_[run_options.inter_op_thread_pool()].first; + Executor::Args::Runner default_runner = [this, pool](Executor::Args::Closure c) { SchedClosure(pool, std::move(c)); @@ -628,6 +584,111 @@ Status DirectSession::Run(const RunOptions& run_options, TF_RETURN_IF_ERROR(run_state.status); } + // Save the output tensors of this run we choose to keep. + if (!run_state.tensor_store.empty()) { + TF_RETURN_IF_ERROR(run_state.tensor_store.SaveTensors( + {executors_and_keys->callable_options.fetch().begin(), + executors_and_keys->callable_options.fetch().end()}, + &session_state_)); + } + + if (args.stats_collector) { + args.stats_collector->Finalize(); + } + + // Build and return the cost model as instructed. + if (update_cost_model) { + // Build the cost model + std::unordered_map device_to_graph; + for (const PerPartitionExecutorsAndLib& partition : + executors_and_keys->items) { + const Graph* graph = partition.graph; + const string device = partition.flib->device()->name(); + device_to_graph[device] = graph; + } + + mutex_lock l(executor_lock_); + args.stats_collector->BuildCostModel(&cost_model_manager_, device_to_graph); + + // annotate stats onto cost graph. + CostGraphDef* cost_graph = run_metadata->mutable_cost_graph(); + for (const auto& item : executors_and_keys->items) { + TF_RETURN_IF_ERROR( + cost_model_manager_.AddToCostGraphDef(item.graph, cost_graph)); + } + } + + // If requested via RunOptions, output the partition graphs. + if (run_options.output_partition_graphs()) { + protobuf::RepeatedPtrField* partition_graph_defs = + run_metadata->mutable_partition_graphs(); + for (const PerPartitionExecutorsAndLib& exec_and_lib : + executors_and_keys->items) { + GraphDef* partition_graph_def = partition_graph_defs->Add(); + exec_and_lib.graph->ToGraphDef(partition_graph_def); + } + } + + return Status::OK(); +} + +Status DirectSession::Run(const RunOptions& run_options, + const NamedTensorList& inputs, + const std::vector& output_names, + const std::vector& target_nodes, + std::vector* outputs, + RunMetadata* run_metadata) { + TF_RETURN_IF_ERROR(CheckNotClosed()); + TF_RETURN_IF_ERROR(CheckGraphCreated("Run()")); + direct_session_runs->GetCell()->IncrementBy(1); + + // Extract the inputs names for this run of the session. + std::vector input_tensor_names; + input_tensor_names.reserve(inputs.size()); + for (const auto& it : inputs) { + input_tensor_names.push_back(it.first); + } + + // Check if we already have an executor for these arguments. + ExecutorsAndKeys* executors_and_keys; + RunStateArgs run_state_args(run_options.debug_options()); + + TF_RETURN_IF_ERROR(GetOrCreateExecutors(input_tensor_names, output_names, + target_nodes, &executors_and_keys, + &run_state_args)); + + // Configure a call frame for the step, which we use to feed and + // fetch values to and from the executors. + FunctionCallFrame call_frame(executors_and_keys->input_types, + executors_and_keys->output_types); + gtl::InlinedVector feed_args(inputs.size()); + for (const auto& it : inputs) { + if (it.second.dtype() == DT_RESOURCE) { + Tensor tensor_from_handle; + TF_RETURN_IF_ERROR( + ResourceHandleToInputTensor(it.second, &tensor_from_handle)); + feed_args[executors_and_keys->input_name_to_index[it.first]] = + tensor_from_handle; + } else { + feed_args[executors_and_keys->input_name_to_index[it.first]] = it.second; + } + } + const Status s = call_frame.SetArgs(feed_args); + if (errors::IsInternal(s)) { + return errors::InvalidArgument(s.error_message()); + } else if (!s.ok()) { + return s; + } + + const int64 step_id = step_id_counter_.fetch_add(1); + + if (LogMemory::IsEnabled()) { + LogMemory::RecordStep(step_id, run_state_args.handle); + } + + TF_RETURN_IF_ERROR(RunInternal(step_id, run_options, &call_frame, + executors_and_keys, run_metadata)); + // Receive outputs. if (outputs) { std::vector sorted_outputs; @@ -667,45 +728,6 @@ Status DirectSession::Run(const RunOptions& run_options, } } - // Save the output tensors of this run we choose to keep. - TF_RETURN_IF_ERROR( - run_state.tensor_store.SaveTensors(output_names, &session_state_)); - if (args.stats_collector) { - args.stats_collector->Finalize(); - } - - // Build and return the cost model as instructed. - mutex_lock l(executor_lock_); - if (update_cost_model) { - // Build the cost model - std::unordered_map device_to_graph; - for (const PerPartitionExecutorsAndLib& partition : - executors_and_keys->items) { - const Graph* graph = partition.graph; - const string device = partition.flib->device()->name(); - device_to_graph[device] = graph; - } - args.stats_collector->BuildCostModel(&cost_model_manager_, device_to_graph); - - // annotate stats onto cost graph. - CostGraphDef* cost_graph = run_metadata->mutable_cost_graph(); - for (const auto& item : executors_and_keys->items) { - TF_RETURN_IF_ERROR( - cost_model_manager_.AddToCostGraphDef(item.graph, cost_graph)); - } - } - - // If requested via RunOptions, output the partition graphs. - if (run_options.output_partition_graphs()) { - protobuf::RepeatedPtrField* partition_graph_defs = - run_metadata->mutable_partition_graphs(); - for (const PerPartitionExecutorsAndLib& exec_and_lib : - executors_and_keys->items) { - GraphDef* partition_graph_def = partition_graph_defs->Add(); - exec_and_lib.graph->ToGraphDef(partition_graph_def); - } - } - return Status::OK(); } @@ -714,13 +736,7 @@ Status DirectSession::PRunSetup(const std::vector& input_names, const std::vector& target_nodes, string* handle) { TF_RETURN_IF_ERROR(CheckNotClosed()); - { - mutex_lock l(graph_def_lock_); - if (!graph_created_) { - return errors::InvalidArgument( - "Session was not created with a graph before PRunSetup()!"); - } - } + TF_RETURN_IF_ERROR(CheckGraphCreated("PRunSetup()")); // RunOptions is not available in PRunSetup, so use thread pool 0. thread::ThreadPool* pool = thread_pools_[0].first; @@ -1061,92 +1077,31 @@ Status DirectSession::CheckFetch(const NamedTensorList& feeds, return Status::OK(); } -Status DirectSession::GetOrCreateExecutors( - gtl::ArraySlice inputs, gtl::ArraySlice outputs, - gtl::ArraySlice target_nodes, ExecutorsAndKeys** executors_and_keys, +Status DirectSession::CreateExecutors( + const CallableOptions& callable_options, + std::unique_ptr* out_executors_and_keys, + std::unique_ptr* out_func_info, RunStateArgs* run_state_args) { - int64 handle_name_counter_value = -1; - if (LogMemory::IsEnabled() || run_state_args->is_partial_run) { - handle_name_counter_value = handle_name_counter_.fetch_add(1); - } - - string debug_tensor_watches_summary; - if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) { - debug_tensor_watches_summary = SummarizeDebugTensorWatches( - run_state_args->debug_options.debug_tensor_watch_opts()); - } - - // Fast lookup path, no sorting. - const string key = strings::StrCat( - str_util::Join(inputs, ","), "->", str_util::Join(outputs, ","), "/", - str_util::Join(target_nodes, ","), "/", run_state_args->is_partial_run, - "/", debug_tensor_watches_summary); - // Set the handle, if it's needed to log memory or for partial run. - if (handle_name_counter_value >= 0) { - run_state_args->handle = - strings::StrCat(key, ";", handle_name_counter_value); - } - - // See if we already have the executors for this run. - { - mutex_lock l(executor_lock_); // could use reader lock - auto it = executors_.find(key); - if (it != executors_.end()) { - *executors_and_keys = it->second.get(); - return Status::OK(); - } - } - - // Slow lookup path, the unsorted key missed the cache. - // Sort the inputs and outputs, and look up with the sorted key in case an - // earlier call used a different order of inputs and outputs. - // - // We could consider some other signature instead of sorting that - // preserves the same property to avoid the sort in the future. - std::vector inputs_sorted(inputs.begin(), inputs.end()); - std::sort(inputs_sorted.begin(), inputs_sorted.end()); - std::vector outputs_sorted(outputs.begin(), outputs.end()); - std::sort(outputs_sorted.begin(), outputs_sorted.end()); - std::vector tn_sorted(target_nodes.begin(), target_nodes.end()); - std::sort(tn_sorted.begin(), tn_sorted.end()); - - const string sorted_key = strings::StrCat( - str_util::Join(inputs_sorted, ","), "->", - str_util::Join(outputs_sorted, ","), "/", str_util::Join(tn_sorted, ","), - "/", run_state_args->is_partial_run, "/", debug_tensor_watches_summary); - // Set the handle, if its needed to log memory or for partial run. - if (handle_name_counter_value >= 0) { - run_state_args->handle = - strings::StrCat(sorted_key, ";", handle_name_counter_value); - } - - // See if we already have the executors for this run. - { - mutex_lock l(executor_lock_); - auto it = executors_.find(sorted_key); - if (it != executors_.end()) { - *executors_and_keys = it->second.get(); - // Insert this under the original key. - executors_.emplace(key, it->second); - return Status::OK(); - } - } - - // Nothing found, so create the executors and store in the cache. BuildGraphOptions options; - options.feed_endpoints = inputs_sorted; - options.fetch_endpoints = outputs_sorted; - options.target_nodes = tn_sorted; + options.feed_endpoints = std::vector(callable_options.feed().begin(), + callable_options.feed().end()); + options.fetch_endpoints = std::vector( + callable_options.fetch().begin(), callable_options.fetch().end()); + options.target_nodes = std::vector(callable_options.target().begin(), + callable_options.target().end()); options.use_function_convention = !run_state_args->is_partial_run; - if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) { - options.debug_options = run_state_args->debug_options; + if (!callable_options.run_options() + .debug_options() + .debug_tensor_watch_opts() + .empty()) { + options.debug_options = callable_options.run_options().debug_options(); } std::unique_ptr func_info(new FunctionInfo); - std::shared_ptr ek(new ExecutorsAndKeys); + std::unique_ptr ek(new ExecutorsAndKeys); + + ek->callable_options = callable_options; - // The executor_lock_ is intentionally released while executor is - // being created. std::unordered_map> graphs; TF_RETURN_IF_ERROR(CreateGraphs(options, &graphs, &func_info->flib_def, run_state_args, &ek->input_types, @@ -1155,11 +1110,11 @@ Status DirectSession::GetOrCreateExecutors( if (run_state_args->is_partial_run) { ek->graph = std::move(run_state_args->graph); std::unordered_set names; - for (const string& input : inputs) { + for (const string& input : callable_options.feed()) { TensorId id(ParseTensorName(input)); names.emplace(id.first); } - for (const string& output : outputs) { + for (const string& output : callable_options.fetch()) { TensorId id(ParseTensorName(output)); names.emplace(id.first); } @@ -1260,12 +1215,12 @@ Status DirectSession::GetOrCreateExecutors( // For regular `Run()`, we use the function calling convention, and so // maintain a mapping from input/output names to // argument/return-value ordinal index. - for (size_t i = 0; i < inputs_sorted.size(); ++i) { - const string& input = inputs_sorted[i]; + for (int i = 0; i < callable_options.feed().size(); ++i) { + const string& input = callable_options.feed(i); ek->input_name_to_index[input] = i; } - for (size_t i = 0; i < outputs_sorted.size(); ++i) { - const string& output = outputs_sorted[i]; + for (int i = 0; i < callable_options.fetch().size(); ++i) { + const string& output = callable_options.fetch(i); ek->output_name_to_index[output] = i; } } else { @@ -1274,26 +1229,123 @@ Status DirectSession::GetOrCreateExecutors( // // We always use the first device as the device name portion of the // key, even if we're feeding another graph. - for (size_t i = 0; i < inputs_sorted.size(); ++i) { - const string& input = inputs_sorted[i]; + for (int i = 0; i < callable_options.feed().size(); ++i) { + const string& input = callable_options.feed(i); ek->input_name_to_rendezvous_key[input] = GetRendezvousKey( input, device_set_.client_device()->attributes(), FrameAndIter(0, 0)); } - for (size_t i = 0; i < outputs_sorted.size(); ++i) { - const string& output = outputs_sorted[i]; + for (int i = 0; i < callable_options.fetch().size(); ++i) { + const string& output = callable_options.fetch(i); ek->output_name_to_rendezvous_key[output] = GetRendezvousKey(output, device_set_.client_device()->attributes(), FrameAndIter(0, 0)); } } + *out_executors_and_keys = std::move(ek); + *out_func_info = std::move(func_info); + return Status::OK(); +} + +Status DirectSession::GetOrCreateExecutors( + gtl::ArraySlice inputs, gtl::ArraySlice outputs, + gtl::ArraySlice target_nodes, ExecutorsAndKeys** executors_and_keys, + RunStateArgs* run_state_args) { + int64 handle_name_counter_value = -1; + if (LogMemory::IsEnabled() || run_state_args->is_partial_run) { + handle_name_counter_value = handle_name_counter_.fetch_add(1); + } + + string debug_tensor_watches_summary; + if (!run_state_args->debug_options.debug_tensor_watch_opts().empty()) { + debug_tensor_watches_summary = SummarizeDebugTensorWatches( + run_state_args->debug_options.debug_tensor_watch_opts()); + } + + // Fast lookup path, no sorting. + const string key = strings::StrCat( + str_util::Join(inputs, ","), "->", str_util::Join(outputs, ","), "/", + str_util::Join(target_nodes, ","), "/", run_state_args->is_partial_run, + "/", debug_tensor_watches_summary); + // Set the handle, if it's needed to log memory or for partial run. + if (handle_name_counter_value >= 0) { + run_state_args->handle = + strings::StrCat(key, ";", handle_name_counter_value); + } + + // See if we already have the executors for this run. + { + mutex_lock l(executor_lock_); // could use reader lock + auto it = executors_.find(key); + if (it != executors_.end()) { + *executors_and_keys = it->second.get(); + return Status::OK(); + } + } + + // Slow lookup path, the unsorted key missed the cache. + // Sort the inputs and outputs, and look up with the sorted key in case an + // earlier call used a different order of inputs and outputs. + // + // We could consider some other signature instead of sorting that + // preserves the same property to avoid the sort in the future. + std::vector inputs_sorted(inputs.begin(), inputs.end()); + std::sort(inputs_sorted.begin(), inputs_sorted.end()); + std::vector outputs_sorted(outputs.begin(), outputs.end()); + std::sort(outputs_sorted.begin(), outputs_sorted.end()); + std::vector tn_sorted(target_nodes.begin(), target_nodes.end()); + std::sort(tn_sorted.begin(), tn_sorted.end()); + + const string sorted_key = strings::StrCat( + str_util::Join(inputs_sorted, ","), "->", + str_util::Join(outputs_sorted, ","), "/", str_util::Join(tn_sorted, ","), + "/", run_state_args->is_partial_run, "/", debug_tensor_watches_summary); + // Set the handle, if its needed to log memory or for partial run. + if (handle_name_counter_value >= 0) { + run_state_args->handle = + strings::StrCat(sorted_key, ";", handle_name_counter_value); + } + + // See if we already have the executors for this run. + { + mutex_lock l(executor_lock_); + auto it = executors_.find(sorted_key); + if (it != executors_.end()) { + *executors_and_keys = it->second.get(); + // Insert this under the original key. + executors_.emplace(key, it->second); + return Status::OK(); + } + } + + // Nothing found, so create the executors and store in the cache. + // The executor_lock_ is intentionally released while executors are + // being created. + CallableOptions callable_options; + for (const string& input : inputs_sorted) { + callable_options.add_feed(input); + } + for (const string& output : outputs_sorted) { + callable_options.add_fetch(output); + } + for (const string& target : tn_sorted) { + callable_options.add_target(target); + } + *callable_options.mutable_run_options()->mutable_debug_options() = + run_state_args->debug_options; + std::unique_ptr ek; + std::unique_ptr func_info; + TF_RETURN_IF_ERROR( + CreateExecutors(callable_options, &ek, &func_info, run_state_args)); + // Reacquire the lock, try to insert into the map. mutex_lock l(executor_lock_); functions_.push_back(std::move(func_info)); // Another thread may have created the entry before us, in which case we will // reuse the already created one. - auto insert_result = executors_.emplace(sorted_key, ek); + auto insert_result = executors_.emplace( + sorted_key, std::shared_ptr(std::move(ek))); // Insert the value under the original key, so the fast path lookup will work // if the user uses the same order of inputs, outputs, and targets again. executors_.emplace(key, insert_result.first->second); @@ -1562,4 +1614,156 @@ void DirectSession::WaitForNotification(RunState* run_state, return Status::OK(); } +Status DirectSession::MakeCallable(const CallableOptions& callable_options, + CallableHandle* out_handle) { + TF_RETURN_IF_ERROR(CheckNotClosed()); + TF_RETURN_IF_ERROR(CheckGraphCreated("MakeCallable()")); + + if (!callable_options.run_options() + .debug_options() + .debug_tensor_watch_opts() + .empty()) { + return errors::Unimplemented( + "Debug options are not currently supported via the C++ MakeCallable " + "interface."); + } + + std::unique_ptr ek; + std::unique_ptr func_info; + RunStateArgs run_state_args(callable_options.run_options().debug_options()); + TF_RETURN_IF_ERROR( + CreateExecutors(callable_options, &ek, &func_info, &run_state_args)); + { + mutex_lock l(callables_lock_); + *out_handle = next_callable_handle_++; + callables_[*out_handle] = {std::move(ek), std::move(func_info)}; + } + return Status::OK(); +} + +class DirectSession::RunCallableCallFrame : public CallFrameInterface { + public: + RunCallableCallFrame(DirectSession* session, + ExecutorsAndKeys* executors_and_keys, + const std::vector* feed_tensors, + std::vector* fetch_tensors) + : session_(session), + executors_and_keys_(executors_and_keys), + feed_tensors_(feed_tensors), + fetch_tensors_(fetch_tensors) {} + + size_t num_args() const override { + return executors_and_keys_->input_types.size(); + } + size_t num_retvals() const override { + return executors_and_keys_->output_types.size(); + } + + Status GetArg(int index, Tensor* val) const override { + if (index > feed_tensors_->size()) { + return errors::Internal("Args index out of bounds: ", index); + } else if (executors_and_keys_->input_types[index] == DT_RESOURCE) { + TF_RETURN_IF_ERROR( + session_->ResourceHandleToInputTensor((*feed_tensors_)[index], val)); + } else { + *val = (*feed_tensors_)[index]; + } + return Status::OK(); + } + + Status SetRetval(int index, const Tensor& val) override { + if (index > fetch_tensors_->size()) { + return errors::Internal("RetVal index out of bounds: ", index); + } + (*fetch_tensors_)[index] = val; + return Status::OK(); + } + + private: + DirectSession* const session_; // Not owned. + ExecutorsAndKeys* const executors_and_keys_; // Not owned. + const std::vector* const feed_tensors_; // Not owned. + std::vector* const fetch_tensors_; // Not owned. +}; + +::tensorflow::Status DirectSession::RunCallable( + CallableHandle handle, const std::vector& feed_tensors, + std::vector* fetch_tensors, RunMetadata* run_metadata) { + TF_RETURN_IF_ERROR(CheckNotClosed()); + TF_RETURN_IF_ERROR(CheckGraphCreated("RunCallable()")); + direct_session_runs->GetCell()->IncrementBy(1); + + // Check if we already have an executor for these arguments. + std::shared_ptr executors_and_keys; + const int64 step_id = step_id_counter_.fetch_add(1); + + { + tf_shared_lock l(callables_lock_); + if (handle >= next_callable_handle_) { + return errors::InvalidArgument("No such callable handle: ", handle); + } + executors_and_keys = callables_[handle].executors_and_keys; + } + + if (!executors_and_keys) { + return errors::InvalidArgument( + "Attempted to run callable after handle was released: ", handle); + } + + // NOTE(mrry): Debug options are not currently supported in the + // callable interface. + DebugOptions debug_options; + RunStateArgs run_state_args(debug_options); + + // Configure a call frame for the step, which we use to feed and + // fetch values to and from the executors. + if (feed_tensors.size() != executors_and_keys->input_types.size()) { + return errors::InvalidArgument( + "Expected ", executors_and_keys->input_types.size(), + " feed tensors, but got ", feed_tensors.size()); + } + if (fetch_tensors != nullptr) { + fetch_tensors->resize(executors_and_keys->output_types.size()); + } else if (!executors_and_keys->output_types.empty()) { + return errors::InvalidArgument( + "`fetch_tensors` must be provided when the callable has one or more " + "outputs."); + } + + // A specialized CallFrame implementation that takes advantage of the + // optimized RunCallable interface. + + RunCallableCallFrame call_frame(this, executors_and_keys.get(), &feed_tensors, + fetch_tensors); + + if (LogMemory::IsEnabled()) { + LogMemory::RecordStep(step_id, run_state_args.handle); + } + + TF_RETURN_IF_ERROR( + RunInternal(step_id, executors_and_keys->callable_options.run_options(), + &call_frame, executors_and_keys.get(), run_metadata)); + + return Status::OK(); +} + +::tensorflow::Status DirectSession::ReleaseCallable(CallableHandle handle) { + mutex_lock l(callables_lock_); + if (handle >= next_callable_handle_) { + return errors::InvalidArgument("No such callable handle: ", handle); + } + callables_.erase(handle); + return Status::OK(); +} + +DirectSession::Callable::~Callable() { + // We must delete the fields in this order, because the destructor + // of `executors_and_keys` will call into an object owned by + // `function_info` (in particular, when deleting a kernel, it relies + // on the `FunctionLibraryRuntime` to know if the kernel is stateful + // or not). + executors_and_keys.reset(); + function_info.reset(); +} + } // namespace tensorflow diff --git a/tensorflow/core/common_runtime/direct_session.h b/tensorflow/core/common_runtime/direct_session.h index 45d765f849..6f9c1b980b 100644 --- a/tensorflow/core/common_runtime/direct_session.h +++ b/tensorflow/core/common_runtime/direct_session.h @@ -107,6 +107,14 @@ class DirectSession : public Session { cost_model_manager_.ExportCostModels(cost_models); } + ::tensorflow::Status MakeCallable(const CallableOptions& callable_options, + CallableHandle* out_handle) override; + ::tensorflow::Status RunCallable(CallableHandle handle, + const std::vector& feed_tensors, + std::vector* fetch_tensors, + RunMetadata* run_metadata) override; + ::tensorflow::Status ReleaseCallable(CallableHandle handle) override; + private: // We create one executor and its dependent library runtime for // every partition. @@ -139,6 +147,8 @@ class DirectSession : public Session { DataTypeVector input_types; DataTypeVector output_types; + + CallableOptions callable_options; }; // A FunctionInfo object is created for every unique set of feeds/fetches. @@ -206,6 +216,14 @@ class DirectSession : public Session { gtl::ArraySlice target_nodes, ExecutorsAndKeys** executors_and_keys, RunStateArgs* run_state_args); + // Creates a set of executors to run the subgraph defined by + // `callable_options`. + ::tensorflow::Status CreateExecutors( + const CallableOptions& callable_options, + std::unique_ptr* out_executors_and_keys, + std::unique_ptr* out_func_info, + RunStateArgs* run_state_args); + // Creates several graphs given the existing graph_def_ and the // input feeds and fetches, given 'devices'. The graphs share a common // function library 'flib_def'. @@ -216,6 +234,11 @@ class DirectSession : public Session { RunStateArgs* run_state_args, DataTypeVector* input_types, DataTypeVector* output_types); + ::tensorflow::Status RunInternal(int64 step_id, const RunOptions& run_options, + CallFrameInterface* call_frame, + ExecutorsAndKeys* executors_and_keys, + RunMetadata* run_metadata); + ::tensorflow::Status ExtendLocked(const GraphDef& graph) EXCLUSIVE_LOCKS_REQUIRED(graph_def_lock_); @@ -257,11 +280,18 @@ class DirectSession : public Session { return ::tensorflow::Status::OK(); } + ::tensorflow::Status CheckGraphCreated(const char* method) { + mutex_lock l(graph_def_lock_); + if (!graph_created_) { + return errors::InvalidArgument( + "Session was not created with a graph before ", method, "!"); + } + return ::tensorflow::Status::OK(); + } + ::tensorflow::Status CreateDebuggerState( - const DebugOptions& debug_options, int64 session_run_index, - int64 executor_step_index, const std::vector& input_names, - const std::vector& output_names, - const std::vector& target_names, + const CallableOptions& options, int64 global_step, + int64 session_run_index, int64 executor_step_index, std::unique_ptr* debugger_state); ::tensorflow::Status DecorateAndPublishGraphForDebug( @@ -303,6 +333,16 @@ class DirectSession : public Session { std::unordered_map> executors_ GUARDED_BY(executor_lock_); + class RunCallableCallFrame; + struct Callable { + std::shared_ptr executors_and_keys; + std::shared_ptr function_info; + ~Callable(); + }; + mutex callables_lock_; + int64 next_callable_handle_ GUARDED_BY(callables_lock_) = 0; + std::unordered_map callables_ GUARDED_BY(callables_lock_); + // Holds mappings from handle to partial run state. std::unordered_map> partial_runs_ GUARDED_BY(executor_lock_); diff --git a/tensorflow/core/common_runtime/direct_session_test.cc b/tensorflow/core/common_runtime/direct_session_test.cc index 6fe0cba1e5..ee38960618 100644 --- a/tensorflow/core/common_runtime/direct_session_test.cc +++ b/tensorflow/core/common_runtime/direct_session_test.cc @@ -49,6 +49,22 @@ limitations under the License. namespace tensorflow { namespace { +CallableOptions MakeCallableOptions(gtl::ArraySlice feeds, + gtl::ArraySlice fetches, + gtl::ArraySlice targets) { + CallableOptions ret; + for (const string& feed : feeds) { + ret.add_feed(feed); + } + for (const string& fetch : fetches) { + ret.add_fetch(fetch); + } + for (const string& target : targets) { + ret.add_target(target); + } + return ret; +} + std::unique_ptr CreateSession() { SessionOptions options; (*options.config.mutable_device_count())["CPU"] = 2; @@ -111,6 +127,53 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetwork) { EXPECT_FLOAT_EQ(5.0, mat(0, 0)); } +TEST_F(DirectSessionMinusAXTest, RunSimpleNetwork_Callable) { + Initialize({3, 2, -1, 0}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + std::vector> inputs; + + // Run the test twice to ensure that the Make/Run/Release cycle is hermetic. + for (int i = 0; i < 2; ++i) { + // Request two targets: one fetch output and one non-fetched output. + Session::CallableHandle handle; + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions({}, {y_ + ":0"}, {y_neg_}), &handle)); + + for (int i = 0; i < 2; ++i) { + std::vector outputs; + TF_ASSERT_OK(session->RunCallable(handle, {}, &outputs, nullptr)); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); + } + + Status s = session->RunCallable(handle, {}, nullptr, nullptr); + EXPECT_TRUE(errors::IsInvalidArgument(s)); + EXPECT_TRUE(StringPiece(s.error_message()) + .contains("`fetch_tensors` must be provided")); + + TF_ASSERT_OK(session->ReleaseCallable(handle)); + + std::vector outputs; + s = session->RunCallable(handle, {}, &outputs, nullptr); + EXPECT_TRUE(errors::IsInvalidArgument(s)); + EXPECT_TRUE( + StringPiece(s.error_message()) + .contains("Attempted to run callable after handle was released")); + + s = session->RunCallable(handle + 1, {}, &outputs, nullptr); + EXPECT_TRUE(errors::IsInvalidArgument(s)); + EXPECT_TRUE( + StringPiece(s.error_message()).contains("No such callable handle")); + } +} + TEST_F(DirectSessionMinusAXTest, TestFeed) { Initialize({1, 2, 3, 4}); auto session = CreateSession(); @@ -140,6 +203,39 @@ TEST_F(DirectSessionMinusAXTest, TestFeed) { EXPECT_FLOAT_EQ(39.0, mat(1, 0)); } +TEST_F(DirectSessionMinusAXTest, TestFeed_Callable) { + Initialize({1, 2, 3, 4}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + + TF_ASSERT_OK(session->Create(def_)); + + // Fill in the input and ask for the output + // + // Note that the input being fed is on the second device. + CallableOptions callable_options; + callable_options.add_feed(x_); + callable_options.add_fetch(y_ + ":0"); + Session::CallableHandle handle; + TF_ASSERT_OK(session->MakeCallable(MakeCallableOptions({x_}, {y_ + ":0"}, {}), + &handle)); + Tensor t(DT_FLOAT, TensorShape({2, 1})); + t.matrix()(0, 0) = 5; + t.matrix()(1, 0) = 6; + std::vector inputs = {t}; + std::vector outputs; + + // Run the callable + TF_ASSERT_OK(session->RunCallable(handle, inputs, &outputs, nullptr)); + + ASSERT_EQ(1, outputs.size()); + auto mat = outputs[0].matrix(); + + // Expect outputs to be; 1*5 + 2*6, 3*5 + 4*6 + EXPECT_FLOAT_EQ(17.0, mat(0, 0)); + EXPECT_FLOAT_EQ(39.0, mat(1, 0)); +} + TEST_F(DirectSessionMinusAXTest, TestConcurrency) { Initialize({1, 2, 3, 4}); auto session = CreateSession(); @@ -172,6 +268,39 @@ TEST_F(DirectSessionMinusAXTest, TestConcurrency) { delete tp; } +TEST_F(DirectSessionMinusAXTest, TestConcurrency_Callable) { + Initialize({1, 2, 3, 4}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + + // Fill in the input and ask for the output + thread::ThreadPool* tp = new thread::ThreadPool(Env::Default(), "test", 4); + + Session::CallableHandle handle; + TF_ASSERT_OK( + session->MakeCallable(MakeCallableOptions({}, {y_ + ":0"}, {}), &handle)); + + // Run the callable 1000 times in 4 different threads concurrently. + auto fn = [&session, handle]() { + for (int i = 0; i < 1000; ++i) { + std::vector outputs; + // Run the graph + TF_ASSERT_OK(session->RunCallable(handle, {}, &outputs, nullptr)); + ASSERT_EQ(1, outputs.size()); + auto mat = outputs[0].matrix(); + EXPECT_FLOAT_EQ(3.0, mat(0, 0)); + } + }; + + for (int i = 0; i < 4; ++i) { + tp->Schedule(fn); + } + + // Wait for the functions to finish. + delete tp; +} + TEST_F(DirectSessionMinusAXTest, TestPerSessionThreads) { Initialize({1, 2, 3, 4}); @@ -297,6 +426,38 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts) { EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2); } +TEST_F(DirectSessionMinusAXTest, RunSimpleNetworkWithOpts_Callable) { + Initialize({3, 2, -1, 0}); + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def_)); + + // Request two targets: one fetch output and one non-fetched output. + Session::CallableHandle handle; + CallableOptions callable_options = + MakeCallableOptions({}, {y_ + ":0"}, {y_neg_}); + callable_options.mutable_run_options()->set_trace_level( + RunOptions::FULL_TRACE); + TF_ASSERT_OK(session->MakeCallable(callable_options, &handle)); + + RunMetadata run_metadata; + EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 0); + + std::vector outputs; + TF_ASSERT_OK(session->RunCallable(handle, {}, &outputs, &run_metadata)); + + ASSERT_EQ(1, outputs.size()); + // The first output should be initialized and have the correct + // output. + auto mat = outputs[0].matrix(); + ASSERT_TRUE(outputs[0].IsInitialized()); + EXPECT_FLOAT_EQ(5.0, mat(0, 0)); + + // Checks RunMetadata is well-formed + ASSERT_TRUE(run_metadata.has_step_stats()); + EXPECT_EQ(run_metadata.step_stats().dev_stats_size(), 2); +} + TEST(DirectSessionTest, KeepsStateAcrossRunsOfSession) { GraphDef def; Graph g(OpRegistry::Global()); @@ -409,6 +570,89 @@ TEST(DirectSessionTest, MultipleFeedTest) { EXPECT_TRUE(StringPiece(s.error_message()).contains("fed more than once")); } +TEST(DirectSessionTest, MultipleFeedTest_Callable) { + GraphDef def; + Graph g(OpRegistry::Global()); + + Tensor first_value(DT_FLOAT, TensorShape({})); + first_value.scalar()() = 1.0; + Node* first_const = test::graph::Constant(&g, first_value); + Node* first_identity = test::graph::Identity(&g, first_const); + + Tensor second_value(DT_FLOAT, TensorShape({})); + second_value.scalar()() = 2.0; + Node* second_const = test::graph::Constant(&g, second_value); + Node* second_identity = test::graph::Identity(&g, second_const); + + test::graph::ToGraphDef(&g, &def); + + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def)); + + Session::CallableHandle handle; + std::vector outputs; + + // Fetch without feeding. + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions( + {}, {first_identity->name() + ":0", second_identity->name() + ":0"}, + {}), + &handle)); + TF_ASSERT_OK(session->RunCallable(handle, {}, &outputs, nullptr)); + ASSERT_EQ(2, outputs.size()); + ASSERT_EQ(1.0, outputs[0].flat()(0)); + ASSERT_EQ(2.0, outputs[1].flat()(0)); + + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions( + {}, {second_identity->name() + ":0", first_identity->name() + ":0"}, + {}), + &handle)); + TF_ASSERT_OK(session->RunCallable(handle, {}, &outputs, nullptr)); + ASSERT_EQ(2, outputs.size()); + ASSERT_EQ(2.0, outputs[0].flat()(0)); + ASSERT_EQ(1.0, outputs[1].flat()(0)); + + Tensor value_11(DT_FLOAT, TensorShape({})); + value_11.scalar()() = 11.0; + Tensor value_22(DT_FLOAT, TensorShape({})); + value_22.scalar()() = 22.0; + + // Feed [first_const, second_const] + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions( + {first_const->name(), second_const->name()}, + {first_identity->name() + ":0", second_identity->name() + ":0"}, {}), + &handle)); + TF_ASSERT_OK( + session->RunCallable(handle, {value_11, value_22}, &outputs, nullptr)); + ASSERT_EQ(2, outputs.size()); + ASSERT_EQ(11.0, outputs[0].flat()(0)); + ASSERT_EQ(22.0, outputs[1].flat()(0)); + + // Feed [second_const, first_const] + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions( + {second_const->name(), first_const->name()}, + {first_identity->name() + ":0", second_identity->name() + ":0"}, {}), + &handle)); + TF_ASSERT_OK( + session->RunCallable(handle, {value_22, value_11}, &outputs, nullptr)); + ASSERT_EQ(2, outputs.size()); + ASSERT_EQ(11.0, outputs[0].flat()(0)); + ASSERT_EQ(22.0, outputs[1].flat()(0)); + + // Feed [first_const, first_const] + Status s = session->MakeCallable( + MakeCallableOptions( + {first_const->name(), first_const->name()}, + {first_identity->name() + ":0", second_identity->name() + ":0"}, {}), + &handle); + EXPECT_TRUE(errors::IsInvalidArgument(s)); + EXPECT_TRUE(StringPiece(s.error_message()).contains("fed more than once")); +} + TEST(DirectSessionTest, FetchMultipleTimes) { Graph g(OpRegistry::Global()); Tensor seven_tensor(DT_INT32, TensorShape()); @@ -695,6 +939,59 @@ TEST(DirectSessionTest, RunHandleTest) { ASSERT_TRUE(s.ok()); } +TEST(DirectSessionTest, RunHandleTest_Callable) { + GraphDef def; + Graph g(OpRegistry::Global()); + + Tensor value0(DT_FLOAT, TensorShape({})); + value0.scalar()() = 1.0; + Node* const0 = test::graph::Constant(&g, value0); + Node* identity0 = test::graph::Identity(&g, const0); + + Tensor value1(DT_FLOAT, TensorShape({})); + value1.scalar()() = 2.0; + Node* const1 = test::graph::Constant(&g, value1); + Node* node3 = test::graph::Add(&g, identity0, const1); + Node* node4 = test::graph::Unary(&g, "GetSessionHandleV2", node3); + + Tensor value2(DT_STRING, TensorShape({})); + Node* const2 = test::graph::Constant(&g, value2); + Node* node5 = test::graph::GetSessionTensor(&g, const2); + Node* node6 = test::graph::Add(&g, node5, const1); + + Node* node7 = test::graph::Unary(&g, "DeleteSessionTensor", const2); + + test::graph::ToGraphDef(&g, &def); + + auto session = CreateSession(); + ASSERT_TRUE(session != nullptr); + TF_ASSERT_OK(session->Create(def)); + + // First run call: Create a handle. + std::vector outputs; + Status s = session->Run({}, {node4->name() + ":0"}, {}, &outputs); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(1, outputs.size()); + + ResourceHandle resource_handle = outputs[0].scalar()(); + Tensor string_handle(DT_STRING, {}); + string_handle.flat().setConstant(resource_handle.name()); + + // Second run call: Use a handle. + std::vector outputs1; + s = session->Run({{const2->name(), string_handle}}, {node6->name() + ":0"}, + {}, &outputs1); + ASSERT_TRUE(s.ok()); + ASSERT_EQ(1, outputs1.size()); + ASSERT_EQ(5.0, outputs1[0].flat()(0)); + + // Third run call: Delete a handle. + std::vector outputs2; + s = session->Run({{const2->name(), string_handle}}, {}, {node7->name()}, + &outputs2); + ASSERT_TRUE(s.ok()); +} + TEST(DirectSessionTest, CreateGraphFailsWhenAssigningAFedVar) { Graph graph(OpRegistry::Global()); @@ -1109,6 +1406,11 @@ TEST(DirectSessionTest, TestDirectSessionRunClose) { EXPECT_EQ(t.scalar()(), outputs[0].scalar()()); outputs.clear(); + // Make a callable handle before closing the session. + Session::CallableHandle handle; + TF_ASSERT_OK(session->MakeCallable( + MakeCallableOptions({}, {}, {var_assign->name()}), &handle)); + // Close the session. TF_ASSERT_OK(session->Close()); @@ -1116,6 +1418,10 @@ TEST(DirectSessionTest, TestDirectSessionRunClose) { Status s = session->Run({} /* inputs */, {}, {var_assign->name()} /* target_nodes */, nullptr); EXPECT_EQ("Cancelled: Session has been closed.", s.ToString()); + + // Run the read as a callable to verify that we get the same error. + s = session->RunCallable(handle, {}, {}, nullptr); + EXPECT_EQ("Cancelled: Session has been closed.", s.ToString()); } TEST(DirectSessionTest, TestDirectSessionPRunClose) { @@ -1217,7 +1523,8 @@ TEST(DirectSessionTest, LocalDeviceManager) { // A simple benchmark for the overhead of `DirectSession::Run()` calls // with varying numbers of feeds/fetches. -void FeedFetchBenchmarkHelper(int iters, int num_feeds) { +void FeedFetchBenchmarkHelper(int iters, int num_feeds, + bool use_make_callable) { testing::StopTiming(); Tensor value(DT_FLOAT, TensorShape()); @@ -1253,29 +1560,55 @@ void FeedFetchBenchmarkHelper(int iters, int num_feeds) { SessionOptions opts; std::unique_ptr session(NewSession(opts)); TF_CHECK_OK(session->Create(gd)); - { - // NOTE(mrry): Ignore the first run, which will incur the graph - // partitioning/pruning overhead and skew the results. - // - // Note that we should also optimize and monitor the overhead on - // the first run, which will impact application startup times, but - // that is not the object of study in this benchmark. - std::vector output_values; - TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values)); - } - testing::StartTiming(); - for (int i = 0; i < iters; ++i) { - std::vector output_values; - TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values)); + if (use_make_callable) { + Session::CallableHandle handle; + CallableOptions callable_options; + std::vector input_tensors; + for (const auto& input : inputs) { + callable_options.add_feed(input.first); + input_tensors.push_back(input.second); + } + for (const string& output : outputs) { + callable_options.add_fetch(output); + } + TF_CHECK_OK(session->MakeCallable(callable_options, &handle)); + + testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + std::vector output_values; + TF_CHECK_OK( + session->RunCallable(handle, input_tensors, &output_values, nullptr)); + } + testing::StopTiming(); + } else { + { + // NOTE(mrry): Ignore the first run, which will incur the graph + // partitioning/pruning overhead and skew the results. + // + // Note that we should also optimize and monitor the overhead on + // the first run, which will impact application startup times, but + // that is not the object of study in this benchmark. + std::vector output_values; + TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values)); + } + testing::StartTiming(); + for (int i = 0; i < iters; ++i) { + std::vector output_values; + TF_CHECK_OK(session->Run(inputs, outputs, {}, &output_values)); + } + testing::StopTiming(); } - testing::StopTiming(); } void BM_FeedFetch(int iters, int num_feeds) { - FeedFetchBenchmarkHelper(iters, num_feeds); + FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ false); +} +void BM_FeedFetchCallable(int iters, int num_feeds) { + FeedFetchBenchmarkHelper(iters, num_feeds, /* use_make_callable */ true); } BENCHMARK(BM_FeedFetch)->Arg(1)->Arg(2)->Arg(5)->Arg(10); +BENCHMARK(BM_FeedFetchCallable)->Arg(1)->Arg(2)->Arg(5)->Arg(10); } // namespace } // namespace tensorflow diff --git a/tensorflow/core/framework/session_state.h b/tensorflow/core/framework/session_state.h index 8fbe940f6a..653a661dd2 100644 --- a/tensorflow/core/framework/session_state.h +++ b/tensorflow/core/framework/session_state.h @@ -74,6 +74,12 @@ class TensorStore { Status SaveTensors(const std::vector& output_names, SessionState* session_state); + // Returns true if no tensors have been added to this store. + bool empty() { + mutex_lock l(lock_); + return tensors_.empty(); + } + private: mutex lock_; diff --git a/tensorflow/core/protobuf/config.proto b/tensorflow/core/protobuf/config.proto index 3606c5f127..abbbe392aa 100644 --- a/tensorflow/core/protobuf/config.proto +++ b/tensorflow/core/protobuf/config.proto @@ -410,3 +410,26 @@ message RunMetadata { // Graphs of the partitions executed by executors. repeated GraphDef partition_graphs = 3; } + +// Defines a subgraph in another `GraphDef` as a set of feed points and nodes +// to be fetched or executed. +// +// Compare with the arguments to `Session::Run()`. +message CallableOptions { + // Tensors to be fed in the callable. Each feed is the name of a tensor. + repeated string feed = 1; + + // Fetches. A list of tensor names. The caller of the callable expects a + // tensor to be returned for each fetch[i] (see RunStepResponse.tensor). The + // order of specified fetches does not change the execution order. + repeated string fetch = 2; + + // Target Nodes. A list of node names. The named nodes will be run by the + // callable but their outputs will not be returned. + repeated string target = 3; + + // Options that will be applied to each run. + RunOptions run_options = 4; + + // Next: 5 +} diff --git a/tensorflow/core/public/session.h b/tensorflow/core/public/session.h index 75ad50f6f2..d58c877cfd 100644 --- a/tensorflow/core/public/session.h +++ b/tensorflow/core/public/session.h @@ -195,6 +195,41 @@ class Session { return errors::Unimplemented( "LocalDeviceManager is not supported for this session."); } + + /// \brief A handle to a subgraph, created with `Session::MakeCallable()`. + typedef int64 CallableHandle; + + /// \brief Creates a `handle` for invoking the subgraph defined by + /// `callable_options`. + /// NOTE: This API is still experimental and may change. + virtual Status MakeCallable(const CallableOptions& callable_options, + CallableHandle* out_handle) { + return errors::Unimplemented( + "MakeCallable is not supported for this session."); + } + + /// \brief Invokes the subgraph named by `handle` with the given options and + /// input tensors. + /// + /// The order of tensors in `feed_tensors` must and `fetch_tensors` will + /// match the order of names in `CallableOptions::feed()` and + /// `CallableOptions::fetch()` when this subgraph was created. + /// NOTE: This API is still experimental and may change. + virtual Status RunCallable(CallableHandle handle, + const std::vector& feed_tensors, + std::vector* fetch_tensors, + RunMetadata* run_metadata) { + return errors::Unimplemented( + "RunCallable is not supported for this session."); + } + + /// \brief Releases resources associated with the given `handle` in this + /// session. + /// NOTE: This API is still experimental and may change. + virtual Status ReleaseCallable(CallableHandle handle) { + return errors::Unimplemented( + "ReleaseCallable is not supported for this session."); + } }; /// \brief Create a new session with the given options. -- GitLab From 9d1d5057b9d3fb335a4b20193bb364737e2b5140 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 18:50:06 -0800 Subject: [PATCH 697/884] Move optimizations to arithmetic optimizer stages 1) Redundant Bitcast 2) Redundant Cast 3) Remove inverse transpose PiperOrigin-RevId: 188569367 --- tensorflow/core/grappler/op_types.cc | 4 + tensorflow/core/grappler/op_types.h | 2 + tensorflow/core/grappler/optimizers/BUILD | 1 + .../optimizers/arithmetic_optimizer.cc | 207 ++++++++++++------ .../optimizers/arithmetic_optimizer.h | 8 +- .../optimizers/arithmetic_optimizer_test.cc | 140 +++++++----- tensorflow/core/grappler/utils/BUILD | 16 ++ .../core/grappler/utils/grappler_test.cc | 15 ++ .../core/grappler/utils/grappler_test.h | 8 + .../core/grappler/utils/grappler_test_test.cc | 100 +++++++++ 10 files changed, 370 insertions(+), 131 deletions(-) create mode 100644 tensorflow/core/grappler/utils/grappler_test_test.cc diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index 8cf1402ae8..ca56833ef6 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -78,6 +78,10 @@ bool IsConstant(const NodeDef& node) { return node.op() == "Const"; } bool IsConj(const NodeDef& node) { return node.op() == "Conj"; } +bool IsConjugateTranspose(const NodeDef& node) { + return node.op() == "ConjugateTranspose"; +} + bool IsConv2D(const NodeDef& node) { return node.op() == "Conv2D"; } bool IsConv2DBackpropFilter(const NodeDef& node) { diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index a7c33ef97b..a0946ee1ad 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -40,6 +40,8 @@ bool IsCast(const NodeDef& node); bool IsComplex(const NodeDef& node); bool IsComplexAbs(const NodeDef& node); bool IsConj(const NodeDef& node); +bool IsConjugateTranspose(const NodeDef& node); +bool IsConcat(const NodeDef& node); bool IsConcatOffset(const NodeDef& node); bool IsConstant(const NodeDef& node); bool IsConv2D(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/BUILD b/tensorflow/core/grappler/optimizers/BUILD index 7ec137373b..6ded261c7d 100644 --- a/tensorflow/core/grappler/optimizers/BUILD +++ b/tensorflow/core/grappler/optimizers/BUILD @@ -248,6 +248,7 @@ tf_cc_test( "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core/grappler/inputs:trivial_test_graph_input_yielder", + "//tensorflow/core/grappler/utils:grappler_test", ], ) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 3cf42fde41..177b0735e9 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -45,19 +45,6 @@ namespace tensorflow { namespace grappler { namespace { -template -bool AreInversePermutations(const std::vector& a, const std::vector& b) { - if (a.size() != b.size()) { - return false; - } - for (int i = 0; i < a.size(); ++i) { - if (a[b[i]] != i) { - return false; - } - } - return true; -} - // Extract values from a Const op to `values`. Returns true if succeeds. template bool ValuesFromConstNode(const NodeDef& node, std::vector* values) { @@ -431,9 +418,7 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { Status TrySimplify(const NodeDef* node, string* simplified_node_name) override { - CHECK(IsSupported(node)) - << "Node " << node->name() - << " is not supported by add ops group optimizer step"; + CHECK(IsSupported(node)); AddOpsGroup group; TF_RETURN_IF_ERROR(CreateAddOpsGroup(node, &group)); @@ -650,6 +635,130 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { std::unordered_set rewritten_nodes_; }; +// Removes inverse transpose nodes +class RemoveInverseTranspose : public ArithmeticOptimizerStage { + public: + explicit RemoveInverseTranspose(ArithmeticOptimizerContext ctx) + : ArithmeticOptimizerStage(ctx) {} + ~RemoveInverseTranspose() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsTranspose(*node) || IsConjugateTranspose(*node); + } + + Status TrySimplify(const NodeDef* node, + string* simplified_node_name) override { + CHECK(IsSupported(node)); + + NodeDef* input; + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &input)); + + if (input->op() == node->op()) { + NodeDef* node_perm; + NodeDef* input_perm; + + TF_RETURN_IF_ERROR(GetInputNode(node->input(1), &node_perm)); + TF_RETURN_IF_ERROR(GetInputNode(input->input(1), &input_perm)); + + // Try 32-bit indices. + std::vector node_perm_values; + std::vector input_perm_values; + if (ValuesFromConstNode(*node_perm, &node_perm_values) && + ValuesFromConstNode(*input_perm, &input_perm_values) && + AreInversePermutations(node_perm_values, input_perm_values)) { + *simplified_node_name = input->input(0); + } + // Try 64-bit indices. + std::vector node_perm_values64; + std::vector input_perm_values64; + if (ValuesFromConstNode(*node_perm, &node_perm_values64) && + ValuesFromConstNode(*input_perm, &input_perm_values64) && + AreInversePermutations(node_perm_values64, input_perm_values64)) { + *simplified_node_name = input->input(0); + } + } + + return Status::OK(); + } + + private: + template + bool AreInversePermutations(const std::vector& a, + const std::vector& b) { + if (a.size() != b.size()) { + return false; + } + for (int i = 0; i < a.size(); ++i) { + if (a[b[i]] != i) { + return false; + } + } + return true; + } +}; + +// Remove redundant Bitcasts. +// 1) Remove Bitcast whose source type and destination type are equal +// 2) Rewrite Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) +class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { + public: + explicit RemoveRedundantBitcastStage(ArithmeticOptimizerContext ctx) + : ArithmeticOptimizerStage(ctx) {} + ~RemoveRedundantBitcastStage() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsBitcast(*node); + } + + Status TrySimplify(const NodeDef* node, + string* simplified_node_name) override { + CHECK(IsSupported(node)); + + // Bypass Bitcast whose source type and destination type are equal. + if (GetSourceDataType(*node) == GetDestinationDataType(*node)) { + *simplified_node_name = node->input(0); + return Status::OK(); + } + + NodeDef* bitcast; + TF_RETURN_IF_ERROR(GetInputNode(node->name(), &bitcast)); + NodeDef* operand; + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &operand)); + + if (IsBitcast(*operand)) { + // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) + bitcast->set_input(0, operand->input(0)); + SetSourceDataType(GetSourceDataType(*operand), bitcast); + ctx_.node_map->UpdateInput(bitcast->name(), bitcast->input(0), + operand->input(0)); + AddToOptimizationQueue(bitcast); + *simplified_node_name = bitcast->name(); + } + + return Status::OK(); + } +}; + +// Remove Casts whose source type and destination type are equal. +class RemoveRedundantCastStage : public ArithmeticOptimizerStage { + public: + explicit RemoveRedundantCastStage(ArithmeticOptimizerContext ctx) + : ArithmeticOptimizerStage(ctx) {} + ~RemoveRedundantCastStage() override = default; + + bool IsSupported(const NodeDef* node) const override { return IsCast(*node); } + + Status TrySimplify(const NodeDef* node, + string* simplified_node_name) override { + CHECK(IsSupported(node)); + // Bypass Cast whose source type and destination type are equal. + if (GetSourceDataType(*node) == GetDestinationDataType(*node)) { + *simplified_node_name = node->input(0); + } + return Status::OK(); + } +}; + } // namespace class UniqueNodes { @@ -903,31 +1012,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - // Remove inverse transposes. - if (node->op() == "Transpose" || node->op() == "ConjugateTranspose") { - NodeDef* input = node_map_->GetNode(node->input(0)); - if (input->op() == node->op()) { - const NodeDef* node_perm = node_map_->GetNode(node->input(1)); - const NodeDef* input_perm = node_map_->GetNode(input->input(1)); - // Try 32-bit indices. - std::vector node_perm_values; - std::vector input_perm_values; - if (ValuesFromConstNode(*node_perm, &node_perm_values) && - ValuesFromConstNode(*input_perm, &input_perm_values) && - AreInversePermutations(node_perm_values, input_perm_values)) { - return input->input(0); - } - // Try 64-bit indices. - std::vector node_perm_values64; - std::vector input_perm_values64; - if (ValuesFromConstNode(*node_perm, &node_perm_values64) && - ValuesFromConstNode(*input_perm, &input_perm_values64) && - AreInversePermutations(node_perm_values64, input_perm_values64)) { - return input->input(0); - } - } - } - if (node->op() == "Reshape") { // Reshape // ^ @@ -1024,32 +1108,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - if (node->op() == "Bitcast") { - NodeDef* bitcast = node_map_->GetNode(node->name()); - // Bypass bitcasts whose source type and destination type are equal. - if (GetSourceDataType(*bitcast) == GetDestinationDataType(*bitcast)) { - return bitcast->input(0); - } - - const NodeDef* operand = node_map_->GetNode(bitcast->input(0)); - if (operand->op() == bitcast->op()) { - // Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) - bitcast->set_input(0, operand->input(0)); - SetSourceDataType(GetSourceDataType(*operand), bitcast); - node_map_->UpdateInput(bitcast->name(), bitcast->input(0), - operand->input(0)); - nodes_to_simplify->PushBack(bitcast); - return bitcast->name(); - } - } - - if (node->op() == "Cast") { - // Bypass casts whose source type and destination type are equal. - if (GetSourceDataType(*node) == GetDestinationDataType(*node)) { - return node->input(0); - } - } - // Fold a multiply of a scalar into the following convolution. This folding // can jump across nodes that merely reorders data (such as reshape and // transpose). For example, we can optimize @@ -1391,11 +1449,22 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { std::vector> stages; - // Add/AddN tree rewrites - if (options_.enable_add_to_addn_combining) { + if (options_.combine_add_to_addn) { stages.push_back( std::unique_ptr(new AddOpsRewriteStage(ctx))); } + if (options_.remove_inverse_transpose) { + stages.push_back(std::unique_ptr( + new RemoveInverseTranspose(ctx))); + } + if (options_.remove_redundant_bitcast) { + stages.push_back(std::unique_ptr( + new RemoveRedundantBitcastStage(ctx))); + } + if (options_.remove_redundant_cast) { + stages.push_back(std::unique_ptr( + new RemoveRedundantCastStage(ctx))); + } VLOG(1) << "Simplify arithmetic ops using " << stages.size() << " arithmetic optimization stages"; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 9cff8ca9d0..787084454d 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -55,14 +55,16 @@ class ArithmeticOptimizer : public GraphOptimizer { // Granular control for arithmetic optimizer stages struct ArithmeticOptimizerOptions { - // rewrite a tree of Add/AddN ops with a single AddN - bool enable_add_to_addn_combining; + bool combine_add_to_addn = true; + bool remove_inverse_transpose = true; + bool remove_redundant_bitcast = true; + bool remove_redundant_cast = true; // Choose which arithmetic optimizer stages will be enabled for a given // optimization level by default. static ArithmeticOptimizerOptions Default( RewriterConfig::Toggle opt_level) { - return {/*enable_add_to_addn_combining*/ true}; + return ArithmeticOptimizerOptions(); } }; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index a56351c18a..98842b29f1 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/grappler/optimizers/constant_folding.h" #include "tensorflow/core/grappler/optimizers/model_pruner.h" #include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/grappler/utils/grappler_test.h" #include "tensorflow/core/lib/core/status_test_util.h" #include "tensorflow/core/platform/test.h" @@ -49,7 +50,7 @@ void VerifyGraphsMatch(const GraphDef& original_graph, } } // namespace -class ArithmeticOptimizerTest : public ::testing::Test { +class ArithmeticOptimizerTest : public GrapplerTest { protected: // Optimize a graph using ArithmeticOptimizer and prune all the nodes that no // longer have any output consumers. @@ -63,14 +64,32 @@ class ArithmeticOptimizerTest : public ::testing::Test { // TODO(ezhulenev): Make private. After migration to stages each test // should explicitly enable required optimization for tests isolation void DisableAllStages(ArithmeticOptimizer* optimizer) { - ArithmeticOptimizer::ArithmeticOptimizerOptions options{ - /*enable_add_to_addn_combining*/ false}; + ArithmeticOptimizer::ArithmeticOptimizerOptions options; + options.combine_add_to_addn = false; + options.remove_inverse_transpose = false; + options.remove_redundant_bitcast = false; + options.remove_redundant_cast = false; optimizer->options_ = options; } - void EnableAddToAddNCombining(ArithmeticOptimizer* optimizer) { + void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); - optimizer->options_.enable_add_to_addn_combining = true; + optimizer->options_.combine_add_to_addn = true; + } + + void EnableOnlyRemoveInverseTranspose(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_inverse_transpose = true; + } + + void EnableOnlyRemoveRedundantBitcast(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_redundant_bitcast = true; + } + + void EnableOnlyRemoveRedundantCast(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.remove_redundant_cast = true; } }; @@ -658,9 +677,7 @@ TEST_F(ArithmeticOptimizerTest, IdentityReshape) { item.graph.Swap(&output); TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - EXPECT_EQ(0, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Reshape"; })); + EXPECT_EQ(0, CountOpNodes(output, "Reshape")); } TEST_F(ArithmeticOptimizerTest, NotIdentityReshape) { @@ -682,9 +699,7 @@ TEST_F(ArithmeticOptimizerTest, NotIdentityReshape) { item.graph.Swap(&output); TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - EXPECT_EQ(1, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Reshape"; })); + EXPECT_EQ(1, CountOpNodes(output, "Reshape")); } TEST_F(ArithmeticOptimizerTest, NotIdentityReshapeTooManyUnknownDimSizes) { @@ -704,9 +719,7 @@ TEST_F(ArithmeticOptimizerTest, NotIdentityReshapeTooManyUnknownDimSizes) { item.graph.Swap(&output); TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - EXPECT_EQ(1, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Reshape"; })); + EXPECT_EQ(1, CountOpNodes(output, "Reshape")); } TEST_F(ArithmeticOptimizerTest, CombineReshapes) { @@ -737,9 +750,7 @@ TEST_F(ArithmeticOptimizerTest, CombineReshapes) { item.graph.Swap(&output); TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); - EXPECT_EQ(1, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Reshape"; })); + EXPECT_EQ(1, CountOpNodes(output, "Reshape")); } TEST_F(ArithmeticOptimizerTest, ReorderTransposeCast) { @@ -826,10 +837,9 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposes) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveInverseTranspose(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); std::set nodes_after_optimization; for (const NodeDef& node : output.node()) { @@ -859,10 +869,9 @@ TEST_F(ArithmeticOptimizerTest, RemoveInverseTransposesMultipleOutputs) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveInverseTranspose(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); for (const NodeDef& node : output.node()) { if (node.op() == "Concat") { @@ -886,10 +895,11 @@ TEST_F(ArithmeticOptimizerTest, RemoveTransposesWithControlDependency) { GrapplerItem item; item.fetch = {"outputs"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveInverseTranspose(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); NodeMap node_map(&output); const NodeDef* outputs_node = node_map.GetNode("outputs"); @@ -915,10 +925,9 @@ TEST_F(ArithmeticOptimizerTest, NotRemoveTransposes) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveInverseTranspose(&optimizer); + OptimizeAndPrune(&optimizer, &item, &output); EXPECT_EQ(6, output.node_size()); } @@ -1133,10 +1142,10 @@ TEST_F(ArithmeticOptimizerTest, OptimizeMultipleMulTransposeConv) { TEST_F(ArithmeticOptimizerTest, CombineBitcasts) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output inputs = - ops::Placeholder(s, DT_UINT8, ops::Placeholder::Shape({2, 3})); - Output bc1 = ops::Bitcast(s, inputs, DT_QINT8); - Output bc2 = ops::Bitcast(s, bc1, DT_INT8); + Output inputs = ops::Placeholder(s.WithOpName("inputs"), DT_UINT8, + ops::Placeholder::Shape({2, 3})); + Output bc1 = ops::Bitcast(s.WithOpName("bc1"), inputs, DT_QINT8); + Output bc2 = ops::Bitcast(s.WithOpName("bc2"), bc1, DT_INT8); Output outputs = ops::Identity(s.WithOpName("outputs"), bc2); GrapplerItem item; @@ -1144,18 +1153,22 @@ TEST_F(ArithmeticOptimizerTest, CombineBitcasts) { TF_CHECK_OK(s.ToGraphDef(&item.graph)); GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveRedundantBitcast(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + NodeMap node_map(&output); - EXPECT_EQ(1, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Bitcast"; })); + // Bitcasts combined into a single op and inputs redirected to updated Bitcast + EXPECT_EQ(3, output.node_size()); + EXPECT_EQ(1, CountOpNodes(output, "Bitcast")); + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "inputs", "bc2")); } TEST_F(ArithmeticOptimizerTest, CombineAndRemoveBitcasts) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output inputs = ops::Placeholder(s, DT_INT8, ops::Placeholder::Shape({2, 3})); + Output inputs = ops::Placeholder(s.WithOpName("inputs"), DT_INT8, + ops::Placeholder::Shape({2, 3})); Output bc1 = ops::Bitcast(s, inputs, DT_QINT8); Output bc2 = ops::Bitcast(s, bc1, DT_INT8); Output outputs = ops::Identity(s.WithOpName("outputs"), bc2); @@ -1163,33 +1176,42 @@ TEST_F(ArithmeticOptimizerTest, CombineAndRemoveBitcasts) { GrapplerItem item; item.fetch = {"outputs"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveRedundantBitcast(&optimizer); + + OptimizeAndPrune(&optimizer, &item, &output); + NodeMap node_map(&output); - EXPECT_EQ(0, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Bitcast"; })); + // Bitcasts removed and inputs redirected to outputs + EXPECT_EQ(2, output.node_size()); + EXPECT_EQ(0, CountOpNodes(output, "Bitcast")); + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "inputs", "outputs")); } TEST_F(ArithmeticOptimizerTest, RemoveRedundantCast) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); - Output inputs = ops::Placeholder(s, DT_INT8, ops::Placeholder::Shape({2, 3})); + Output inputs = ops::Placeholder(s.WithOpName("inputs"), DT_INT8, + ops::Placeholder::Shape({2, 3})); Output cast = ops::Cast(s, inputs, DT_INT8); Output outputs = ops::Identity(s.WithOpName("outputs"), cast); GrapplerItem item; item.fetch = {"outputs"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + GraphDef output; - TF_EXPECT_OK(ArithmeticOptimizer().Optimize(nullptr, item, &output)); - item.graph.Swap(&output); - TF_EXPECT_OK(ModelPruner().Optimize(nullptr, item, &output)); + ArithmeticOptimizer optimizer; + EnableOnlyRemoveRedundantCast(&optimizer); - EXPECT_EQ(0, std::count_if( - output.node().begin(), output.node().end(), - [](const NodeDef& node) { return node.op() == "Cast"; })); + OptimizeAndPrune(&optimizer, &item, &output); + NodeMap node_map(&output); + + // Cast removed and inputs redirected to outputs + EXPECT_EQ(2, output.node_size()); + EXPECT_EQ(0, CountOpNodes(output, "Cast")); + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "inputs", "outputs")); } TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { @@ -1211,7 +1233,7 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { GraphDef output; ArithmeticOptimizer optimizer; - EnableAddToAddNCombining(&optimizer); + EnableOnlyAddToAddNCombining(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); @@ -1266,7 +1288,7 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { GraphDef output; ArithmeticOptimizer optimizer; - EnableAddToAddNCombining(&optimizer); + EnableOnlyAddToAddNCombining(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); @@ -1329,7 +1351,7 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteAddInputThroughMultiplePaths) { GraphDef output; ArithmeticOptimizer optimizer; - EnableAddToAddNCombining(&optimizer); + EnableOnlyAddToAddNCombining(&optimizer); OptimizeAndPrune(&optimizer, &item, &output); diff --git a/tensorflow/core/grappler/utils/BUILD b/tensorflow/core/grappler/utils/BUILD index 3dbad40cae..939031c44b 100644 --- a/tensorflow/core/grappler/utils/BUILD +++ b/tensorflow/core/grappler/utils/BUILD @@ -147,6 +147,22 @@ cc_library( ], ) +tf_cc_test( + name = "grappler_test_test", + size = "small", + srcs = ["grappler_test_test.cc"], + deps = [ + ":grappler_test", + "//tensorflow/cc:cc_ops", + "//tensorflow/core:core_cpu", + "//tensorflow/core:direct_session", + "//tensorflow/core:lib", + "//tensorflow/core:test", + "//tensorflow/core:test_main", + "//tensorflow/core/grappler:utils", + ], +) + cc_library( name = "functions", srcs = [ diff --git a/tensorflow/core/grappler/utils/grappler_test.cc b/tensorflow/core/grappler/utils/grappler_test.cc index 79b2aa2808..89c3aa82bf 100644 --- a/tensorflow/core/grappler/utils/grappler_test.cc +++ b/tensorflow/core/grappler/utils/grappler_test.cc @@ -90,5 +90,20 @@ void GrapplerTest::CompareGraphs(GraphDef want, GraphDef got) { } } +bool GrapplerTest::IsNodesDirectlyConnected(const NodeMap& node_map, + const string& src, + const string& dst, int position) { + const NodeDef* src_node = node_map.GetNode(src); + const NodeDef* dst_node = node_map.GetNode(dst); + EXPECT_TRUE(src_node != nullptr) << src << " node not found"; + EXPECT_TRUE(dst_node != nullptr) << dst << " node not found"; + return src_node && dst_node && dst_node->input(position) == src_node->name(); +} + +int GrapplerTest::CountOpNodes(const GraphDef& graph, const string& op) { + return std::count_if(graph.node().begin(), graph.node().end(), + [&op](const NodeDef& node) { return node.op() == op; }); +} + } // namespace grappler } // namespace tensorflow diff --git a/tensorflow/core/grappler/utils/grappler_test.h b/tensorflow/core/grappler/utils/grappler_test.h index fd6809b6e2..3df6625d5c 100644 --- a/tensorflow/core/grappler/utils/grappler_test.h +++ b/tensorflow/core/grappler/utils/grappler_test.h @@ -21,6 +21,7 @@ limitations under the License. #include "tensorflow/core/framework/graph.pb.h" #include "tensorflow/core/framework/types.h" #include "tensorflow/core/grappler/grappler_item.h" +#include "tensorflow/core/grappler/utils.h" #include "tensorflow/core/platform/test.h" namespace tensorflow { @@ -37,6 +38,13 @@ class GrapplerTest : public ::testing::Test { const std::vector& inputs, GraphDef* graph); void CompareGraphs(GraphDef want, GraphDef got); + + // Check if node 'src' is directly connected to the input($position) of 'dst'. + bool IsNodesDirectlyConnected(const NodeMap& node_map, const string& src, + const string& dst, int position = 0); + + // Count nodes of the given op-type in a graph. + int CountOpNodes(const GraphDef& graph, const string& op); }; } // end namespace grappler diff --git a/tensorflow/core/grappler/utils/grappler_test_test.cc b/tensorflow/core/grappler/utils/grappler_test_test.cc new file mode 100644 index 0000000000..677fa5a798 --- /dev/null +++ b/tensorflow/core/grappler/utils/grappler_test_test.cc @@ -0,0 +1,100 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/grappler/utils/grappler_test.h" +#include "tensorflow/cc/ops/standard_ops.h" +#include "tensorflow/core/grappler/utils.h" +#include "tensorflow/core/lib/core/status_test_util.h" +#include "tensorflow/core/platform/test.h" + +namespace tensorflow { +namespace grappler { +namespace { + +// TODO(ezhulenev): add tests for all methods in GrapplerTest +class GrapplerTestTest : public GrapplerTest {}; + +TEST_F(GrapplerTestTest, CompareIdenticalGraphs) { + tensorflow::Scope s1 = tensorflow::Scope::NewRootScope(); + auto s1_a = ops::Variable(s1.WithOpName("a"), {2, 2}, DT_FLOAT); + auto s1_b = ops::Variable(s1.WithOpName("b"), {2, 2}, DT_FLOAT); + auto s1_add = ops::Add(s1.WithOpName("Add_1"), s1_a, s1_b); + + tensorflow::Scope s2 = tensorflow::Scope::NewRootScope(); + auto s2_a = ops::Variable(s2.WithOpName("a"), {2, 2}, DT_FLOAT); + auto s2_b = ops::Variable(s2.WithOpName("b"), {2, 2}, DT_FLOAT); + auto s2_add = ops::Add(s2.WithOpName("Add_1"), s2_a, s2_b); + + GraphDef graph1; + TF_ASSERT_OK(s1.ToGraphDef(&graph1)); + + GraphDef graph2; + TF_ASSERT_OK(s2.ToGraphDef(&graph2)); + + CompareGraphs(graph1, graph2); +} + +TEST_F(GrapplerTestTest, CheckNodesConnectivity) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto add_1 = ops::Add(s.WithOpName("Add_1"), a, b); + auto add_2 = ops::Add(s.WithOpName("Add_2"), add_1, b); + + GraphDef graph; + TF_ASSERT_OK(s.ToGraphDef(&graph)); + + NodeMap node_map(&graph); + + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "a", "Add_1", 0)); + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "b", "Add_1", 1)); + EXPECT_FALSE(IsNodesDirectlyConnected(node_map, "a", "Add_2", 0)); + EXPECT_TRUE(IsNodesDirectlyConnected(node_map, "b", "Add_2", 1)); +} + +TEST_F(GrapplerTestTest, CountOpNodes) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + + auto a = ops::Variable(s.WithOpName("a"), {2, 2}, DT_FLOAT); + auto b = ops::Variable(s.WithOpName("b"), {2, 2}, DT_FLOAT); + auto c = ops::Variable(s.WithOpName("c"), {2, 2}, DT_FLOAT); + + auto add_ab = ops::Add(s.WithOpName("Add_ab"), a, b); + auto add_bc = ops::Add(s.WithOpName("Add_bc"), b, c); + + auto mul_ab = ops::Mul(s.WithOpName("Mull_ab"), a, b); + auto mul_bc = ops::Mul(s.WithOpName("Mull_bc"), a, b); + + InputList inputs{ + Output(add_ab), + Output(add_bc), + Output(mul_ab), + Output(mul_bc), + }; + auto add_all = ops::AddN(s.WithOpName("Add_all"), inputs); + + GraphDef graph; + TF_ASSERT_OK(s.ToGraphDef(&graph)); + + EXPECT_EQ(2, CountOpNodes(graph, "Add")); + EXPECT_EQ(2, CountOpNodes(graph, "Mul")); + EXPECT_EQ(1, CountOpNodes(graph, "AddN")); + EXPECT_EQ(0, CountOpNodes(graph, "Transpose")); +} + +} // namespace +} // namespace grappler +} // namespace tensorflow \ No newline at end of file -- GitLab From 54d785dfdcb4eb6758741e20a6d111fda577dc99 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 18:54:41 -0800 Subject: [PATCH 698/884] Propagate min/max for StridedSlice PiperOrigin-RevId: 188569611 --- .../contrib/lite/toco/graph_transformations/hardcode_min_max.cc | 1 + tensorflow/contrib/lite/toco/graph_transformations/quantize.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 938d76386d..48a67cabec 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -326,6 +326,7 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { changed = HardcodeMinMaxForAverageOrMaxPool(model, op); break; + case OperatorType::kStridedSlice: case OperatorType::kSqueeze: case OperatorType::kTensorFlowReshape: case OperatorType::kPad: diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 4fd26e4325..05686ce9a0 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -49,6 +49,7 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kTensorFlowReshape || type == OperatorType::kTanh || type == OperatorType::kMul || type == OperatorType::kSpaceToDepth || + type == OperatorType::kStridedSlice || type == OperatorType::kDepthToSpace || type == OperatorType::kLstmCell; } -- GitLab From a9bb191793e8e8c924b6a19f645610809b1dae62 Mon Sep 17 00:00:00 2001 From: Mingsheng Hong Date: Fri, 9 Mar 2018 19:07:44 -0800 Subject: [PATCH 699/884] Unified test util PlaceHolderFloat() into PlaceHolder(), and extended the latter to take a TF_DataType param. PiperOrigin-RevId: 188570493 --- tensorflow/c/c_test_util.cc | 12 +++--------- tensorflow/c/c_test_util.h | 7 ++----- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/tensorflow/c/c_test_util.cc b/tensorflow/c/c_test_util.cc index 53346a8cdf..22f77e7b87 100644 --- a/tensorflow/c/c_test_util.cc +++ b/tensorflow/c/c_test_util.cc @@ -102,16 +102,10 @@ void PlaceholderHelper(TF_Graph* graph, TF_Status* s, const char* name, ASSERT_NE(*op, nullptr); } -TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name) { +TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, const char* name, + TF_DataType dtype) { TF_Operation* op; - PlaceholderHelper(graph, s, name, TF_INT32, &op); - return op; -} - -TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, - const char* name) { - TF_Operation* op; - PlaceholderHelper(graph, s, name, TF_FLOAT, &op); + PlaceholderHelper(graph, s, name, dtype, &op); return op; } diff --git a/tensorflow/c/c_test_util.h b/tensorflow/c/c_test_util.h index 8cf060f73f..d87c57fd51 100644 --- a/tensorflow/c/c_test_util.h +++ b/tensorflow/c/c_test_util.h @@ -46,12 +46,9 @@ TF_Tensor* DoubleTensor(double v); TF_Tensor* FloatTensor(float v); -// TODO(hongm): Change Placeholder() to take in a TF_DataType parameter, and -// unify with PlaceholderFloat. TF_Operation* Placeholder(TF_Graph* graph, TF_Status* s, - const char* name = "feed"); -TF_Operation* PlaceholderFloat(TF_Graph* graph, TF_Status* s, - const char* name = "feed"); + const char* name = "feed", + TF_DataType dtype = TF_INT32); TF_Operation* Const(TF_Tensor* t, TF_Graph* graph, TF_Status* s, const char* name = "const"); -- GitLab From 40c96d70bd71d483324e7328958f61f723986dcb Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 20:50:32 -0800 Subject: [PATCH 700/884] Fix docstring for `embedding_lookup_sparse`. Example with weighted mean combiner implies that single-key embeddings not normalized (the weighted sum answer). However, the code and test shows normalization regardless of number of keys. PiperOrigin-RevId: 188575982 --- tensorflow/python/ops/embedding_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/embedding_ops.py b/tensorflow/python/ops/embedding_ops.py index 3826585f59..20e4a28b9c 100644 --- a/tensorflow/python/ops/embedding_ops.py +++ b/tensorflow/python/ops/embedding_ops.py @@ -396,8 +396,8 @@ def embedding_lookup_sparse(params, with `combiner`="mean", then the output will be a 3x20 matrix where output[0, :] = (params[1, :] * 2.0 + params[3, :] * 0.5) / (2.0 + 0.5) - output[1, :] = params[0, :] * 1.0 - output[2, :] = params[1, :] * 3.0 + output[1, :] = (params[0, :] * 1.0) / 1.0 + output[2, :] = (params[1, :] * 3.0) / 3.0 Raises: TypeError: If sp_ids is not a SparseTensor, or if sp_weights is neither -- GitLab From 3b0a27549dd2f1a32526cb77ec7ff407d0fc315f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Fri, 9 Mar 2018 22:43:51 -0800 Subject: [PATCH 701/884] Fix DepthToSpace and SpaceToDepth to silently return instead of failing when the input tensor is empty. PiperOrigin-RevId: 188580972 --- .../core/kernels/depthtospace_op_gpu.cu.cc | 9 +++++++++ .../core/kernels/spacetodepth_op_gpu.cu.cc | 9 +++++++++ .../kernel_tests/depthtospace_op_test.py | 18 ++++++++++++++++++ .../kernel_tests/spacetodepth_op_test.py | 18 ++++++++++++++++++ 4 files changed, 54 insertions(+) diff --git a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc index 7a66285383..184c703599 100644 --- a/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc +++ b/tensorflow/core/kernels/depthtospace_op_gpu.cu.cc @@ -158,6 +158,9 @@ struct DepthToSpaceOpFunctor { const int total_count = batch_size * output_height * output_width * output_depth; + if (total_count == 0) { + return; + } CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); D2S_NHWC<<>>( config.virtual_thread_count, input.data(), block_size, batch_size, @@ -188,6 +191,9 @@ struct DepthToSpaceOpFunctor { const int output_width = output.dimension(3); const int output_depth_by_input_area = output_depth * input_area; const int total_count = batch_size * output_depth_by_input_area; + if (total_count == 0) { + return; + } CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); switch (block_size) { case 2: @@ -213,6 +219,9 @@ struct DepthToSpaceOpFunctor { // Other block sizes are processed by the generic kernel. const int total_count = batch_size * input_depth_by_input_area; + if (total_count == 0) { + return; + } auto config = GetCudaLaunchConfig(total_count, d); D2S_NCHW<<>>( config.virtual_thread_count, input.data(), block_size, input_width, diff --git a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc index a1a01e8813..db05ca1ed2 100644 --- a/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc +++ b/tensorflow/core/kernels/spacetodepth_op_gpu.cu.cc @@ -154,6 +154,9 @@ struct SpaceToDepthOpFunctor { const int total_count = batch_size * input_height * input_width * input_depth; + if (total_count == 0) { + return; + } CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); S2D_NHWC<<>>( config.virtual_thread_count, input.data(), block_size, batch_size, @@ -184,6 +187,9 @@ struct SpaceToDepthOpFunctor { const int input_width = input.dimension(3); const int input_depth_by_output_area = input_depth * output_area; const int total_count = batch_size * input_depth_by_output_area; + if (total_count == 0) { + return; + } CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); switch (block_size) { case 2: @@ -209,6 +215,9 @@ struct SpaceToDepthOpFunctor { // Other block sizes are processed by the generic kernel. const int total_count = batch_size * output_depth_by_output_area; + if (total_count == 0) { + return; + } CudaLaunchConfig config = GetCudaLaunchConfig(total_count, d); S2D_NCHW<<>>( config.virtual_thread_count, input.data(), block_size, output_width, diff --git a/tensorflow/python/kernel_tests/depthtospace_op_test.py b/tensorflow/python/kernel_tests/depthtospace_op_test.py index 7df2366954..96c9718b83 100644 --- a/tensorflow/python/kernel_tests/depthtospace_op_test.py +++ b/tensorflow/python/kernel_tests/depthtospace_op_test.py @@ -90,6 +90,24 @@ class DepthToSpaceTest(test.TestCase): x_out = [batch_output_elt(i) for i in range(batch_size)] self._testOne(x_np, block_size, x_out) + def testBatchSize0(self): + block_size = 2 + batch_size = 0 + input_nhwc = array_ops.ones([batch_size, 2, 3, 12]) + x_out = array_ops.ones([batch_size, 4, 6, 3]) + + with self.test_session(use_gpu=False): + # test NHWC (default) on CPU + x_tf = array_ops.depth_to_space(input_nhwc, block_size) + self.assertAllEqual(x_tf.shape, x_out.shape) + x_tf.eval() + if test.is_gpu_available(): + with self.test_session(use_gpu=True): + # test NHWC (default) on GPU + x_tf = array_ops.depth_to_space(input_nhwc, block_size) + self.assertAllEqual(x_tf.shape, x_out.shape) + x_tf.eval() + # Tests for different width and height. def testNonSquare(self): x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]], diff --git a/tensorflow/python/kernel_tests/spacetodepth_op_test.py b/tensorflow/python/kernel_tests/spacetodepth_op_test.py index 3c98a685e0..b76135764f 100644 --- a/tensorflow/python/kernel_tests/spacetodepth_op_test.py +++ b/tensorflow/python/kernel_tests/spacetodepth_op_test.py @@ -126,6 +126,24 @@ class SpaceToDepthTest(test.TestCase): x_out = [batch_output_elt(i) for i in range(batch_size)] self._testOne(x_np, block_size, x_out) + def testBatchSize0(self): + block_size = 2 + batch_size = 0 + input_nhwc = array_ops.ones([batch_size, 4, 6, 3]) + x_out = array_ops.ones([batch_size, 2, 3, 12]) + + with self.test_session(use_gpu=False): + # test NHWC (default) on CPU + x_tf = array_ops.space_to_depth(input_nhwc, block_size) + self.assertAllEqual(x_tf.shape, x_out.shape) + x_tf.eval() + if test.is_gpu_available(): + with self.test_session(use_gpu=True): + # test NHWC (default) on GPU + x_tf = array_ops.space_to_depth(input_nhwc, block_size) + self.assertAllEqual(x_tf.shape, x_out.shape) + x_tf.eval() + # Tests for different width and height. def testNonSquare(self): x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]], -- GitLab From 2cd50a9fd2900c2bf7e74a7795823254d5383fb4 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Fri, 9 Mar 2018 22:49:30 -0800 Subject: [PATCH 702/884] [XLA] Speed up colocated buffer merging. PiperOrigin-RevId: 188581202 --- .../compiler/xla/service/buffer_assignment.cc | 33 ++++++++++++------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/tensorflow/compiler/xla/service/buffer_assignment.cc b/tensorflow/compiler/xla/service/buffer_assignment.cc index fb18c9d828..dbe45e932c 100644 --- a/tensorflow/compiler/xla/service/buffer_assignment.cc +++ b/tensorflow/compiler/xla/service/buffer_assignment.cc @@ -1339,26 +1339,35 @@ BufferAssigner::MergeColocatedBufferSets( auto cannot_merge_buffer_sets = [&colocated_buffer_sets, &buffer_liveness, &buffer_size, &is_entry_parameter](int64 i, int64 j) { - for (auto& buffer_a : colocated_buffer_sets[i]) { - for (auto& buffer_b : colocated_buffer_sets[j]) { - // Do not merge if the set includes live outs or entry parameters. - if (buffer_liveness.MaybeLiveOut(*buffer_a) || - is_entry_parameter(*buffer_a) || - buffer_liveness.MaybeLiveOut(*buffer_b) || - is_entry_parameter(*buffer_b)) { + // Do not merge if one of the sets includes live outs or entry parameters. + for (int64 key : {i, j}) { + for (auto& buffer : colocated_buffer_sets[key]) { + if (buffer_liveness.MaybeLiveOut(*buffer) || + is_entry_parameter(*buffer)) { return true; } - // Do not merge if the buffers interfere with each other. + } + } + + // Colocated sets satisfy the invariant that all buffers within a set have + // the same size. That means we need to check whether the size is the same + // between the two sets, but also that it's enough to look at just one + // buffer within each set. + if (buffer_size(**colocated_buffer_sets[i].begin()) != + buffer_size(**colocated_buffer_sets[j].begin())) { + return true; + } + + // Do not merge if some pair of buffers interferes with each other. + for (auto& buffer_a : colocated_buffer_sets[i]) { + for (auto& buffer_b : colocated_buffer_sets[j]) { if (buffer_a->id() != buffer_b->id() && buffer_liveness.MayInterfere(*buffer_a, *buffer_b)) { return true; } - // Do not merge if the buffer sizes are different. - if (buffer_size(*buffer_a) != buffer_size(*buffer_b)) { - return true; - } } } + return false; }; -- GitLab From 754dd339c141babf5aeee9495479ff0da380da52 Mon Sep 17 00:00:00 2001 From: Rui Zhao Date: Sat, 10 Mar 2018 00:29:37 -0800 Subject: [PATCH 703/884] Increment node_ids when merging CostGraphDef. PiperOrigin-RevId: 188586552 --- .../core/grappler/clusters/single_machine.cc | 35 +++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/clusters/single_machine.cc b/tensorflow/core/grappler/clusters/single_machine.cc index 8e236c9ee8..313ef90d81 100644 --- a/tensorflow/core/grappler/clusters/single_machine.cc +++ b/tensorflow/core/grappler/clusters/single_machine.cc @@ -378,10 +378,15 @@ void SingleMachine::MergeCosts(CostGraphDef* graph_costs, init_costs.node_size() + queue_costs.node_size()); std::unordered_set nodes_seen; + int queue_costs_id_offset = graph_costs->node_size(); for (const auto& node : graph_costs->node()) { nodes_seen.insert(node.name()); + if (node.id() >= queue_costs_id_offset) { + queue_costs_id_offset = node.id() + 1; + } } + int init_costs_id_offset = queue_costs_id_offset + queue_costs.node_size(); // The costs obtained by running the main graph could be more stable than // the one we get from the queue runners since the queue runners run // asynchronously. @@ -389,7 +394,22 @@ void SingleMachine::MergeCosts(CostGraphDef* graph_costs, if (nodes_seen.find(node.name()) != nodes_seen.end()) { continue; } - graph_costs->add_node()->MergeFrom(node); + + auto* new_node = graph_costs->add_node(); + new_node->MergeFrom(node); + + new_node->set_id(node.id() + queue_costs_id_offset); + if (new_node->id() >= init_costs_id_offset) { + init_costs_id_offset = new_node->id() + 1; + } + + for (auto& input_info : *new_node->mutable_input_info()) { + input_info.set_preceding_node(input_info.preceding_node() + + queue_costs_id_offset); + } + for (auto& control_input : *new_node->mutable_control_input()) { + control_input += queue_costs_id_offset; + } } // Don't overwrite the costs with that generated during initialization since @@ -398,7 +418,18 @@ void SingleMachine::MergeCosts(CostGraphDef* graph_costs, if (nodes_seen.find(node.name()) != nodes_seen.end()) { continue; } - graph_costs->add_node()->MergeFrom(node); + + auto* new_node = graph_costs->add_node(); + new_node->MergeFrom(node); + + new_node->set_id(node.id() + init_costs_id_offset); + for (auto& input_info : *new_node->mutable_input_info()) { + input_info.set_preceding_node(input_info.preceding_node() + + init_costs_id_offset); + } + for (auto& control_input : *new_node->mutable_control_input()) { + control_input += init_costs_id_offset; + } } } -- GitLab From 43ccbdbcda0501c823ecc3938c928175b9d964a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sat, 10 Mar 2018 12:03:19 -0800 Subject: [PATCH 704/884] Turn the following ops into Identity. * Slice when the Size input matches the size of the input tensor * Tile when the multiples input is a tensor of '1' * Pad/PadV2 when the paddings input is a tensor of 0 * Squeeze when the squeeze dimensions are known to be > 1 PiperOrigin-RevId: 188609800 --- .../grappler/optimizers/constant_folding.cc | 113 ++++++++++- .../optimizers/constant_folding_test.cc | 181 ++++++++++++++++++ 2 files changed, 292 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 31dc1b73e1..39cc4a9629 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1524,7 +1524,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // The node is replaceable iff // unknown_rank == false && (dim_size == 0 || all dims have size 1) bool replaceable = !shape.unknown_rank(); - for (int j = 0; j < shape.dim_size(); ++j) { + for (int j = 0; replaceable && j < shape.dim_size(); ++j) { replaceable &= shape.dim(j).size() == 1; } if (replaceable) { @@ -1532,6 +1532,116 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } } + if (use_shape_info && IsSlice(*node) && + properties.GetInputProperties(node->name()).size() == 3) { + const auto& input = properties.GetInputProperties(node->name())[0]; + const auto& b = properties.GetInputProperties(node->name())[1]; + const auto& s = properties.GetInputProperties(node->name())[2]; + if (TensorShape::IsValid(b.shape()) && b.has_value() && + TensorShape::IsValid(s.shape()) && s.has_value()) { + Tensor begin(b.dtype(), b.shape()); + if (!begin.FromProto(b.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + b.value().DebugString()); + } + Tensor size(s.dtype(), s.shape()); + if (!size.FromProto(s.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + s.value().DebugString()); + } + // The node is replaceable iff unknown_rank == false && + // begin == 0 && (size == -1 || size == input_shape) for all dimensions + bool replaceable = !input.shape().unknown_rank(); + for (int j = 0; replaceable && j < input.shape().dim_size(); ++j) { + if (begin.dtype() == DT_INT32) { + replaceable &= begin.vec()(j) == 0; + } else { + replaceable &= begin.vec()(j) == 0; + } + if (size.dtype() == DT_INT32) { + replaceable &= (size.vec()(j) == -1 || + size.vec()(j) == input.shape().dim(j).size()); + } else { + replaceable &= + (size.vec()(j) == -1 || + size.vec()(j) == input.shape().dim(j).size()); + } + } + if (replaceable) { + ReplaceOperationWithIdentity(0, node, output); + } + } + } + + if (IsTile(*node) && + properties.GetInputProperties(node->name()).size() == 2) { + const auto& m = properties.GetInputProperties(node->name())[1]; + if (TensorShape::IsValid(m.shape()) && m.has_value()) { + Tensor multiplies(m.dtype(), m.shape()); + if (!multiplies.FromProto(m.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + m.value().DebugString()); + } + // The node is replaceable iff all values in multiplies are 1. + bool replaceable = true; + if (multiplies.dtype() == DT_INT32) { + for (int j = 0; replaceable && j < multiplies.vec().size(); + ++j) { + replaceable &= multiplies.vec()(j) == 1; + } + } else { + for (int j = 0; replaceable && j < multiplies.vec().size(); + ++j) { + replaceable &= multiplies.vec()(j) == 1; + } + } + if (replaceable) { + ReplaceOperationWithIdentity(0, node, output); + } + } + } + + if (IsPad(*node) && + properties.GetInputProperties(node->name()).size() >= 2) { + const auto& p = properties.GetInputProperties(node->name())[1]; + if (TensorShape::IsValid(p.shape()) && p.has_value()) { + Tensor paddings(p.dtype(), p.shape()); + if (!paddings.FromProto(p.value())) { + return errors::InvalidArgument("Cannot parse tensor from proto: ", + p.value().DebugString()); + } + // The node is replaceable iff all values in paddings are 0. + bool replaceable = true; + // The operation requires it to be int32 value so we don't check for + // 1nt64. + const auto flatten = paddings.flat(); + for (int j = 0; replaceable && j < flatten.size(); ++j) { + replaceable &= flatten(j) == 0; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, node, output); + } + } + } + + if (use_shape_info && IsSqueeze(*node) && + !properties.GetInputProperties(node->name()).empty()) { + // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's + // error to squeeze a dimension that is not 1, so we only need to check + // whether the input has > 1 size for each dimension. + const auto& shape = + properties.GetInputProperties(node->name())[0].shape(); + // The node is replaceable iff + // unknown_rank == false && (dim_size == 0 || all dims have size > 1) + bool replaceable = !shape.unknown_rank(); + for (int j = 0; replaceable && j < shape.dim_size(); ++j) { + replaceable &= shape.dim(j).size() > 1; + } + if (replaceable) { + ReplaceOperationWithIdentity(0, node, output); + } + } + // Switch(x, x) will always feed false to its false branch and true to // its true branch. By rewriting the graph a bit, we can propagate these // constants down the two output branches, and just use control dependencies @@ -2027,7 +2137,6 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, TF_RETURN_IF_ERROR(MaterializeShapes(properties)); TF_RETURN_IF_ERROR(MaterializeConstants(properties)); } - TF_RETURN_IF_ERROR(FoldGraph(output)); node_map_.reset(new NodeMap(output)); TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info)); diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index 4b9770889f..f421a59989 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -1261,6 +1261,187 @@ TEST_F(ConstantFoldingTest, ShuffleReverseOnScalarRemoval) { CompareGraphs(want, got); } +TEST_F(ConstantFoldingTest, SliceWithSameDimensionRemoval) { + { // size = {3, 5} + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = ops::Variable(scope.WithOpName("in1"), {3, 5}, DT_FLOAT); + auto begin = ops::Const(scope.WithOpName("begin"), {0, 0}, {2}); + auto size = ops::Const(scope.WithOpName("size"), {3, 5}, {2}); + Output in2 = ops::Variable(scope.WithOpName("in2"), {4, 6}, DT_FLOAT); + ops::Slice s1(scope.WithOpName("s1"), in1, begin, size); + ops::Slice s2(scope.WithOpName("s2"), in2, begin, size); + + ops::Add out(scope.WithOpName("out"), s1, s2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef got; + Status status = fold.Optimize(nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, &want); + AddNode("in2", "VariableV2", {}, &want); + AddNode("begin", "Const", {}, &want); + AddNode("size", "Const", {}, &want); + AddNode("s1", "Identity", + {"in1", AsControlDependency("begin"), AsControlDependency("size")}, + &want); + AddNode("s2", "Slice", {"in2", "begin", "size"}, &want); + AddNode("out", "Add", {"s1", "s2"}, &want); + + CompareGraphs(want, got); + } + { // size = {-1, -1} + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = + ops::Variable(scope.WithOpName("in1"), {3, 5}, DataType::DT_FLOAT); + auto begin1 = ops::Const(scope.WithOpName("begin1"), {0, 0}, {2}); + auto begin2 = ops::Const(scope.WithOpName("begin2"), {1, 1}, {2}); + auto size = ops::Const(scope.WithOpName("size"), {-1, -1}, {2}); + Output in2 = + ops::Variable(scope.WithOpName("in2"), {4, 6}, DataType::DT_FLOAT); + ops::Slice s1(scope.WithOpName("s1"), in1, begin1, size); + ops::Slice s2(scope.WithOpName("s2"), in2, begin2, size); + + ops::Add out(scope.WithOpName("out"), s1, s2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef got; + Status status = fold.Optimize(nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, &want); + AddNode("in2", "VariableV2", {}, &want); + AddNode("begin1", "Const", {}, &want); + AddNode("begin2", "Const", {}, &want); + AddNode("size", "Const", {}, &want); + AddNode("s1", "Identity", + {"in1", AsControlDependency("begin1"), AsControlDependency("size")}, + &want); + AddNode("s2", "Slice", {"in2", "begin2", "size"}, &want); + AddNode("out", "Add", {"s1", "s2"}, &want); + + CompareGraphs(want, got); + } +} + +TEST_F(ConstantFoldingTest, TileWithMultipliesBeingOne) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = ops::Variable(scope.WithOpName("in1"), {4, 6}, DT_FLOAT); + auto in2 = ops::Variable(scope.WithOpName("in2"), {4, 3}, DT_FLOAT); + auto multiplies1 = ops::Const(scope.WithOpName("multiplies1"), {1, 1}, {2}); + auto multiplies2 = ops::Const(scope.WithOpName("multiplies2"), {1, 2}, {2}); + + ops::Tile t1(scope.WithOpName("t1"), in1, multiplies1); + ops::Tile t2(scope.WithOpName("t2"), in2, multiplies2); + + ops::Add out(scope.WithOpName("out"), t1, t2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef got; + Status status = fold.Optimize(nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, &want); + AddNode("in2", "VariableV2", {}, &want); + AddNode("multiplies1", "Const", {}, &want); + AddNode("multiplies2", "Const", {}, &want); + AddNode("t1", "Identity", {"in1", AsControlDependency("multiplies1")}, &want); + AddNode("t2", "Tile", {"in2", "multiplies2"}, &want); + AddNode("out", "Add", {"t1", "t2"}, &want); + + CompareGraphs(want, got); +} + +TEST_F(ConstantFoldingTest, PaddingWithZeroSize) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = ops::Variable(scope.WithOpName("in1"), {4, 6}, DT_INT32); + auto in2 = ops::Variable(scope.WithOpName("in2"), {2, 2}, DT_INT32); + auto paddings1 = + ops::Const(scope.WithOpName("paddings1"), {0, 0, 0, 0}, {2, 2}); + auto paddings2 = + ops::Const(scope.WithOpName("paddings2"), {1, 1, 2, 2}, {2, 2}); + auto c1 = ops::Const(scope.WithOpName("c1"), 1); + auto c2 = ops::Const(scope.WithOpName("c2"), 1); + + ops::PadV2 p1(scope.WithOpName("p1"), in1, paddings1, c1); + ops::PadV2 p2(scope.WithOpName("p2"), in2, paddings2, c2); + + ops::Add out(scope.WithOpName("out"), p1, p2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef got; + Status status = fold.Optimize(nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, &want); + AddNode("in2", "VariableV2", {}, &want); + AddNode("paddings1", "Const", {}, &want); + AddNode("paddings2", "Const", {}, &want); + AddNode("c1", "Const", {}, &want); + AddNode("c2", "Const", {}, &want); + AddNode("p1", "Identity", + {"in1", AsControlDependency("paddings1"), AsControlDependency("c1")}, + &want); + AddNode("p2", "PadV2", {"in2", "paddings2", "c2"}, &want); + AddNode("out", "Add", {"p1", "p2"}, &want); + + CompareGraphs(want, got); +} + +TEST_F(ConstantFoldingTest, SqueezeWithAllDimesionsGreaterThanOne) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + + auto in1 = ops::Variable(scope.WithOpName("in1"), {2, 3}, DT_INT32); + auto in2 = ops::Variable(scope.WithOpName("in2"), {1, 2, 3, 1}, DT_INT32); + + ops::Squeeze s1(scope.WithOpName("s1"), in1); + ops::Squeeze s2(scope.WithOpName("s2"), in2); + + ops::Add out(scope.WithOpName("out"), s1, s2); + + GrapplerItem item; + item.fetch = {"out"}; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef got; + Status status = fold.Optimize(nullptr, item, &got); + TF_EXPECT_OK(status); + + GraphDef want; + AddNode("in1", "VariableV2", {}, &want); + AddNode("in2", "VariableV2", {}, &want); + AddNode("s1", "Identity", {"in1"}, &want); + AddNode("s2", "Squeeze", {"in2"}, &want); + AddNode("out", "Add", {"s1", "s2"}, &want); + + CompareGraphs(want, got); +} + TEST_F(ConstantFoldingTest, NoOpReduction) { // Build a simple graph with a reduction that can be reduced to the // identity. -- GitLab From 067f14c6e1cf23d69008cf62507fc3b41aba882f Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Sat, 10 Mar 2018 19:44:13 -0800 Subject: [PATCH 705/884] Adds a warning to help user to debug the TPU program hanging for predict() call. PiperOrigin-RevId: 188624174 --- tensorflow/contrib/tpu/python/tpu/tpu_estimator.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py index d918b0f198..b3a7a4bd8d 100644 --- a/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py +++ b/tensorflow/contrib/tpu/python/tpu/tpu_estimator.py @@ -2096,6 +2096,13 @@ class TPUEstimator(estimator_lib.Estimator): host_ops), ] + input_hooks + # TODO(b/73813593): Delete this logging once the bug is resolved. + logging.info( + 'If the Tensors in TPUEstimatorSpec.predictions dict are large, ' + 'you might observe the TPU program getting stuck (b/73813593). ' + 'Consider using small Tensors in the predictions dict to verify ' + 'the issue and report on the bug.') + return model_fn_lib.EstimatorSpec( mode, prediction_hooks=hooks, -- GitLab From ef235c77b6a00538b72c6053e427a77d0fea0b57 Mon Sep 17 00:00:00 2001 From: Edd Wilder-James Date: Sat, 10 Mar 2018 21:39:43 -0800 Subject: [PATCH 706/884] Include links to new announce@ list (#17606) * Add ewilderj as CoC contact * Add named contacts to code of conduct * Update language to match multiple stewards * Include links to new announce@ list. --- README.md | 4 ++++ tensorflow/docs_src/community/welcome.md | 2 ++ 2 files changed, 6 insertions(+) diff --git a/README.md b/README.md index ef5bdc66ef..3cdb6e478d 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,10 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. +Keep up to date with release announcements and security updates by +subscribing to +[announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). + ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* diff --git a/tensorflow/docs_src/community/welcome.md b/tensorflow/docs_src/community/welcome.md index d2d3f9edae..6d0458e678 100644 --- a/tensorflow/docs_src/community/welcome.md +++ b/tensorflow/docs_src/community/welcome.md @@ -51,6 +51,8 @@ Europe: TensorFlow provides multiple communication paths. To pick the right path, please read the following list carefully: + * For new release announcements and security updates, subscribe to + [announce@tensorflow.org](https://groups.google.com/a/tensorflow.org/forum/#!forum/announce). * To ask or answer technical questions about TensorFlow, use [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow). For example, ask or search Stack Overflow about a particular error message -- GitLab From df2b8447dc026d1402e3c0cbf7c0071ad5c67178 Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 10 Mar 2018 21:40:25 -0800 Subject: [PATCH 707/884] Fix mac installation documentation error (#17617) This fix tries to address 17614 where installation for python 2 was incorrectly pointing to python3. The error was fixed by f4e70be, but later it has been overridden by 9dae88d. This fix fixes 17614. Signed-off-by: Yong Tang --- tensorflow/docs_src/install/install_mac.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/docs_src/install/install_mac.md b/tensorflow/docs_src/install/install_mac.md index 94defcd18c..205db8e6bd 100644 --- a/tensorflow/docs_src/install/install_mac.md +++ b/tensorflow/docs_src/install/install_mac.md @@ -118,8 +118,8 @@ Take the following steps to install TensorFlow with Virtualenv: Python 2.7, the command to install TensorFlow in the active Virtualenv is as follows: -
 $ pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl
+
 $ pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl
If you encounter installation problems, see [Common Installation Problems](#common-installation-problems). @@ -241,8 +241,8 @@ take the following steps: you are installing TensorFlow for Mac OS and Python 2.7 issue the following command: -
 $ sudo pip3 install --upgrade \
-     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py3-none-any.whl 
+
 $ sudo pip install --upgrade \
+     https://storage.googleapis.com/tensorflow/mac/cpu/tensorflow-1.6.0-py2-none-any.whl 
If the preceding command fails, see [installation problems](#common-installation-problems). -- GitLab From 0a5945eda820f04148ea7c7c0670e49066292d8b Mon Sep 17 00:00:00 2001 From: brett koonce Date: Sat, 10 Mar 2018 21:41:36 -0800 Subject: [PATCH 708/884] SECURITY.md: minor sp, permisisons->permissions (#17597) --- SECURITY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/SECURITY.md b/SECURITY.md index 9f252e6818..665a480ba7 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -113,7 +113,7 @@ use have been carefully audited to be safe**. Similar to best practices for other servers, we recommend running any `ModelServer` with appropriate privileges (i.e., using a separate user with -reduced permisisons). In the spirit of defense in depth, we recommend +reduced permissions). In the spirit of defense in depth, we recommend authenticating requests to any TensorFlow server connected to an untrusted network, as well as sandboxing the server to minimize the adverse effects of any breach. -- GitLab From bdc3ab88cff9cbbefb7076e9f18afe628ca7d68c Mon Sep 17 00:00:00 2001 From: Yong Tang Date: Sat, 10 Mar 2018 21:42:09 -0800 Subject: [PATCH 709/884] Fix broken graphviz download link and change to https (#17604) The graphviz download link has been changed to https://www.graphviz.org/download/ This fix fixes the broken link in jit.md. Signed-off-by: Yong Tang --- tensorflow/docs_src/performance/xla/jit.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/docs_src/performance/xla/jit.md b/tensorflow/docs_src/performance/xla/jit.md index d4dc3e57c8..d9a979ccbd 100644 --- a/tensorflow/docs_src/performance/xla/jit.md +++ b/tensorflow/docs_src/performance/xla/jit.md @@ -157,7 +157,7 @@ to fuse Ops is visible by starting at `hlo_graph_0.dot` and viewing each diagram in succession. To Render the .dot file into a png, install -[GraphViz](http://www.graphviz.org/Download..php) and run: +[GraphViz](https://www.graphviz.org/download/) and run: ```shell dot -Tpng hlo_graph_80.dot -o hlo_graph_80.png -- GitLab From bac9bc1abc21a6ba718beee88c2657402b813333 Mon Sep 17 00:00:00 2001 From: Panos Ipeirotis Date: Sun, 11 Mar 2018 00:47:59 -0500 Subject: [PATCH 710/884] Fix bug 17175 (#17283) * Fix bug 17175 * Added support for uppercase letters for axes in einsum equation * Incorrect test * Extra character removed from regex --- tensorflow/python/ops/special_math_ops.py | 4 ++-- tensorflow/python/ops/special_math_ops_test.py | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/python/ops/special_math_ops.py b/tensorflow/python/ops/special_math_ops.py index 6d7eaababc..5e2146b79f 100644 --- a/tensorflow/python/ops/special_math_ops.py +++ b/tensorflow/python/ops/special_math_ops.py @@ -163,7 +163,7 @@ def einsum(equation, *inputs, **kwargs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) @@ -402,7 +402,7 @@ def _exponential_space_einsum(equation, *inputs): if '...' in equation: raise ValueError('Subscripts with ellipses are not yet supported.') - match = re.match('([a-z,]+)(->[a-z]*)?', equation) + match = re.match('^([a-zA-Z,]+)(->[a-zA-Z]*)?$', equation) if not match: raise ValueError('Indices have incorrect format: %s' % equation) diff --git a/tensorflow/python/ops/special_math_ops_test.py b/tensorflow/python/ops/special_math_ops_test.py index 2c212f4548..d7c3a7e8dc 100644 --- a/tensorflow/python/ops/special_math_ops_test.py +++ b/tensorflow/python/ops/special_math_ops_test.py @@ -192,6 +192,9 @@ class EinsumTest(test.TestCase): 'abc,cba', 'dba,ead,cad->bce', 'aef,fbc,dca->bde', + 'iJ,Jk->ik', + 'iJ,Ki->JK', + 'iJk,Jklm->Jk' ] long_cases = [ @@ -208,6 +211,8 @@ class EinsumTest(test.TestCase): 'ijk ijk', 'ij.jk->ik', 'ij...,jk...->ik...', + 'ij,k ->kji', + 'ij,k-> kji', # axis in output that does not exist 'ij,jk->im', -- GitLab From 3b7fcd7f938ee883dbd35a480b74c3b62d35161d Mon Sep 17 00:00:00 2001 From: "Yuan (Terry) Tang" Date: Sun, 11 Mar 2018 00:48:54 -0500 Subject: [PATCH 711/884] Minor improvements to `estimator.predict()` docs (#17100) * Minor improvements to `estimator.predict()` docs * Update estimator.py * Fix line length * Added quotes around code --- tensorflow/python/estimator/estimator.py | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tensorflow/python/estimator/estimator.py b/tensorflow/python/estimator/estimator.py index 41a13587d1..4d4d8e25a0 100644 --- a/tensorflow/python/estimator/estimator.py +++ b/tensorflow/python/estimator/estimator.py @@ -139,8 +139,8 @@ class Estimator(object): to configure Estimators from hyper parameter tuning. * `config`: Optional configuration object. Will receive what is passed to Estimator in `config` parameter, or the default `config`. - Allows updating things in your model_fn based on configuration - such as `num_ps_replicas`, or `model_dir`. + Allows updating things in your `model_fn` based on + configuration such as `num_ps_replicas`, or `model_dir`. * Returns: `EstimatorSpec` @@ -301,11 +301,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. hooks: List of `SessionRunHook` subclass instances. Used for callbacks inside the training loop. @@ -381,11 +381,11 @@ class Estimator(object): * A 'tf.data.Dataset' object: Outputs of `Dataset` object must be a tuple (features, labels) with same constraints as below. - * A tuple (features, labels): Where features is a `Tensor` or a - dictionary of string feature name to `Tensor` and labels is a + * A tuple (features, labels): Where `features` is a `Tensor` or a + dictionary of string feature name to `Tensor` and `labels` is a `Tensor` or a dictionary of string label name to `Tensor`. Both - features and labels are consumed by `model_fn`. They should satisfy - the expectation of `model_fn` from inputs. + `features` and `labels` are consumed by `model_fn`. They should + satisfy the expectation of `model_fn` from inputs. steps: Number of steps for which to evaluate model. If `None`, evaluates until `input_fn` raises an end-of-input exception. @@ -457,17 +457,17 @@ class Estimator(object): checkpoint_path: Path of a specific checkpoint to predict. If `None`, the latest checkpoint in `model_dir` is used. yield_single_examples: If False, yield the whole batch as returned by the - model_fn instead of decomposing the batch into individual elements. This - is useful if model_fn return some tensor with first dimension not - equal to the batch size + `model_fn` instead of decomposing the batch into individual elements. + This is useful if `model_fn` returns some tensors whose first dimension + is not equal to the batch size. Yields: Evaluated values of `predictions` tensors. Raises: - ValueError: Could not find a trained model in model_dir. - ValueError: if batch length of predictions are not same and - yield_single_examples is True. + ValueError: Could not find a trained model in `model_dir`. + ValueError: If batch length of predictions is not the same and + `yield_single_examples` is True. ValueError: If there is a conflict between `predict_keys` and `predictions`. For example if `predict_keys` is not `None` but `EstimatorSpec.predictions` is not a `dict`. -- GitLab From ae3badb08c7a081c9683a2c85f33ccc969e5c1f6 Mon Sep 17 00:00:00 2001 From: Paul Van Eck Date: Sat, 10 Mar 2018 21:51:05 -0800 Subject: [PATCH 712/884] Add wheel dependency to cmake README (#17063) In order to run to create the pip package after compilation, wheel needs to be installed. This explicitly lists wheel as a prereq in the README. --- tensorflow/contrib/cmake/README.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tensorflow/contrib/cmake/README.md b/tensorflow/contrib/cmake/README.md index 8f85a75ee4..fe83bb3204 100644 --- a/tensorflow/contrib/cmake/README.md +++ b/tensorflow/contrib/cmake/README.md @@ -26,7 +26,7 @@ The CMake files in this directory can build the core TensorFlow runtime, an example C++ binary, and a PIP package containing the runtime and Python bindings. -### Pre-requisites +### Prerequisites * CMake version 3.5 or later. @@ -34,14 +34,16 @@ bindings. * [SWIG](http://www.swig.org/download.html) -* Additional pre-requisites for Microsoft Windows: +* Additional prerequisites for Microsoft Windows: - Visual Studio 2015 - Python 3.5 - - NumPy 1.11.0 or later -* Additional pre-requisites for Linux: +* Additional prerequisites for Linux: - Python 2.7 or later - [Docker](https://www.docker.com/) (for automated testing) + +* Python dependencies: + - wheel - NumPy 1.11.0 or later ### Known-good configurations @@ -102,7 +104,7 @@ ops or APIs. Step-by-step Windows build ========================== -1. Install the pre-requisites detailed above, and set up your environment. +1. Install the prerequisites detailed above, and set up your environment. * The following commands assume that you are using the Windows Command Prompt (`cmd.exe`). You will need to set up your environment to use the -- GitLab From 0c0ee52e7841f7d14b4c8465a5825aaa2fef0fdb Mon Sep 17 00:00:00 2001 From: Gunhan Gulsoy Date: Sat, 10 Mar 2018 22:18:23 -0800 Subject: [PATCH 713/884] Fix windows GPU build scripts. PiperOrigin-RevId: 188629017 --- tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat index b87e4a9bec..4656afe025 100644 --- a/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat +++ b/tensorflow/tools/ci_build/windows/gpu/cmake/run_build.bat @@ -37,7 +37,7 @@ SET CMAKE_DIR=%REPO_ROOT%\tensorflow\contrib\cmake SET MSBUILD_EXE="C:\Program Files (x86)\MSBuild\14.0\Bin\msbuild.exe" :: Run cmake to create Visual Studio Project files. -%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX +%CMAKE_EXE% %CMAKE_DIR% -A x64 -DSWIG_EXECUTABLE=%SWIG_EXE% -DPYTHON_EXECUTABLE=%PY_EXE% -DCMAKE_BUILD_TYPE=Release -DPYTHON_LIBRARIES=%PY_LIB% -Dtensorflow_BUILD_PYTHON_TESTS=%BUILD_PYTHON_TESTS% -Dtensorflow_BUILD_CC_TESTS=%BUILD_CC_TESTS% -Dtensorflow_ENABLE_GPU=ON -DCUDNN_HOME=%CUDNN_HOME% -Dtensorflow_TF_NIGHTLY=%TF_NIGHTLY% -Dtensorflow_DISABLE_EIGEN_FORCEINLINE=%DISABLE_FORCEINLINE% -Dtensorflow_WIN_CPU_SIMD_OPTIONS=/arch:AVX -G"Visual Studio 14" :: Run msbuild in the resulting VS project files to build a pip package. %MSBUILD_EXE% /p:Configuration=Release /maxcpucount:32 tf_python_build_pip_package.vcxproj -- GitLab From d58f2b50b66d555790de51d5036320949101afa1 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 11 Mar 2018 10:00:02 -0700 Subject: [PATCH 714/884] Improve errors raised when an object does not match the RNNCell interface. PiperOrigin-RevId: 188651070 --- .../rnn/python/kernel_tests/rnn_cell_test.py | 8 ++-- .../contrib/rnn/python/ops/core_rnn_cell.py | 10 ++--- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 3 +- .../seq2seq/python/ops/attention_wrapper.py | 4 +- .../seq2seq/python/ops/basic_decoder.py | 3 +- .../seq2seq/python/ops/beam_search_decoder.py | 3 +- tensorflow/python/ops/rnn.py | 25 +++-------- tensorflow/python/ops/rnn_cell_impl.py | 45 ++++++++++++++++++- 8 files changed, 61 insertions(+), 40 deletions(-) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index 7de55a0bb3..69f7b8e107 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -455,8 +455,8 @@ class RNNCellTest(test.TestCase): self.assertAllClose(np.concatenate(res[1], axis=1), expected_state) def testAttentionCellWrapperFailures(self): - with self.assertRaisesRegexp(TypeError, - "The parameter cell is not RNNCell."): + with self.assertRaisesRegexp( + TypeError, rnn_cell_impl.ASSERT_LIKE_RNNCELL_ERROR_REGEXP): contrib_rnn_cell.AttentionCellWrapper(None, 0) num_units = 8 @@ -1203,7 +1203,7 @@ class LayerNormBasicLSTMCellTest(test.TestCase): h1 = array_ops.zeros([1, 2]) state1 = rnn_cell.LSTMStateTuple(c1, h1) state = (state0, state1) - single_cell = lambda: contrib_rnn_cell.LayerNormBasicLSTMCell(2, layer_norm=False) + single_cell = lambda: contrib_rnn_cell.LayerNormBasicLSTMCell(2, layer_norm=False) # pylint: disable=line-too-long cell = rnn_cell.MultiRNNCell([single_cell() for _ in range(2)]) g, out_m = cell(x, state) sess.run([variables.global_variables_initializer()]) @@ -1235,7 +1235,7 @@ class LayerNormBasicLSTMCellTest(test.TestCase): self.assertAllClose(expected_state1_h, actual_state1_h, 1e-5) with variable_scope.variable_scope( - "other", initializer=init_ops.constant_initializer(0.5)) as vs: + "other", initializer=init_ops.constant_initializer(0.5)): x = array_ops.zeros( [1, 3]) # Test BasicLSTMCell with input_size != num_units. c = array_ops.zeros([1, 2]) diff --git a/tensorflow/contrib/rnn/python/ops/core_rnn_cell.py b/tensorflow/contrib/rnn/python/ops/core_rnn_cell.py index 8109ebc718..645f82624b 100644 --- a/tensorflow/contrib/rnn/python/ops/core_rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/core_rnn_cell.py @@ -40,7 +40,6 @@ from tensorflow.python.util import nest # pylint: disable=protected-access,invalid-name RNNCell = rnn_cell_impl.RNNCell -_like_rnncell = rnn_cell_impl._like_rnncell _WEIGHTS_VARIABLE_NAME = rnn_cell_impl._WEIGHTS_VARIABLE_NAME _BIAS_VARIABLE_NAME = rnn_cell_impl._BIAS_VARIABLE_NAME # pylint: enable=protected-access,invalid-name @@ -221,8 +220,7 @@ class EmbeddingWrapper(RNNCell): ValueError: if embedding_classes is not positive. """ super(EmbeddingWrapper, self).__init__(_reuse=reuse) - if not _like_rnncell(cell): - raise TypeError("The parameter cell is not RNNCell.") + rnn_cell_impl.assert_like_rnncell("cell", cell) if embedding_classes <= 0 or embedding_size <= 0: raise ValueError("Both embedding_classes and embedding_size must be > 0: " "%d, %d." % (embedding_classes, embedding_size)) @@ -301,8 +299,7 @@ class InputProjectionWrapper(RNNCell): super(InputProjectionWrapper, self).__init__(_reuse=reuse) if input_size is not None: logging.warn("%s: The input_size parameter is deprecated.", self) - if not _like_rnncell(cell): - raise TypeError("The parameter cell is not RNNCell.") + rnn_cell_impl.assert_like_rnncell("cell", cell) self._cell = cell self._num_proj = num_proj self._activation = activation @@ -356,8 +353,7 @@ class OutputProjectionWrapper(RNNCell): ValueError: if output_size is not positive. """ super(OutputProjectionWrapper, self).__init__(_reuse=reuse) - if not _like_rnncell(cell): - raise TypeError("The parameter cell is not RNNCell.") + rnn_cell_impl.assert_like_rnncell("cell", cell) if output_size < 1: raise ValueError("Parameter output_size must be > 0: %d." % output_size) self._cell = cell diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 6bea8d4a21..3028edad1b 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -1143,8 +1143,7 @@ class AttentionCellWrapper(rnn_cell_impl.RNNCell): `state_is_tuple` is `False` or if attn_length is zero or less. """ super(AttentionCellWrapper, self).__init__(_reuse=reuse) - if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access - raise TypeError("The parameter cell is not RNNCell.") + rnn_cell_impl.assert_like_rnncell("cell", cell) if nest.is_sequence(cell.state_size) and not state_is_tuple: raise ValueError( "Cell returns tuple of states, but the flag " diff --git a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py index 0a53fd66db..f8da5a3e17 100644 --- a/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py +++ b/tensorflow/contrib/seq2seq/python/ops/attention_wrapper.py @@ -1152,9 +1152,7 @@ class AttentionWrapper(rnn_cell_impl.RNNCell): is a list, and its length does not match that of `attention_layer_size`. """ super(AttentionWrapper, self).__init__(name=name) - if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access - raise TypeError( - "cell must be an RNNCell, saw type: %s" % type(cell).__name__) + rnn_cell_impl.assert_like_rnncell("cell", cell) if isinstance(attention_mechanism, (list, tuple)): self._is_multi = True attention_mechanisms = attention_mechanism diff --git a/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py b/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py index ed226239b8..7eb95e5a70 100644 --- a/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/basic_decoder.py @@ -59,8 +59,7 @@ class BasicDecoder(decoder.Decoder): Raises: TypeError: if `cell`, `helper` or `output_layer` have an incorrect type. """ - if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access - raise TypeError("cell must be an RNNCell, received: %s" % type(cell)) + rnn_cell_impl.assert_like_rnncell("cell", cell) if not isinstance(helper, helper_py.Helper): raise TypeError("helper must be a Helper, received: %s" % type(helper)) if (output_layer is not None diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index d6184d6109..22dc7f2eda 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -195,8 +195,7 @@ class BeamSearchDecoder(decoder.Decoder): ValueError: If `start_tokens` is not a vector or `end_token` is not a scalar. """ - if not rnn_cell_impl._like_rnncell(cell): # pylint: disable=protected-access - raise TypeError("cell must be an RNNCell, received: %s" % type(cell)) + rnn_cell_impl.assert_like_rnncell("cell", cell) # pylint: disable=protected-access if (output_layer is not None and not isinstance(output_layer, layers_base.Layer)): raise TypeError( diff --git a/tensorflow/python/ops/rnn.py b/tensorflow/python/ops/rnn.py index 625d433b1f..c59eccc174 100644 --- a/tensorflow/python/ops/rnn.py +++ b/tensorflow/python/ops/rnn.py @@ -45,7 +45,6 @@ from tensorflow.python.util.tf_export import tf_export # pylint: disable=protected-access _concat = rnn_cell_impl._concat -_like_rnncell = rnn_cell_impl._like_rnncell # pylint: enable=protected-access @@ -403,11 +402,8 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None, Raises: TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. """ - - if not _like_rnncell(cell_fw): - raise TypeError("cell_fw must be an instance of RNNCell") - if not _like_rnncell(cell_bw): - raise TypeError("cell_bw must be an instance of RNNCell") + rnn_cell_impl.assert_like_rnncell("cell_fw", cell_fw) + rnn_cell_impl.assert_like_rnncell("cell_bw", cell_bw) with vs.variable_scope(scope or "bidirectional_rnn"): # Forward direction @@ -568,8 +564,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None, TypeError: If `cell` is not an instance of RNNCell. ValueError: If inputs is None or an empty list. """ - if not _like_rnncell(cell): - raise TypeError("cell must be an instance of RNNCell") + rnn_cell_impl.assert_like_rnncell("cell", cell) with vs.variable_scope(scope or "rnn") as varscope: # Create a new scope in which the caching device is either @@ -1015,9 +1010,8 @@ def raw_rnn(cell, loop_fn, TypeError: If `cell` is not an instance of RNNCell, or `loop_fn` is not a `callable`. """ + rnn_cell_impl.assert_like_rnncell("cell", cell) - if not _like_rnncell(cell): - raise TypeError("cell must be an instance of RNNCell") if not callable(loop_fn): raise TypeError("loop_fn must be a callable") @@ -1229,9 +1223,7 @@ def static_rnn(cell, ValueError: If `inputs` is `None` or an empty list, or if the input depth (column size) cannot be inferred from inputs via shape inference. """ - - if not _like_rnncell(cell): - raise TypeError("cell must be an instance of RNNCell") + rnn_cell_impl.assert_like_rnncell("cell", cell) if not nest.is_sequence(inputs): raise TypeError("inputs must be a sequence") if not inputs: @@ -1469,11 +1461,8 @@ def static_bidirectional_rnn(cell_fw, TypeError: If `cell_fw` or `cell_bw` is not an instance of `RNNCell`. ValueError: If inputs is None or an empty list. """ - - if not _like_rnncell(cell_fw): - raise TypeError("cell_fw must be an instance of RNNCell") - if not _like_rnncell(cell_bw): - raise TypeError("cell_bw must be an instance of RNNCell") + rnn_cell_impl.assert_like_rnncell("cell_fw", cell_fw) + rnn_cell_impl.assert_like_rnncell("cell_bw", cell_bw) if not nest.is_sequence(inputs): raise TypeError("inputs must be a sequence") if not inputs: diff --git a/tensorflow/python/ops/rnn_cell_impl.py b/tensorflow/python/ops/rnn_cell_impl.py index e61d10835f..fe380c44da 100644 --- a/tensorflow/python/ops/rnn_cell_impl.py +++ b/tensorflow/python/ops/rnn_cell_impl.py @@ -55,6 +55,8 @@ _BIAS_VARIABLE_NAME = "bias" _WEIGHTS_VARIABLE_NAME = "kernel" +# TODO(jblespiau): Remove this function when we are sure there are no longer +# any usage (even if protected, it is being used). Prefer assert_like_rnncell. def _like_rnncell(cell): """Checks that a given object is an RNNCell by using duck typing.""" conditions = [hasattr(cell, "output_size"), hasattr(cell, "state_size"), @@ -62,6 +64,45 @@ def _like_rnncell(cell): return all(conditions) +# This can be used with self.assertRaisesRegexp for assert_like_rnncell. +ASSERT_LIKE_RNNCELL_ERROR_REGEXP = "is not an RNNCell" + + +def assert_like_rnncell(cell_name, cell): + """Raises a TypeError if cell is not like an RNNCell. + + NOTE: Do not rely on the error message (in particular in tests) which can be + subject to change to increase readability. Use + ASSERT_LIKE_RNNCELL_ERROR_REGEXP. + + Args: + cell_name: A string to give a meaningful error referencing to the name + of the functionargument. + cell: The object which should behave like an RNNCell. + + Raises: + TypeError: A human-friendly exception. + """ + conditions = [ + hasattr(cell, "output_size"), + hasattr(cell, "state_size"), + hasattr(cell, "zero_state"), + callable(cell), + ] + errors = [ + "'output_size' property is missing", + "'state_size' property is missing", + "'zero_state' method is missing", + "is not callable" + ] + + if not all(conditions): + + errors = [error for error, cond in zip(errors, conditions) if not cond] + raise TypeError("The argument {!r} ({}) is not an RNNCell: {}.".format( + cell_name, cell, ", ".join(errors))) + + def _concat(prefix, suffix, static=False): """Concat that enables int, Tensor, or TensorShape values. @@ -914,8 +955,8 @@ class DropoutWrapper(RNNCell): but not `callable`. ValueError: if any of the keep_probs are not between 0 and 1. """ - if not _like_rnncell(cell): - raise TypeError("The parameter cell is not a RNNCell.") + assert_like_rnncell("cell", cell) + if (dropout_state_filter_visitor is not None and not callable(dropout_state_filter_visitor)): raise TypeError("dropout_state_filter_visitor must be callable") -- GitLab From 84967d4aba3fd7dc72c9bb16ea1453ff634ebeb8 Mon Sep 17 00:00:00 2001 From: Bixia Zheng Date: Sun, 11 Mar 2018 10:15:18 -0700 Subject: [PATCH 715/884] Selectively re-enable bfloat16 tests for the GPU backend. PiperOrigin-RevId: 188651655 --- tensorflow/compiler/xla/tests/reduce_window_test.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tensorflow/compiler/xla/tests/reduce_window_test.cc b/tensorflow/compiler/xla/tests/reduce_window_test.cc index 8e976e8a31..6f3b8ea9b6 100644 --- a/tensorflow/compiler/xla/tests/reduce_window_test.cc +++ b/tensorflow/compiler/xla/tests/reduce_window_test.cc @@ -41,9 +41,7 @@ limitations under the License. namespace xla { namespace { -// TODO(b/74260408): This test is timing out if bfloat16 is enabled on -// GPU. Last timed out on 2018-03-06. -#if defined(XLA_BACKEND_SUPPORTS_BFLOAT16) && !defined(XLA_TEST_BACKEND_GPU) +#ifdef XLA_BACKEND_SUPPORTS_BFLOAT16 // Tests both F32 and BF16. static std::array use_bfloat16_params{false, true}; #else @@ -978,9 +976,13 @@ struct R2ReduceWindowTestData { {/*base_bounds=*/{3, 129}, /*window_bounds=*/{1, 100}, /*strides=*/{2, 99}, /*layout=*/{0, 1}, /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, +// TODO(b/74260408): This test last failed on GPU on 2018-03-08, likely due to a +// ptxas bug. +#ifndef XLA_TEST_BACKEND_GPU {/*base_bounds=*/{6, 152}, /*window_bounds=*/{2, 25}, /*strides=*/{5, 4}, /*layout=*/{0, 1}, /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, +#endif {/*base_bounds=*/{6, 4}, /*window_bounds=*/{4, 2}, /*strides=*/{3, 3}, /*layout=*/{0, 1}, /*padding=*/Padding::kSame, /*reducer=*/Reducer::kAdd}, -- GitLab From fcd8162a1ebb31b7e6847caa051652bc9217c9ad Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 11 Mar 2018 10:44:02 -0700 Subject: [PATCH 716/884] Specify the `maximum_iterations` to tf.while_loop in tf.scan to be compatible with XLA. PiperOrigin-RevId: 188652533 --- tensorflow/python/ops/functional_ops.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tensorflow/python/ops/functional_ops.py b/tensorflow/python/ops/functional_ops.py index 8f5673597e..a840b1eddf 100644 --- a/tensorflow/python/ops/functional_ops.py +++ b/tensorflow/python/ops/functional_ops.py @@ -364,8 +364,8 @@ def map_fn(fn, elems, dtype=None, parallel_iterations=10, back_prop=True, dtype = dtype or input_pack([elem.dtype for elem in elems_flat]) dtype_flat = output_flatten(dtype) - # Convert elems to tensor array. - n = array_ops.shape(elems_flat[0])[0] + # Convert elems to tensor array. n may be known statically. + n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0] # TensorArrays are always flat elems_ta = [ @@ -555,7 +555,8 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, elems_flat = [ ops.convert_to_tensor(elem, name="elem") for elem in elems_flat] - n = array_ops.shape(elems_flat[0])[0] + # Convert elems to tensor array. n may be known statically. + n = elems_flat[0].shape[0].value or array_ops.shape(elems_flat[0])[0] # TensorArrays are always flat elems_ta = [ @@ -615,7 +616,8 @@ def scan(fn, elems, initializer=None, parallel_iterations=10, back_prop=True, _, _, r_a = control_flow_ops.while_loop( lambda i, _1, _2: i < n, compute, (i, a_flat, accs_ta), parallel_iterations=parallel_iterations, - back_prop=back_prop, swap_memory=swap_memory) + back_prop=back_prop, swap_memory=swap_memory, + maximum_iterations=n) results_flat = [r.stack() for r in r_a] -- GitLab From 94e4ea20d9c1c780208d54d415cf3c318442ca18 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Sun, 11 Mar 2018 15:38:16 -0700 Subject: [PATCH 717/884] Fixes a race condition in function instantiation. Previously, if the same function was being concurrently instantiated and released: 1. Thread one could begin to instantiate the function, determine that it already existed in the runtime, then be preempted. 2. Thread two could release the handle on the function, causing it to be freed and removed from the `FunctionLibraryRuntime::items_` map. 3. Thread one could then incorrectly assume that the function still existed, and fail to find it in the `FunctionLibraryRuntime::items_` map, causing a segfault when it attempted to increment the refcount on an uninitialized object. PiperOrigin-RevId: 188661500 --- tensorflow/core/common_runtime/function.cc | 24 +++++++++++++++---- .../kernel_tests/filter_dataset_op_test.py | 8 +++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/common_runtime/function.cc b/tensorflow/core/common_runtime/function.cc index effe53c961..37c59a16f5 100644 --- a/tensorflow/core/common_runtime/function.cc +++ b/tensorflow/core/common_runtime/function.cc @@ -496,11 +496,26 @@ Status FunctionLibraryRuntimeImpl::Instantiate( InstantiateOptions options_copy(options); options_copy.target = device_name_; const string key = Canonicalize(function_name, attrs, options_copy); - *handle = parent_->GetHandle(key); - if (*handle != kInvalidHandle) { + + { mutex_lock l(mu_); - items_[parent_->GetHandleOnDevice(device_name_, *handle)]->Ref(); - return Status::OK(); + *handle = parent_->GetHandle(key); + if (*handle != kInvalidHandle) { + FunctionLibraryRuntime::LocalHandle handle_on_device = + parent_->GetHandleOnDevice(device_name_, *handle); + if (handle_on_device == kInvalidLocalHandle) { + return errors::Internal("LocalHandle not found for handle ", *handle, + "."); + } + auto item_handle = items_.find(handle_on_device); + if (item_handle == items_.end()) { + return errors::Internal("LocalHandle ", handle_on_device, + " for handle ", *handle, + " not found in items."); + } + item_handle->second->Ref(); + return Status::OK(); + } } Status s; @@ -553,6 +568,7 @@ Status FunctionLibraryRuntimeImpl::ReleaseHandle(Handle handle) { } LocalHandle h = parent_->GetHandleOnDevice(device_name_, handle); + CHECK_NE(h, kInvalidLocalHandle); mutex_lock l(mu_); CHECK_EQ(1, items_.count(h)); Item* item = items_[h]; diff --git a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py index 2c71723167..4f2216f0a3 100644 --- a/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py +++ b/tensorflow/python/data/kernel_tests/filter_dataset_op_test.py @@ -176,6 +176,14 @@ class FilterDatasetTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): sess.run(get_next) + def testParallelFilters(self): + dataset = dataset_ops.Dataset.range(10).filter( + lambda x: math_ops.equal(x % 2, 0)) + iterators = [dataset.make_one_shot_iterator() for _ in range(10)] + next_elements = [iterator.get_next() for iterator in iterators] + with self.test_session() as sess: + self.assertEqual([0 for _ in range(10)], sess.run(next_elements)) + class FilterDatasetBenchmark(test.Benchmark): -- GitLab From 4c2d2872f9ac45d0f68d48d19df9d87289dd7248 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Sun, 11 Mar 2018 16:22:47 -0700 Subject: [PATCH 718/884] Removed duplicate statement. PiperOrigin-RevId: 188663018 --- tensorflow/contrib/lite/interpreter.cc | 3 --- 1 file changed, 3 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 819782a3c6..831cfafeae 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -171,9 +171,6 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( // Annotate the registration as DELEGATE op. registration.builtin_code = BuiltinOperator_DELEGATE; - // Annotate the registration as DELEGATE op. - registration.builtin_code = BuiltinOperator_DELEGATE; - // Analyze the graph to find all independent subgraphs that are either // fully not-this-delegate or this-delegate computation. InterpreterInfo info(this); -- GitLab From f75d332b599641c522d39950428c5fc9e4444ce7 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Sun, 11 Mar 2018 17:49:34 -0700 Subject: [PATCH 719/884] Fix typo in description of INTERNAL error code. PiperOrigin-RevId: 188666142 --- tensorflow/core/lib/core/error_codes.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/core/lib/core/error_codes.proto b/tensorflow/core/lib/core/error_codes.proto index a7306c8cc1..b82d389146 100644 --- a/tensorflow/core/lib/core/error_codes.proto +++ b/tensorflow/core/lib/core/error_codes.proto @@ -119,7 +119,7 @@ enum Code { // Operation is not implemented or not supported/enabled in this service. UNIMPLEMENTED = 12; - // Internal errors. Means some invariants expected by underlying + // Internal errors. Means some invariant expected by the underlying // system has been broken. If you see one of these errors, // something is very broken. INTERNAL = 13; -- GitLab From 8f0c30b88017e883c09da640422588804546b8fc Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Sun, 11 Mar 2018 19:35:34 -0700 Subject: [PATCH 720/884] disable flaky asan test PiperOrigin-RevId: 188670616 --- tensorflow/contrib/learn/BUILD | 1 + 1 file changed, 1 insertion(+) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index abf6e393bb..f837ca3265 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -426,6 +426,7 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", + tags = ["noasan"], deps = [ ":learn", "//tensorflow/python:array_ops", -- GitLab From 4b7511f4ecd6d0bd491ec557fe05fdfe731ecdae Mon Sep 17 00:00:00 2001 From: Shashi Shekhar Date: Sun, 11 Mar 2018 20:20:45 -0700 Subject: [PATCH 721/884] Fix assets for the TF camera example. Mobile net model is downloaded from tf_http_archive("tf_mobilenet") rule and renaming the asset file in assets folder has no effect. PiperOrigin-RevId: 188672531 --- .../tflitecamerademo/ImageClassifierQuantizedMobileNet.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index 5f341f0f5b..ee89dbd375 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -46,7 +46,7 @@ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { @Override protected String getLabelPath() { - return "labels_mobilenet_quant_v1_224.txt"; + return "labels.txt"; } @Override -- GitLab From 76f8fbf1b94de81a90bc8adf441a644024033c65 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Sun, 11 Mar 2018 21:23:15 -0700 Subject: [PATCH 722/884] propagate fix from tensorflow/models#3561 PiperOrigin-RevId: 188675327 --- tensorflow/docs_src/get_started/premade_estimators.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/docs_src/get_started/premade_estimators.md b/tensorflow/docs_src/get_started/premade_estimators.md index 6bffd2e065..e50d2f5420 100644 --- a/tensorflow/docs_src/get_started/premade_estimators.md +++ b/tensorflow/docs_src/get_started/premade_estimators.md @@ -397,9 +397,9 @@ predictions and their probabilities: ``` python -for pred_dict, expec in zip(predictions, expected): - template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"') +template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"') +for pred_dict, expec in zip(predictions, expected): class_id = pred_dict['class_ids'][0] probability = pred_dict['probabilities'][class_id] -- GitLab From 107e0904233c35791917654a82631ce2fca7bd37 Mon Sep 17 00:00:00 2001 From: Olivia Nordquist Date: Mon, 12 Mar 2018 00:30:38 -0700 Subject: [PATCH 723/884] this test is also timing out in cuda so disabling for now PiperOrigin-RevId: 188685611 --- tensorflow/contrib/distributions/BUILD | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 84f74ce79c..203fbf9931 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -486,7 +486,11 @@ cuda_py_test( "//third_party/py/numpy", "//tensorflow/python:client_testlib", ], - tags = ["noasan"], + tags = [ + "manual", + "noasan", + "noguitar", + ], ) cuda_py_test( -- GitLab From cd67e8eb088537874b53b4fa52d02ff50c4a66fa Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 03:13:15 -0700 Subject: [PATCH 724/884] Lint some files. PiperOrigin-RevId: 188698275 --- .../rnn/python/kernel_tests/rnn_cell_test.py | 14 ++++------- tensorflow/contrib/rnn/python/ops/rnn_cell.py | 25 +++++++++++++------ .../seq2seq/python/ops/beam_search_decoder.py | 1 - 3 files changed, 22 insertions(+), 18 deletions(-) diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index 69f7b8e107..f21915ffbc 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -878,7 +878,6 @@ class RNNCellTest(test.TestCase): shape = [2, 1] filter_size = [3] num_features = 1 - batch_size = 2 expected_state_c = np.array( [[[1.4375670191], [1.4375670191]], [[2.7542609292], [2.7542609292]]], dtype=np.float32) @@ -912,7 +911,6 @@ class RNNCellTest(test.TestCase): shape = [2, 2, 1] filter_size = [3, 3] num_features = 1 - batch_size = 2 expected_state_c = np.array( [[[[1.4375670191], [1.4375670191]], [[1.4375670191], [1.4375670191]]], [[[2.7542609292], [2.7542609292]], [[2.7542609292], [2.7542609292]] @@ -954,7 +952,6 @@ class RNNCellTest(test.TestCase): shape = [2, 2, 2, 1] filter_size = [3, 3, 3] num_features = 1 - batch_size = 2 expected_state_c = np.array( [[[[[1.4375670191], [1.4375670191]], [[1.4375670191], [1.4375670191]] ], [[[1.4375670191], [1.4375670191]], [[1.4375670191], @@ -1584,7 +1581,7 @@ class WeightNormLSTMCellTest(test.TestCase): """Compared cell output with pre-calculated values.""" def _cell_output(self, cell): - """Calculate cell output""" + """Calculates cell output.""" with self.test_session() as sess: init = init_ops.constant_initializer(0.5) @@ -1611,7 +1608,7 @@ class WeightNormLSTMCellTest(test.TestCase): return actual_state_c, actual_state_h def testBasicCell(self): - """Tests cell w/o peepholes and w/o normalisation""" + """Tests cell w/o peepholes and w/o normalisation.""" def cell(): return contrib_rnn_cell.WeightNormLSTMCell( @@ -1626,7 +1623,7 @@ class WeightNormLSTMCellTest(test.TestCase): self.assertAllClose(expected_h, actual_h, 1e-5) def testNonbasicCell(self): - """Tests cell with peepholes and w/o normalisation""" + """Tests cell with peepholes and w/o normalisation.""" def cell(): return contrib_rnn_cell.WeightNormLSTMCell( @@ -1640,9 +1637,8 @@ class WeightNormLSTMCellTest(test.TestCase): self.assertAllClose(expected_c, actual_c, 1e-5) self.assertAllClose(expected_h, actual_h, 1e-5) - def testBasicCellWithNorm(self): - """Tests cell w/o peepholes and with normalisation""" + """Tests cell w/o peepholes and with normalisation.""" def cell(): return contrib_rnn_cell.WeightNormLSTMCell( @@ -1657,7 +1653,7 @@ class WeightNormLSTMCellTest(test.TestCase): self.assertAllClose(expected_h, actual_h, 1e-5) def testNonBasicCellWithNorm(self): - """Tests cell with peepholes and with normalisation""" + """Tests cell with peepholes and with normalisation.""" def cell(): return contrib_rnn_cell.WeightNormLSTMCell( diff --git a/tensorflow/contrib/rnn/python/ops/rnn_cell.py b/tensorflow/contrib/rnn/python/ops/rnn_cell.py index 3028edad1b..73f2607d84 100644 --- a/tensorflow/contrib/rnn/python/ops/rnn_cell.py +++ b/tensorflow/contrib/rnn/python/ops/rnn_cell.py @@ -2058,16 +2058,19 @@ class ConvLSTMCell(rnn_cell_impl.RNNCell): initializers=None, name="conv_lstm_cell"): """Construct ConvLSTMCell. + Args: conv_ndims: Convolution dimensionality (1, 2 or 3). input_shape: Shape of the input as int tuple, excluding the batch size. output_channels: int, number of output channels of the conv LSTM. kernel_shape: Shape of kernel as in tuple (of size 1,2 or 3). - use_bias: Use bias in convolutions. + use_bias: (bool) Use bias in convolutions. skip_connection: If set to `True`, concatenate the input to the - output of the conv LSTM. Default: `False`. + output of the conv LSTM. Default: `False`. forget_bias: Forget bias. + initializers: Unused. name: Name of the module. + Raises: ValueError: If `skip_connection` is `True` and stride is different from 1 or if `input_shape` is incompatible with `conv_ndims`. @@ -2156,15 +2159,19 @@ class Conv3DLSTMCell(ConvLSTMCell): def _conv(args, filter_size, num_features, bias, bias_start=0.0): - """convolution: + """Convolution. + Args: args: a Tensor or a list of Tensors of dimension 3D, 4D or 5D, batch x n, Tensors. filter_size: int tuple of filter height and width. num_features: int, number of features. + bias: Whether to use biases in the convolution layer. bias_start: starting value to initialize the bias; 0 by default. + Returns: A 3D, 4D, or 5D Tensor with shape [batch ... num_features] + Raises: ValueError: if some of the arguments has unspecified or wrong shape. """ @@ -2304,7 +2311,7 @@ class GLSTMCell(rnn_cell_impl.RNNCell): return self._output_size def _get_input_for_group(self, inputs, group_id, group_size): - """Slices inputs into groups to prepare for processing by cell's groups + """Slices inputs into groups to prepare for processing by cell's groups. Args: inputs: cell input or it's previous state, @@ -2705,7 +2712,7 @@ class LayerNormLSTMCell(rnn_cell_impl.RNNCell): class SRUCell(rnn_cell_impl.LayerRNNCell): - """SRU, Simple Recurrent Unit + """SRU, Simple Recurrent Unit. Implementation based on Training RNNs as Fast as CNNs (cf. https://arxiv.org/abs/1709.02755). @@ -2753,12 +2760,13 @@ class SRUCell(rnn_cell_impl.LayerRNNCell): input_depth = inputs_shape[1].value + # pylint: disable=protected-access self._kernel = self.add_variable( rnn_cell_impl._WEIGHTS_VARIABLE_NAME, shape=[input_depth, 4 * self._num_units]) - + # pylint: enable=protected-access self._bias = self.add_variable( - rnn_cell_impl._BIAS_VARIABLE_NAME, + rnn_cell_impl._BIAS_VARIABLE_NAME, # pylint: disable=protected-access shape=[2 * self._num_units], initializer=init_ops.constant_initializer(0.0, dtype=self.dtype)) @@ -2767,7 +2775,7 @@ class SRUCell(rnn_cell_impl.LayerRNNCell): def call(self, inputs, state): """Simple recurrent unit (SRU) with num_units cells.""" - U = math_ops.matmul(inputs, self._kernel) + U = math_ops.matmul(inputs, self._kernel) # pylint: disable=invalid-name x_bar, f_intermediate, r_intermediate, x_tx = array_ops.split( value=U, num_or_size_splits=4, axis=1) @@ -2897,6 +2905,7 @@ class WeightNormLSTMCell(rnn_cell_impl.RNNCell): Args: args: a 2D Tensor or a list of 2D, batch x n, Tensors. output_size: int, second dimension of W[i]. + norm: bool, whether to normalize the weights. bias: boolean, whether to add a bias term or not. bias_initializer: starting value to initialize the bias (default is all zeros). diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 22dc7f2eda..6e57ccd6dd 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -569,7 +569,6 @@ def _beam_search_step(time, logits, next_cell_state, beam_state, batch_size, time = ops.convert_to_tensor(time, name="time") # During the first time step we only consider the initial beam - scores_shape = array_ops.shape(scores) scores_flat = array_ops.reshape(scores, [batch_size, -1]) # Pick the next beams according to the specified successors function -- GitLab From 12496b26049384b78f63940907078f9269c9866f Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 07:26:13 -0700 Subject: [PATCH 725/884] Reuse the linear index when broadcasting a contiguous range of dimensions. This potentially allows us to get rid of additional mod and div operations. PiperOrigin-RevId: 188719238 --- .../xla/service/elemental_ir_emitter.cc | 11 ++-- .../compiler/xla/service/llvm_ir/ir_array.cc | 63 +++++++++++++++++++ .../compiler/xla/service/llvm_ir/ir_array.h | 9 ++- 3 files changed, 75 insertions(+), 8 deletions(-) diff --git a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc index 111c29593e..b6a0903b0e 100644 --- a/tensorflow/compiler/xla/service/elemental_ir_emitter.cc +++ b/tensorflow/compiler/xla/service/elemental_ir_emitter.cc @@ -1522,15 +1522,12 @@ llvm_ir::ElementGenerator ElementalIrEmitter::MakeElementGenerator( case HloOpcode::kBroadcast: return [this, hlo, &operand_to_generator]( const IrArray::Index& target_index) -> StatusOr { + const HloInstruction* operand = hlo->operand(0); // The `dimensions` member of the broadcast instruction maps from // input dimensions to output dimensions. - const HloInstruction* operand = hlo->operand(0); - int64 rank = ShapeUtil::Rank(operand->shape()); - IrArray::Index source_index(rank); - for (int64 i = 0; i < rank; ++i) { - source_index[i] = target_index[hlo->dimensions(i)]; - } - return operand_to_generator.at(operand)(source_index); + return operand_to_generator.at( + operand)(target_index.SourceIndexOfBroadcast( + hlo->shape(), operand->shape(), hlo->dimensions(), ir_builder_)); }; case HloOpcode::kSlice: return [this, hlo, &operand_to_generator]( diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc index d444c1d49d..3312a88844 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.cc @@ -241,6 +241,69 @@ IrArray::Index IrArray::Index::SourceIndexOfBitcast( return Index(multi_index, linear_index, operand_shape); } +IrArray::Index IrArray::Index::SourceIndexOfBroadcast( + const Shape& shape, const Shape& operand_shape, + tensorflow::gtl::ArraySlice dimension_mapping, + llvm::IRBuilder<>* builder) const { + int64 rank = ShapeUtil::Rank(operand_shape); + std::vector source_index(rank); + for (int64 i = 0; i < rank; ++i) { + source_index[i] = multidim_[dimension_mapping[i]]; + } + if (linear_ == nullptr || !LayoutUtil::HasLayout(operand_shape) || + !LayoutUtil::HasLayout(shape)) { + return Index(source_index); + } + // High-level idea: we can reuse the linear index if the broadcasted + // dimensions are contiguous, and this part of the operation is a bitcast. + // The other dimensions can be masked out with a div and a mod operation. + std::vector logical_to_physical = + LayoutUtil::MakeLogicalToPhysical(shape.layout()); + int64 output_rank = ShapeUtil::Rank(shape); + // The minimum physical dimension that is broadcasted. + int64 min_broadcasted_dimension = output_rank; + // The maximum physical dimension that is broadcasted. + int64 max_broadcasted_dimension = -1; + for (int64 i = 0; i < rank; ++i) { + int64 physical_dim = logical_to_physical[dimension_mapping[i]]; + min_broadcasted_dimension = + std::min(min_broadcasted_dimension, physical_dim); + max_broadcasted_dimension = + std::max(max_broadcasted_dimension, physical_dim); + } + bool contiguous_broadcast_dimensions = + max_broadcasted_dimension - min_broadcasted_dimension == rank - 1; + if (!contiguous_broadcast_dimensions) { + return Index(source_index); + } + // Check if the mapped dimensions are a bitcast. + std::vector operand_logical_to_physical = + LayoutUtil::MakeLogicalToPhysical(operand_shape.layout()); + for (int64 i = 0; i < rank; ++i) { + if (operand_logical_to_physical[i] != + logical_to_physical[dimension_mapping[i]] - min_broadcasted_dimension) { + return Index(source_index); + } + } + llvm::Value* linear = linear_; + int64 divisor = 1; + for (int64 i = max_broadcasted_dimension + 1; i < output_rank; ++i) { + divisor *= shape.dimensions(LayoutUtil::Major(shape.layout(), i)); + } + if (divisor > 1) { + linear = builder->CreateUDiv(linear, builder->getInt64(divisor)); + } + if (min_broadcasted_dimension > 0) { + int64 mod = 1; + for (int64 i = min_broadcasted_dimension; i <= max_broadcasted_dimension; + ++i) { + mod *= shape.dimensions(LayoutUtil::Major(shape.layout(), i)); + } + linear = builder->CreateURem(linear, builder->getInt64(mod)); + } + return Index(source_index, linear, operand_shape); +} + llvm::Value* IrArray::Index::Linearize( tensorflow::gtl::ArraySlice dimensions, llvm::IRBuilder<>* builder) const { diff --git a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h index faa92d608c..06cfb2a36c 100644 --- a/tensorflow/compiler/xla/service/llvm_ir/ir_array.h +++ b/tensorflow/compiler/xla/service/llvm_ir/ir_array.h @@ -134,10 +134,17 @@ class IrArray { llvm::IRBuilder<>* builder) const; // Given that "this" is the target index of a bitcast from `operand_shape` - // to `shape` with the given dimension mapping, returns the source index. + // to `shape`, returns the source index. Index SourceIndexOfBitcast(const Shape& shape, const Shape& operand_shape, llvm::IRBuilder<>* builder) const; + // Given that "this" is the target index of a broadcast from `operand_shape` + // to `shape` with the given dimension mapping, returns the source index. + Index SourceIndexOfBroadcast( + const Shape& shape, const Shape& operand_shape, + tensorflow::gtl::ArraySlice dimension_mapping, + llvm::IRBuilder<>* builder) const; + // Linearizes the index into the given shape, i.e. reshapes it to rank-1 and // returns the index into the sole dimension 0 of the new shape. llvm::Value* Linearize(tensorflow::gtl::ArraySlice dimensions, -- GitLab From 974bec95f781fbc2c91d40f13457c0953271c160 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Mar 2018 08:03:54 -0700 Subject: [PATCH 726/884] Turn on function optimization by default PiperOrigin-RevId: 188722505 --- tensorflow/core/BUILD | 2 ++ tensorflow/core/grappler/optimizers/function_optimizer.cc | 5 +++++ tensorflow/core/grappler/optimizers/meta_optimizer.cc | 4 ++-- tensorflow/core/protobuf/rewriter_config.proto | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index e9ed5c4910..98a18e4305 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -3205,6 +3205,7 @@ tf_cc_test( "//tensorflow/core/kernels:dense_update_ops", "//tensorflow/core/kernels:fifo_queue_op", "//tensorflow/core/kernels:function_ops", + "//tensorflow/core/kernels:identity_n_op", "//tensorflow/core/kernels:identity_op", "//tensorflow/core/kernels:matmul_op", "//tensorflow/core/kernels:ops_util", @@ -3247,6 +3248,7 @@ tf_cc_test( "//tensorflow/core/kernels:fifo_queue_op", "//tensorflow/core/kernels:function_ops", "//tensorflow/core/kernels:identity_op", + "//tensorflow/core/kernels:identity_n_op", "//tensorflow/core/kernels:matmul_op", "//tensorflow/core/kernels:ops_util", "//tensorflow/core/kernels:queue_ops", diff --git a/tensorflow/core/grappler/optimizers/function_optimizer.cc b/tensorflow/core/grappler/optimizers/function_optimizer.cc index d8a237c297..87160f6b83 100644 --- a/tensorflow/core/grappler/optimizers/function_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/function_optimizer.cc @@ -136,6 +136,11 @@ Status FunctionOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (func.attr().count("_noinline") != 0) { continue; } + // Don't touch anything marked XLA to prevent XLA failures further down the + // road. + if (func.attr().count("_XlaCompile") != 0) { + continue; + } // Can't create IdentityN nodes with no input or output: skip these // functions for now. if (func.signature().input_arg_size() == 0 || diff --git a/tensorflow/core/grappler/optimizers/meta_optimizer.cc b/tensorflow/core/grappler/optimizers/meta_optimizer.cc index 6fa8c03548..3a764937fd 100644 --- a/tensorflow/core/grappler/optimizers/meta_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/meta_optimizer.cc @@ -94,7 +94,7 @@ Status MetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item, if (!cfg_.disable_model_pruning()) { optimizers.push_back(std::unique_ptr(new ModelPruner())); } - if (cfg_.function_optimization() == RewriterConfig::ON) { + if (cfg_.function_optimization() != RewriterConfig::OFF) { optimizers.push_back( std::unique_ptr(new FunctionOptimizer())); } @@ -231,7 +231,7 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item, bool MetaOptimizerEnabled(const RewriterConfig& cfg) { return !cfg.disable_model_pruning() || cfg.layout_optimizer() != RewriterConfig::OFF || - cfg.function_optimization() == RewriterConfig::ON || + cfg.function_optimization() != RewriterConfig::OFF || cfg.constant_folding() != RewriterConfig::OFF || cfg.arithmetic_optimization() != RewriterConfig::OFF || cfg.loop_optimization() == RewriterConfig::ON || diff --git a/tensorflow/core/protobuf/rewriter_config.proto b/tensorflow/core/protobuf/rewriter_config.proto index 0ccf2149f2..b1fceaacf4 100644 --- a/tensorflow/core/protobuf/rewriter_config.proto +++ b/tensorflow/core/protobuf/rewriter_config.proto @@ -44,7 +44,7 @@ message RewriterConfig { Toggle dependency_optimization = 8; // Loop optimizations (default is OFF). Toggle loop_optimization = 9; - // Function optimizations (default is OFF). + // Function optimizations (default is ON). Toggle function_optimization = 10; // If true, don't remove unnecessary ops from the graph bool disable_model_pruning = 2; -- GitLab From e1066ba1a4166ba5ff7ca02ae70e5c44fc385789 Mon Sep 17 00:00:00 2001 From: Asim Shankar Date: Mon, 12 Mar 2018 09:05:33 -0700 Subject: [PATCH 727/884] ResourceScatterUpdate: Gracefully handle inconsistent indices and updates in the kernel. With graph execution, consistency between the shapes of the arguments to ResourceScatterUpdate is validated by the shape inference functions at graph construction time. With eager execution, the shape inference logic isn't executed, so inconsistent arguments could be provided to the kernel, which would result in a segmentation fault prior to this change. As demonstrated by the added tests. PiperOrigin-RevId: 188729154 --- tensorflow/core/kernels/resource_variable_ops.cc | 12 +++++++++--- .../kernel_tests/resource_variable_ops_test.py | 9 +++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index 2041fb9094..f254036ba7 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -374,7 +374,7 @@ class AssignVariableOp : public OpKernel { OP_REQUIRES_OK(context, VariantDeviceCopy( VariantDeviceCopyDirection::DEVICE_TO_DEVICE, elements_in(i), &elements_out(i), copy_fn)); - }; + } } private: @@ -608,7 +608,7 @@ class ResourceScatterUpdateOp : public OpKernel { DataTypeString(DataTypeToEnum::v()), " indexing: ", N_big, " > ", std::numeric_limits::max())); - const Index N = static_cast(indices.NumElements()); + const Index N = static_cast(N_big); OP_REQUIRES( c, params->dim_size(0) <= std::numeric_limits::max(), errors::InvalidArgument("params.shape[0] too large for ", @@ -619,7 +619,13 @@ class ResourceScatterUpdateOp : public OpKernel { if (N > 0) { auto indices_flat = indices.flat(); auto params_flat = params->flat_outer_dims(); - auto updates_flat = updates.shaped({N, updates.NumElements() / N}); + int64 num_updates = updates.NumElements(); + OP_REQUIRES(c, num_updates % N == 0, + errors::InvalidArgument( + "shape of indices (", indices.shape().DebugString(), + ") is not compatible with the shape of updates (", + updates.shape().DebugString(), ")")); + auto updates_flat = updates.shaped({N, num_updates / N}); functor::ScatterFunctor functor; const Index bad_i = functor(c, c->template eigen_device(), diff --git a/tensorflow/python/kernel_tests/resource_variable_ops_test.py b/tensorflow/python/kernel_tests/resource_variable_ops_test.py index d34b751062..2dc993f811 100644 --- a/tensorflow/python/kernel_tests/resource_variable_ops_test.py +++ b/tensorflow/python/kernel_tests/resource_variable_ops_test.py @@ -586,6 +586,15 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase): state_ops.scatter_update(v, [1], [3]) self.assertAllEqual([1.0, 3.0], v.numpy()) + @test_util.run_in_graph_and_eager_modes() + def testScatterUpdateInvalidArgs(self): + v = resource_variable_ops.ResourceVariable([0, 1, 2, 3], name="update") + # The exact error and message differ between graph construction (where the + # error is realized during shape inference at graph construction time) and + # eager execution (where the error is realized during kernel execution). + with self.assertRaisesRegexp(Exception, r"shape.*2.*3"): + state_ops.scatter_update(v, [0, 1], [0, 1, 2]) + if __name__ == "__main__": test.main() -- GitLab From 6d3bb6cac26684a2553a7a9fa04dd5b12f5434f3 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 09:12:41 -0700 Subject: [PATCH 728/884] Don't remove identity nodes if they follow a device crossing and have consumers on a device different than themselves. They may be used to cache or route data between devices in a deliberate manner. Simplify code in DependencyOptimizer a bit. PiperOrigin-RevId: 188730185 --- .../optimizers/dependency_optimizer.cc | 58 +++++++++---------- .../optimizers/dependency_optimizer_test.cc | 51 ++++++++++++++++ 2 files changed, 80 insertions(+), 29 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc index a5b2572c9c..63bc19630d 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer.cc @@ -274,12 +274,17 @@ void DependencyOptimizer::OptimizeNode(int node_idx, // +----------+ y --^> b if (is_noop || is_identity) { + if (is_identity && !SafeToRemoveIdentity(*node)) { + return; + } + const auto& output_node_set = node_map_->GetOutputs(node_name); const std::vector output_nodes(output_node_set.begin(), output_node_set.end()); const int num_outputs = output_nodes.size(); const int num_inputs = node->input_size(); + // Don't increase the number of edges in the graph. if (num_inputs * num_outputs > num_inputs + num_outputs) { return; } @@ -293,39 +298,34 @@ void DependencyOptimizer::OptimizeNode(int node_idx, input_nodes.push_back(input_node); } - // Make sure that we don't increase the number of edges that cross - // device boundaries. - if ((num_inputs == 1 && num_outputs > 1 && - input_nodes[0]->device() != node->device()) || - (num_inputs > 1 && num_outputs == 1 && - output_nodes[0]->device() != node->device())) { + // TODO(rmlarsen): Not all device crossings are equally expensive. + // Assign a cost to each based on device affinity and compute a + // cost before and after. + const string& node_dev = node->device(); + int num_cross_in = 0; + for (NodeDef* input_node : input_nodes) { + num_cross_in += static_cast(input_node->device() != node_dev); + } + int num_cross_out = 0; + for (NodeDef* output_node : output_nodes) { + num_cross_out += static_cast(output_node->device() != node_dev); + } + if (is_identity && num_cross_in > 0 && num_cross_out > 0) { + // This identity node follows a device crossing, so it might be + // following a _Recv node after partioning. Do not remove such nodes, + // unless they only have consumers on the same device as themselves. return; } - if (num_inputs == 2 && num_outputs == 2) { - const string& noop_dev = node->device(); - const string& in0_dev = input_nodes[0]->device(); - const string& in1_dev = input_nodes[1]->device(); - const string& out0_dev = output_nodes[0]->device(); - const string& out1_dev = output_nodes[1]->device(); - const int num_cross_before = static_cast(in0_dev != noop_dev) + - static_cast(in1_dev != noop_dev) + - static_cast(out0_dev != noop_dev) + - static_cast(out1_dev != noop_dev); - const int num_cross_after = static_cast(in0_dev != out0_dev) + - static_cast(in0_dev != out1_dev) + - static_cast(in1_dev != out0_dev) + - static_cast(in1_dev != out1_dev); - if (num_cross_after > num_cross_before) { - return; - } - // To avoid potentially removing Identity nodes following _Recv nodes, - // we require that no device crossings occur in that case. - // TODO(rmlarsen): See if we can relax this condition. - if (is_identity && (num_cross_after > 0 || num_cross_before > 0)) { - return; + const int num_cross_before = num_cross_in + num_cross_out; + int num_cross_after = 0; + for (NodeDef* input_node : input_nodes) { + for (NodeDef* output_node : output_nodes) { + num_cross_after += + static_cast(input_node->device() != output_node->device()); } } - if (is_identity && !SafeToRemoveIdentity(*node)) { + if (num_cross_after > num_cross_before) { + // Avoid increasing the number of device crossings. return; } diff --git a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc index b66cc17a72..cc1e142041 100644 --- a/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/dependency_optimizer_test.cc @@ -595,6 +595,57 @@ TEST_F(DependencyOptimizerTest, IdentityN) { EXPECT_EQ("id_b:1", output.node(8).input(0)); } +TEST_F(DependencyOptimizerTest, + Identity_DeviceCrossing_ConsumerOnDifferentDevice) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x_on_1 = + ops::Const(s.WithOpName("x_on_1").WithDevice("/gpu:1"), {1.0f}, {}); + Output one_on_3 = + ops::Const(s.WithOpName("one_on_3").WithDevice("/gpu:3"), {1.0f}, {}); + Output x_on_2 = + ops::Identity(s.WithOpName("x_on_2").WithDevice("/gpu:2"), x_on_1); + Output result = + ops::Add(s.WithOpName("result").WithDevice("/gpu:3"), x_on_2, one_on_3); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"result"}; + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + + VerifyGraphsEqual(item.graph, output, __FUNCTION__); +} + +TEST_F(DependencyOptimizerTest, Identity_DeviceCrossing_ConsumerOnSameDevice) { + tensorflow::Scope s = tensorflow::Scope::NewRootScope(); + Output x_on_1 = + ops::Const(s.WithOpName("x_on_1").WithDevice("/gpu:1"), {1.0f}, {}); + Output one_on_2 = + ops::Const(s.WithOpName("one_on_2").WithDevice("/gpu:2"), {1.0f}, {}); + Output x_on_2 = + ops::Identity(s.WithOpName("x_on_2").WithDevice("/gpu:2"), x_on_1); + Output result = + ops::Add(s.WithOpName("result").WithDevice("/gpu:2"), x_on_2, one_on_2); + + GrapplerItem item; + TF_CHECK_OK(s.ToGraphDef(&item.graph)); + item.fetch = {"result"}; + DependencyOptimizer optimizer; + GraphDef output; + Status status = optimizer.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + LOG(INFO) << output.DebugString(); + EXPECT_EQ(3, output.node_size()); + for (const auto& node : output.node()) { + EXPECT_NE("x_on_2", node.name()); + if (node.name() == "result") { + EXPECT_EQ("x_on_1", node.input(0)); + } + } +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 1b73c6eb7d45c8276ccacdef3ff6e44b76ebc5e5 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Mar 2018 09:16:08 -0700 Subject: [PATCH 729/884] Don't let the grappler item builder fail if the graph contains unknown custom ops. PiperOrigin-RevId: 188730560 --- tensorflow/core/framework/graph_def_util.cc | 14 +++++++++++-- tensorflow/core/framework/graph_def_util.h | 6 ++++++ .../core/grappler/grappler_item_builder.cc | 4 ++-- .../grappler/grappler_item_builder_test.cc | 21 +++++++++++++++++++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/tensorflow/core/framework/graph_def_util.cc b/tensorflow/core/framework/graph_def_util.cc index 1f670535d5..896cb3cd7f 100644 --- a/tensorflow/core/framework/graph_def_util.cc +++ b/tensorflow/core/framework/graph_def_util.cc @@ -53,6 +53,12 @@ Status ValidateExternalGraphDefSyntax(const GraphDef& graph_def) { Status AddDefaultAttrsToGraphDef(GraphDef* graph_def, const OpRegistryInterface& op_registry, int node_offset) { + return AddDefaultAttrsToGraphDef(graph_def, op_registry, node_offset, false); +} + +Status AddDefaultAttrsToGraphDef(GraphDef* graph_def, + const OpRegistryInterface& op_registry, + int node_offset, bool skip_unknown_ops) { if (node_offset > graph_def->node_size()) { return errors::InvalidArgument( "Tried to add default attrs to GraphDef " @@ -63,8 +69,12 @@ Status AddDefaultAttrsToGraphDef(GraphDef* graph_def, for (int i = node_offset; i < graph_def->node_size(); ++i) { NodeDef* node_def = graph_def->mutable_node(i); const OpDef* op_def; - TF_RETURN_IF_ERROR(op_registry.LookUpOpDef(node_def->op(), &op_def)); - AddDefaultsToNodeDef(*op_def, node_def); + Status s = op_registry.LookUpOpDef(node_def->op(), &op_def); + if (s.ok()) { + AddDefaultsToNodeDef(*op_def, node_def); + } else if (!skip_unknown_ops) { + return s; + } } return Status::OK(); diff --git a/tensorflow/core/framework/graph_def_util.h b/tensorflow/core/framework/graph_def_util.h index 0c6542a9f2..525e84a989 100644 --- a/tensorflow/core/framework/graph_def_util.h +++ b/tensorflow/core/framework/graph_def_util.h @@ -56,6 +56,12 @@ Status AddDefaultAttrsToGraphDef(GraphDef* graph_def, const OpRegistryInterface& op_registry, int node_offset); +// Same as above, except for the fact that it skips nodes that aren't found in +// op_registry if skip_unknown_ops is true. +Status AddDefaultAttrsToGraphDef(GraphDef* graph_def, + const OpRegistryInterface& op_registry, + int node_offset, bool skip_unknown_ops); + // Remove attrs from 'graph_def' that have the default value according // to 'producer_op_registry', but don't exist according to // 'consumer_op_registry'. This can allow 'graph_def' to run on the diff --git a/tensorflow/core/grappler/grappler_item_builder.cc b/tensorflow/core/grappler/grappler_item_builder.cc index 33ad426bbf..04c7dae30b 100644 --- a/tensorflow/core/grappler/grappler_item_builder.cc +++ b/tensorflow/core/grappler/grappler_item_builder.cc @@ -138,7 +138,7 @@ Status OptimizeGraph(const GraphDef& graph_def_arg, GraphDef* output_graph_def, // The default values of attributes might have been stripped by the optimizer. // Add them back. return AddDefaultAttrsToGraphDef(output_graph_def, *graphptr->op_registry(), - 0); + 0, true); } // Applies the same graph pruning logic to the graph as Session.Run in TF. @@ -514,7 +514,7 @@ std::unique_ptr GrapplerItemFromMetaGraphDef( &new_item->graph, FunctionLibraryDefinition(OpRegistry::Global(), new_item->graph.library()), - 0); + 0, true); if (!attr_status.ok()) { LOG(ERROR) << "Failed to instantiate default attribute values: " << attr_status.error_message(); diff --git a/tensorflow/core/grappler/grappler_item_builder_test.cc b/tensorflow/core/grappler/grappler_item_builder_test.cc index 78cbff6c90..ada90925a4 100644 --- a/tensorflow/core/grappler/grappler_item_builder_test.cc +++ b/tensorflow/core/grappler/grappler_item_builder_test.cc @@ -280,6 +280,27 @@ TEST_F(GrapplerItemBuilderTest, GraphWithFunctions) { ASSERT_TRUE(item != nullptr); } +TEST_F(GrapplerItemBuilderTest, GraphWithCustomOps) { + MetaGraphDef meta_graph; + // y = XTimesTwo(x) + constexpr char device[] = "/cpu:0"; + *meta_graph.mutable_graph_def() = test::function::GDef( + {test::function::NDef("x", "Const", {}, {{"dtype", DT_FLOAT}}, device), + test::function::NDef("y", "CustomOp", {"x"}, {{"T", DT_FLOAT}}, device)}, + {}); + + CollectionDef train_op; + train_op.mutable_node_list()->add_value("y"); + (*meta_graph.mutable_collection_def())["train_op"] = train_op; + + ItemConfig cfg; + cfg.inline_functions = false; + + std::unique_ptr item = + GrapplerItemFromMetaGraphDef("0", meta_graph, cfg); + ASSERT_TRUE(item != nullptr); +} + TEST_F(GrapplerItemBuilderTest, FromGraphWithSignatureDef) { tensorflow::Scope s = tensorflow::Scope::NewRootScope(); auto x = ops::Const(s.WithOpName("x"), 0); -- GitLab From e2020e64360a4f9beeb48f388fb74ab1c4b1f847 Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 12 Mar 2018 09:28:18 -0700 Subject: [PATCH 730/884] Plug a few more PyObject leaks, test for them. PiperOrigin-RevId: 188731961 --- tensorflow/python/eager/pywrap_tfe_src.cc | 57 +++++++++++------------ tensorflow/python/layers/core_test.py | 23 +++++++++ 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index 7b674807f5..fcb0452a14 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -184,11 +184,11 @@ bool SetOpAttrList( const int num_values = PySequence_Size(py_list); if (attr_list_sizes != nullptr) (*attr_list_sizes)[key] = num_values; -#define PARSE_LIST(c_type, parse_fn) \ - std::unique_ptr values(new c_type[num_values]); \ - for (int i = 0; i < num_values; ++i) { \ - auto py_value = PySequence_ITEM(py_list, i); \ - if (!parse_fn(key, py_value, status, &values[i])) return false; \ +#define PARSE_LIST(c_type, parse_fn) \ + std::unique_ptr values(new c_type[num_values]); \ + for (int i = 0; i < num_values; ++i) { \ + tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); \ + if (!parse_fn(key, py_value.get(), status, &values[i])) return false; \ } if (type == TF_ATTR_STRING) { @@ -213,9 +213,9 @@ bool SetOpAttrList( // dims across all the input lists. int total_dims = 0; for (int i = 0; i < num_values; ++i) { - auto py_value = PySequence_ITEM(py_list, i); - if (py_value != Py_None) { - if (!PySequence_Check(py_value)) { + tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); + if (py_value.get() != Py_None) { + if (!PySequence_Check(py_value.get())) { TF_SetStatus( status, TF_INVALID_ARGUMENT, tensorflow::strings::StrCat( @@ -224,7 +224,7 @@ bool SetOpAttrList( .c_str()); return false; } - const auto size = TensorShapeNumDims(py_value); + const auto size = TensorShapeNumDims(py_value.get()); if (size >= 0) { total_dims += size; } @@ -238,12 +238,12 @@ bool SetOpAttrList( std::unique_ptr num_dims(new int[num_values]); int64_t* offset = buffer.get(); for (int i = 0; i < num_values; ++i) { - auto py_value = PySequence_ITEM(py_list, i); - if (py_value == Py_None) { + tensorflow::Safe_PyObjectPtr py_value(PySequence_ITEM(py_list, i)); + if (py_value.get() == Py_None) { dims[i] = nullptr; num_dims[i] = -1; } else { - const auto size = TensorShapeNumDims(py_value); + const auto size = TensorShapeNumDims(py_value.get()); if (size == -1) { dims[i] = nullptr; num_dims[i] = -1; @@ -252,10 +252,11 @@ bool SetOpAttrList( dims[i] = offset; num_dims[i] = size; for (int j = 0; j < size; ++j) { - auto inner_py_value = PySequence_ITEM(py_value, j); - if (inner_py_value == Py_None) { + tensorflow::Safe_PyObjectPtr inner_py_value( + PySequence_ITEM(py_value.get(), j)); + if (inner_py_value.get() == Py_None) { *offset = -1; - } else if (!ParseDimensionValue(key, inner_py_value, status, + } else if (!ParseDimensionValue(key, inner_py_value.get(), status, offset)) { return false; } @@ -428,14 +429,14 @@ bool SetOpAttrScalar( } std::unique_ptr dims(new int64_t[num_dims]); for (int i = 0; i < num_dims; ++i) { - auto inner_py_value = PySequence_ITEM(py_value, i); - if (inner_py_value == Py_None) { + tensorflow::Safe_PyObjectPtr inner_py_value( + PySequence_ITEM(py_value, i)); + if (inner_py_value.get() == Py_None) { dims[i] = -1; - } else if (!ParseDimensionValue(key, inner_py_value, status, + } else if (!ParseDimensionValue(key, inner_py_value.get(), status, &dims[i])) { return false; } - Py_DECREF(inner_py_value); } TFE_OpSetAttrShape(op, key, dims.get(), num_dims, status); } @@ -2033,13 +2034,13 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { return nullptr; } - PyObject* flat_result = PyList_New(num_retvals); + tensorflow::Safe_PyObjectPtr flat_result(PyList_New(num_retvals)); for (int i = 0; i < num_retvals; ++i) { - PyList_SET_ITEM(flat_result, i, EagerTensorFromHandle(retvals[i])); + PyList_SET_ITEM(flat_result.get(), i, EagerTensorFromHandle(retvals[i])); } if (!RunCallbacks(op_exec_info, args, *flattened_inputs, *flattened_attrs, - flat_result)) { + flat_result.get())) { return nullptr; } @@ -2051,11 +2052,10 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { if (op_def->output_arg_size() == 1) { if (!op_def->output_arg(0).number_attr().empty() || !op_def->output_arg(0).type_list_attr().empty()) { - return flat_result; + return flat_result.release(); } else { - auto* result = PyList_GET_ITEM(flat_result, 0); + auto* result = PyList_GET_ITEM(flat_result.get(), 0); Py_INCREF(result); - Py_DECREF(flat_result); return result; } } @@ -2068,7 +2068,7 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { int list_length = attr_list_sizes[op_def->output_arg(i).number_attr()]; PyObject* inner_list = PyList_New(list_length); for (int j = 0; j < list_length; j++) { - PyObject* obj = PyList_GET_ITEM(flat_result, flat_result_index++); + PyObject* obj = PyList_GET_ITEM(flat_result.get(), flat_result_index++); Py_INCREF(obj); PyList_SET_ITEM(inner_list, j, obj); } @@ -2077,18 +2077,17 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) { int list_length = attr_list_sizes[op_def->output_arg(i).type_list_attr()]; PyObject* inner_list = PyList_New(list_length); for (int j = 0; j < list_length; j++) { - PyObject* obj = PyList_GET_ITEM(flat_result, flat_result_index++); + PyObject* obj = PyList_GET_ITEM(flat_result.get(), flat_result_index++); Py_INCREF(obj); PyList_SET_ITEM(inner_list, j, obj); } PyList_SET_ITEM(result, i, inner_list); } else { - PyObject* obj = PyList_GET_ITEM(flat_result, flat_result_index++); + PyObject* obj = PyList_GET_ITEM(flat_result.get(), flat_result_index++); Py_INCREF(obj); PyList_SET_ITEM(result, i, obj); } } - Py_DECREF(flat_result); return result; } diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index 09287e4906..7d74046caf 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -19,6 +19,7 @@ from __future__ import division from __future__ import print_function import collections +import gc import numpy as np @@ -83,6 +84,28 @@ class DenseTest(test.TestCase): self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias.name, 'my_dense/bias:0') + def testNoEagerLeak(self): + # Tests that repeatedly constructing and building a Layer does not leak + # Python objects. + def _test_fn(): + inputs = random_ops.random_uniform((5, 4), seed=1) + core_layers.Dense(5)(inputs) + core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')(inputs) + + with context.eager_mode(): + _test_fn() # warmup + gc.disable() + gc.collect() + object_count = len(gc.get_objects()) + for _ in range(100): + _test_fn() + gc.collect() + self.assertLessEqual( + len(gc.get_objects()), + # DEBUG_SAVEALL messes with this slightly. + object_count + 1) + gc.enable() + @test_util.run_in_graph_and_eager_modes() def testCallTensorDot(self): dense = core_layers.Dense(2, activation=nn_ops.relu, name='my_dense') -- GitLab From 739d5ce952b5e907489eacfd08f3631962ef7b2d Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 10:12:22 -0700 Subject: [PATCH 731/884] Supporting quantization of Gather ops and removal of trivial Relu1s when quantized. PiperOrigin-RevId: 188738133 --- .../graph_transformations/hardcode_min_max.cc | 2 + .../toco/graph_transformations/quantize.cc | 12 ++-- ...emove_trivial_quantized_activation_func.cc | 64 ++++++++++++------- 3 files changed, 51 insertions(+), 27 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc index 48a67cabec..5cc82da5d5 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/hardcode_min_max.cc @@ -330,6 +330,8 @@ bool HardcodeMinMax::Run(Model* model, std::size_t op_index) { case OperatorType::kSqueeze: case OperatorType::kTensorFlowReshape: case OperatorType::kPad: + case OperatorType::kGather: + case OperatorType::kTranspose: changed = HardcodeMinMaxFromFirstInput(model, op); break; diff --git a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc index 05686ce9a0..ad3f05274b 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/quantize.cc @@ -50,7 +50,9 @@ bool SupportsQuantization(const Operator& op) { type == OperatorType::kTanh || type == OperatorType::kMul || type == OperatorType::kSpaceToDepth || type == OperatorType::kStridedSlice || - type == OperatorType::kDepthToSpace || type == OperatorType::kLstmCell; + type == OperatorType::kDepthToSpace || + type == OperatorType::kLstmCell || type == OperatorType::kGather || + type == OperatorType::kTranspose; } template @@ -511,9 +513,11 @@ bool Quantize::Run(Model* model, std::size_t op_index) { // // Let us just guard this assumption by the following assertion: for (const auto& input : op.inputs) { - if (IsInputArray(*model, input)) { - const auto& input_array = model->GetArray(input); - CHECK(input_array.quantization_params); + const auto& input_array = model->GetArray(input); + if (IsInputArray(*model, input) && + input_array.data_type == ArrayDataType::kFloat) { + CHECK(input_array.quantization_params) + << "Input array " << input << " is missing quantization_params"; } } if (!SupportsQuantization(op)) { diff --git a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc index 28f76c9d36..9b65feaa64 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/remove_trivial_quantized_activation_func.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ +#include #include #include #include @@ -30,6 +31,7 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, const auto it = model->operators.begin() + op_index; auto* op = it->get(); if (op->fused_activation_function != FusedActivationFunctionType::kRelu && + op->fused_activation_function != FusedActivationFunctionType::kRelu1 && op->fused_activation_function != FusedActivationFunctionType::kRelu6) { return false; } @@ -42,33 +44,49 @@ bool RemoveTrivialQuantizedActivationFunc::Run(Model* model, } const auto& quantization_params = output_array.GetQuantizationParams(); + double clamp_min; + double clamp_max; + switch (op->fused_activation_function) { + case FusedActivationFunctionType::kRelu: + clamp_min = 0.0; + clamp_max = std::numeric_limits::infinity(); + break; + case FusedActivationFunctionType::kRelu1: + clamp_min = -1.0; + clamp_max = 1.0; + break; + case FusedActivationFunctionType::kRelu6: + clamp_min = 0.0; + clamp_max = 6.0; + break; + default: + LOG(FATAL) << "Unsupported fused activation type: " + << static_cast(op->fused_activation_function); + return false; + } + bool has_nontrivial_min_bound = false; bool has_nontrivial_max_bound = false; - if (op->fused_activation_function == FusedActivationFunctionType::kRelu || - op->fused_activation_function == FusedActivationFunctionType::kRelu6) { - double lowest_representable_output = - (0. - quantization_params.zero_point) * quantization_params.scale; - if (lowest_representable_output < 0.) { - has_nontrivial_min_bound = true; - AddMessageF( - "Quantized activation function is not trivial: " - "the lowest representable output value %g" - " less than the clamp min bound.", - lowest_representable_output); - } + double lowest_representable_output = + (0. - quantization_params.zero_point) * quantization_params.scale; + if (lowest_representable_output < clamp_min) { + has_nontrivial_min_bound = true; + AddMessageF( + "Quantized activation function is not trivial: " + "the lowest representable output value %g" + " less than the clamp min bound %g.", + lowest_representable_output, clamp_min); } - if (op->fused_activation_function == FusedActivationFunctionType::kRelu6) { - double highest_representable_output = - (255. - quantization_params.zero_point) * quantization_params.scale; - if (highest_representable_output > 6.) { - has_nontrivial_max_bound = true; - AddMessageF( - "Quantized activation function is not trivial: " - "the highest representable output value %g" - " is greater than the clamp max bound.", - highest_representable_output); - } + double highest_representable_output = + (255. - quantization_params.zero_point) * quantization_params.scale; + if (highest_representable_output > clamp_max) { + has_nontrivial_max_bound = true; + AddMessageF( + "Quantized activation function is not trivial: " + "the highest representable output value %g" + " is greater than the clamp max bound %g.", + highest_representable_output, clamp_max); } if (has_nontrivial_min_bound || has_nontrivial_max_bound) { -- GitLab From 31af33430d3edbfdecbcf121681e5a586f37ba03 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 10:24:56 -0700 Subject: [PATCH 732/884] Convert Squeeze into Reshape: Support empty output shapes. PiperOrigin-RevId: 188740288 --- .../toco/graph_transformations/convert_squeeze_to_reshape.cc | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc index e601284495..81cedb5dad 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_squeeze_to_reshape.cc @@ -57,6 +57,11 @@ bool ConvertSqueezeToReshape::Run(Model* model, std::size_t op_index) { // We use the output shape that has been calculated by shape propagation. const auto& output_shape = model->GetArray(squeeze_op->outputs[0]).shape(); + // Empty shapes will not work as empty data arrays. + if (output_shape.dimensions_count() == 0) { + return false; + } + auto* reshape_op = new TensorFlowReshapeOperator; reshape_op->inputs = { squeeze_op->inputs[0], -- GitLab From aab543c3013e3018d409ed2b8cd957f3465d1ab2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 10:34:34 -0700 Subject: [PATCH 733/884] Make default number of threads trigger the default behavior for both eigen and gemmlowp. In gemmlowp the default is '1', while in eigen it is 'number of processors'. PiperOrigin-RevId: 188742087 --- tensorflow/contrib/lite/interpreter.cc | 2 +- tensorflow/contrib/lite/kernels/eigen_support.cc | 5 +++-- tensorflow/contrib/lite/kernels/gemm_support.cc | 4 +++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index 831cfafeae..bbcd318efd 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -94,7 +94,7 @@ Interpreter::Interpreter(ErrorReporter* error_reporter) context_.tensors_size = 0; context_.eigen_context = nullptr; context_.gemm_context = nullptr; - context_.recommended_num_threads = 0; + context_.recommended_num_threads = -1; // Invalid to call these these except from TfLiteDelegate SetForbiddenContextFunction(&context_.GetNodeAndRegistration); diff --git a/tensorflow/contrib/lite/kernels/eigen_support.cc b/tensorflow/contrib/lite/kernels/eigen_support.cc index 1435a45672..213e465552 100644 --- a/tensorflow/contrib/lite/kernels/eigen_support.cc +++ b/tensorflow/contrib/lite/kernels/eigen_support.cc @@ -27,8 +27,9 @@ struct RefCountedEigenContext { void IncrementUsageCounter(TfLiteContext* context) { auto* ptr = reinterpret_cast(context->eigen_context); if (ptr == nullptr) { - Eigen::setNbThreads(context->recommended_num_threads); - + if (context->recommended_num_threads != -1) { + Eigen::setNbThreads(context->recommended_num_threads); + } ptr = new RefCountedEigenContext; ptr->num_references = 0; context->eigen_context = ptr; diff --git a/tensorflow/contrib/lite/kernels/gemm_support.cc b/tensorflow/contrib/lite/kernels/gemm_support.cc index df8a9c8cee..76a5165d14 100644 --- a/tensorflow/contrib/lite/kernels/gemm_support.cc +++ b/tensorflow/contrib/lite/kernels/gemm_support.cc @@ -29,7 +29,9 @@ void IncrementUsageCounter(TfLiteContext* context) { if (ptr == nullptr) { ptr = new RefCountedGemmContext; ptr->gemm_context_ = new gemmlowp::GemmContext(); - ptr->gemm_context_->set_max_num_threads(context->recommended_num_threads); + if (context->recommended_num_threads != -1) { + ptr->gemm_context_->set_max_num_threads(context->recommended_num_threads); + } ptr->num_references_ = 0; context->gemm_context = ptr; } -- GitLab From db636edf2d1c53239fc81a5c285b230f2f52c713 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 12 Mar 2018 10:41:15 -0700 Subject: [PATCH 734/884] Clean up BUILD file --- tensorflow/contrib/learn/BUILD | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index c4c34ba749..c7e5a7446c 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -429,7 +429,10 @@ py_test( size = "medium", srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", - tags = ["nomac", "noasan"], # b/73741358 + tags = [ + "nomac", + "noasan" + ], # b/73741358 deps = [ ":learn", "//tensorflow/python:array_ops", -- GitLab From 89177f289e9467e04b205a1a3e705ad67d9854d2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 10:37:20 -0700 Subject: [PATCH 735/884] Turn trivial Pack ops with a single input into ExpandDims ops to avoid copying the tensor. PiperOrigin-RevId: 188742516 --- tensorflow/core/grappler/op_types.cc | 2 + tensorflow/core/grappler/op_types.h | 1 + .../grappler/optimizers/constant_folding.cc | 70 ++++++++++++++----- .../grappler/optimizers/constant_folding.h | 2 +- .../optimizers/constant_folding_test.cc | 42 +++++++++++ 5 files changed, 97 insertions(+), 20 deletions(-) diff --git a/tensorflow/core/grappler/op_types.cc b/tensorflow/core/grappler/op_types.cc index ca56833ef6..53c177befc 100644 --- a/tensorflow/core/grappler/op_types.cc +++ b/tensorflow/core/grappler/op_types.cc @@ -217,6 +217,8 @@ bool IsNextIteration(const NodeDef& node) { return op == "NextIteration" || op == "RefNextIteration"; } +bool IsPack(const NodeDef& node) { return node.op() == "Pack"; } + bool IsPad(const NodeDef& node) { const auto& op = node.op(); return op == "Pad" || op == "PadV2"; diff --git a/tensorflow/core/grappler/op_types.h b/tensorflow/core/grappler/op_types.h index a0946ee1ad..cd5b464099 100644 --- a/tensorflow/core/grappler/op_types.h +++ b/tensorflow/core/grappler/op_types.h @@ -86,6 +86,7 @@ bool IsMod(const NodeDef& node); bool IsMul(const NodeDef& node); bool IsMatMul(const NodeDef& node); bool IsNextIteration(const NodeDef& node); +bool IsPack(const NodeDef& node); bool IsPad(const NodeDef& node); bool IsNoOp(const NodeDef& node); bool IsNotEqual(const NodeDef& node); diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 39cc4a9629..6cb0447355 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1510,7 +1510,7 @@ Status ConstantFolding::ReplaceOperationWithConstant( } Status ConstantFolding::SimplifyGraph(GraphDef* output, - const GraphProperties& properties, + GraphProperties* properties, bool use_shape_info) { const bool is_aggressive = opt_level_ == RewriterConfig::AGGRESSIVE; for (int i = 0; i < output->node_size(); ++i) { @@ -1520,7 +1520,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, if (use_shape_info && (IsShuffle(*node) || IsReverse(*node) || IsTranspose(*node))) { const auto& shape = - properties.GetInputProperties(node->name())[0].shape(); + properties->GetInputProperties(node->name())[0].shape(); // The node is replaceable iff // unknown_rank == false && (dim_size == 0 || all dims have size 1) bool replaceable = !shape.unknown_rank(); @@ -1533,10 +1533,10 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (use_shape_info && IsSlice(*node) && - properties.GetInputProperties(node->name()).size() == 3) { - const auto& input = properties.GetInputProperties(node->name())[0]; - const auto& b = properties.GetInputProperties(node->name())[1]; - const auto& s = properties.GetInputProperties(node->name())[2]; + properties->GetInputProperties(node->name()).size() == 3) { + const auto& input = properties->GetInputProperties(node->name())[0]; + const auto& b = properties->GetInputProperties(node->name())[1]; + const auto& s = properties->GetInputProperties(node->name())[2]; if (TensorShape::IsValid(b.shape()) && b.has_value() && TensorShape::IsValid(s.shape()) && s.has_value()) { Tensor begin(b.dtype(), b.shape()); @@ -1574,8 +1574,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (IsTile(*node) && - properties.GetInputProperties(node->name()).size() == 2) { - const auto& m = properties.GetInputProperties(node->name())[1]; + properties->GetInputProperties(node->name()).size() == 2) { + const auto& m = properties->GetInputProperties(node->name())[1]; if (TensorShape::IsValid(m.shape()) && m.has_value()) { Tensor multiplies(m.dtype(), m.shape()); if (!multiplies.FromProto(m.value())) { @@ -1602,8 +1602,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (IsPad(*node) && - properties.GetInputProperties(node->name()).size() >= 2) { - const auto& p = properties.GetInputProperties(node->name())[1]; + properties->GetInputProperties(node->name()).size() >= 2) { + const auto& p = properties->GetInputProperties(node->name())[1]; if (TensorShape::IsValid(p.shape()) && p.has_value()) { Tensor paddings(p.dtype(), p.shape()); if (!paddings.FromProto(p.value())) { @@ -1625,12 +1625,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (use_shape_info && IsSqueeze(*node) && - !properties.GetInputProperties(node->name()).empty()) { + !properties->GetInputProperties(node->name()).empty()) { // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's // error to squeeze a dimension that is not 1, so we only need to check // whether the input has > 1 size for each dimension. const auto& shape = - properties.GetInputProperties(node->name())[0].shape(); + properties->GetInputProperties(node->name())[0].shape(); // The node is replaceable iff // unknown_rank == false && (dim_size == 0 || all dims have size > 1) bool replaceable = !shape.unknown_rank(); @@ -1642,6 +1642,38 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } } + if (IsPack(*node) && NumNonControlInputs(*node) == 1 && + !OptimizedNodeExists(*node, "_const_axis")) { + // Create constant axis node. + Tensor axis_t(DT_INT32, TensorShape({})); + NodeDef* axis_node = output->add_node(); + axis_node->set_name(OptimizedNodeName(*node, "_const_axis")); + const int axis = node->attr().at("axis").i(); + if (!SetTensorValue(DT_INT32, axis, &axis_t).ok() || + !CreateNodeDef(axis_node->name(), TensorValue(&axis_t), axis_node) + .ok()) { + continue; + } + VLOG(1) << "*** Rewriting trivial Pack node: " << node->DebugString(); + // Add a control dependency to make sure axis_node is in the right frame. + const string ctrl_dep = ConstantFolding::AddControlDependency( + node->input(0), graph_, node_map_.get()); + axis_node->add_input(ctrl_dep); + axis_node->set_device(node->device()); + node->set_op("ExpandDims"); + if (node->attr().count("axis") != 0) { + node->mutable_attr()->erase("axis"); + } + if (node->attr().count("N") != 0) { + node->mutable_attr()->erase("N"); + } + (*node->mutable_attr())["Tdim"].set_type(DT_INT32); + node->add_input(axis_node->name()); + if (node->input_size() > 2) { + node->mutable_input()->SwapElements(1, node->input_size() - 1); + } + } + // Switch(x, x) will always feed false to its false branch and true to // its true branch. By rewriting the graph a bit, we can propagate these // constants down the two output branches, and just use control dependencies @@ -1759,7 +1791,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, graph_modified_ = true; continue; } - if (use_shape_info && IsSimplifiableReshape(*node, properties)) { + if (use_shape_info && IsSimplifiableReshape(*node, *properties)) { DataType output_type = node->attr().at("T").type(); node->set_op("Identity"); node->clear_attr(); @@ -1777,8 +1809,8 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, // Simplify arithmetic operations with ones or zeros. if (use_shape_info && (is_mul || is_matmul || is_add || is_sub || is_any_div) && - properties.HasInputProperties(node->name()) && - properties.HasOutputProperties(node->name())) { + properties->HasInputProperties(node->name()) && + properties->HasOutputProperties(node->name())) { const NodeDef* x = node_map_->GetNode(node->input(0)); const NodeDef* y = node_map_->GetNode(node->input(1)); if (x == nullptr || y == nullptr) { @@ -1786,12 +1818,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, node->DebugString()); } const TensorShapeProto& output_shape = - properties.GetOutputProperties(node->name())[0].shape(); + properties->GetOutputProperties(node->name())[0].shape(); // Simplify element-wise multiplication by ones or addition/subtraction // of zeros. const TensorShapeProto& y_shape = - properties.GetInputProperties(node->name())[1].shape(); + properties->GetInputProperties(node->name())[1].shape(); const bool x_is_zero = IsZeros(*x); const bool x_is_one = IsOnes(*x); const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape); @@ -1818,7 +1850,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } const TensorShapeProto& x_shape = - properties.GetInputProperties(node->name())[0].shape(); + properties->GetInputProperties(node->name())[0].shape(); const bool y_is_zero = IsZeros(*y); const bool y_is_one = IsOnes(*y); const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape); @@ -2139,7 +2171,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster, } TF_RETURN_IF_ERROR(FoldGraph(output)); node_map_.reset(new NodeMap(output)); - TF_RETURN_IF_ERROR(SimplifyGraph(output, properties, can_use_shape_info)); + TF_RETURN_IF_ERROR(SimplifyGraph(output, &properties, can_use_shape_info)); return Status::OK(); } diff --git a/tensorflow/core/grappler/optimizers/constant_folding.h b/tensorflow/core/grappler/optimizers/constant_folding.h index 2fd59c7f9c..13ecfcd281 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.h +++ b/tensorflow/core/grappler/optimizers/constant_folding.h @@ -92,7 +92,7 @@ class ConstantFolding : public GraphOptimizer { bool IsSimplifiableReduction(const NodeDef& node) const; bool IsSimplifiableReshape(const NodeDef& node, const GraphProperties& properties) const; - Status SimplifyGraph(GraphDef* output, const GraphProperties& properties, + Status SimplifyGraph(GraphDef* output, GraphProperties* properties, bool use_shape_info); Status RunOptimizationPass(Cluster* cluster, const GrapplerItem& item, diff --git a/tensorflow/core/grappler/optimizers/constant_folding_test.cc b/tensorflow/core/grappler/optimizers/constant_folding_test.cc index f421a59989..724fb84f3e 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding_test.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding_test.cc @@ -1930,6 +1930,48 @@ TEST_F(ConstantFoldingTest, IdenticalN) { EXPECT_EQ("^id_n", output.node(7).input(2)); } +TEST_F(ConstantFoldingTest, TrivialPack) { + tensorflow::Scope scope = tensorflow::Scope::NewRootScope(); + Output x = + ops::RandomNormal(scope.WithOpName("x"), {2, 2}, DataType::DT_FLOAT); + Output y = ops::Const(scope.WithOpName("y"), {2.0f}, {}); + auto stack = + ops::Stack(scope.WithOpName("stack").WithControlDependencies({y}), {x}, + ops::Stack::Axis(1)); + + GrapplerItem item; + TF_CHECK_OK(scope.ToGraphDef(&item.graph)); + item.fetch.push_back("stack"); + + ConstantFolding fold(nullptr /* cpu_device */); + GraphDef output; + Status status = fold.Optimize(nullptr, item, &output); + TF_EXPECT_OK(status); + LOG(INFO) << output.DebugString(); + EXPECT_EQ(5, output.node_size()); + for (const auto& node : output.node()) { + if (node.name() == "stack") { + EXPECT_EQ("stack", node.name()); + EXPECT_EQ("ExpandDims", node.op()); + EXPECT_EQ(3, node.input_size()); + EXPECT_EQ("x", node.input(0)); + EXPECT_EQ("ConstantFolding/stack_const_axis", node.input(1)); + EXPECT_EQ("^y", node.input(2)); + } else if (node.name() == "ConstantFolding/stack_const_axis") { + EXPECT_EQ("Const", node.op()); + EXPECT_EQ(1, node.input_size()); + EXPECT_EQ("^x", node.input(0)); + } + } + + std::vector fetch = {"stack"}; + auto tensors_expected = EvaluateNodes(item.graph, fetch); + auto tensors = EvaluateNodes(output, fetch); + EXPECT_EQ(1, tensors_expected.size()); + EXPECT_EQ(1, tensors.size()); + EXPECT_EQ(tensors_expected[0].shape(), tensors[0].shape()); +} + } // namespace } // namespace grappler } // namespace tensorflow -- GitLab From 8a9a725bd18e326ed6c02130fa7675acc499137a Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 12 Mar 2018 10:52:35 -0700 Subject: [PATCH 736/884] More clean-up --- tensorflow/contrib/learn/BUILD | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index c7e5a7446c..04f3f9d2cd 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -431,8 +431,8 @@ py_test( srcs_version = "PY2AND3", tags = [ "nomac", - "noasan" - ], # b/73741358 + "noasan" # b/73741358 + ], deps = [ ":learn", "//tensorflow/python:array_ops", -- GitLab From 103a3101dfb3d0747fd74a416cc901ce951cbfd9 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 12 Mar 2018 11:05:45 -0700 Subject: [PATCH 737/884] Fix do_check_load_py_test --- tensorflow/contrib/learn/BUILD | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 04f3f9d2cd..44da18b181 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -5,6 +5,8 @@ licenses(["notice"]) # Apache 2.0 exports_files(["LICENSE"]) +load("//tensorflow:tensorflow.bzl", "py_test") + package(default_visibility = [ "//engedu/ml/tf_from_scratch:__pkg__", "//tensorflow:internal", @@ -431,7 +433,7 @@ py_test( srcs_version = "PY2AND3", tags = [ "nomac", - "noasan" # b/73741358 + "noasan", # b/73741358 ], deps = [ ":learn", -- GitLab From 1d6a57edc0be0dcc0c92eb2610b88420a7b7be51 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 12 Mar 2018 11:02:29 -0700 Subject: [PATCH 738/884] Fix race in C API. RecordMutation could race with ExtendSessionGraphHelper, which would release the graph lock and only keep the session lock when extending the session. Also makes sure thread annotations are on declarations, not definitions (otherwise they have no effect). PiperOrigin-RevId: 188747158 --- tensorflow/c/c_api.cc | 38 +++++++++++++++-------------------- tensorflow/c/c_api_internal.h | 12 ++++++----- tensorflow/c/python_api.cc | 3 +-- 3 files changed, 24 insertions(+), 29 deletions(-) diff --git a/tensorflow/c/c_api.cc b/tensorflow/c/c_api.cc index 8b9b3da21c..778cb667e2 100644 --- a/tensorflow/c/c_api.cc +++ b/tensorflow/c/c_api.cc @@ -63,6 +63,7 @@ limitations under the License. // brain namespace because we are defining 'extern "C"' functions. using tensorflow::AllocationDescription; using tensorflow::DataType; +using tensorflow::ExtendSessionGraphHelper; using tensorflow::Graph; using tensorflow::GraphDef; using tensorflow::mutex_lock; @@ -640,11 +641,11 @@ Status MessageToBuffer(const tensorflow::protobuf::Message& in, } void RecordMutation(TF_Graph* graph, const TF_Operation& op, - const char* mutation_type) - EXCLUSIVE_LOCKS_REQUIRED(graph->mu) { + const char* mutation_type) { // If any session has already run this node_id, mark this session as // unrunnable. for (auto it : graph->sessions) { + mutex_lock session_lock(it.first->mu); if (it.first->last_num_graph_nodes > op.node.id()) { it.second = FailedPrecondition( "Operation '", op.node.DebugString(), "' was changed by ", @@ -713,10 +714,12 @@ Status LoadLibrary(const char* library_filename, void** result, // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and // call Session::Extend(). -bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) - EXCLUSIVE_LOCKS_REQUIRED(session->mu) { +bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) { if (session->graph != nullptr) { + // Take the graph lock before the session lock to avoid deadlock. This is + // safe since session->graph does not change. session->graph->mu.lock(); + mutex_lock session_lock(session->mu); const Graph& graph = session->graph->graph; status->status = session->graph->sessions[session]; @@ -2571,12 +2574,9 @@ void TF_SessionRun(TF_Session* session, const TF_Buffer* run_options, // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and // call Session::Extend(). - { - mutex_lock l(session->mu); - if (session->extend_before_run && - !tensorflow::ExtendSessionGraphHelper(session, status)) { - return; - } + if (session->extend_before_run && + !ExtendSessionGraphHelper(session, status)) { + return; } TF_Run_Setup(noutputs, output_values, status); @@ -2612,12 +2612,9 @@ void TF_SessionPRunSetup(TF_Session* session, const TF_Output* inputs, const char** handle, TF_Status* status) { *handle = nullptr; - { - mutex_lock l(session->mu); - if (session->extend_before_run && - !tensorflow::ExtendSessionGraphHelper(session, status)) { - return; - } + if (session->extend_before_run && + !ExtendSessionGraphHelper(session, status)) { + return; } std::vector input_names(ninputs); @@ -2659,12 +2656,9 @@ void TF_SessionPRun(TF_Session* session, const char* handle, // TODO(josh11b,mrry): Change Session to be able to use a Graph* // directly, instead of requiring us to serialize to a GraphDef and // call Session::Extend(). - { - mutex_lock l(session->mu); - if (session->extend_before_run && - !tensorflow::ExtendSessionGraphHelper(session, status)) { - return; - } + if (session->extend_before_run && + !ExtendSessionGraphHelper(session, status)) { + return; } TF_Run_Setup(noutputs, output_values, status); diff --git a/tensorflow/c/c_api_internal.h b/tensorflow/c/c_api_internal.h index 25233931de..e885a69927 100644 --- a/tensorflow/c/c_api_internal.h +++ b/tensorflow/c/c_api_internal.h @@ -124,16 +124,16 @@ struct TF_Session { TF_Session(tensorflow::Session* s, TF_Graph* g); tensorflow::Session* session; - TF_Graph* graph; + TF_Graph* const graph; - tensorflow::mutex mu; + tensorflow::mutex mu ACQUIRED_AFTER(TF_Graph::mu); int last_num_graph_nodes; // If true, TF_SessionRun and similar methods will call // ExtendSessionGraphHelper before running the graph (this is the default // public behavior). Can be set to false if the caller needs to call // ExtendSessionGraphHelper manually. - bool extend_before_run GUARDED_BY(mu); + std::atomic extend_before_run; }; struct TF_ImportGraphDefOptions { @@ -211,9 +211,11 @@ void TF_GraphSetOutputHandleShapesAndTypes(TF_Graph* graph, TF_Output output, TF_Status* status); void RecordMutation(TF_Graph* graph, const TF_Operation& op, - const char* mutation_type); + const char* mutation_type) + EXCLUSIVE_LOCKS_REQUIRED(graph->mu); -bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status); +bool ExtendSessionGraphHelper(TF_Session* session, TF_Status* status) + LOCKS_EXCLUDED(session->graph->mu, session->mu); } // end namespace tensorflow diff --git a/tensorflow/c/python_api.cc b/tensorflow/c/python_api.cc index 26683f50ec..cd604538f1 100644 --- a/tensorflow/c/python_api.cc +++ b/tensorflow/c/python_api.cc @@ -105,9 +105,8 @@ void SetRequireShapeInferenceFns(TF_Graph* graph, bool require) { } void ExtendSession(TF_Session* session, TF_Status* status) { - mutex_lock l(session->mu); - session->extend_before_run = false; ExtendSessionGraphHelper(session, status); + session->extend_before_run = false; } } // namespace tensorflow -- GitLab From 62fa49ff5dbab9df83362112e17c04f857c72f44 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Mar 2018 11:04:59 -0700 Subject: [PATCH 739/884] Avoid capturing unused variables in lambda functions PiperOrigin-RevId: 188747641 --- tensorflow/cc/framework/while_gradients.cc | 6 +++--- tensorflow/contrib/image/kernels/segmentation_ops.cc | 4 ++-- tensorflow/core/common_runtime/memory_types.cc | 4 ++-- tensorflow/core/distributed_runtime/graph_mgr.cc | 2 +- tensorflow/core/distributed_runtime/worker.cc | 4 ++-- tensorflow/core/kernels/data/iterator_ops.cc | 2 +- tensorflow/core/kernels/mutex_ops.cc | 12 ++++++------ tensorflow/core/kernels/resource_variable_ops.cc | 2 +- tensorflow/core/kernels/sparse_cross_op.cc | 2 +- tensorflow/core/kernels/split_v_op.cc | 8 ++++---- 10 files changed, 23 insertions(+), 23 deletions(-) diff --git a/tensorflow/cc/framework/while_gradients.cc b/tensorflow/cc/framework/while_gradients.cc index 0734075fc6..81870a0efa 100644 --- a/tensorflow/cc/framework/while_gradients.cc +++ b/tensorflow/cc/framework/while_gradients.cc @@ -72,9 +72,9 @@ Status AddForwardLoopCounter(WhileContext* while_ctx, const Scope& scope, }; // Body function that adds one to input. - BodyGraphBuilderFn body_fn = [while_ctx](const Scope& scope, - const std::vector& inputs, - std::vector* outputs) { + BodyGraphBuilderFn body_fn = [](const Scope& scope, + const std::vector& inputs, + std::vector* outputs) { DCHECK_EQ(inputs.size(), 1); outputs->emplace_back(ops::Add(scope, inputs[0], 1)); return scope.status(); diff --git a/tensorflow/contrib/image/kernels/segmentation_ops.cc b/tensorflow/contrib/image/kernels/segmentation_ops.cc index fe8bf6e21c..9372289623 100644 --- a/tensorflow/contrib/image/kernels/segmentation_ops.cc +++ b/tensorflow/contrib/image/kernels/segmentation_ops.cc @@ -101,8 +101,8 @@ struct ImageConnectedComponentsFunctor { int cost = (union_find.block_height() + union_find.block_width()) * 20; Shard(worker_threads->num_threads, worker_threads->workers, num_images * num_blocks_vertically * num_blocks_horizontally, cost, - [&union_find, num_images, num_blocks_vertically, - num_blocks_horizontally](int64 start_block, int64 limit_block) { + [&union_find, num_blocks_vertically, num_blocks_horizontally]( + int64 start_block, int64 limit_block) { for (int64 i = start_block; i < limit_block; i++) { int64 block_x = i % num_blocks_horizontally; int64 block_y = diff --git a/tensorflow/core/common_runtime/memory_types.cc b/tensorflow/core/common_runtime/memory_types.cc index 090a16ebeb..116750fbfd 100644 --- a/tensorflow/core/common_runtime/memory_types.cc +++ b/tensorflow/core/common_runtime/memory_types.cc @@ -92,7 +92,7 @@ static Status ProcessMemoryTypes( Status ValidateMemoryTypes(const DeviceType& device_type, const Graph* g) { return ProcessMemoryTypes( - device_type, g, [g](const Edge* e, MemoryType sm, MemoryType dm) { + device_type, g, [](const Edge* e, MemoryType sm, MemoryType dm) { if (sm == dm) { return Status::OK(); } @@ -155,7 +155,7 @@ Status EnsureMemoryTypes(const DeviceType& device_type, }; std::vector edges; TF_RETURN_IF_ERROR(ProcessMemoryTypes( - device_type, g, [g, &edges](const Edge* e, MemoryType sm, MemoryType dm) { + device_type, g, [&edges](const Edge* e, MemoryType sm, MemoryType dm) { if (sm == dm) { return Status::OK(); } diff --git a/tensorflow/core/distributed_runtime/graph_mgr.cc b/tensorflow/core/distributed_runtime/graph_mgr.cc index 9768a244f2..8447c55bf4 100644 --- a/tensorflow/core/distributed_runtime/graph_mgr.cc +++ b/tensorflow/core/distributed_runtime/graph_mgr.cc @@ -438,7 +438,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id, StartParallelExecutors(handle, step_id, item, rendezvous, collector, cost_graph, cancellation_manager, - [this, item, rendezvous, done](const Status& s) { + [item, rendezvous, done](const Status& s) { done(s); rendezvous->Unref(); item->Unref(); diff --git a/tensorflow/core/distributed_runtime/worker.cc b/tensorflow/core/distributed_runtime/worker.cc index 6345549367..598652fb98 100644 --- a/tensorflow/core/distributed_runtime/worker.cc +++ b/tensorflow/core/distributed_runtime/worker.cc @@ -215,7 +215,7 @@ void Worker::DoPartialRunGraph(CallOptions* opts, GraphMgr::NamedTensors in; GraphMgr::NamedTensors* out = new GraphMgr::NamedTensors; Status s = PrepareRunGraph(request, &in, out); - auto finish = [this, done, out, opts](const Status& s) { + auto finish = [done, out, opts](const Status& s) { opts->ClearCancelCallback(); delete out; done(s); @@ -247,7 +247,7 @@ void Worker::DoPartialRunGraph(CallOptions* opts, session->graph_mgr->ExecuteAsync( graph_handle, step_id, session.get(), request->exec_opts(), nullptr /* collector */, nullptr /* response */, cm, in, - [this, token, step_id, session, cm](Status s) { + [this, token, step_id, session](Status s) { { mutex_lock l(mu_); cancellation_manager_->DeregisterCallback(token); diff --git a/tensorflow/core/kernels/data/iterator_ops.cc b/tensorflow/core/kernels/data/iterator_ops.cc index 6fe3746a73..780f927a4f 100644 --- a/tensorflow/core/kernels/data/iterator_ops.cc +++ b/tensorflow/core/kernels/data/iterator_ops.cc @@ -867,7 +867,7 @@ class IteratorGetNextOp : public AsyncOpKernel { // inter-op thread pool thread, so we issue the call from the // owned thread pool. thread_pool_->Schedule(std::bind( - [this, ctx, iterator](DoneCallback done) { + [ctx, iterator](DoneCallback done) { std::vector components; bool end_of_sequence = false; diff --git a/tensorflow/core/kernels/mutex_ops.cc b/tensorflow/core/kernels/mutex_ops.cc index b02a584d73..ddb7a606c1 100644 --- a/tensorflow/core/kernels/mutex_ops.cc +++ b/tensorflow/core/kernels/mutex_ops.cc @@ -127,7 +127,7 @@ class Mutex : public ResourceBase { } } thread_pool_->Schedule(std::bind( - [this, c, cm, cancelled, + [this, cm, cancelled, token](std::function fn_) { bool local_locked; @@ -173,7 +173,7 @@ class MutexLockOp : public AsyncOpKernel { OP_REQUIRES_OK_ASYNC( c, LookupOrCreateResource(c, HandleFromInput(c, 0), &mutex, - [this, c](Mutex** ptr) { + [c](Mutex** ptr) { *ptr = new Mutex( c, HandleFromInput(c, 0).name()); return Status::OK(); @@ -186,10 +186,10 @@ class MutexLockOp : public AsyncOpKernel { mutex->AcquireAsync( c, std::bind( - [this, c, variant, mutex](DoneCallback done_, - // End of bound arguments. - const Status& s, - Mutex::SharedLockReleaser&& lock) { + [c, variant, mutex](DoneCallback done_, + // End of bound arguments. + const Status& s, + Mutex::SharedLockReleaser&& lock) { VLOG(2) << "Finished locking mutex " << mutex << " with lock: " << lock.shared_lock.get() << " status: " << s.ToString(); diff --git a/tensorflow/core/kernels/resource_variable_ops.cc b/tensorflow/core/kernels/resource_variable_ops.cc index f254036ba7..aecad0185f 100644 --- a/tensorflow/core/kernels/resource_variable_ops.cc +++ b/tensorflow/core/kernels/resource_variable_ops.cc @@ -351,7 +351,7 @@ class AssignVariableOp : public OpKernel { Var* variable = nullptr; OP_REQUIRES_OK(context, LookupOrCreateResource( context, HandleFromInput(context, 0), &variable, - [this, context](Var** ptr) { + [](Var** ptr) { // Created on host. *ptr = new Var(DT_VARIANT); return Status::OK(); diff --git a/tensorflow/core/kernels/sparse_cross_op.cc b/tensorflow/core/kernels/sparse_cross_op.cc index 7cd4532ad6..4b5df7aff0 100644 --- a/tensorflow/core/kernels/sparse_cross_op.cc +++ b/tensorflow/core/kernels/sparse_cross_op.cc @@ -327,7 +327,7 @@ class SparseCrossOp : public OpKernel { typename CrossTraits::Updater updater( output_start_indices, indices_out, values_out); - auto do_work = [this, &columns, crosser, updater](int64 begin, int64 end) { + auto do_work = [&columns, crosser, updater](int64 begin, int64 end) { for (int b = begin; b < end; b++) { ProductIterator product_iterator(columns, b); int64 cross_count = 0; diff --git a/tensorflow/core/kernels/split_v_op.cc b/tensorflow/core/kernels/split_v_op.cc index 0ce0b552e6..5c19a45fb1 100644 --- a/tensorflow/core/kernels/split_v_op.cc +++ b/tensorflow/core/kernels/split_v_op.cc @@ -208,10 +208,10 @@ class SplitVOpCPUImpl { input_element_count >= std::max(num_threads, num_split) * 4096 && input_element_count < num_split * 180 * 1024); - auto range_output_func = [&indices, context, &input_shape, prefix_dim_size, - split_dim, &split_sizes_vec, &split_start_points, - suffix_dim_size, use_parallelism_between_outputs, - &input_reshaped, &make_sizes, + auto range_output_func = [&indices, context, &input_shape, split_dim, + &split_sizes_vec, &split_start_points, + use_parallelism_between_outputs, &input_reshaped, + &make_sizes, &reshape_result](int64 start, int64 limit) { for (int64 i = start; i < limit; ++i) { TensorShape output_shape(input_shape); -- GitLab From 21b91300e9e18dbfa2d1a503721ed3d0a08f37e2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 11:19:08 -0700 Subject: [PATCH 740/884] boosted_trees: infer the output shapes of Quantiles Op from the input shapes. PiperOrigin-RevId: 188750079 --- .../contrib/boosted_trees/ops/quantile_ops.cc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc index ae99d53a2c..6aa5246398 100644 --- a/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc +++ b/tensorflow/contrib/boosted_trees/ops/quantile_ops.cc @@ -272,6 +272,20 @@ REGISTER_OP("Quantiles") .Input("sparse_indices: num_sparse_features * int64") .Output("dense_quantiles: num_dense_features * int32") .Output("sparse_quantiles: num_sparse_features * int32") + .SetShapeFn([](InferenceContext* c) { + int num_dense_features; + TF_RETURN_IF_ERROR(c->GetAttr("num_dense_features", &num_dense_features)); + int num_sparse_features; + TF_RETURN_IF_ERROR( + c->GetAttr("num_sparse_features", &num_sparse_features)); + // Set output shapes (dense_quantiles and sparse_quantiles) by the + // relevant inputs (dense_values and sparse_values). Note that the output + // has an additional dimension for dimension_ids. + for (int i = 0; i < num_dense_features + num_sparse_features; ++i) { + c->set_output(i, c->MakeShape({c->Dim(c->input(i), 0), 2})); + } + return Status::OK(); + }) .Doc(R"doc( Computes quantile for each a given list of dense and sparse feature values using the given buckets. -- GitLab From 402fb8c97db05b51587c6fc999c690d548fd4496 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 11:24:22 -0700 Subject: [PATCH 741/884] Transposes are can be merged into reshapes when the ordering of non-one dimensions remains unchanged. PiperOrigin-RevId: 188751074 --- .../convert_trivial_transpose_to_reshape.cc | 54 ++++++++++++++----- 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc index c2b166033c..5a36a90b38 100644 --- a/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc +++ b/tensorflow/contrib/lite/toco/graph_transformations/convert_trivial_transpose_to_reshape.cc @@ -21,6 +21,33 @@ limitations under the License. namespace toco { +namespace { + +bool TransposeAffectsMemoryOrder(std::vector perm, + std::vector in_shape) { + CHECK_EQ(perm.size(), in_shape.size()); + // See what the ordering of the non-unary columns are before and after + // transpose permutation. If the major indices stay in the same order (not + // just the shape) then the flat buffer representation shouldn't change. + std::vector old_major_index_ordering; + std::vector new_major_index_ordering; + for (int i = 0; i < in_shape.size(); i++) { + if (in_shape[i] != 1) { + old_major_index_ordering.push_back(i); + } + + if (in_shape[perm[i]] != 1) { + new_major_index_ordering.push_back(perm[i]); + } + } + + CHECK_EQ(new_major_index_ordering.size(), old_major_index_ordering.size()); + + return old_major_index_ordering != new_major_index_ordering; +} + +} // namespace + bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { auto transpose_it = model->operators.begin() + op_index; if (transpose_it->get()->type != OperatorType::kTranspose) { @@ -29,23 +56,26 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { TransposeOperator* transpose_op = static_cast(transpose_it->get()); + const auto& input_array = model->GetArray(transpose_op->inputs[0]); const auto& output_array = model->GetArray(transpose_op->outputs[0]); - if (!output_array.has_shape()) { + if (!input_array.has_shape() || !output_array.has_shape()) { // Yield until PropagateFixedSizes has been run on this op. return false; } // Note: We can assume we have error checked inputs in PropagateFixedSizes. - // This transpose is trivial if we only have one non-unitary dimension. - std::vector const& dims = output_array.shape().dims(); - unsigned non_unitary_axis_count = 0; - for (int i = 0; i < dims.size(); i++) { - if (dims[i] != 1) { - non_unitary_axis_count++; - } + // Check that the permutation has propogated. + std::vector const& perm = transpose_op->perm; + if (perm.empty()) { + return false; } - if (non_unitary_axis_count > 1) { - // Transpose is not trivial + + // This transpose is trivial if non-unitary dimensions remain in the same + // order. + std::vector const& input_dims = input_array.shape().dims(); + std::vector const& output_dims = output_array.shape().dims(); + + if (TransposeAffectsMemoryOrder(perm, input_dims)) { return false; } @@ -61,11 +91,11 @@ bool ConvertTrivialTransposeToReshape::Run(Model* model, std::size_t op_index) { string shape_array_name = toco::AvailableArrayName(*model, perm_array_name); Array& shape_array = model->GetOrCreateArray(shape_array_name); *(shape_array.mutable_shape()->mutable_dims()) = { - 1, static_cast(dims.size())}; + 1, static_cast(output_dims.size())}; reshape_op->inputs.push_back(shape_array_name); shape_array.data_type = ArrayDataType::kInt32; auto& shape_buffer = shape_array.GetMutableBuffer(); - shape_buffer.data = dims; + shape_buffer.data = output_dims; // Delete perm array if unused if (IsDiscardableArray(*model, perm_array_name) && -- GitLab From 617d1f01d60b677536f988be35dc4f02885e6f1e Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 12 Mar 2018 11:29:24 -0700 Subject: [PATCH 742/884] Improve usability of `tf.contrib.bayesflow.custom_gradient` by removing need for `axis` arg and support taking lists. PiperOrigin-RevId: 188751894 --- .../python/kernel_tests/custom_grad_test.py | 2 +- .../bayesflow/python/ops/custom_grad_impl.py | 122 +++++++++++------- 2 files changed, 76 insertions(+), 48 deletions(-) diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py index a95df31ac1..1250765d09 100644 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py +++ b/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py @@ -83,7 +83,7 @@ class CustomGradientTest(test.TestCase): g = lambda z: z[0]**2 * z[1]**2 / 2 z = array_ops.stack([x, y]) - fz = cg.custom_gradient(f(z), g(z), z, axis=0) + fz = cg.custom_gradient(f(z), g(z), z) gz = gradients_impl.gradients(fz, variables.trainable_variables()) [z_, fz_, gx_, gy_] = sess.run([z, fz, gz[0], gz[1]]) diff --git a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py b/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py index d44fe6529a..927cc28f67 100644 --- a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py +++ b/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py @@ -24,32 +24,38 @@ from __future__ import print_function from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops from tensorflow.python.ops import math_ops __all__ = [ - "custom_gradient", + 'custom_gradient', ] -def custom_gradient(fx, gx, x, axis=(), fx_gx_manually_stopped=False, - name=None): - """Enables specifying a custom gradient. +def is_list_like(x): + return isinstance(x, (tuple, list)) + + +def identity(x, dtype=None, name=None): + return array_ops.identity(ops.convert_to_tensor( + x, dtype=dtype, name=name), name=name) + + +def custom_gradient(fx, gx, x, fx_gx_manually_stopped=False, name=None): + """Embeds a custom gradient into a `Tensor`. This function works by clever application of `stop_gradient`. I.e., observe that: ```none - h(x) = x * stop_gradient(g(x)) + stop_gradient(f(x) - x * g(x)) + h(x) = stop_gradient(f(x)) + stop_gradient(g(x)) * (x - stop_gradient(x)) ``` - is such that `h(x) = stop_gradient(f(x))` and `grad[h(x), x] = - stop_gradient(g(x)).` + is such that `h(x) == stop_gradient(f(x))` and + `grad[h(x), x] == stop_gradient(g(x)).` In addition to scalar-domain/scalar-range functions, this function also - supports tensor-domain/scalar-range functions. However, in the latter case it - is necessary to reduce `x` to a scalar. This can be done by indicating the - `axis` over which `f` operates or by appropriately `reduce_sum`-ing `x`, prior - to calling this function. + supports tensor-domain/scalar-range functions. Partial Custom Gradient: @@ -61,12 +67,8 @@ def custom_gradient(fx, gx, x, axis=(), fx_gx_manually_stopped=False, Args: fx: `Tensor`. Output of function evaluated at `x`. - gx: `Tensor`. Gradient of function evaluated at `x`. - x: `Tensor`. Point of evaluation for `f, g`. - axis: 1D `int` `Tensor` representing dimensions of `x` which are the domain - of `f`. If `()` (the default), `f` is assumed scalar-domain/scalar-range. - If `None` `f` is assumed to render one scalar given all of `x`. Otherwise - `f` is assumed to output one scalar for each of `axis` dimensions of `x`. + gx: `Tensor` or list of `Tensor`s. Gradient of function at (each) `x`. + x: `Tensor` or list of `Tensor`s. Args of evaluation for `f`. fx_gx_manually_stopped: Python `bool` indicating that `fx`, `gx` manually have `stop_gradient` applied. name: Python `str` name prefixed to Ops created by this function. @@ -75,36 +77,62 @@ def custom_gradient(fx, gx, x, axis=(), fx_gx_manually_stopped=False, fx: Floating-type `Tensor` equal to `f(x)` but which has gradient `stop_gradient(g(x))`. """ - with ops.name_scope(name, "custom_gradient", [fx, gx, x]): - fx = ops.convert_to_tensor(fx, name="fx") + def maybe_stop(x): + if fx_gx_manually_stopped: + return x + return array_ops.stop_gradient(x) + with ops.name_scope(name, 'custom_gradient', [fx, gx, x]): + fx = ops.convert_to_tensor(fx, name='fx') # We don't want to bother eagerly computing `gx` since we may not even need # it. with ops.control_dependencies([fx]): - gx = ops.convert_to_tensor(gx, dtype=fx.dtype, name="gx") - gx = array_ops.identity(gx, name="gx") - # Proof of correctness: - # - # f(x) = x * stop[gx] + stop[fx - x * gx] - # = stop[fx] - # - # g(x) = grad[fx] - # = stop[gx] + grad[stop[fx - x * gx]] - # = stop[gx] + 0 - # - # Notice that when x is zero it still works: - # grad[x * stop(gx) + stop(fx - x * gx)] = 1 * stop[gx] + 0 = stop[gx] - # - # The proof is similar for the tensor-domain case, except that `x` is - # replaced by `reduce_sum(x)`. - sum_x = math_ops.reduce_sum(x, axis=axis, name="sum_x") - if not fx_gx_manually_stopped: - fx = array_ops.stop_gradient(fx) - gx = array_ops.stop_gradient(gx) - # IEEE754 ensures `(x-x)==0.` and that `0.*x==0.` so we make sure to write - # the code this way, rather than, e.g., - # `sum_x * stop(gx) + stop(fx - sum_x * gx)`. - # For more discussion regarding the relevant portions of the IEEE754 - # standard, see the StackOverflow question, - # "Is there a floating point value of x, for which x-x == 0 is false?" - # http://stackoverflow.com/q/2686644 - return (sum_x - array_ops.stop_gradient(sum_x)) * gx + fx + if is_list_like(x): + x = [identity(x_, name='x') for x_ in x] + else: + x = [identity(x, name='x')] + + if is_list_like(gx): + gx = [identity(gx_, dtype=fx.dtype, name='gx') + for gx_ in gx] + else: + gx = [identity(gx, dtype=fx.dtype, name='gx')] + + override_grad = [] + for x_, gx_ in zip(x, gx): + # Observe: tf.gradients(f(x), x)[i].shape == x[i].shape + # thus we check that the user is supplying correct shapes. + equal_shape = check_ops.assert_equal( + array_ops.shape(x_), + array_ops.shape(gx_), + message='Each `x` must have the same shape as each `gx`.') + with ops.control_dependencies([equal_shape]): + # IEEE754 ensures `(x-x)==0.` and that `0.*x==0.` so we make sure to + # write the code this way, rather than, e.g., + # `sum_x * stop(gx) + stop(fx - sum_x * gx)`. + # For more discussion regarding the relevant portions of the IEEE754 + # standard, see the StackOverflow question, + # "Is there a floating point value of x, for which x-x == 0 is false?" + # http://stackoverflow.com/q/2686644 + zeros_like_x_ = x_ - array_ops.stop_gradient(x_) + override_grad.append(math_ops.reduce_sum( + maybe_stop(gx_) * zeros_like_x_)) + override_grad = sum(override_grad) + override_grad /= math_ops.cast(array_ops.size(fx), + dtype=fx.dtype.base_dtype) + + # Proof of correctness: + # + # f(x) = x * stop[gx] + stop[fx - x * gx] + # = stop[fx] + # + # g(x) = grad[fx] + # = stop[gx] + grad[stop[fx - x * gx]] + # = stop[gx] + 0 + # + # Notice that when x is zero it still works: + # grad[x * stop(gx) + stop(fx - x * gx)] = 1 * stop[gx] + 0 = stop[gx] + # + # The proof is similar for the tensor-domain case, except that we + # `reduce_sum` the `stop[gx] * (x - stop[x])` then rescale by + # `tf.size(fx)` since this reduced version is broadcast to `fx`. + return maybe_stop(fx) + override_grad -- GitLab From bf93a9f13e5a8f51db6afe2b61c3dbee9763b7d3 Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 12 Mar 2018 11:39:08 -0700 Subject: [PATCH 743/884] Standardize "op" capitalization, see "adding_an_op". PiperOrigin-RevId: 188753529 --- .../programmers_guide/version_compat.md | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/tensorflow/docs_src/programmers_guide/version_compat.md b/tensorflow/docs_src/programmers_guide/version_compat.md index 5412fba5d0..72e427c5f8 100644 --- a/tensorflow/docs_src/programmers_guide/version_compat.md +++ b/tensorflow/docs_src/programmers_guide/version_compat.md @@ -183,7 +183,7 @@ Our versioning scheme has three requirements: * **Forward compatibility** to support scenarios where the producer of a graph or checkpoint is upgraded to a newer version of TensorFlow before the consumer. -* Enable evolving TensorFlow in incompatible ways. For example, removing Ops, +* Enable evolving TensorFlow in incompatible ways. For example, removing ops, adding attributes, and removing attributes. Note that while the `GraphDef` version mechanism is separate from the TensorFlow @@ -245,10 +245,10 @@ contains a main data version which is treated as either `producer` or `TF_CHECKPOINT_VERSION_MIN_CONSUMER`, and `TF_CHECKPOINT_VERSION_MIN_PRODUCER`. -### Add a new attribute with default to an existing Op +### Add a new attribute with default to an existing op Following the guidance below gives you forward compatibility only if the set of -Ops has not changed. +ops has not changed: 1. If forward compatibility is desired, set `strip_default_attrs` to `True` while exporting the model using either the @@ -257,39 +257,39 @@ Ops has not changed. methods of the `SavedModelBuilder` class, or @{tf.estimator.Estimator.export_savedmodel$`Estimator.export_savedmodel`} 2. This strips off the default valued attributes at the time of - producing/exporting the models; thereby making sure that the exported - @{tf.MetaGraphDef} does not contain the new Op-attribute when the default + producing/exporting the models. This makes sure that the exported + @{tf.MetaGraphDef} does not contain the new op-attribute when the default value is used. -3. Having this control lets potentially old consumers aka serving binaries - (lagging behind training binaries) continue loading the models - thereby preventing interruptions in model serving. +3. Having this control could allow out-of-date consumers (for example, serving + binaries that lag behind training binaries) to continue loading the models + and prevent interruptions in model serving. ### Evolving GraphDef versions This section explains how to use this versioning mechanism to make different types of changes to the `GraphDef` format. -#### Add an Op +#### Add an op -Add the new Op to both consumers and producers at the same time, and do not +Add the new op to both consumers and producers at the same time, and do not change any `GraphDef` versions. This type of change is automatically backward compatible, and does not impact forward compatibility plan since existing producer scripts will not suddenly use the new functionality. -#### Add an Op and switch existing Python wrappers to use it +#### Add an op and switch existing Python wrappers to use it 1. Implement new consumer functionality and increment the `GraphDef` version. 2. If it is possible to make the wrappers use the new functionality only in cases that did not work before, the wrappers can be updated now. 3. Change Python wrappers to use the new functionality. Do not increment - `min_consumer`, since models that do not use this Op should not break. + `min_consumer`, since models that do not use this op should not break. -#### Remove or restrict an Op's functionality +#### Remove or restrict an op's functionality -1. Fix all producer scripts (not TensorFlow itself) to not use the banned Op or +1. Fix all producer scripts (not TensorFlow itself) to not use the banned op or functionality. 2. Increment the `GraphDef` version and implement new consumer functionality - that bans the removed Op or functionality for GraphDefs at the new version + that bans the removed op or functionality for GraphDefs at the new version and above. If possible, make TensorFlow stop producing `GraphDefs` with the banned functionality. To do so, add the [`REGISTER_OP(...).Deprecated(deprecated_at_version, @@ -298,15 +298,15 @@ existing producer scripts will not suddenly use the new functionality. 4. Increase `min_producer` to the GraphDef version from (2) and remove the functionality entirely. -#### Change an Op's functionality +#### Change an op's functionality -1. Add a new similar Op named `SomethingV2` or similar and go through the +1. Add a new similar op named `SomethingV2` or similar and go through the process of adding it and switching existing Python wrappers to use it, which may take three weeks if forward compatibility is desired. -2. Remove the old Op (Can only take place with a major version change due to +2. Remove the old op (Can only take place with a major version change due to backward compatibility). -3. Increase `min_consumer` to rule out consumers with the old Op, add back the - old Op as an alias for `SomethingV2`, and go through the process to switch +3. Increase `min_consumer` to rule out consumers with the old op, add back the + old op as an alias for `SomethingV2`, and go through the process to switch existing Python wrappers to use it. 4. Go through the process to remove `SomethingV2`. @@ -314,6 +314,6 @@ existing producer scripts will not suddenly use the new functionality. 1. Bump the `GraphDef` version and add the bad version to `bad_consumers` for all new GraphDefs. If possible, add to `bad_consumers` only for GraphDefs - which contain a certain Op or similar. + which contain a certain op or similar. 2. If existing consumers have the bad version, push them out as soon as possible. -- GitLab From 077c500bf98aea58fe365818951b6447049550f3 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 12 Mar 2018 11:43:57 -0700 Subject: [PATCH 744/884] Switch op_hint.py to use _set_attr. This is in preparation for enabling the C API. Modifying an op's NodeDef directly has no effect with the C API enabled. PiperOrigin-RevId: 188754464 --- tensorflow/contrib/lite/python/op_hint.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tensorflow/contrib/lite/python/op_hint.py b/tensorflow/contrib/lite/python/op_hint.py index 9a3971228a..7908689ce4 100644 --- a/tensorflow/contrib/lite/python/op_hint.py +++ b/tensorflow/contrib/lite/python/op_hint.py @@ -119,8 +119,10 @@ class OpHint(object): def _setattr(self, dest_op, name, value): tensor_value = _ops.convert_to_tensor(value) - dest_op.op.node_def.attr[name].tensor.CopyFrom( - tensor_value.op.node_def.attr["value"].tensor) + # pylint: disable=protected-access + dest_op.op._set_attr(name, _attr_value_pb2.AttrValue( + tensor=tensor_value.op.node_def.attr["value"].tensor)) + # pylint: enable=protected-access def add_inputs(self, *args): """Add a sequence of inputs to the function invocation. -- GitLab From 694a8101316107088efdbc33f7a5a60c7c8e7c8d Mon Sep 17 00:00:00 2001 From: Yunxing Dai Date: Mon, 12 Mar 2018 11:49:36 -0700 Subject: [PATCH 745/884] [XLA] [Copy insertion] Deterministically iterate through instructions to copy - Use HloInstructionMap to get deterministic iteration order. PiperOrigin-RevId: 188755375 --- tensorflow/compiler/xla/service/copy_insertion.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/compiler/xla/service/copy_insertion.cc b/tensorflow/compiler/xla/service/copy_insertion.cc index df73c28597..e9c974a046 100644 --- a/tensorflow/compiler/xla/service/copy_insertion.cc +++ b/tensorflow/compiler/xla/service/copy_insertion.cc @@ -960,7 +960,7 @@ Status AddSpecialCaseCopies(const CallGraph& call_graph, HloModule* module) { // Identify which shape indices of which instructions need to be copied. Store // these results in 'instructions_to_copy'. - std::unordered_map> instructions_to_copy; + HloInstructionMap> instructions_to_copy; auto add_index_to_copy = [&instructions_to_copy](HloInstruction* instruction, const ShapeIndex& index) { auto it = instructions_to_copy.find(instruction); -- GitLab From 315369aacd002d8c668b86a52f3cd88956a9b9a2 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 12:44:29 -0700 Subject: [PATCH 746/884] Extend TF Eager C API to allow asynchronous execution. PiperOrigin-RevId: 188763442 --- tensorflow/c/eager/BUILD | 1 + tensorflow/c/eager/c_api.cc | 824 +++++++++++++++++----- tensorflow/c/eager/c_api.h | 58 +- tensorflow/c/eager/c_api_internal.h | 206 +++++- tensorflow/c/eager/c_api_test.cc | 380 +++++++--- tensorflow/c/eager/runtime.h | 3 +- tensorflow/python/eager/core_test.py | 24 +- tensorflow/python/eager/pywrap_tensor.cc | 6 +- tensorflow/python/eager/pywrap_tfe_src.cc | 9 +- tensorflow/python/lib/core/py_func.cc | 16 +- 10 files changed, 1222 insertions(+), 305 deletions(-) diff --git a/tensorflow/c/eager/BUILD b/tensorflow/c/eager/BUILD index e55cb672e9..3046d9064a 100644 --- a/tensorflow/c/eager/BUILD +++ b/tensorflow/c/eager/BUILD @@ -58,6 +58,7 @@ tf_cuda_library( "//tensorflow/core:framework", "//tensorflow/core:framework_internal", "//tensorflow/core:framework_lite", + "//tensorflow/core:lib", "//tensorflow/core:lib_internal", ], ) diff --git a/tensorflow/c/eager/c_api.cc b/tensorflow/c/eager/c_api.cc index b9a47ea244..56cec2d668 100644 --- a/tensorflow/c/eager/c_api.cc +++ b/tensorflow/c/eager/c_api.cc @@ -42,6 +42,7 @@ limitations under the License. #include "tensorflow/core/lib/gtl/flatmap.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" +#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" @@ -67,6 +68,7 @@ string DeviceName(const tensorflow::Device* d) { #ifdef TENSORFLOW_EAGER_USE_XLA std::atomic_int_fast64_t func_id_generator(0); #endif // TENSORFLOW_EAGER_USE_XLA + } // namespace TFE_ContextDevicePlacementPolicy PlacementPolicy( @@ -90,11 +92,33 @@ void TFE_ContextOptionsSetConfig(TFE_ContextOptions* options, const void* proto, TF_SetConfig(&options->session_options, proto, proto_len, status); } +void TFE_ContextOptionsSetAsync(TFE_ContextOptions* options, + unsigned char async) { + options->async = async; +} void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions* options, TFE_ContextDevicePlacementPolicy policy) { options->policy = policy; } +TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context* ctx, + unsigned char async, + TF_Status* status) { + { + tensorflow::mutex_lock l(ctx->async_map_mu); + ctx->thread_local_async[std::this_thread::get_id()] = async; + } + if (async) { + ctx->executor.EnableAsync(); + } else { + // TODO(agarwal): Currently we add a wait here to handle cases where a sync + // op has a control dependency on an async op, and the latter has not + // executed yet. This wait can be removed by storing all the control inputs + // and waiting for them when executing ops. + status->status = ctx->executor.WaitForAllPendingNodes(); + } +} + void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; } TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { @@ -113,7 +137,7 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) { } void TFE_DeleteContext(TFE_Context* ctx, TF_Status* status) { - status->status = tensorflow::Status::OK(); + status->status = ctx->executor.WaitForAllPendingNodes(); { tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::STLDeleteValues(&ctx->kernel_cache); @@ -139,6 +163,9 @@ void TFE_ContextSetThreadLocalDevicePlacementPolicy( ctx->thread_local_policies[std::this_thread::get_id()] = policy; } +// Note: this function looks up a thread local policy. So it should be called in +// the appropriate client thread. In particular, in async mode, it may not be +// safe to call this function from the async TFE_Executor threads. extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( TFE_Context* ctx) { tensorflow::mutex_lock ml(ctx->policy_map_mu); @@ -150,6 +177,18 @@ extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy( return ctx->policy; } +void TFE_ContextAsyncWait(TFE_Context* ctx, TF_Status* status) { + status->status = ctx->executor.WaitForAllPendingNodes(); +} + +void TFE_ContextGetStatus(TFE_Context* ctx, TF_Status* status) { + status->status = ctx->executor.status(); +} + +void TFE_ContextAsyncClearError(TFE_Context* ctx) { + ctx->executor.ClearError(); +} + TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { tensorflow::Tensor tensor; status->status = tensorflow::TF_TensorToTensor(t, &tensor); @@ -157,56 +196,70 @@ TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status) { return new TFE_TensorHandle(tensor, nullptr, nullptr); } -void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { delete h; } +void TFE_DeleteTensorHandle(TFE_TensorHandle* h) { + DCHECK(h); + h->Unref(); +} TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h) { - return static_cast(h->t.dtype()); + return static_cast(h->dtype); } int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status) { - status->status = tensorflow::Status::OK(); - return h->t.dims(); + const tensorflow::Tensor* t = nullptr; + status->status = h->Tensor(&t); + return t == nullptr ? 0 : t->dims(); } int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, TF_Status* status) { - status->status = tensorflow::Status::OK(); - return h->t.dim_size(dim_index); + const tensorflow::Tensor* t = nullptr; + status->status = h->Tensor(&t); + return t == nullptr ? 0 : t->dim_size(dim_index); } const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) { - status->status = tensorflow::Status::OK(); - return (h->op_device == nullptr) - ? "/job:localhost/replica:0/task:0/device:CPU:0" - : h->op_device->name().c_str(); + tensorflow::Device* d = nullptr; + status->status = h->OpDevice(&d); + return (d == nullptr) ? "/job:localhost/replica:0/task:0/device:CPU:0" + : d->name().c_str(); } TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status) { - if (!IsCPU(h->d)) { + // TODO(agarwal): move this implementation inside TFE_TensorHandle. + tensorflow::Device* d = nullptr; + tensorflow::Device* op_device = nullptr; + const tensorflow::Tensor* t = nullptr; + status->status = h->TensorAndDevice(&t, &d, &op_device); + if (!status->status.ok()) return nullptr; + if (!IsCPU(d)) { TF_SetStatus(status, TF_UNIMPLEMENTED, tensorflow::strings::StrCat( "TFE_TensorHandle can be resolved iff it is on CPU (this " "handle is on ", - h->d->name(), + d->name(), "). Consider using TFE_TensorHandleCopyToDevice to get a " "copy of the tensor on CPU") .c_str()); return nullptr; } - return tensorflow::TF_TensorFromTensor(h->t, status); + return tensorflow::TF_TensorFromTensor(*t, status); } +} // extern "C" -TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, - TFE_Context* ctx, - const char* device_name, - TF_Status* status) { - tensorflow::Device* dstd = ctx->devices[0]; - if (device_name != nullptr && strlen(device_name) > 0) { - status->status = ctx->device_manager->LookupDevice(device_name, &dstd); - if (!status->status.ok()) return nullptr; - } +namespace { - tensorflow::Device* srcd = h->d == nullptr ? ctx->devices[0] : h->d; +tensorflow::Status TensorHandleCopyToDevice(TFE_TensorHandle* h, + TFE_Context* ctx, + tensorflow::Device* dstd, + TFE_TensorHandle** output) { + const tensorflow::Tensor* src = nullptr; + tensorflow::Device* srcd = nullptr; + // TODO(agarwal): src_opd is unused. Perhaps allow TensorAndDevice to accept + // nullptr. + tensorflow::Device* src_opd = nullptr; + TF_RETURN_IF_ERROR(h->TensorAndDevice(&src, &srcd, &src_opd)); + if (srcd == nullptr) srcd = ctx->devices[0]; bool is_same_device = (srcd == dstd) || (DeviceName(srcd) == DeviceName(dstd)); const bool dst_cpu = IsCPU(dstd); @@ -216,18 +269,15 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, const bool both_on_cpu = src_cpu && dst_cpu; if (is_same_device || both_on_cpu) { dstd = dst_cpu ? nullptr : dstd; - return new TFE_TensorHandle(h->t, dstd, dstd); + *output = new TFE_TensorHandle(*src, dstd, dstd); + return tensorflow::Status::OK(); } - tensorflow::Tensor* src = &(h->t); if (!dst_cpu && (src->dtype() != tensorflow::DT_VARIANT && !tensorflow::DataTypeCanUseMemcpy(src->dtype()))) { - TF_SetStatus( - status, TF_INVALID_ARGUMENT, - tensorflow::strings::StrCat("Can't copy Tensor with type ", - tensorflow::DataTypeString(src->dtype()), - " to device ", DeviceName(dstd), ".") - .c_str()); - return nullptr; + return tensorflow::errors::InvalidArgument( + "Can't copy Tensor with type ", + tensorflow::DataTypeString(src->dtype()), " to device ", + DeviceName(dstd), "."); } tensorflow::AllocatorAttributes attr; if (src->dtype() == tensorflow::DT_VARIANT) { @@ -236,7 +286,8 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, tensorflow::Tensor dst(dstd->GetAllocator(attr), src->dtype(), src->shape()); if (src->shape().num_elements() == 0) { dstd = dst_cpu ? nullptr : dstd; - return new TFE_TensorHandle(dst, dstd, dstd); + *output = new TFE_TensorHandle(dst, dstd, dstd); + return tensorflow::Status::OK(); } tensorflow::DeviceContext* src_device_context = nullptr; if (!src_cpu) { @@ -253,21 +304,26 @@ TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, // With that setup, Sync()ing across all 3 streams should be sufficient // but more than necessary (since it waits for operations that might have // nothing to do with this tensor to complete). - status->status = srcd->Sync(); + TF_RETURN_IF_ERROR(srcd->Sync()); tensorflow::Notification n; + tensorflow::Status status; tensorflow::CopyTensor::ViaDMA("copy", src_device_context, dst_device_context, srcd, dstd, tensorflow::AllocatorAttributes(), tensorflow::AllocatorAttributes(), src, &dst, - [status, &n](const tensorflow::Status& s) { - status->status = s; + [&status, &n](const tensorflow::Status& s) { + status = s; n.Notify(); }); n.WaitForNotification(); - return (TF_GetCode(status) == TF_OK) - ? new TFE_TensorHandle(dst, dst_cpu ? nullptr : dstd, - dst_cpu ? nullptr : dstd) - : nullptr; + if (status.ok()) { + dstd = dst_cpu ? nullptr : dstd; + *output = new TFE_TensorHandle(dst, dstd, dstd); + } + return status; } +} // namespace + +extern "C" { TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, TF_Status* status) { @@ -311,16 +367,19 @@ void TFE_OpSetXLACompilation(TFE_Op* op, unsigned char enable) { } void TFE_OpAddInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) { - // Questionable heuristic ... - // - If a device was explicitly set on the op, always use that. - // - If not, place on the first non-host device seen. - if (op->device == nullptr && !IsCPU(h->d)) { - op->device = h->d; + if (op->device == nullptr) { + // Questionable heuristic ... + // - If a device was explicitly set on the op, always use that. + // - If not, place on the first non-host device seen. + tensorflow::Device* d = nullptr; + // TODO(agarwal): This call may block if h is not ready. Avoid this if + // possible. + status->status = h->Device(&d); + if (!status->status.ok()) return; + if (!IsCPU(d)) op->device = d; } - if (!status->status.ok()) return; - op->inputs.push_back(h->t); - op->input_devices.push_back(h->d); - op->input_op_devices.push_back(h->op_device); + h->Ref(); + op->inputs.push_back(h); op->attrs.NumInputs(op->inputs.size()); } @@ -482,14 +541,14 @@ void TFE_OpSetAttrFunctionList(TFE_Op* op, const char* attr_name, tensorflow::gtl::ArraySlice( funcs.get(), num_values)); } +} // extern "C" namespace { tensorflow::Status ValidateInputTypeAndPlacement( TFE_Context* ctx, tensorflow::Device* host_device, tensorflow::Device* op_device, TFE_Op* op, - const tensorflow::OpKernel* kernel, - std::vector* copied_tensors) { + const tensorflow::OpKernel* kernel) { const tensorflow::MemoryTypeVector& memtypes = kernel->input_memory_types(); if (memtypes.size() != op->inputs.size()) { return tensorflow::errors::InvalidArgument( @@ -498,14 +557,17 @@ tensorflow::Status ValidateInputTypeAndPlacement( for (int i = 0; i < op->inputs.size(); ++i) { const tensorflow::Device* expected_device = memtypes[i] == tensorflow::HOST_MEMORY ? host_device : op_device; + TFE_TensorHandle* handle = op->inputs[i]; + tensorflow::Device* handle_device = nullptr; + TF_RETURN_IF_ERROR(handle->Device(&handle_device)); const tensorflow::Device* actual_device = - op->input_devices[i] == nullptr ? host_device : op->input_devices[i]; + handle_device == nullptr ? host_device : handle_device; if (expected_device != actual_device) { switch (TFE_ContextGetDevicePlacementPolicy(ctx)) { case TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32: // TODO(xpan): See if we could bubble python related error up // to python level. - if (op->inputs[i].dtype() == tensorflow::DT_INT32) { + if (handle->dtype == tensorflow::DT_INT32) { // Note: enabling silent copies of int32 tensors to match behavior // of graph mode. break; @@ -536,36 +598,245 @@ tensorflow::Status ValidateInputTypeAndPlacement( } // We are only here if the policy is warn or silent copies, so we should // trigger a copy. - TFE_TensorHandle original{op->inputs[i], op->input_devices[i], - op->device}; TF_Status* s = TF_NewStatus(); TFE_TensorHandle* copied_tensor = TFE_TensorHandleCopyToDevice( - &original, ctx, expected_device->name().c_str(), s); - if (!s->status.ok()) { - tensorflow::Status status = s->status; - delete s; + handle, ctx, expected_device->name().c_str(), s); + tensorflow::Status status = s->status; + TF_DeleteStatus(s); + if (!status.ok()) { + if (copied_tensor != nullptr) copied_tensor->Unref(); return tensorflow::errors::Internal( "Failed copying input tensor from ", actual_device->name(), " to ", expected_device->name(), " in order to run ", op->name, ": ", status.error_message()); } - op->inputs[i] = copied_tensor->t; - copied_tensors->push_back(copied_tensor); - op->input_devices[i] = copied_tensor->d; - delete s; + handle->Unref(); + handle = copied_tensor; + op->inputs[i] = copied_tensor; } - if (op->inputs[i].dtype() != kernel->input_type(i)) { + if (handle->dtype != kernel->input_type(i)) { return tensorflow::errors::InvalidArgument( "cannot compute ", op->name, " as input #", i, " was expected to be a ", tensorflow::DataTypeString(kernel->input_type(i)), - " tensor but is a ", - tensorflow::DataTypeString(op->inputs[i].dtype()), " tensor"); + " tensor but is a ", tensorflow::DataTypeString(handle->dtype), + " tensor"); } } return tensorflow::Status::OK(); } +tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, + TFE_Context* ctx, TF_Status* status) { + tensorflow::DeviceSet ds; + for (tensorflow::Device* d : ctx->devices) { + ds.AddDevice(d); + } + tensorflow::DeviceTypeVector final_devices; + status->status = tensorflow::SupportedDeviceTypesForNode( + ds.PrioritizedDeviceTypeList(), ndef, &final_devices); + if (!status->status.ok()) { + return nullptr; + } + if (final_devices.empty()) { + status->status = tensorflow::errors::Internal( + "Could not find valid device for node ", ndef.DebugString()); + return nullptr; + } + for (tensorflow::Device* d : ctx->devices) { + if (d->device_type() == final_devices[0].type_string()) { + return d; + } + } + status->status = tensorflow::errors::Unknown( + "Could not find a device for node ", ndef.DebugString()); + return nullptr; +} + +tensorflow::Status Execute( + TFE_Context* ctx, tensorflow::Device* device, + const tensorflow::gtl::InlinedVector& op_inputs, + tensorflow::KernelAndDevice* kernel, tensorflow::NodeExecStats* maybe_stats, + TFE_TensorHandle** retvals, int num_retvals) { + if (!ctx->soft_placement && device == nullptr) { + // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU + device = ctx->devices[0]; + } + + if (device == nullptr) { + // TODO(apassos) debug how the assignment below might return a different + // device from the one requested above. + device = kernel->device(); + } + + std::vector outputs(1); + const tensorflow::MemoryTypeVector* output_memory_types = nullptr; + output_memory_types = &kernel->kernel()->output_memory_types(); + std::vector inputs(op_inputs.size()); + for (int i = 0; i < op_inputs.size(); ++i) { + const tensorflow::Tensor* input_tensor = nullptr; + TF_RETURN_IF_ERROR(op_inputs[i]->Tensor(&input_tensor)); + inputs[i] = *input_tensor; + } + // WARNING: kernel->Run utilizes the FunctionLibraryRuntime + // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def, + // which is GUARDED_BY(ctx->functions_mu). But knowledge of the implementation + // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by + // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here. + // This is quite subtle. Re-work things to make this better? (Would it make + // sense for FunctionLibraryRuntime to ensure thread-safe access to + // FunctionLibraryDefinition?). TODO(apassos) figure out how to record stats + // for ops which are a part of functions. + // TODO(agarwal): change Run to take vector of handles ? + TF_RETURN_IF_ERROR(kernel->Run(&inputs, &outputs, maybe_stats)); + if (maybe_stats != nullptr) { + maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - + maybe_stats->all_start_micros()); + tensorflow::mutex_lock ml(ctx->metadata_mu); + if (ctx->should_store_metadata.load()) { + auto* step_stats = ctx->run_metadata.mutable_step_stats(); + // Lazily initialize the RunMetadata with information about all devices if + // this is the first call. + while (step_stats->dev_stats_size() < ctx->devices.size()) { + step_stats->add_dev_stats(); + } + // Find the current device's index. + int device_idx = 0; + for (int i = 0; i < ctx->devices.size(); ++i) { + if (ctx->devices[i] == device) { + device_idx = i; + break; + } + } + // Populate the device stats for this device. + auto* dev_stats = step_stats->mutable_dev_stats(device_idx); + dev_stats->set_device(device->name()); + *dev_stats->add_node_stats() = *maybe_stats; + } + } + if (num_retvals != outputs.size()) { + return tensorflow::errors::InvalidArgument( + "Expecting ", num_retvals, " outputs but got ", outputs.size()); + } + tensorflow::Device* op_device = IsCPU(device) ? nullptr : device; + for (int i = 0; i < num_retvals; ++i) { + tensorflow::Device* d = op_device; + if (d != nullptr && output_memory_types != nullptr && + (*output_memory_types)[i] == tensorflow::HOST_MEMORY) { + d = nullptr; + } + if (retvals[i] == nullptr) { + retvals[i] = new TFE_TensorHandle(outputs[i], d, op_device); + } else { + retvals[i]->SetTensorAndDevice(outputs[i], d, op_device); + } + } + return tensorflow::Status::OK(); +} + +// TODO(agarwal): move TFE_Executor and TFE_Node related code to a separate +// file. +class ExecuteNode : public TFE_Node { + public: + ExecuteNode(TFE_Op* op, tensorflow::KernelAndDevice* kernel, + tensorflow::NodeExecStats* maybe_stats, + const tensorflow::DataTypeVector& output_dtypes, + TFE_TensorHandle** retvals, int num_retvals) + : TFE_Node(op->ctx->executor.NextId()), + ctx_(op->ctx), + op_device_(op->device), + inputs_(op->inputs), + kernel_(kernel), + maybe_stats_(maybe_stats), + retvals_(num_retvals) { + for (auto handle : inputs_) { + handle->Ref(); + } + TFE_Context* ctx = op->ctx; + for (int i = 0; i < num_retvals; ++i) { + TFE_TensorHandle* h = new TFE_TensorHandle(id, output_dtypes[i], ctx); + h->Ref(); + retvals[i] = h; + retvals_[i] = h; + } + } + + ~ExecuteNode() override { + for (auto handle : inputs_) { + handle->Unref(); + } + for (auto handle : retvals_) { + handle->Unref(); + } + } + + tensorflow::Status Run() override { + const tensorflow::Status status = + Execute(ctx_, op_device_, inputs_, kernel_, maybe_stats_.get(), + retvals_.begin(), retvals_.size()); + if (status.ok()) { + return status; + } else { + return tensorflow::Status( + status.code(), + tensorflow::strings::StrCat("Got error, \"", status.error_message(), + "\" while executing kernel ", + kernel_->kernel()->def().DebugString())); + } + } + + private: + TFE_Context* ctx_; + tensorflow::Device* op_device_; + tensorflow::gtl::InlinedVector inputs_; + tensorflow::KernelAndDevice* kernel_; + std::unique_ptr maybe_stats_; + tensorflow::gtl::InlinedVector retvals_; +}; + +class CopyToDeviceNode : public TFE_Node { + public: + CopyToDeviceNode(TFE_TensorHandle* src, tensorflow::Device* dstd, + TFE_Context* ctx) + : TFE_Node(ctx->executor.NextId()), + src_(src), + dstd_(dstd), + ctx_(ctx), + dst_(new TFE_TensorHandle(id, src_->dtype, ctx)) { + src_->Ref(); + dst_->Ref(); + } + + ~CopyToDeviceNode() override { + src_->Unref(); + dst_->Unref(); + } + + tensorflow::Status Run() override { + TFE_TensorHandle* temp = nullptr; + TF_RETURN_IF_ERROR(TensorHandleCopyToDevice(src_, ctx_, dstd_, &temp)); + const tensorflow::Tensor* tensor = nullptr; + tensorflow::Device* device = nullptr; + tensorflow::Device* op_device = nullptr; + tensorflow::Status status = + temp->TensorAndDevice(&tensor, &device, &op_device); + // `temp` is a ready handle. So the following call should return OK. + TF_DCHECK_OK(status) << status.error_message(); + DCHECK(tensor); + dst_->SetTensorAndDevice(*tensor, device, op_device); + temp->Unref(); + return tensorflow::Status::OK(); + } + + TFE_TensorHandle* dst() { return dst_; } + + private: + TFE_TensorHandle* src_; + tensorflow::Device* dstd_; + TFE_Context* ctx_; + TFE_TensorHandle* dst_; +}; + #ifdef TENSORFLOW_EAGER_USE_XLA // Synthesizes and returns a wrapper function over `op`, which must be a // primitive op (e.g. matmul). @@ -631,7 +902,7 @@ const tensorflow::FunctionDef* OpToFunction( (*op_input_to_func_input)[i] = const_index; func_input_arg = signature->mutable_input_arg(const_index++); const_input_types->push_back( - static_cast(op->inputs[i].dtype())); + static_cast(op->inputs[i]->dtype)); } else if (op_input_arg.type() == tensorflow::DT_RESOURCE) { VLOG(1) << "For resource input, mapping op input " << i << " to func input " << resource_index; @@ -643,11 +914,11 @@ const tensorflow::FunctionDef* OpToFunction( (*op_input_to_func_input)[i] = arg_index; func_input_arg = signature->mutable_input_arg(arg_index++); arg_input_types->push_back( - static_cast(op->inputs[i].dtype())); + static_cast(op->inputs[i]->dtype)); } func_input_arg->set_name(op_input_arg.name()); - func_input_arg->set_type(op->inputs[i].dtype()); + func_input_arg->set_type(op->inputs[i]->dtype); } VLOG(1) << "Added OpDef Inputs: " << fdef.DebugString(); @@ -740,22 +1011,16 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { // Since input param reordering may have occurred between `op` and `launch_op` // via `op_input_to_func_input`, adjust the actual inputs accordingly. launch_op->inputs = op->inputs; - launch_op->input_devices = op->input_devices; - launch_op->input_op_devices = op->input_op_devices; + for (TFE_TensorHandle* h : launch_op->inputs) { + h->Ref(); + } if (!op_input_to_func_input.empty()) { DCHECK_EQ(op->inputs.size(), op_input_to_func_input.size()); - if (!op->input_devices.empty()) { - DCHECK_EQ(op->input_devices.size(), op_input_to_func_input.size()); - } for (int i = 0; i < op_input_to_func_input.size(); ++i) { VLOG(1) << "mapping op input " << i << " to func input " << op_input_to_func_input[i]; launch_op->inputs[op_input_to_func_input[i]] = op->inputs[i]; - if (!op->input_devices.empty()) { - launch_op->input_devices[op_input_to_func_input[i]] = - op->input_devices[i]; - } } } launch_op->attrs.NumInputs(op->inputs.size()); @@ -789,37 +1054,17 @@ std::unique_ptr BuildXlaLaunch(TFE_Op* op, TF_Status* status) { } #endif // TENSORFLOW_EAGER_USE_XLA -tensorflow::Device* SelectDevice(const tensorflow::NodeDef& ndef, - TFE_Context* ctx, TF_Status* status) { - tensorflow::DeviceSet ds; - for (tensorflow::Device* d : ctx->devices) { - ds.AddDevice(d); - } - tensorflow::DeviceTypeVector final_devices; - status->status = tensorflow::SupportedDeviceTypesForNode( - ds.PrioritizedDeviceTypeList(), ndef, &final_devices); - if (!status->status.ok()) { - return nullptr; - } - if (final_devices.empty()) { - status->status = tensorflow::errors::Internal( - "Could not find valid device for node ", ndef.DebugString()); - return nullptr; - } - for (tensorflow::Device* d : ctx->devices) { - if (d->device_type() == final_devices[0].type_string()) { - return d; - } - } - status->status = tensorflow::errors::Unknown( - "Could not find a device for node ", ndef.DebugString()); - return nullptr; -} - } // namespace +extern "C" { + void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status) { + TFE_Context* ctx = op->ctx; + status->status = ctx->executor.status(); + if (!status->status.ok()) { + return; + } #ifdef TENSORFLOW_EAGER_USE_XLA std::unique_ptr xla_launch_op; if (op->use_xla && op->name != "_XlaLaunch") { @@ -830,31 +1075,29 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, op = xla_launch_op.get(); } #endif // TENSORFLOW_EAGER_USE_XLA - TFE_Context* ctx = op->ctx; - tensorflow::Device* device = op->device; // Ensure all resource-touching ops run in the device the resource is, // regardless of anything else that has been specified. This is identical to // the graph mode behavior. for (int i = 0; i < op->inputs.size(); ++i) { - if (op->inputs[i].dtype() == tensorflow::DT_RESOURCE && - op->input_op_devices[i] != device) { - tensorflow::Device* d = op->input_op_devices[i] == nullptr - ? ctx->devices[0] - : op->input_op_devices[i]; + tensorflow::Device* input_op_device = nullptr; + status->status = op->inputs[i]->OpDevice(&input_op_device); + if (!status->status.ok()) return; + if (op->inputs[i]->dtype == tensorflow::DT_RESOURCE && + input_op_device != op->device) { + tensorflow::Device* d = + input_op_device == nullptr ? ctx->devices[0] : input_op_device; VLOG(1) << "Changing device of operation " << op->name << " to " << d->name() << " because input #" << i << " is a resource in this device."; - device = d; op->device = d; } } + tensorflow::Device* device = op->device; if (!ctx->soft_placement && device == nullptr) { // TODO(ashankar): ASSUMPTION: ctx->devices[0] is always CPU device = ctx->devices[0]; } - std::vector outputs(1); - const tensorflow::MemoryTypeVector* output_memory_types = nullptr; tensorflow::Fprint128 cache_key = op->attrs.CacheKey(device == nullptr ? "unspecified" : device->name()); tensorflow::KernelAndDevice* kernel; @@ -879,8 +1122,8 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, // Knowledge of the implementation of Init (and in-turn // FunctionLibraryRuntime::CreateKernel) tells us that ctx->func_lib_def // will be accessed, so grab on to the lock. - // See WARNING comment below - would be nice to rework to avoid this - // subtlety. + // See WARNING comment in Execute (before kernel->Run) - would be nice to + // rework to avoid this subtlety. tensorflow::tf_shared_lock l(ctx->functions_mu); status->status = tensorflow::KernelAndDevice::Init(ndef, ctx->func_lib(device), kernel); @@ -903,29 +1146,30 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, } tensorflow::DataTypeVector input_dtypes; status->status = InOutTypesForNode(ndef, *op_def, &input_dtypes, - kernel->output_dtypes()); + kernel->mutable_output_dtypes()); if (!status->status.ok()) { return; } tensorflow::mutex_lock ml(ctx->cache_mu); tensorflow::gtl::InsertOrUpdate(&(ctx->kernel_cache), cache_key, kernel); } + const tensorflow::DataTypeVector& output_dtypes = kernel->output_dtypes(); + if (output_dtypes.size() != *num_retvals) { + TF_SetStatus(status, TF_INVALID_ARGUMENT, + tensorflow::strings::StrCat("Expecting ", output_dtypes.size(), + " outputs, but *num_retvals is ", + *num_retvals) + .c_str()); + return; + } if (device == nullptr) { // TODO(apassos) debug how the assignment below might return a different // device from the one requested above. device = kernel->device(); } - - std::vector copied_tensors; - status->status = ValidateInputTypeAndPlacement( - ctx, ctx->devices[0], device, op, kernel->kernel(), &copied_tensors); - output_memory_types = &kernel->kernel()->output_memory_types(); - if (!status->status.ok()) { - for (auto* t : copied_tensors) { - TFE_DeleteTensorHandle(t); - } - return; - } + status->status = ValidateInputTypeAndPlacement(ctx, ctx->devices[0], device, + op, kernel->kernel()); + if (!status->status.ok()) return; std::unique_ptr maybe_stats; if (ctx->should_store_metadata.load()) { maybe_stats.reset(new tensorflow::NodeExecStats); @@ -935,53 +1179,47 @@ void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, maybe_stats->set_scheduled_micros(tensorflow::Env::Default()->NowMicros()); // TODO(apassos) track referenced tensors } - // WARNING: kernel->Run utilizes the FunctionLibraryRuntime - // (ctx->func_lib(device)), which in turn holds a pointer to func_lib_def, - // which is GUARDED_BY(ctx->functions_mu). But knowledge of the implementation - // of FunctionLibraryRuntime tells us that func_lib_def is not accessed by - // FunctionLibraryRuntime::Run(), so there is no thread-safety concern here. - // This is quite subtle. Re-work things to make this better? (Would it make - // sense for FunctionLibraryRuntime to ensure thread-safe access to - // FunctionLibraryDefinition?). TODO(apassos) figure out how to record stats - // for ops which are a part of functions. - status->status = kernel->Run(&op->inputs, &outputs, maybe_stats.get()); - for (auto* t : copied_tensors) { - TFE_DeleteTensorHandle(t); - } - if (!status->status.ok()) return; - if (maybe_stats != nullptr) { - maybe_stats->set_op_end_rel_micros(tensorflow::Env::Default()->NowMicros() - - maybe_stats->all_start_micros()); - tensorflow::mutex_lock ml(ctx->metadata_mu); - if (ctx->should_store_metadata.load()) { - auto* step_stats = ctx->run_metadata.mutable_step_stats(); - // Lazily initialize the RunMetadata with information about all devices if - // this is the first call. - while (step_stats->dev_stats_size() < ctx->devices.size()) { - step_stats->add_dev_stats(); - } - // Find the current device's index. - int device_idx = 0; - for (int i = 0; i < ctx->devices.size(); ++i) { - if (ctx->devices[i] == device) { - device_idx = i; - break; - } - } - // Populate the device stats for this device. - auto* dev_stats = step_stats->mutable_dev_stats(device_idx); - dev_stats->set_device(device->name()); - *dev_stats->add_node_stats() = *maybe_stats; + if (ctx->Async()) { + // Note that for async mode, execution order will make sure that all + // input handles are ready before executing them. + // TODO(agarwal): Consider executing "cheap" kernels inline for performance. + TFE_Node* node = new ExecuteNode(op, kernel, maybe_stats.release(), + output_dtypes, retvals, *num_retvals); + ctx->executor.Add(node); + } else { + // Execute checks if retvals[i] is nullptr or not to figure if it needs to + // allocate it. + for (int i = 0; i < *num_retvals; ++i) { + retvals[i] = nullptr; } + status->status = Execute(op->ctx, op->device, op->inputs, kernel, + maybe_stats.get(), retvals, *num_retvals); } - *num_retvals = std::min(*num_retvals, outputs.size()); - for (int i = 0; i < *num_retvals; ++i) { - tensorflow::Device* d = IsCPU(device) ? nullptr : device; - if (d != nullptr && output_memory_types != nullptr && - (*output_memory_types)[i] == tensorflow::HOST_MEMORY) { - d = nullptr; - } - retvals[i] = new TFE_TensorHandle(outputs[i], d, device); +} + +TFE_TensorHandle* TFE_TensorHandleCopyToDevice(TFE_TensorHandle* h, + TFE_Context* ctx, + const char* device_name, + TF_Status* status) { + status->status = ctx->executor.status(); + if (!status->status.ok()) { + return nullptr; + } + tensorflow::Device* dstd = ctx->devices[0]; + if (device_name != nullptr && strlen(device_name) > 0) { + status->status = ctx->device_manager->LookupDevice(device_name, &dstd); + if (!status->status.ok()) return nullptr; + } + if (ctx->Async()) { + // Note that `h` may not be currently ready. However execution order will + // make sure that `h` is ready before the copy is actually done. + CopyToDeviceNode* node = new CopyToDeviceNode(h, dstd, ctx); + ctx->executor.Add(node); + return node->dst(); + } else { + TFE_TensorHandle* output = nullptr; + status->status = TensorHandleCopyToDevice(h, ctx, dstd, &output); + return output; } } @@ -1004,6 +1242,16 @@ void TFE_ContextAddFunction(TFE_Context* ctx, TF_Function* function, status->status = ctx->func_lib_def.AddFunctionDef(function->fdef); } +void TFE_ContextEnableRunMetadata(TFE_Context* ctx) { + ctx->should_store_metadata.store(true); +} + +void TFE_ContextDisableRunMetadata(TFE_Context* ctx) { + tensorflow::mutex_lock ml(ctx->metadata_mu); + ctx->should_store_metadata.store(false); + ctx->run_metadata.Clear(); +} + } // extern "C" TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { @@ -1012,27 +1260,24 @@ TFE_TensorHandle* TFE_NewTensorHandle(const tensorflow::Tensor& t) { const tensorflow::Tensor* TFE_TensorHandleUnderlyingTensorInHostMemory( TFE_TensorHandle* h, TF_Status* status) { - if (h->d != nullptr) { + tensorflow::Device* d = nullptr; + tensorflow::Device* op_device = nullptr; + const tensorflow::Tensor* t = nullptr; + status->status = h->TensorAndDevice(&t, &d, &op_device); + if (!status->status.ok()) return nullptr; + if (d != nullptr) { status->status = tensorflow::errors::FailedPrecondition( "TFE_TensorHandle is placed in device (not host) memory. Cannot return " "a tensorflow::Tensor"); return nullptr; } - return &h->t; -} - -void TFE_ContextEnableRunMetadata(TFE_Context* ctx) { - ctx->should_store_metadata.store(true); -} - -void TFE_ContextDisableRunMetadata(TFE_Context* ctx) { - tensorflow::mutex_lock ml(ctx->metadata_mu); - ctx->should_store_metadata.store(false); - ctx->run_metadata.Clear(); + return t; } void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status) { + TFE_ContextAsyncWait(ctx, status); + if (!status->status.ok()) return; tensorflow::mutex_lock ml(ctx->metadata_mu); status->status = MessageToBuffer(ctx->run_metadata, buf); ctx->run_metadata.Clear(); @@ -1108,3 +1353,208 @@ void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op, } } } // namespace tensorflow + +TFE_Node::TFE_Node(tensorflow::uint64 id) : id(id) {} + +TFE_Executor::~TFE_Executor() { + tensorflow::mutex_lock l(node_queue_mutex_); + thread_done_ = true; + nodes_pending_.notify_all(); +} + +tensorflow::uint64 TFE_Executor::NextId() { + tensorflow::mutex_lock l(next_id_mutex_); + return next_id_++; +} + +void TFE_Executor::EnableAsync() { + tensorflow::mutex_lock l(node_queue_mutex_); + if (thread_ == nullptr) { + thread_.reset(tensorflow::Env::Default()->StartThread( + tensorflow::ThreadOptions(), "eager_async_executor", + std::bind(&TFE_Executor::Run, this))); + } +} + +void TFE_Executor::Add(TFE_Node* node) { + tensorflow::mutex_lock l(node_queue_mutex_); + DCHECK(thread_) << "EnableAsync should have been called before Add"; + if (!status_.ok()) { + delete node; + return; + } + int qlen = node_queue_.size(); + if (qlen > 0) { + if (node_queue_.back()->id >= node->id) { + status_ = tensorflow::errors::InvalidArgument( + "Inserting TFE_Node with non-increasing ids:", node_queue_.back()->id, + " vs ", node->id); + delete node; + return; + } + node_queue_.push(node); + } else { + node_queue_.push(node); + nodes_pending_.notify_all(); + } +} + +tensorflow::Status TFE_Executor::WaitFor(tensorflow::uint64 node_id) { + return WaitImpl(false, node_id); +} + +tensorflow::Status TFE_Executor::WaitForAllPendingNodes() { + return WaitImpl(true, 0); +} + +tensorflow::Status TFE_Executor::WaitImpl(bool wait_all, + tensorflow::uint64 node_id) { + tensorflow::condition_variable cond; + tensorflow::mutex_lock l(node_queue_mutex_); + // Don't wait if an error is already set. + if (!status_.ok()) return status_; + if (node_queue_.empty()) return tensorflow::Status::OK(); + if (wait_all) { + node_id = node_queue_.back()->id; + } else if (node_id < node_queue_.front()->id) { + // Note that we are relying on the ops being dispatched sequentially from + // the queue. + return tensorflow::Status::OK(); + } + node_done_notifications_.insert(std::make_pair(node_id, &cond)); + cond.wait(l); + // Note that we could be woken up if an error occurs, even though the node has + // not actually executed. + return status_; +} + +void TFE_Executor::ClearError() { + tensorflow::mutex_lock l(node_queue_mutex_); + if (status_.ok()) return; + // If an error was set, node_done_notifications_ and node_queue_ should have + // been cleared, and no new entries should have been added since. + DCHECK(node_done_notifications_.empty()); + DCHECK(node_queue_.empty()); + status_ = tensorflow::Status::OK(); + nodes_pending_.notify_all(); +} + +tensorflow::Status TFE_Executor::status() { + tensorflow::mutex_lock l(node_queue_mutex_); + return status_; +} + +void TFE_Executor::Run() { + while (true) { + std::unique_ptr curr_node; + { + tensorflow::mutex_lock l(node_queue_mutex_); + while (node_queue_.empty() || !status_.ok()) { + if (thread_done_) return; + nodes_pending_.wait(l); + } + curr_node.reset(node_queue_.front()); + } + tensorflow::Status status = curr_node->Run(); + const bool ok = status.ok(); + tensorflow::mutex_lock l(node_queue_mutex_); + node_queue_.pop(); + if (!ok) { + status_ = status; + // TODO(agarwal): mark all affected handles as corrupted before clearing + // this queue. + // We remove any pending ops so that we don't try to execute them if + // ClearError is called. + for (int i = 0; i < node_queue_.size(); ++i) { + delete node_queue_.front(); + node_queue_.pop(); + } + } + if (!node_done_notifications_.empty()) { + tensorflow::uint64 node_id = curr_node->id; + // Note that we notify all waiting threads in case an error has occurred. + // These calling threads are responsible for checking status_ before + // proceeding. + const auto range = ok ? node_done_notifications_.equal_range(node_id) + : make_pair(node_done_notifications_.begin(), + node_done_notifications_.end()); + for (auto it = range.first; it != range.second; ++it) { + it->second->notify_all(); + } + node_done_notifications_.erase(range.first, range.second); + } + } +} + +bool TFE_Context::Async() const { + tensorflow::mutex_lock l(async_map_mu); + return tensorflow::gtl::FindWithDefault( + thread_local_async, std::this_thread::get_id(), async_default); +} + +bool TFE_TensorHandle::IsReady() { + if (node_id == 0) return true; + tensorflow::mutex_lock l(ctx_mutex_); + return ctx_ == nullptr; +} + +tensorflow::Status TFE_TensorHandle::WaitReady() { + if (node_id == 0) return tensorflow::Status::OK(); + TFE_Executor* executor = nullptr; + { + tensorflow::mutex_lock l(ctx_mutex_); + if (ctx_ == nullptr) return tensorflow::Status::OK(); + executor = &ctx_->executor; + } + return executor->WaitFor(node_id); +} + +tensorflow::Status TFE_TensorHandle::Tensor(const tensorflow::Tensor** t) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *t = &tensor_; + return tensorflow::Status::OK(); +} + +tensorflow::Status TFE_TensorHandle::Device(tensorflow::Device** d) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *d = device_; + return tensorflow::Status::OK(); +} + +tensorflow::Status TFE_TensorHandle::OpDevice(tensorflow::Device** d) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *d = op_device_; + return tensorflow::Status::OK(); +} + +tensorflow::Status TFE_TensorHandle::TensorAndDevice( + const tensorflow::Tensor** tensor, tensorflow::Device** device, + tensorflow::Device** op_device) { + TF_RETURN_IF_ERROR(WaitReady()); + DCHECK(IsReady()); + *tensor = &tensor_; + *device = device_; + *op_device = op_device_; + return tensorflow::Status::OK(); +} + +void TFE_TensorHandle::SetTensorAndDevice(const tensorflow::Tensor& tensor, + tensorflow::Device* device, + tensorflow::Device* op_device) { + tensorflow::mutex_lock l(ctx_mutex_); + DCHECK(node_id > 0 && ctx_) << "SetTensorAndDevice should be only called " + << "on non-ready handles."; + ctx_ = nullptr; + tensor_ = tensor; + device_ = device; + op_device_ = op_device; +} + +TFE_Op::~TFE_Op() { + for (TFE_TensorHandle* h : inputs) { + h->Unref(); + } +} diff --git a/tensorflow/c/eager/c_api.h b/tensorflow/c/eager/c_api.h index 9610ca1b3b..316006bafb 100644 --- a/tensorflow/c/eager/c_api.h +++ b/tensorflow/c/eager/c_api.h @@ -75,6 +75,11 @@ typedef enum TFE_ContextDevicePlacementPolicy { TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32 = 3, } TFE_ContextDevicePlacementPolicy; +// Sets the default execution mode (sync/async). Note that this can be +// overridden per thread using TFE_ContextSetAsyncForThread. +TF_CAPI_EXPORT extern void TFE_ContextOptionsSetAsync(TFE_ContextOptions*, + unsigned char async); + TF_CAPI_EXPORT extern void TFE_ContextOptionsSetDevicePlacementPolicy( TFE_ContextOptions*, TFE_ContextDevicePlacementPolicy); @@ -110,6 +115,30 @@ TF_CAPI_EXPORT extern void TFE_ContextSetThreadLocalDevicePlacementPolicy( TF_CAPI_EXPORT extern TFE_ContextDevicePlacementPolicy TFE_ContextGetDevicePlacementPolicy(TFE_Context*); +// Overrides the execution mode (sync/async) for the current thread. +TF_CAPI_EXPORT extern void TFE_ContextSetAsyncForThread(TFE_Context*, + unsigned char async, + TF_Status* status); + +// Causes the calling thread to block till all ops dispatched in async mode +// have been executed. Note that "execution" here refers to kernel execution / +// scheduling of copies, etc. Similar to sync execution, it doesn't guarantee +// that lower level device queues (like GPU streams) have been flushed. +// +// This call may not block for execution of ops enqueued concurrently with this +// call. +TF_CAPI_EXPORT extern void TFE_ContextAsyncWait(TFE_Context*, + TF_Status* status); + +// When an error happens, any pending operations are discarded and newly issued +// ops return an error. This call clears the error state and re-enables +// execution of newly issued ops. +// +// Note that outputs of discarded ops remain in a corrupt state and should not +// be used for future calls. +// TODO(agarwal): mark the affected handles and raise errors if they are used. +TF_CAPI_EXPORT extern void TFE_ContextAsyncClearError(TFE_Context*); + // A handle to a tensor on a device. // // Like a TF_Tensor, a TFE_TensorHandle refers to a tensor with a value, shape, @@ -119,15 +148,21 @@ typedef struct TFE_TensorHandle TFE_TensorHandle; TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_NewTensorHandle(TF_Tensor* t, TF_Status* status); +// Indicates that the caller will not be using `h` any more. TF_CAPI_EXPORT extern void TFE_DeleteTensorHandle(TFE_TensorHandle* h); TF_CAPI_EXPORT extern TF_DataType TFE_TensorHandleDataType(TFE_TensorHandle* h); +// This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern int TFE_TensorHandleNumDims(TFE_TensorHandle* h, TF_Status* status); +// This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern int64_t TFE_TensorHandleDim(TFE_TensorHandle* h, int dim_index, TF_Status* status); +// This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern const char* TFE_TensorHandleDeviceName( TFE_TensorHandle* h, TF_Status* status); + +// This function will block till the operation that produces `h` has completed. TF_CAPI_EXPORT extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, TF_Status* status); @@ -137,6 +172,9 @@ TF_CAPI_EXPORT extern TF_Tensor* TFE_TensorHandleResolve(TFE_TensorHandle* h, // that shares the underlying buffer. Otherwise, it currently requires at least // one of the source or destination devices to be CPU (i.e., for the source or // destination tensor to be placed in host memory). +// If async execution is enabled, the copy may be enqueued and the call will +// return "non-ready" handle. Else, this function returns after the copy has +// been done. TF_CAPI_EXPORT extern TFE_TensorHandle* TFE_TensorHandleCopyToDevice( TFE_TensorHandle* h, TFE_Context* ctx, const char* device_name, TF_Status* status); @@ -157,6 +195,7 @@ typedef struct TFE_Op TFE_Op; TF_CAPI_EXPORT extern TFE_Op* TFE_NewOp(TFE_Context* ctx, const char* op_or_function_name, TF_Status* status); + TF_CAPI_EXPORT extern void TFE_DeleteOp(TFE_Op* op); TF_CAPI_EXPORT extern void TFE_OpSetDevice(TFE_Op* op, const char* device_name, @@ -242,13 +281,20 @@ TF_CAPI_EXPORT extern void TFE_OpSetAttrFunctionList(TFE_Op* op, int num_values); // Execute the operation defined by 'op' and return handles to computed -// tensors in 'retvals'. +// tensors in `retvals`. +// +// 'retvals' must point to a pre-allocated array of TFE_TensorHandle* and +// '*num_retvals' should be set to the size of this array. It is an error if +// the number of outputs is different from *num_retvals. // -// 'retvals' must point to a pre-allocated array of TFE_TensorHandle* -// and '*num_retvals' should be set to the size of this array. +// If async execution is enabled, the call may simply enqueue the execution +// and return "non-ready" handles in `retvals`. Note that any handles contained +// in 'op' should not be mutated till the kernel execution actually finishes. // -// On return, 'num_retvals' will be set to the actual number of outputs -// returned by the operation. +// For sync execution, if any of the inputs to `op` are not ready, this call +// will block till they become ready and then return when the kernel execution +// is done. +// TODO(agarwal): change num_retvals to int from int*. TF_CAPI_EXPORT extern void TFE_Execute(TFE_Op* op, TFE_TensorHandle** retvals, int* num_retvals, TF_Status* status); @@ -274,6 +320,8 @@ TF_CAPI_EXPORT extern void TFE_ContextDisableRunMetadata(TFE_Context* ctx); // Populates the passed-in buffer with a serialized RunMetadata protocol buffer // containing any run metadata information accumulated so far and clears this // information. +// If async mode is enabled, this call blocks till all currently pending ops are +// done. TF_CAPI_EXPORT extern void TFE_ContextExportRunMetadata(TFE_Context* ctx, TF_Buffer* buf, TF_Status* status); diff --git a/tensorflow/c/eager/c_api_internal.h b/tensorflow/c/eager/c_api_internal.h index 49b9434457..8dba12f47b 100644 --- a/tensorflow/c/eager/c_api_internal.h +++ b/tensorflow/c/eager/c_api_internal.h @@ -19,7 +19,9 @@ limitations under the License. #include #include +#include #include +#include #include #include #include @@ -31,14 +33,113 @@ limitations under the License. #include "tensorflow/core/common_runtime/function.h" #include "tensorflow/core/common_runtime/rendezvous_mgr.h" #include "tensorflow/core/framework/rendezvous.h" +#include "tensorflow/core/lib/gtl/inlined_vector.h" #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/thread_annotations.h" #include "tensorflow/core/public/version.h" +// A unit of execution for the TFE_Executor class below. Example subclasses +// encapsulate execution of a TFE_Op, or copying a TFE_TensorHandle from one +// device to another. +class TFE_Node { + public: + explicit TFE_Node(tensorflow::uint64 id); + + virtual ~TFE_Node() {} + + // Runs the computation corresponding to this node and blocks till the + // execution is done. + virtual tensorflow::Status Run() = 0; + + // An id unique to the TFE_Context under which this node is created. Allocated + // monotonically. + const tensorflow::uint64 id; +}; + +// A class for handling async execution (see TFE_ContextSetAsync). +// Note that this class is thread-safe. +// TODO(agarwal): TFE_OpAddInput may currently block if it tries to access the +// device of the input handle. Fix that. +// TODO(agarwal): On error, mark all affected handles as corrupted. +// TODO(agarwal): Implement support for control dependencies. +// TODO(agarwal): Support out-of-order execution and dispatching multiple +// TFE_Node in parallel. +// TODO(agarwal): Implement optimizations over TFE_Node traces. +class TFE_Executor { + public: + ~TFE_Executor(); + + // This is called whenever async mode is enabled. Note that it may be called + // multiple times as different calling threads may switch async mode on or off + // independently. + void EnableAsync(); + + // Helper function to create monotonically increasing ids unique to this + // object. + tensorflow::uint64 NextId(); + + // Schedules `node` for execution. + // Note that Add must be called in monotonically increasing order of node->id. + void Add(TFE_Node* node); + + // Causes the caller to block till node with id `node_id` has finished + // execution. + tensorflow::Status WaitFor(tensorflow::uint64 node_id); + + // Blocks till all currently pending ops are done. + tensorflow::Status WaitForAllPendingNodes(); + + // Clears all currently set errors which re-enables async execution. + void ClearError(); + + // Returns Status based on any errors that occurred during async execution. + tensorflow::Status status(); + + private: + // Starts execution of pending TFE_Nodes. This function loops till + // thread_done_ is set to true. If any errors are encontered, these are set + // inside `status_`. The loop blocks anytime there are no pending nodes, or if + // `status_` is not ok. + void Run(); + + tensorflow::Status WaitImpl(bool wait_all, tensorflow::uint64 node_id); + + tensorflow::mutex node_queue_mutex_; + + // Used to signal that some TFE_Nodes are pending execution. + tensorflow::condition_variable nodes_pending_ GUARDED_BY(node_queue_mutex_); + + // Queue of pending TFE_Nodes. + std::queue node_queue_ GUARDED_BY(node_queue_mutex_); + + // `status_` is set based on any errors raised during execution of a TFE_Node. + // It remains set until ClearError is called. + tensorflow::Status status_ GUARDED_BY(node_queue_mutex_); + + // Map from id of a TFE_Node to condition_variables (not owned by the map). + // These condition_variables are notified and removed when that TFE_Node is + // done executing, or if an error is found in execution of any TFE_Node. + std::multimap + node_done_notifications_ GUARDED_BY(node_queue_mutex_); + + // Thread object that calls the `Run` method. Currently we use only one thread + // for executing the TFE_Nodes one-by-one. + std::unique_ptr thread_ GUARDED_BY(node_queue_mutex_); + + // Indicates that `thread_` should stop as soon as it is done executing the + // current TFE_Node. + bool thread_done_ GUARDED_BY(node_queue_mutex_) = false; + + tensorflow::mutex next_id_mutex_; + tensorflow::uint64 next_id_ GUARDED_BY(next_id_mutex_) = 1; +}; + struct TFE_ContextOptions { TF_SessionOptions session_options; + // true if async execution is enabled. + bool async = false; TFE_ContextDevicePlacementPolicy policy{ TFE_DEVICE_PLACEMENT_SILENT_FOR_INT32}; }; @@ -60,7 +161,10 @@ struct TFE_Context { device_manager.get(), opts.session_options.options.env, TF_GRAPH_DEF_VERSION, &func_lib_def, {})), log_device_placement( - opts.session_options.options.config.log_device_placement()) {} + opts.session_options.options.config.log_device_placement()), + async_default(opts.async) { + if (async_default) executor.EnableAsync(); + } const bool soft_placement; const TFE_ContextDevicePlacementPolicy policy; @@ -98,29 +202,99 @@ struct TFE_Context { std::atomic should_store_metadata{false}; tensorflow::mutex metadata_mu; tensorflow::RunMetadata run_metadata GUARDED_BY(metadata_mu); - const bool log_device_placement; + // TFE_Executor for async execution. + TFE_Executor executor; + + // True if running in asynchronous mode. + bool Async() const; + + // True if the default value for execution mode is async. Note that this value + // can be overridden per thread based on `thread_local_async` overrides. + const bool async_default; + mutable tensorflow::mutex async_map_mu; + std::unordered_map thread_local_async + GUARDED_BY(async_map_mu); }; -struct TFE_TensorHandle { +struct TFE_TensorHandle : public tensorflow::core::RefCounted { + public: TFE_TensorHandle(const tensorflow::Tensor& t, tensorflow::Device* d, tensorflow::Device* op_device) - : t(t), d(d), op_device(op_device) {} + : dtype(t.dtype()), + node_id(0), + tensor_(t), + device_(d), + op_device_(op_device), + ctx_(nullptr) {} + + TFE_TensorHandle(tensorflow::uint64 node_id, tensorflow::DataType dtype, + TFE_Context* ctx) + : dtype(dtype), + node_id(node_id), + tensor_(dtype), + device_(nullptr), + op_device_(nullptr), + ctx_(ctx) { + DCHECK_GT(node_id, 0); + } + + ~TFE_TensorHandle() override {} + + tensorflow::Status Tensor(const tensorflow::Tensor** t); + + tensorflow::Status Device(tensorflow::Device** d); - tensorflow::Tensor t; - // TODO(ashankar): d == nullptr iff local CPU - // This was expedient, but perhaps worth revisiting ('d' should always be a - // valid pointer?) + tensorflow::Status OpDevice(tensorflow::Device** d); + + tensorflow::Status TensorAndDevice(const tensorflow::Tensor** tensor, + tensorflow::Device** device, + tensorflow::Device** op_device); + + // Note that this can be called at most once, and only on non-ready handles, + // and makes them ready. + void SetTensorAndDevice(const tensorflow::Tensor& tensor, + tensorflow::Device* device, + tensorflow::Device* op_device); + + // dtype for the handle. It must be the same as t.dtype() once the handle is + // ready. + const tensorflow::DataType dtype; + + private: + // If the contents of the Tensor pointed to by this handle is yet to be + // computed by a TFE_Node, this function will block till that compuatation is + // done and the handle is "ready". + tensorflow::Status WaitReady(); + + bool IsReady(); + + // Id for the TFE_Node that will compute the value pointed to by this handle. + // If the value is 0, the handle is already ready, but not vice-versa. + const tensorflow::uint64 node_id; + + tensorflow::Tensor tensor_; + + // TODO(ashankar): device_ == nullptr iff local CPU + // This was expedient, but perhaps worth revisiting ('device_' should always + // be a valid pointer?) // This can be done if TFE_NewOp() and the TFE_TensorHandle constructors are // provided with the appropriate TFE_Context. // - // TODO(ashankar): Reference count TFE_Context to ensure that 'd' of a + // TODO(ashankar): Reference count TFE_Context to ensure that 'device_' of a // TFE_TensorHandle does not outlive the TFE_Context from which it came? - tensorflow::Device* d; + tensorflow::Device* device_; + + // Device in which the op producing this tensor was executed. Equals to + // device_ for constant tensors. + tensorflow::Device* op_device_; - // Device in which the op producing this tensor was executed. Equals to d for - // constant tensors. - tensorflow::Device* op_device; + tensorflow::mutex ctx_mutex_; + + // `ctx` is only guaranteed to be set if the handle is not "ready". This is + // typically true when the handle was produced during async execution. + // `ctx` object is not owned and should outlive this handle. + TFE_Context* ctx_ GUARDED_BY(ctx_mutex_); }; struct TFE_Op { @@ -129,15 +303,15 @@ struct TFE_Op { TFE_Op(TFE_Context* ctx, const char* op, const tensorflow::AttrTypeMap* t) : ctx(ctx), name(op), attrs(op), attr_types(t), device(nullptr) {} + ~TFE_Op(); + bool const is_function() const { return attr_types == nullptr; } TFE_Context* ctx; // Must outlive the TFE_Op. const tensorflow::string name; tensorflow::AttrBuilder attrs; const tensorflow::AttrTypeMap* attr_types; - std::vector inputs; - std::vector input_devices; - std::vector input_op_devices; + tensorflow::gtl::InlinedVector inputs; tensorflow::Device* device; bool use_xla = false; }; diff --git a/tensorflow/c/eager/c_api_test.cc b/tensorflow/c/eager/c_api_test.cc index 00fb7e68d0..927d119389 100644 --- a/tensorflow/c/eager/c_api_test.cc +++ b/tensorflow/c/eager/c_api_test.cc @@ -29,6 +29,20 @@ using tensorflow::string; namespace { +TFE_TensorHandle* DoubleTestMatrixTensorHandle() { + int64_t dims[] = {2, 2}; + double data[] = {1.0, 2.0, 3.0, 4.0}; + TF_Tensor* t = TF_AllocateTensor( + TF_DOUBLE, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + TFE_TensorHandle* TestMatrixTensorHandle() { int64_t dims[] = {2, 2}; float data[] = {1.0f, 2.0f, 3.0f, 4.0f}; @@ -43,6 +57,20 @@ TFE_TensorHandle* TestMatrixTensorHandle() { return th; } +TFE_TensorHandle* TestMatrixTensorHandle3X2() { + int64_t dims[] = {3, 2}; + double data[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + TF_Tensor* t = TF_AllocateTensor( + TF_FLOAT, &dims[0], sizeof(dims) / sizeof(int64_t), sizeof(data)); + memcpy(TF_TensorData(t), &data[0], TF_TensorByteSize(t)); + TF_Status* status = TF_NewStatus(); + TFE_TensorHandle* th = TFE_NewTensorHandle(t, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TF_DeleteTensor(t); + TF_DeleteStatus(status); + return th; +} + TFE_Op* MatMulOp(TFE_Context* ctx, TFE_TensorHandle* a, TFE_TensorHandle* b) { TF_Status* status = TF_NewStatus(); @@ -139,10 +167,12 @@ void BM_InitOp(int iters) { } BENCHMARK(BM_InitOp); -void BM_Execute(int iters) { +void BM_Execute(int iters, int async) { tensorflow::testing::StopTiming(); + tensorflow::testing::SetLabel(async ? "ExecuteAsync" : "Execute"); TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); @@ -156,6 +186,9 @@ void BM_Execute(int iters) { TFE_Execute(matmul, &retvals[0], &num_retvals, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); } + if (async) { + TFE_ContextAsyncWait(ctx, status); + } tensorflow::testing::StopTiming(); TFE_DeleteOp(matmul); TFE_DeleteTensorHandle(m); @@ -163,7 +196,7 @@ void BM_Execute(int iters) { CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteStatus(status); } -BENCHMARK(BM_Execute); +BENCHMARK(BM_Execute)->Arg(0)->Arg(1); TEST(CAPI, Context) { TF_Status* status = TF_NewStatus(); @@ -205,10 +238,11 @@ TEST(CAPI, TensorHandle) { TFE_DeleteTensorHandle(h); } -TEST(CAPI, TensorHandleCopyBetweenDevices) { +void TensorHandleCopyBetweenDevices(bool async) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status.get()); TFE_DeleteContextOptions(opts); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); @@ -274,10 +308,56 @@ TEST(CAPI, TensorHandleCopyBetweenDevices) { EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); } -TEST(CAPI, TensorHandleCopyBetweenTwoGPUDevices) { +TEST(CAPI, TensorHandleCopyBetweenDevices) { + TensorHandleCopyBetweenDevices(false); +} + +TEST(CAPI, TensorHandleCopyBetweenDevicesAsync) { + TensorHandleCopyBetweenDevices(true); +} + +void TensorHandleCopyBetweenDevicesError(bool async) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); + TFE_Context* ctx = TFE_NewContext(opts, status.get()); + TFE_DeleteContextOptions(opts); + ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_TensorHandle* hcpu = TestMatrixTensorHandle(); + const char* kErrorDevice = "NoSuchDevice:0"; + TFE_TensorHandle* hdevice = + TFE_TensorHandleCopyToDevice(hcpu, ctx, kErrorDevice, status.get()); + EXPECT_NE(TF_OK, TF_GetCode(status.get())); + const char* msg = "NoSuchDevice:0 unknown device"; + EXPECT_TRUE(strstr(TF_Message(status.get()), msg) != nullptr) + << TF_Message(status.get()); + TF_SetStatus(status.get(), TF_OK, ""); + const char* kCPUDevice = "CPU:0"; + TFE_TensorHandle* hcopy = + TFE_TensorHandleCopyToDevice(hcpu, ctx, kCPUDevice, status.get()); + EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); + TFE_ContextAsyncWait(ctx, status.get()); + EXPECT_EQ(TF_OK, TF_GetCode(status.get())); + TFE_DeleteTensorHandle(hcopy); + TFE_DeleteTensorHandle(hcpu); + if (hdevice != nullptr) TFE_DeleteTensorHandle(hdevice); + TFE_DeleteContext(ctx, status.get()); +} + +TEST(CAPI, TensorHandleCopyBetweenDevicesError) { + TensorHandleCopyBetweenDevicesError(false); +} + +TEST(CAPI, TensorHandleCopyBetweenDevicesErrorAsync) { + TensorHandleCopyBetweenDevicesError(true); +} + +void TensorHandleCopyBetweenTwoGPUDevices(bool async) { + std::unique_ptr status( + TF_NewStatus(), TF_DeleteStatus); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status.get()); TFE_DeleteContextOptions(opts); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); @@ -332,11 +412,20 @@ TEST(CAPI, TensorHandleCopyBetweenTwoGPUDevices) { EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); } -TEST(CAPI, TensorHandleSilentCopy) { +TEST(CAPI, TensorHandleCopyBetweenTwoGPUDevices) { + TensorHandleCopyBetweenTwoGPUDevices(false); +} + +TEST(CAPI, TensorHandleCopyBetweenTwoGPUDevicesAsync) { + TensorHandleCopyBetweenTwoGPUDevices(true); +} + +void TensorHandleSilentCopy(bool async) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_SILENT); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status.get()); TFE_DeleteContextOptions(opts); ASSERT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); @@ -366,14 +455,20 @@ TEST(CAPI, TensorHandleSilentCopy) { TF_DeleteTensor(t); TFE_DeleteTensorHandle(hcpu); + TFE_ContextAsyncWait(ctx, status.get()); + EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TFE_DeleteContext(ctx, status.get()); EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); } -TEST(CAPI, TensorHandleSilentCopyLocal) { +TEST(CAPI, TensorHandleSilentCopy) { TensorHandleSilentCopy(false); } +TEST(CAPI, TensorHandleSilentCopyAsync) { TensorHandleSilentCopy(true); } + +void TensorHandleSilentCopyLocal(bool async) { std::unique_ptr status( TF_NewStatus(), TF_DeleteStatus); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_ContextOptionsSetDevicePlacementPolicy(opts, TFE_DEVICE_PLACEMENT_EXPLICIT); TFE_Context* ctx = TFE_NewContext(opts, status.get()); @@ -407,11 +502,17 @@ TEST(CAPI, TensorHandleSilentCopyLocal) { TF_DeleteTensor(t); TFE_DeleteTensorHandle(hcpu); + TFE_ContextAsyncWait(ctx, status.get()); + EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); TFE_DeleteContext(ctx, status.get()); EXPECT_EQ(TF_OK, TF_GetCode(status.get())) << TF_Message(status.get()); } +TEST(CAPI, TensorHandleSilentCopyLocal) { TensorHandleSilentCopyLocal(false); } +TEST(CAPI, TensorHandleSilentCopyLocalAsync) { + TensorHandleSilentCopyLocal(true); +} -TEST(CAPI, SetAndGetOpDevices) { +void SetAndGetOpDevices(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); TFE_Context* ctx = TFE_NewContext(opts, status); @@ -442,27 +543,27 @@ TEST(CAPI, SetAndGetOpDevices) { TF_DeleteStatus(status); } -TEST(CAPI, Execute_MatMul_CPU) { +void Execute_MatMul_CPU(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); TFE_TensorHandle* m = TestMatrixTensorHandle(); TFE_Op* matmul = MatMulOp(ctx, m, m); - TFE_TensorHandle* retvals[2] = {nullptr}; - int num_retvals = 2; // Should be reduced to 1 by the TFE_Execute call. + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; TFE_Execute(matmul, &retvals[0], &num_retvals, status); EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteOp(matmul); TFE_DeleteTensorHandle(m); - TFE_DeleteContext(ctx, status); - ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); - ASSERT_EQ(1, num_retvals); TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); + ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteTensorHandle(retvals[0]); + TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); float product[4] = {0}; EXPECT_EQ(sizeof(product), TF_TensorByteSize(t)); @@ -474,7 +575,101 @@ TEST(CAPI, Execute_MatMul_CPU) { EXPECT_EQ(22, product[3]); TF_DeleteStatus(status); } +TEST(CAPI, Execute_MatMul_CPU) { Execute_MatMul_CPU(false); } +TEST(CAPI, Execute_MatMul_CPUAsync) { Execute_MatMul_CPU(true); } + +void Execute_MatMul_CPU_Runtime_Error(bool async) { + TF_Status* status = TF_NewStatus(); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); + TFE_Context* ctx = TFE_NewContext(opts, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteContextOptions(opts); + + TFE_TensorHandle* m1 = TestMatrixTensorHandle(); + TFE_TensorHandle* m2 = TestMatrixTensorHandle3X2(); + TFE_Op* matmul = MatMulOp(ctx, m1, m2); + TFE_Op* matmul2 = MatMulOp(ctx, m1, m1); + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; + TFE_Execute(matmul, &retvals[0], &num_retvals, status); + TFE_DeleteOp(matmul); + if (!async) { + EXPECT_NE(TF_OK, TF_GetCode(status)); + } else { + TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); + EXPECT_NE(TF_OK, TF_GetCode(status)); + EXPECT_EQ(nullptr, t); + const char* msg = "Matrix size-incompatible: In[0]: [2,2], In[1]: [3,2]"; + EXPECT_TRUE(strstr(TF_Message(status), msg) != nullptr) + << TF_Message(status); + // Since error is not cleared, the following copy with correct device will + // still fail. + TF_SetStatus(status, TF_OK, ""); + TFE_DeleteTensorHandle(retvals[0]); + retvals[0] = nullptr; + TFE_Execute(matmul2, &retvals[0], &num_retvals, status); + EXPECT_NE(TF_OK, TF_GetCode(status)); + TFE_ContextAsyncClearError(ctx); + TFE_ContextAsyncWait(ctx, status); + EXPECT_EQ(TF_OK, TF_GetCode(status)); + } + // Following works in async mode since TFE_ContextAsyncClearError was called. + TF_SetStatus(status, TF_OK, ""); + if (retvals[0] != nullptr) { + TFE_DeleteTensorHandle(retvals[0]); + } + retvals[0] = nullptr; + TFE_Execute(matmul2, &retvals[0], &num_retvals, status); + EXPECT_EQ(TF_OK, TF_GetCode(status)); + TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); + EXPECT_EQ(TF_OK, TF_GetCode(status)); + TF_DeleteTensor(t); + TFE_DeleteOp(matmul2); + TFE_DeleteTensorHandle(m1); + TFE_DeleteTensorHandle(m2); + TFE_DeleteTensorHandle(retvals[0]); + TFE_DeleteContext(ctx, status); + TF_DeleteStatus(status); +} +TEST(CAPI, Execute_MatMul_CPU_Runtime_Error) { + Execute_MatMul_CPU_Runtime_Error(false); +} +TEST(CAPI, Execute_MatMul_CPU_Runtime_ErrorAsync) { + Execute_MatMul_CPU_Runtime_Error(true); +} + +void Execute_MatMul_CPU_Type_Error(bool async) { + TF_Status* status = TF_NewStatus(); + TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); + TFE_Context* ctx = TFE_NewContext(opts, status); + CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); + TFE_DeleteContextOptions(opts); + + TFE_TensorHandle* m1 = TestMatrixTensorHandle(); + TFE_TensorHandle* m2 = DoubleTestMatrixTensorHandle(); + TFE_Op* matmul = MatMulOp(ctx, m1, m2); + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; + TFE_Execute(matmul, &retvals[0], &num_retvals, status); + EXPECT_NE(TF_OK, TF_GetCode(status)); + TFE_DeleteOp(matmul); + TFE_DeleteTensorHandle(m1); + TFE_DeleteTensorHandle(m2); + if (retvals[0] != nullptr) { + TFE_DeleteTensorHandle(retvals[0]); + } + TFE_DeleteContext(ctx, status); + TF_DeleteStatus(status); +} +TEST(CAPI, Execute_MatMul_CPU_Type_Error) { + Execute_MatMul_CPU_Type_Error(false); +} +TEST(CAPI, Execute_MatMul_CPU_Type_ErrorAsync) { + Execute_MatMul_CPU_Type_Error(true); +} TEST(CAPI, Execute_Min_CPU) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); @@ -485,8 +680,8 @@ TEST(CAPI, Execute_Min_CPU) { TFE_TensorHandle* input = TestMatrixTensorHandle(); TFE_TensorHandle* axis = TestAxisTensorHandle(); TFE_Op* minOp = MinOp(ctx, input, axis); - TFE_TensorHandle* retvals[2] = {nullptr}; - int num_retvals = 2; // Should be reduced to 1 by the TFE_Execute call. + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; TFE_Execute(minOp, &retvals[0], &num_retvals, status); EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteOp(minOp); @@ -509,9 +704,10 @@ TEST(CAPI, Execute_Min_CPU) { } #ifdef TENSORFLOW_EAGER_USE_XLA -TEST(CAPI, Execute_MatMul_XLA_CPU) { +void Execute_MatMul_XLA_CPU(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); @@ -521,15 +717,14 @@ TEST(CAPI, Execute_MatMul_XLA_CPU) { TFE_OpSetXLACompilation(matmul, true); - TFE_TensorHandle* retvals[2] = {nullptr}; - int num_retvals = 2; // Should be reduced to 1 by the TFE_Execute call. + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; TFE_Execute(matmul, &retvals[0], &num_retvals, status); // Running a primitive TF operator via XLA is not yet supported. ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteOp(matmul); TFE_DeleteTensorHandle(m); - TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); EXPECT_EQ(1, num_retvals); @@ -545,13 +740,16 @@ TEST(CAPI, Execute_MatMul_XLA_CPU) { EXPECT_EQ(10, product[1]); EXPECT_EQ(15, product[2]); EXPECT_EQ(22, product[3]); - + TFE_DeleteContext(ctx, status); TF_DeleteStatus(status); } +TEST(CAPI, Execute_MatMul_XLA_CPU) { Execute_MatMul_XLA_CPU(false); } +TEST(CAPI, Execute_MatMul_XLA_CPUAsync) { Execute_MatMul_XLA_CPU(true); } -TEST(CAPI, Execute_Min_XLA_CPU) { +void Execute_Min_XLA_CPU(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); @@ -562,14 +760,13 @@ TEST(CAPI, Execute_Min_XLA_CPU) { TFE_OpSetXLACompilation(minOp, true); - TFE_TensorHandle* retvals[2] = {nullptr}; - int num_retvals = 2; // Should be reduced to 1 by the TFE_Execute call. + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; TFE_Execute(minOp, &retvals[0], &num_retvals, status); EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteOp(minOp); TFE_DeleteTensorHandle(input); TFE_DeleteTensorHandle(axis); - TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); ASSERT_EQ(1, num_retvals); @@ -582,13 +779,17 @@ TEST(CAPI, Execute_Min_XLA_CPU) { TF_DeleteTensor(t); EXPECT_EQ(1, output[0]); EXPECT_EQ(3, output[1]); + TFE_DeleteContext(ctx, status); TF_DeleteStatus(status); } +TEST(CAPI, Execute_Min_XLA_CPU) { Execute_Min_XLA_CPU(false); } +TEST(CAPI, Execute_Min_XLA_CPUAsync) { Execute_Min_XLA_CPU(true); } #endif // TENSORFLOW_EAGER_USE_XLA -TEST(CAPI, ExecuteWithTracing) { +void ExecuteWithTracing(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); TFE_ContextEnableRunMetadata(ctx); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); @@ -596,8 +797,8 @@ TEST(CAPI, ExecuteWithTracing) { TFE_TensorHandle* m = TestMatrixTensorHandle(); TFE_Op* matmul = MatMulOp(ctx, m, m); - TFE_TensorHandle* retvals[2] = {nullptr}; - int num_retvals = 2; // Should be reduced to 1 by the TFE_Execute call. + TFE_TensorHandle* retvals[1] = {nullptr}; + int num_retvals = 1; TFE_Execute(matmul, &retvals[0], &num_retvals, status); EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteOp(matmul); @@ -609,12 +810,12 @@ TEST(CAPI, ExecuteWithTracing) { EXPECT_TRUE( rm.ParseFromString({reinterpret_cast(b->data), b->length})); TF_DeleteBuffer(b); - TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); ASSERT_EQ(1, num_retvals); TF_Tensor* t = TFE_TensorHandleResolve(retvals[0], status); TFE_DeleteTensorHandle(retvals[0]); + TFE_DeleteContext(ctx, status); ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); float product[4] = {0}; EXPECT_EQ(sizeof(product), TF_TensorByteSize(t)); @@ -626,6 +827,8 @@ TEST(CAPI, ExecuteWithTracing) { EXPECT_EQ(22, product[3]); TF_DeleteStatus(status); } +TEST(CAPI, ExecuteWithTracing) { ExecuteWithTracing(false); } +TEST(CAPI, ExecuteWithTracingAsync) { ExecuteWithTracing(true); } TEST(CAPI, Function_ident_CPU) { // First create a simple identity function. @@ -657,32 +860,37 @@ TEST(CAPI, Function_ident_CPU) { ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); TF_DeleteFunction(fn); - TF_Tensor* t = - TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32)); - *reinterpret_cast(TF_TensorData(t)) = 42; - TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - TF_DeleteTensor(t); + for (bool async : {false, true, false}) { + TFE_ContextSetAsyncForThread(ctx, static_cast(async), + status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK); + TF_Tensor* t = + TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32)); + *reinterpret_cast(TF_TensorData(t)) = 42; + TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteTensor(t); - TFE_Op* op = TFE_NewOp(ctx, "ident", status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - TFE_OpAddInput(op, h, status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_Op* op = TFE_NewOp(ctx, "ident", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_OpAddInput(op, h, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - std::vector result; - result.push_back(nullptr); - int num_retvals = 1; - TFE_Execute(op, result.data(), &num_retvals, status); - TFE_DeleteOp(op); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - ASSERT_EQ(num_retvals, 1); + std::vector result; + result.push_back(nullptr); + int num_retvals = 1; + TFE_Execute(op, result.data(), &num_retvals, status); + TFE_DeleteOp(op); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + ASSERT_EQ(num_retvals, 1); - TF_Tensor* r = TFE_TensorHandleResolve(result[0], status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42); - TFE_DeleteTensorHandle(h); - TF_DeleteTensor(r); - TFE_DeleteTensorHandle(result[0]); + TF_Tensor* r = TFE_TensorHandleResolve(result[0], status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42); + TFE_DeleteTensorHandle(h); + TF_DeleteTensor(r); + TFE_DeleteTensorHandle(result[0]); + } TFE_DeleteContext(ctx, status); ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); TF_DeleteStatus(status); @@ -719,35 +927,40 @@ TEST(CAPI, Function_ident_XLA_CPU) { ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); TF_DeleteFunction(fn); - TF_Tensor* t = - TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32)); - *reinterpret_cast(TF_TensorData(t)) = 42; - TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - TF_DeleteTensor(t); + for (bool async : {false, true, false}) { + TFE_ContextSetAsyncForThread(ctx, static_cast(async), + status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK); + TF_Tensor* t = + TF_AllocateTensor(TF_INT32, nullptr, 0, 1 * sizeof(tensorflow::int32)); + *reinterpret_cast(TF_TensorData(t)) = 42; + TFE_TensorHandle* h = TFE_NewTensorHandle(t, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TF_DeleteTensor(t); - TFE_Op* op = TFE_NewOp(ctx, "ident", status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - TFE_OpAddInput(op, h, status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_Op* op = TFE_NewOp(ctx, "ident", status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + TFE_OpAddInput(op, h, status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - // Now run it via XLA. - TFE_OpSetXLACompilation(op, true); + // Now run it via XLA. + TFE_OpSetXLACompilation(op, true); - std::vector result; - result.push_back(nullptr); - int num_retvals = 1; - TFE_Execute(op, result.data(), &num_retvals, status); - TFE_DeleteOp(op); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - ASSERT_EQ(num_retvals, 1); + std::vector result; + result.push_back(nullptr); + int num_retvals = 1; + TFE_Execute(op, result.data(), &num_retvals, status); + TFE_DeleteOp(op); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + ASSERT_EQ(num_retvals, 1); - TF_Tensor* r = TFE_TensorHandleResolve(result[0], status); - ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); - EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42); - TFE_DeleteTensorHandle(h); - TF_DeleteTensor(r); - TFE_DeleteTensorHandle(result[0]); + TF_Tensor* r = TFE_TensorHandleResolve(result[0], status); + ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); + EXPECT_EQ(*reinterpret_cast(TF_TensorData(r)), 42); + TFE_DeleteTensorHandle(h); + TF_DeleteTensor(r); + TFE_DeleteTensorHandle(result[0]); + } TFE_DeleteContext(ctx, status); ASSERT_TRUE(TF_GetCode(status) == TF_OK) << TF_Message(status); TF_DeleteStatus(status); @@ -788,9 +1001,10 @@ string MatMulFunction() { return def.SerializeAsString(); } -TEST(CAPI, FunctionDefAndExecute) { +void FunctionDefAndExecute(bool async) { TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); @@ -827,11 +1041,16 @@ TEST(CAPI, FunctionDefAndExecute) { EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteStatus(status); } +TEST(CAPI, FunctionDefAndExecute) { FunctionDefAndExecute(false); } +TEST(CAPI, FunctionDefAndExecuteAsync) { FunctionDefAndExecute(true); } -void BM_ExecuteFunction(int iters) { +void BM_ExecuteFunction(int iters, int async) { tensorflow::testing::StopTiming(); + tensorflow::testing::SetLabel(async ? "ExecuteFunctionAsync" + : "ExecuteFunction"); TF_Status* status = TF_NewStatus(); TFE_ContextOptions* opts = TFE_NewContextOptions(); + TFE_ContextOptionsSetAsync(opts, static_cast(async)); TFE_Context* ctx = TFE_NewContext(opts, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TFE_DeleteContextOptions(opts); @@ -853,6 +1072,9 @@ void BM_ExecuteFunction(int iters) { TFE_Execute(matmul, &retval[0], &num_retvals, status); CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); } + if (async) { + TFE_ContextAsyncWait(ctx, status); + } tensorflow::testing::StopTiming(); TFE_DeleteTensorHandle(m); TFE_DeleteTensorHandle(retval[0]); @@ -860,7 +1082,7 @@ void BM_ExecuteFunction(int iters) { EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status); TF_DeleteStatus(status); } -BENCHMARK(BM_ExecuteFunction); +BENCHMARK(BM_ExecuteFunction)->Arg(0)->Arg(1); TFE_TensorHandle* CreateVariable(TFE_Context* ctx, float value, TF_Status* status) { diff --git a/tensorflow/c/eager/runtime.h b/tensorflow/c/eager/runtime.h index 985ed96735..ad16f65495 100644 --- a/tensorflow/c/eager/runtime.h +++ b/tensorflow/c/eager/runtime.h @@ -185,7 +185,8 @@ class KernelAndDevice { Device* device() const { return device_; } - DataTypeVector* output_dtypes() { return &output_dtypes_; } + DataTypeVector* mutable_output_dtypes() { return &output_dtypes_; } + const DataTypeVector& output_dtypes() { return output_dtypes_; } private: std::unique_ptr kernel_; diff --git a/tensorflow/python/eager/core_test.py b/tensorflow/python/eager/core_test.py index d504ca0b05..012c68f68e 100644 --- a/tensorflow/python/eager/core_test.py +++ b/tensorflow/python/eager/core_test.py @@ -250,13 +250,23 @@ class TFETest(test_util.TensorFlowTestCase): def testExecuteTooManyNumOutputs(self): # num_outputs provided is 50, but only one output is produced. - # That should be okay. - product = execute( - b'Mul', - num_outputs=50, - inputs=[constant_op.constant(3), constant_op.constant(5)], - attrs=('T', dtypes.int32.as_datatype_enum))[0] - self.assertAllEqual(15, product) + with self.assertRaises(errors.InvalidArgumentError): + _ = execute( + b'Mul', + num_outputs=50, + inputs=[constant_op.constant(3), + constant_op.constant(5)], + attrs=('T', dtypes.int32.as_datatype_enum))[0] + + def testExecuteTooFewNumOutputs(self): + # num_outputs provided is 50, but only one output is produced. + with self.assertRaises(errors.InvalidArgumentError): + _ = execute( + b'Mul', + num_outputs=0, + inputs=[constant_op.constant(3), + constant_op.constant(5)], + attrs=('T', dtypes.int32.as_datatype_enum))[0] def testMatMulGPU(self): if not context.context().num_gpus(): diff --git a/tensorflow/python/eager/pywrap_tensor.cc b/tensorflow/python/eager/pywrap_tensor.cc index 8338bc4343..105c09e81f 100644 --- a/tensorflow/python/eager/pywrap_tensor.cc +++ b/tensorflow/python/eager/pywrap_tensor.cc @@ -340,8 +340,10 @@ void EagerTensor_dealloc(EagerTensor* self) { Py_DECREF(self->handle_data); Py_DECREF(self->keras_mask); Py_DECREF(self->tensor_shape); - TFE_DeleteTensorHandle(self->handle); - self->handle = nullptr; + if (self->handle != nullptr) { + TFE_DeleteTensorHandle(self->handle); + self->handle = nullptr; + } // We have the global interpreter lock, so use this chance to perform delayed // refcount decrements. tensorflow::ClearDecrefCache(); diff --git a/tensorflow/python/eager/pywrap_tfe_src.cc b/tensorflow/python/eager/pywrap_tfe_src.cc index fcb0452a14..fe9785dc66 100644 --- a/tensorflow/python/eager/pywrap_tfe_src.cc +++ b/tensorflow/python/eager/pywrap_tfe_src.cc @@ -1012,7 +1012,14 @@ static tensorflow::eager::TapeTensor TapeTensorFromTensor(PyObject* tensor) { if (EagerTensor_CheckExact(tensor)) { TFE_TensorHandle* t = EagerTensor_Handle(tensor); tensorflow::int64 id = EagerTensor_id(tensor); - return tensorflow::eager::TapeTensor{id, t->t.dtype(), t->t.shape()}; + const tensorflow::Tensor* tensor = nullptr; + const tensorflow::Status status = t->Tensor(&tensor); + if (MaybeRaiseExceptionFromStatus(status, nullptr)) { + return tensorflow::eager::TapeTensor{id, t->dtype, + tensorflow::TensorShape({})}; + } else { + return tensorflow::eager::TapeTensor{id, t->dtype, tensor->shape()}; + } } tensorflow::int64 id = FastTensorId(tensor); if (PyErr_Occurred()) { diff --git a/tensorflow/python/lib/core/py_func.cc b/tensorflow/python/lib/core/py_func.cc index 343415b264..02eafd42b3 100644 --- a/tensorflow/python/lib/core/py_func.cc +++ b/tensorflow/python/lib/core/py_func.cc @@ -164,9 +164,9 @@ bool IsSingleNone(PyObject* obj) { } // Retrieves a Tensor from `eager_tensor` and stores it in `output_tensor`. -void ExtractTensorFromEagerTensor(const PyObject* eager_tensor, - Tensor* output_tensor) { - *output_tensor = EagerTensor_Handle(eager_tensor)->t; +tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor, + const Tensor** output_tensor) { + return EagerTensor_Handle(eager_tensor)->Tensor(output_tensor); } // Calls the registered py function through the trampoline. @@ -220,7 +220,9 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { if (call->eager) { const PyObject* item = PyList_GetItem(result, i); if (EagerTensor_CheckExact(item)) { - ExtractTensorFromEagerTensor(item, &t); + const Tensor* tensor = nullptr; + s = ExtractTensorFromEagerTensor(item, &tensor); + if (s.ok()) t = *tensor; } else { s = errors::FailedPrecondition( "Expected EagerTensor, found PyObject of type: ", @@ -238,10 +240,10 @@ Status DoCallPyFunc(PyCall* call, bool* out_log_on_error) { } else if (EagerTensor_CheckExact(result) || result == Py_None) { // result is an `EagerTensor` or `None`. DCHECK(call->eager); - Tensor t; if (result != Py_None) { - ExtractTensorFromEagerTensor(result, &t); - call->out.push_back(t); + const Tensor* t = nullptr; + s = ExtractTensorFromEagerTensor(result, &t); + if (s.ok()) call->out.push_back(*t); } } else if (PyArray_Check(result)) { // `result` is a NumPy array. -- GitLab From bae670486f2cf87983476067103a019bbdf86333 Mon Sep 17 00:00:00 2001 From: "Joshua V. Dillon" Date: Mon, 12 Mar 2018 12:58:49 -0700 Subject: [PATCH 747/884] Add custom_gradient function. PiperOrigin-RevId: 188765271 --- tensorflow/contrib/bayesflow/BUILD | 20 --- tensorflow/contrib/bayesflow/__init__.py | 2 - .../python/kernel_tests/custom_grad_test.py | 157 ------------------ .../bayesflow/python/ops/custom_grad.py | 34 ---- .../bayesflow/python/ops/custom_grad_impl.py | 138 --------------- 5 files changed, 351 deletions(-) delete mode 100644 tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/custom_grad.py delete mode 100644 tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py diff --git a/tensorflow/contrib/bayesflow/BUILD b/tensorflow/contrib/bayesflow/BUILD index 88956f0512..c6feec68e0 100644 --- a/tensorflow/contrib/bayesflow/BUILD +++ b/tensorflow/contrib/bayesflow/BUILD @@ -56,26 +56,6 @@ cuda_py_test( ], ) -cuda_py_test( - name = "custom_grad_test", - size = "small", - srcs = ["python/kernel_tests/custom_grad_test.py"], - additional_deps = [ - ":bayesflow_py", - "//third_party/py/numpy", - "//tensorflow/contrib/layers:layers_py", - "//tensorflow/python:array_ops", - "//tensorflow/python:client_testlib", - "//tensorflow/python:framework_for_generated_wrappers", - "//tensorflow/python:framework_test_lib", - "//tensorflow/python:gradients", - "//tensorflow/python:init_ops", - "//tensorflow/python:platform_test", - "//tensorflow/python:variable_scope", - "//tensorflow/python:variables", - ], -) - cuda_py_test( name = "monte_carlo_test", size = "small", diff --git a/tensorflow/contrib/bayesflow/__init__.py b/tensorflow/contrib/bayesflow/__init__.py index 89dfa583a4..f868203826 100644 --- a/tensorflow/contrib/bayesflow/__init__.py +++ b/tensorflow/contrib/bayesflow/__init__.py @@ -21,7 +21,6 @@ from __future__ import division from __future__ import print_function # pylint: disable=unused-import,line-too-long -from tensorflow.contrib.bayesflow.python.ops import custom_grad from tensorflow.contrib.bayesflow.python.ops import hmc from tensorflow.contrib.bayesflow.python.ops import metropolis_hastings from tensorflow.contrib.bayesflow.python.ops import monte_carlo @@ -31,7 +30,6 @@ from tensorflow.python.util.all_util import remove_undocumented _allowed_symbols = [ - 'custom_grad', 'entropy', 'hmc', 'metropolis_hastings', diff --git a/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py b/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py deleted file mode 100644 index 1250765d09..0000000000 --- a/tensorflow/contrib/bayesflow/python/kernel_tests/custom_grad_test.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Tests for Custom Gradient Ops.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from tensorflow.contrib.bayesflow.python.ops import custom_grad_impl -from tensorflow.python.framework import constant_op -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import gradients_impl -from tensorflow.python.ops import init_ops -from tensorflow.python.ops import math_ops -from tensorflow.python.ops import variable_scope -from tensorflow.python.ops import variables -from tensorflow.python.platform import test - - -cg = custom_grad_impl - - -class CustomGradientTest(test.TestCase): - - def test_works_correctly(self): - with self.test_session() as sess: - f = lambda x: x**2 / 2 - g = lambda x: (x - 1)**3 / 3 - x_ = np.linspace(-100, 100, int(1e4)) + [0.] - - x = constant_op.constant(x_) - fx = cg.custom_gradient(f(x), g(x), x) - gx = gradients_impl.gradients(fx, x)[0] - [fx_, gx_] = sess.run([fx, gx]) - - self.assertAllClose(f(x_), fx_) - self.assertAllClose(g(x_), gx_) - - def test_works_correctly_both_f_g_zero(self): - with self.test_session() as sess: - f = lambda x: x**2 / 2 - g = lambda x: x**3 / 3 - x_ = np.linspace(-100, 100, int(1e4)) + [0.] - - x = constant_op.constant(x_) - fx = cg.custom_gradient(f(x), g(x), x) - gx = gradients_impl.gradients(fx, x)[0] - [fx_, gx_] = sess.run([fx, gx]) - - self.assertAllClose(f(x_), fx_) - self.assertAllClose(g(x_), gx_) - - def test_works_correctly_vector_of_vars(self): - with self.test_session() as sess: - x = variable_scope.get_variable( - name="x", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(2)) - y = variable_scope.get_variable( - name="y", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(3)) - sess.run([variables.global_variables_initializer()]) - - f = lambda z: z[0] * z[1] - g = lambda z: z[0]**2 * z[1]**2 / 2 - - z = array_ops.stack([x, y]) - fz = cg.custom_gradient(f(z), g(z), z) - gz = gradients_impl.gradients(fz, variables.trainable_variables()) - [z_, fz_, gx_, gy_] = sess.run([z, fz, gz[0], gz[1]]) - - self.assertEqual(f(z_), fz_) - self.assertEqual(g(z_), gx_) - self.assertEqual(g(z_), gy_) - - def test_works_correctly_side_vars(self): - with self.test_session() as sess: - x_ = np.float32(2.1) # Adding extra tenth to force imprecision. - y_ = np.float32(3.1) - x = variable_scope.get_variable( - name="x", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(x_)) - y = variable_scope.get_variable( - name="y", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(y_)) - sess.run([variables.global_variables_initializer()]) - - f = lambda x: x * y - g = lambda z: math_ops.square(x) * y - - fx = cg.custom_gradient(f(x), g(x), x) - gx = gradients_impl.gradients(fx, variables.trainable_variables()) - [x_, fx_, gx_] = sess.run([x, fx, gx[0]]) - gy_ = gx[1] - - self.assertEqual(x_ * y_, fx_) - self.assertEqual(np.square(x_) * y_, gx_) - self.assertEqual(None, gy_) - - def test_works_correctly_fx_gx_manually_stopped(self): - with self.test_session() as sess: - x_ = np.float32(2.1) # Adding extra tenth to force imprecision. - y_ = np.float32(3.1) - x = variable_scope.get_variable( - name="x", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(x_)) - y = variable_scope.get_variable( - name="y", - shape=[], - dtype=dtypes.float32, - initializer=init_ops.constant_initializer(y_)) - sess.run([variables.global_variables_initializer()]) - - stop = array_ops.stop_gradient # For readability. - - # Basically we need to stop the `x` portion of `f`. And when we supply the - # arg to `custom_gradient` we need to stop the complement, i.e., the `y` - # part. - f = lambda x: stop(x) * y - g = lambda x: stop(math_ops.square(x)) * y - fx = cg.custom_gradient(f(x), g(x), x + stop(y), - fx_gx_manually_stopped=True) - - gx = gradients_impl.gradients(fx, variables.trainable_variables()) - [x_, fx_, gx_, gy_] = sess.run([x, fx, gx[0], gx[1]]) - - self.assertEqual(x_ * y_, fx_) - self.assertEqual(np.square(x_) * y_, gx_) - self.assertEqual(x_, gy_) - - -if __name__ == "__main__": - test.main() diff --git a/tensorflow/contrib/bayesflow/python/ops/custom_grad.py b/tensorflow/contrib/bayesflow/python/ops/custom_grad.py deleted file mode 100644 index c8218c57cc..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/custom_grad.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for specifying custom gradients. - -See @{tf.contrib.bayesflow.custom_grad.custom_gradient}. -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -# go/tf-wildcard-import -# pylint: disable=wildcard-import -from tensorflow.contrib.bayesflow.python.ops.custom_grad_impl import * -# pylint: enable=wildcard-import -from tensorflow.python.util.all_util import remove_undocumented - -_allowed_symbols = [ - 'custom_gradient', -] - -remove_undocumented(__name__, _allowed_symbols) diff --git a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py b/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py deleted file mode 100644 index 927cc28f67..0000000000 --- a/tensorflow/contrib/bayesflow/python/ops/custom_grad_impl.py +++ /dev/null @@ -1,138 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Functions for specifying custom gradients. - -@@custom_gradient - -""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from tensorflow.python.framework import ops -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import check_ops -from tensorflow.python.ops import math_ops - -__all__ = [ - 'custom_gradient', -] - - -def is_list_like(x): - return isinstance(x, (tuple, list)) - - -def identity(x, dtype=None, name=None): - return array_ops.identity(ops.convert_to_tensor( - x, dtype=dtype, name=name), name=name) - - -def custom_gradient(fx, gx, x, fx_gx_manually_stopped=False, name=None): - """Embeds a custom gradient into a `Tensor`. - - This function works by clever application of `stop_gradient`. I.e., observe - that: - - ```none - h(x) = stop_gradient(f(x)) + stop_gradient(g(x)) * (x - stop_gradient(x)) - ``` - - is such that `h(x) == stop_gradient(f(x))` and - `grad[h(x), x] == stop_gradient(g(x)).` - - In addition to scalar-domain/scalar-range functions, this function also - supports tensor-domain/scalar-range functions. - - Partial Custom Gradient: - - Suppose `h(x) = htilde(x, y)`. Note that `dh/dx = stop(g(x))` but `dh/dy = - None`. This is because a `Tensor` cannot have only a portion of its gradient - stopped. To circumvent this issue, one must manually `stop_gradient` the - relevant portions of `f`, `g`. For example see the unit-test, - `test_works_correctly_fx_gx_manually_stopped`. - - Args: - fx: `Tensor`. Output of function evaluated at `x`. - gx: `Tensor` or list of `Tensor`s. Gradient of function at (each) `x`. - x: `Tensor` or list of `Tensor`s. Args of evaluation for `f`. - fx_gx_manually_stopped: Python `bool` indicating that `fx`, `gx` manually - have `stop_gradient` applied. - name: Python `str` name prefixed to Ops created by this function. - - Returns: - fx: Floating-type `Tensor` equal to `f(x)` but which has gradient - `stop_gradient(g(x))`. - """ - def maybe_stop(x): - if fx_gx_manually_stopped: - return x - return array_ops.stop_gradient(x) - with ops.name_scope(name, 'custom_gradient', [fx, gx, x]): - fx = ops.convert_to_tensor(fx, name='fx') - # We don't want to bother eagerly computing `gx` since we may not even need - # it. - with ops.control_dependencies([fx]): - if is_list_like(x): - x = [identity(x_, name='x') for x_ in x] - else: - x = [identity(x, name='x')] - - if is_list_like(gx): - gx = [identity(gx_, dtype=fx.dtype, name='gx') - for gx_ in gx] - else: - gx = [identity(gx, dtype=fx.dtype, name='gx')] - - override_grad = [] - for x_, gx_ in zip(x, gx): - # Observe: tf.gradients(f(x), x)[i].shape == x[i].shape - # thus we check that the user is supplying correct shapes. - equal_shape = check_ops.assert_equal( - array_ops.shape(x_), - array_ops.shape(gx_), - message='Each `x` must have the same shape as each `gx`.') - with ops.control_dependencies([equal_shape]): - # IEEE754 ensures `(x-x)==0.` and that `0.*x==0.` so we make sure to - # write the code this way, rather than, e.g., - # `sum_x * stop(gx) + stop(fx - sum_x * gx)`. - # For more discussion regarding the relevant portions of the IEEE754 - # standard, see the StackOverflow question, - # "Is there a floating point value of x, for which x-x == 0 is false?" - # http://stackoverflow.com/q/2686644 - zeros_like_x_ = x_ - array_ops.stop_gradient(x_) - override_grad.append(math_ops.reduce_sum( - maybe_stop(gx_) * zeros_like_x_)) - override_grad = sum(override_grad) - override_grad /= math_ops.cast(array_ops.size(fx), - dtype=fx.dtype.base_dtype) - - # Proof of correctness: - # - # f(x) = x * stop[gx] + stop[fx - x * gx] - # = stop[fx] - # - # g(x) = grad[fx] - # = stop[gx] + grad[stop[fx - x * gx]] - # = stop[gx] + 0 - # - # Notice that when x is zero it still works: - # grad[x * stop(gx) + stop(fx - x * gx)] = 1 * stop[gx] + 0 = stop[gx] - # - # The proof is similar for the tensor-domain case, except that we - # `reduce_sum` the `stop[gx] * (x - stop[x])` then rescale by - # `tf.size(fx)` since this reduced version is broadcast to `fx`. - return maybe_stop(fx) + override_grad -- GitLab From dc15b875893d55793c419840446dc809bcb7383f Mon Sep 17 00:00:00 2001 From: Allen Lavoie Date: Mon, 12 Mar 2018 13:00:24 -0700 Subject: [PATCH 748/884] Fix another eager PyObject leak Shockingly this one was also due to PySequence_GetItem. PiperOrigin-RevId: 188765548 --- tensorflow/python/framework/test_util.py | 26 +++++++++++++++++++ tensorflow/python/framework/test_util_test.py | 20 ++++++++++++++ .../python/kernel_tests/constant_op_test.py | 6 +++++ tensorflow/python/layers/core_test.py | 23 +++------------- tensorflow/python/lib/core/py_seq_tensor.cc | 9 ++++--- 5 files changed, 62 insertions(+), 22 deletions(-) diff --git a/tensorflow/python/framework/test_util.py b/tensorflow/python/framework/test_util.py index fde9c85891..c4952cffdd 100644 --- a/tensorflow/python/framework/test_util.py +++ b/tensorflow/python/framework/test_util.py @@ -434,6 +434,32 @@ def with_c_api(cls): return cls +def assert_no_new_pyobjects_executing_eagerly(f): + """Decorator for asserting that no new Python objects persist after a test. + + Runs the test multiple times executing eagerly, first as a warmup and then + several times to let objects accumulate. The warmup helps ignore caches which + do not grow as the test is run repeatedly. + + Useful for checking that there are no missing Py_DECREFs in the C exercised by + a bit of Python. + """ + def decorator(self, **kwargs): + """Warms up, gets an object count, runs the test, checks for new objects.""" + with context.eager_mode(): + gc.disable() + f(self, **kwargs) + gc.collect() + previous_count = len(gc.get_objects()) + for _ in range(3): + f(self, **kwargs) + gc.collect() + # There should be no new Python objects hanging around. + new_count = len(gc.get_objects()) + self.assertEqual(previous_count, new_count) + gc.enable() + return decorator + def assert_no_new_tensors(f): """Decorator for asserting that no new Tensors persist after a test. diff --git a/tensorflow/python/framework/test_util_test.py b/tensorflow/python/framework/test_util_test.py index 20d816050f..02ffa93bae 100644 --- a/tensorflow/python/framework/test_util_test.py +++ b/tensorflow/python/framework/test_util_test.py @@ -448,6 +448,26 @@ class GarbageCollectionTest(test_util.TensorFlowTestCase): LeakedTensorTest().test_has_no_leak() + def test_no_new_objects_decorator(self): + + class LeakedObjectTest(object): + + def __init__(inner_self): # pylint: disable=no-self-argument + inner_self.assertEqual = self.assertEqual # pylint: disable=invalid-name + inner_self.accumulation = [] + + @test_util.assert_no_new_pyobjects_executing_eagerly + def test_has_leak(self): + self.accumulation.append([1.]) + + @test_util.assert_no_new_pyobjects_executing_eagerly + def test_has_no_leak(self): + self.not_accumulating = [1.] + + with self.assertRaises(AssertionError): + LeakedObjectTest().test_has_leak() + + LeakedObjectTest().test_has_no_leak() if __name__ == "__main__": googletest.main() diff --git a/tensorflow/python/kernel_tests/constant_op_test.py b/tensorflow/python/kernel_tests/constant_op_test.py index 16e56349c4..ffbdb0e61a 100644 --- a/tensorflow/python/kernel_tests/constant_op_test.py +++ b/tensorflow/python/kernel_tests/constant_op_test.py @@ -30,6 +30,7 @@ from tensorflow.python.framework import errors_impl from tensorflow.python.framework import importer from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradient_checker from tensorflow.python.ops import logging_ops @@ -180,6 +181,11 @@ class ConstantTest(test.TestCase): shape=[2, 3, 5]) self.assertEqual(c.get_shape(), [2, 3, 5]) + @test_util.assert_no_new_pyobjects_executing_eagerly + def testEagerMemory(self): + """Tests PyObject refs are managed correctly when executing eagerly.""" + constant_op.constant([[1.]]) + def testImplicitShapeNumPy(self): with ops.Graph().as_default(): c = constant_op.constant( diff --git a/tensorflow/python/layers/core_test.py b/tensorflow/python/layers/core_test.py index 7d74046caf..cf45b07637 100644 --- a/tensorflow/python/layers/core_test.py +++ b/tensorflow/python/layers/core_test.py @@ -19,7 +19,6 @@ from __future__ import division from __future__ import print_function import collections -import gc import numpy as np @@ -84,27 +83,13 @@ class DenseTest(test.TestCase): self.assertEqual(dense.kernel.name, 'my_dense/kernel:0') self.assertEqual(dense.bias.name, 'my_dense/bias:0') + @test_util.assert_no_new_pyobjects_executing_eagerly def testNoEagerLeak(self): # Tests that repeatedly constructing and building a Layer does not leak # Python objects. - def _test_fn(): - inputs = random_ops.random_uniform((5, 4), seed=1) - core_layers.Dense(5)(inputs) - core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')(inputs) - - with context.eager_mode(): - _test_fn() # warmup - gc.disable() - gc.collect() - object_count = len(gc.get_objects()) - for _ in range(100): - _test_fn() - gc.collect() - self.assertLessEqual( - len(gc.get_objects()), - # DEBUG_SAVEALL messes with this slightly. - object_count + 1) - gc.enable() + inputs = random_ops.random_uniform((5, 4), seed=1) + core_layers.Dense(5)(inputs) + core_layers.Dense(2, activation=nn_ops.relu, name='my_dense')(inputs) @test_util.run_in_graph_and_eager_modes() def testCallTensorDot(self): diff --git a/tensorflow/python/lib/core/py_seq_tensor.cc b/tensorflow/python/lib/core/py_seq_tensor.cc index 317bdc2e14..8247d354db 100644 --- a/tensorflow/python/lib/core/py_seq_tensor.cc +++ b/tensorflow/python/lib/core/py_seq_tensor.cc @@ -84,6 +84,7 @@ bool IsPyDimension(PyObject* obj) { } Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) { + std::vector refs_to_clean; while (true) { // We test strings first, in case a string is considered a sequence. if (IsPyString(obj)) { @@ -93,6 +94,7 @@ Status InferShapeAndType(PyObject* obj, TensorShape* shape, DataType* dtype) { if (length > 0) { shape->AddDim(length); obj = PySequence_GetItem(obj, 0); + refs_to_clean.push_back(make_safe(obj)); continue; } else if (length == 0) { shape->AddDim(length); @@ -167,14 +169,15 @@ const char ErrorFoundFloat[] = if (shape.dims() > 1) { \ /* Iterate over outer dim, and recursively convert each element. */ \ const int64 s = shape.dim_size(0); \ - if (TF_PREDICT_FALSE(s != PySequence_Length(obj))) { \ + Safe_PyObjectPtr seq = make_safe(PySequence_Fast(obj, "")); \ + if (TF_PREDICT_FALSE(s != PySequence_Fast_GET_SIZE(seq.get()))) { \ return ErrorRectangular; \ } \ TensorShape rest = shape; \ rest.RemoveDim(0); \ for (int64 i = 0; i < s; ++i) { \ - const char* error = \ - FUNCTION##Helper(PySequence_GetItem(obj, i), rest, buf); \ + const char* error = FUNCTION##Helper( \ + PySequence_Fast_GET_ITEM(seq.get(), i), rest, buf); \ if (TF_PREDICT_FALSE(error != nullptr)) return error; \ } \ } else { \ -- GitLab From 7feb32b92448f722aa089f599f75c59c82b901ba Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 13:05:26 -0700 Subject: [PATCH 749/884] Add is_discrete, is_continuous, is_bounded methods to TensorSpecs. PiperOrigin-RevId: 188766232 --- tensorflow/python/framework/tensor_spec.py | 20 ++++++++++++++++++ .../python/framework/tensor_spec_test.py | 21 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/tensorflow/python/framework/tensor_spec.py b/tensorflow/python/framework/tensor_spec.py index 27a9ab8c60..546c48adba 100644 --- a/tensorflow/python/framework/tensor_spec.py +++ b/tensorflow/python/framework/tensor_spec.py @@ -65,6 +65,11 @@ class TensorSpec(object): else: raise ValueError("`tensor` should be a tf.Tensor") + @classmethod + def is_bounded(cls): + del cls + return False + @property def shape(self): """Returns the `TensorShape` that represents the shape of the tensor.""" @@ -80,6 +85,16 @@ class TensorSpec(object): """Returns the name of the described tensor.""" return self._name + @property + def is_discrete(self): + """Whether spec is discrete.""" + return self.dtype.is_integer + + @property + def is_continuous(self): + """Whether spec is continuous.""" + return self.dtype.is_floating + def is_compatible_with(self, spec_or_tensor): """True if the shape and dtype of `spec_or_tensor` are compatible.""" return (self._dtype.is_compatible_with(spec_or_tensor.dtype) and @@ -163,6 +178,11 @@ class BoundedTensorSpec(TensorSpec): self._maximum = np.array(maximum, dtype=self.dtype.as_numpy_dtype()) self._maximum.setflags(write=False) + @classmethod + def is_bounded(cls): + del cls + return True + @classmethod def from_spec(cls, spec): dtype = dtypes.as_dtype(spec.dtype) diff --git a/tensorflow/python/framework/tensor_spec_test.py b/tensorflow/python/framework/tensor_spec_test.py index 54ca4d9a19..b33d769d86 100644 --- a/tensorflow/python/framework/tensor_spec_test.py +++ b/tensorflow/python/framework/tensor_spec_test.py @@ -127,6 +127,22 @@ class TensorSpecTest(test_util.TensorFlowTestCase): self.assertEqual(bounded_spec.dtype, spec.dtype) self.assertEqual(bounded_spec.name, spec.name) + def testIsDiscrete(self): + discrete_spec = tensor_spec.TensorSpec((1, 2), dtypes.int32) + continuous_spec = tensor_spec.TensorSpec((1, 2), dtypes.float32) + self.assertTrue(discrete_spec.is_discrete) + self.assertFalse(continuous_spec.is_discrete) + + def testIsContinuous(self): + discrete_spec = tensor_spec.TensorSpec((1, 2), dtypes.int32) + continuous_spec = tensor_spec.TensorSpec((1, 2), dtypes.float32) + self.assertFalse(discrete_spec.is_continuous) + self.assertTrue(continuous_spec.is_continuous) + + def testIsBounded(self): + unbounded_spec = tensor_spec.TensorSpec((1, 2), dtypes.int32) + self.assertFalse(unbounded_spec.is_bounded()) + class BoundedTensorSpecTest(test_util.TensorFlowTestCase): @@ -138,6 +154,11 @@ class BoundedTensorSpecTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp(ValueError, "not compatible"): tensor_spec.BoundedTensorSpec((3, 5), dtypes.uint8, 0, (1, 1, 1)) + def testIsBounded(self): + bounded_spec = tensor_spec.BoundedTensorSpec( + (1, 2), dtypes.int32, minimum=0, maximum=1) + self.assertTrue(bounded_spec.is_bounded()) + def testMinimumMaximumAttributes(self): spec = tensor_spec.BoundedTensorSpec( (1, 2, 3), dtypes.float32, 0, (5, 5, 5)) -- GitLab From 7a6af158e972bfef4b23bf6812b5895abcdc5aef Mon Sep 17 00:00:00 2001 From: Igor Saprykin Date: Mon, 12 Mar 2018 13:07:12 -0700 Subject: [PATCH 750/884] Move `loss_reduction` argument from `replicate_model_fn` to `TowerOptimizer. PiperOrigin-RevId: 188766477 --- .../python/estimator/replicate_model_fn.py | 55 +++--- .../estimator/replicate_model_fn_test.py | 164 +++++++++++------- 2 files changed, 125 insertions(+), 94 deletions(-) diff --git a/tensorflow/python/estimator/replicate_model_fn.py b/tensorflow/python/estimator/replicate_model_fn.py index 7418852096..144d89abf3 100644 --- a/tensorflow/python/estimator/replicate_model_fn.py +++ b/tensorflow/python/estimator/replicate_model_fn.py @@ -50,7 +50,6 @@ from tensorflow.python.training import optimizer as optimizer_lib def _replicate_model_fn(model_fn, - loss_reduction=losses.Reduction.SUM_BY_NONZERO_WEIGHTS, devices=None): """Replicate `Estimator.model_fn` over GPUs. @@ -109,8 +108,9 @@ def _replicate_model_fn(model_fn, On reduction algorithms: Certain algorithms were chosen for aggregating results of computations on multiple towers: - - Losses from all towers are reduced according to `loss_reduction`. - - Gradients from all towers are reduced according to `loss_reduction` + - Losses from all towers are reduced according to `loss_reduction` argument + to TowerOptimizer.. + - Gradients from all towers are reduced according to the `loss_reduction` for each trainable variable. - `eval_metrics_ops` are reduced per metric using `reduce_mean`. - `EstimatorSpec.predictions` and `EstimatorSpec.export_outputs` are @@ -134,16 +134,11 @@ def _replicate_model_fn(model_fn, Args: model_fn: `model_fn` as defined in `Estimator`. See the section above about the train_op argument of `EstimatorSpec`. - loss_reduction: controls whether losses are summed or averaged. devices: Optional list of devices to replicate the model across. This argument can be used to replice only on the subset of available GPUs. If `None`, then all available GPUs are going to be used for replication. If no GPUs are available, then the model is going to be placed on the CPU. - Raises: - ValueError: if there is no `loss_reduction` or if _TowerOptimizer is - mis-used. - Returns: A replicated version of the supplied `model_fn`. Returned function that conforms to the requirements of `Estimator`'s `model_fn` and can be used @@ -151,7 +146,6 @@ def _replicate_model_fn(model_fn, """ return _replicate_model_fn_with_mode( model_fn, - loss_reduction, devices, # TODO(isaprykin): Query the system configuration to choose modes other # than `SHARED_LOCAL_PARAMETER_SERVER`, even though it is often @@ -186,13 +180,9 @@ class _VariableDistributionMode(object): def _replicate_model_fn_with_mode( model_fn, - loss_reduction, devices=None, mode=_VariableDistributionMode.SHARED_LOCAL_PARAMETER_SERVER): """A version of `replicate_model_fn` that allows to specify a `mode`.""" - if loss_reduction == losses.Reduction.NONE: - raise ValueError('Tower losses need to be reduced in some way, yet {} ' - 'reduction is specified.'.format(loss_reduction)) if not devices: devices = _get_local_devices('GPU') or _get_local_devices('CPU') @@ -215,7 +205,6 @@ def _replicate_model_fn_with_mode( features=[features], labels=[labels], params=params, - loss_reduction=loss_reduction, config=config, devices=devices, local_ps_devices=ps_devices)[0] # One device, so one spec is out. @@ -230,7 +219,6 @@ def _replicate_model_fn_with_mode( features=feature_shards, labels=label_shards, params=params, - loss_reduction=loss_reduction, config=config, devices=devices, local_ps_devices=ps_devices) @@ -255,7 +243,8 @@ class _TowerOptimizer(optimizer_lib.Optimizer): COLLECTION_FOR_GRAPH_STATES = 'replicate_model_fn_graph_states' - def __init__(self, optimizer_or_optimizer_fn): + def __init__(self, optimizer_or_optimizer_fn, + loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE): """Wrap an existing optimizer for gathering gradients across towers. Each invocation of model_fn has to call the same optimizers in the same @@ -275,8 +264,10 @@ class _TowerOptimizer(optimizer_lib.Optimizer): optimizer_or_optimizer_fn: an instance of optimizer to wrap. That instance is going to be used for optimizer-specific logic. This can also be a no-argument function that returns such an optimizer instance. + loss_reduction: controls whether losses are summed or averaged. """ self._optimizer_or_optimizer_fn = optimizer_or_optimizer_fn + self._loss_reduction = loss_reduction @staticmethod def has_been_used(): @@ -296,8 +287,9 @@ class _TowerOptimizer(optimizer_lib.Optimizer): def compute_gradients(self, loss, *args, **kwargs): """Compute gradients, but first, if needed, scale the loss.""" + _TowerOptimizer._graph_state().set_loss_reduction(self._loss_reduction) loss = _scale_loss(loss, - self._graph_state().loss_reduction, + self._loss_reduction, self._graph_state().number_of_towers) return self._get_optimizer().compute_gradients(loss, *args, **kwargs) @@ -402,10 +394,12 @@ class _TowerOptimizer(optimizer_lib.Optimizer): self._collected_grads_and_vars[tower_id][index_of_last_gradients]) return grads_and_vars - def set_reduction_across_towers(self, loss_reduction, number_of_towers): - self._loss_reduction = loss_reduction + def set_number_of_towers(self, number_of_towers): self._number_of_towers = number_of_towers + def set_loss_reduction(self, loss_reduction): + self._loss_reduction = loss_reduction + @contextmanager def tower(self, tower_id, var_scope, name_scope): if tower_id == 0: @@ -509,7 +503,6 @@ def _get_loss_towers(model_fn, config, devices, local_ps_devices, - loss_reduction, name_scope_pattern=_DEFAULT_NAME_SCOPE_PATTERN): """Replicate the loss computation across devices.""" tower_specs = [] @@ -524,8 +517,7 @@ def _get_loss_towers(model_fn, # pylint: disable=protected-access round_robin_strategy = device_setter_lib._RoundRobinStrategy( num_tasks=len(local_ps_devices)) - _TowerOptimizer._graph_state().set_reduction_across_towers( - loss_reduction, len(devices)) + _TowerOptimizer._graph_state().set_number_of_towers(len(devices)) for i, device in enumerate(devices): is_the_first_tower = (i == 0) @@ -567,7 +559,9 @@ def _get_loss_towers(model_fn, # Scaling the loss here doesn't actually affect gradients. Another # instance of scaling happens inside the _TowerOptimizer. tower_spec = _scale_tower_loss( - tower_spec, loss_reduction, number_of_towers=len(devices)) + tower_spec, + _TowerOptimizer._graph_state().loss_reduction, + number_of_towers=len(devices)) tower_specs.append(tower_spec) if not _TowerOptimizer._did_towers_have_same_optimizer_calls(): @@ -607,20 +601,27 @@ def _scale_tower_loss(tower_spec, loss_reduction, number_of_towers): return tower_spec estimator_spec = _asdict(tower_spec) - estimator_spec['loss'] = _scale_loss(tower_spec.loss, loss_reduction, - number_of_towers) + estimator_spec['loss'] = _scale_loss( + tower_spec.loss, + loss_reduction, + number_of_towers, + reduced_loss_name='averaged_loss') return model_fn_lib.EstimatorSpec(**estimator_spec) -def _scale_loss(loss, loss_reduction, number_of_towers): +def _scale_loss(loss, loss_reduction, number_of_towers, reduced_loss_name=None): """If needed, scale down the loss for averaging loss by summing.""" if loss is None: return None if number_of_towers == 1: return loss + if loss_reduction == losses.Reduction.NONE: + raise ValueError('Tower losses need to be reduced in some way, yet {} ' + 'reduction is specified.'.format(loss_reduction)) + if loss_reduction != losses.Reduction.SUM: - return math_ops.div(loss, 1.0 * number_of_towers, name='averaged_loss') + return math_ops.div(loss, 1.0 * number_of_towers, name=reduced_loss_name) else: return loss diff --git a/tensorflow/python/estimator/replicate_model_fn_test.py b/tensorflow/python/estimator/replicate_model_fn_test.py index b6dd4e981f..ad1f9c02b9 100644 --- a/tensorflow/python/estimator/replicate_model_fn_test.py +++ b/tensorflow/python/estimator/replicate_model_fn_test.py @@ -121,8 +121,9 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): estimator = dnn.DNNClassifier( hidden_units=(2, 2), # Adagrad is configured with `get_optimizer_instance`, so the function - # form of `_TowerOptimizer.__init__` is used. - optimizer=replicate_model_fn._TowerOptimizer(optimizer_fn), + # form of `TowerOptimizer.__init__` is used. + optimizer=replicate_model_fn._TowerOptimizer( + optimizer_fn, loss_reduction=losses.Reduction.SUM), feature_columns=feature_columns, n_classes=n_classes, model_dir=self._model_dir) @@ -134,7 +135,6 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): model_fn = replicate_model_fn._replicate_model_fn_with_mode( estimator.model_fn, devices=['/gpu:0', '/gpu:1', '/gpu:2'], - loss_reduction=losses.Reduction.SUM, mode=mode) estimator = estimator_lib.Estimator( @@ -178,32 +178,39 @@ class DNNClassifierIntegrationTest(test_util.TensorFlowTestCase): class ReplicateModelTest(test_util.TensorFlowTestCase): - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(10, dtype=dtypes.float64), - dtype=dtypes.float64) + def create_model_fn_with_loss_reduction(self, loss_reduction): - predictions = math_ops.multiply(features, c) + def model_fn(mode, features, labels, params): + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(10, dtype=dtypes.float64), + dtype=dtypes.float64) - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) - loss = math_ops.reduce_sum(loss) + predictions = math_ops.multiply(features, c) - metrics = { - 'accuracy': metrics_lib.accuracy(labels, predictions), - 'auc': metrics_lib.auc(labels, predictions) - } + loss = losses.absolute_difference( + labels=labels, + predictions=predictions, + reduction=losses.Reduction.SUM) + loss = math_ops.reduce_sum(loss) - optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(params['learning_rate'])) + metrics = { + 'accuracy': metrics_lib.accuracy(labels, predictions), + 'auc': metrics_lib.auc(labels, predictions) + } - return model_fn_lib.EstimatorSpec( - mode=mode, - loss=loss, - eval_metric_ops=metrics, - predictions={'probabilities': predictions}, - train_op=optimizer.minimize(loss)) + optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(params['learning_rate']), + loss_reduction=loss_reduction) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=loss, + eval_metric_ops=metrics, + predictions={'probabilities': predictions}, + train_op=optimizer.minimize(loss)) + + return model_fn @property def params(self): @@ -217,8 +224,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - loss_reduction=losses.Reduction.SUM, + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) @@ -248,7 +254,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): dtype=dtypes.float64) replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1']) + self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), + devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) session.run(variables.global_variables_initializer()) @@ -284,8 +291,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session, variable_scope.variable_scope( '', reuse=variable_scope.AUTO_REUSE): replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - loss_reduction=losses.Reduction.SUM, + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) @@ -307,8 +313,7 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, - loss_reduction=losses.Reduction.SUM, + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL, self.params) @@ -338,7 +343,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, losses.Reduction.MEAN, devices=['/gpu:0', '/gpu:1']) + self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), + devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL, self.params) session.run(variables.local_variables_initializer()) @@ -367,7 +373,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0', '/gpu:1']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) session.run(variables.global_variables_initializer()) @@ -382,7 +389,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) session.run(variables.global_variables_initializer()) @@ -404,7 +412,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.EVAL, self.params) session.run(variables.local_variables_initializer()) @@ -432,7 +441,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0']) estimator_spec = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.PREDICT, self.params) session.run(variables.global_variables_initializer()) @@ -448,15 +458,22 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.assertRaisesRegexp( ValueError, '.*Batch.+size.+needs.+to.+be.+divisible.+by.+GPUs.+'): replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0', '/gpu:1']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0', '/gpu:1']) _ = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) def test_unsupported_loss_reduction(self): + features = np.array([[1.0], [2.0], [3.0]]) + labels = np.array([[1.0], [2.0], [3.0]]) + with self.assertRaisesRegexp(ValueError, '.+none.+reduction.+is.+specified.+'): - _ = replicate_model_fn._replicate_model_fn(self.model_fn, - losses.Reduction.NONE) + replicated_model_fn = replicate_model_fn._replicate_model_fn( + self.create_model_fn_with_loss_reduction(losses.Reduction.NONE), + devices=['/gpu:0', '/gpu:1', '/gpu:2']) + _ = replicated_model_fn( + features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) def test_places_on_gpu_with_upper_case_spelling(self): features = np.array([[0.01], [0.002]]) @@ -464,7 +481,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session(): replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/GPU:0']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/GPU:0']) _ = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) @@ -478,7 +496,8 @@ class ReplicateModelTest(test_util.TensorFlowTestCase): with self.test_session(): replicated_model_fn = replicate_model_fn._replicate_model_fn( - self.model_fn, devices=['/gpu:0']) + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), + devices=['/gpu:0']) _ = replicated_model_fn( features, labels, model_fn_lib.ModeKeys.TRAIN, self.params) @@ -624,7 +643,8 @@ class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase): optimizer = training.SyncReplicasOptimizer( optimizer, replicas_to_aggregate=1) sync_hook = optimizer.make_session_run_hook(True) - optimizer = replicate_model_fn._TowerOptimizer(optimizer) + optimizer = replicate_model_fn._TowerOptimizer( + optimizer, loss_reduction=losses.Reduction.SUM) return model_fn_lib.EstimatorSpec( mode=mode, @@ -650,7 +670,6 @@ class MakeSureSyncReplicasOptimizerWorks(test_util.TensorFlowTestCase): model_fn = replicate_model_fn._replicate_model_fn( self.model_fn, - loss_reduction=losses.Reduction.SUM, devices=['/gpu:0', '/gpu:1']) estimator = estimator_lib.Estimator( @@ -687,9 +706,10 @@ class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase): } first_optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0)) + gradient_descent.GradientDescentOptimizer(1.0), + loss_reduction=losses.Reduction.SUM) second_optimizer = replicate_model_fn._TowerOptimizer( - adam.AdamOptimizer(1.0)) + adam.AdamOptimizer(1.0), loss_reduction=losses.Reduction.SUM) with ops_lib.control_dependencies([side_effects.assign_add(1.0)]): first_grads_and_vars = first_optimizer.compute_gradients(loss) @@ -712,7 +732,6 @@ class ReplicateWithTwoOptimizersTest(test_util.TensorFlowTestCase): with self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( self.model_fn, - loss_reduction=losses.Reduction.SUM, devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, {}) @@ -787,11 +806,13 @@ class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase): train_ops = [] optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0)) + gradient_descent.GradientDescentOptimizer(1.0), + loss_reduction=losses.Reduction.SUM) train_ops.append(optimizer.minimize(loss, var_list=[c])) if not self.should_skip_optimizer(): another_optimizer = replicate_model_fn._TowerOptimizer( - gradient_descent.GradientDescentOptimizer(1.0)) + gradient_descent.GradientDescentOptimizer(1.0), + loss_reduction=losses.Reduction.SUM) train_ops.append(another_optimizer.minimize(another_loss, var_list=[d])) train_op = control_flow_ops.group(train_ops) @@ -806,10 +827,9 @@ class ReplicateWithTwoLossesAndOneOptimizer(test_util.TensorFlowTestCase): features = np.array([[1.0], [2.0]]) labels = np.array([[1.0], [2.0]]) - with self.test_session() as session: + with ops_lib.Graph().as_default(), self.test_session() as session: replicated_model_fn = replicate_model_fn._replicate_model_fn( self.model_fn, - loss_reduction=losses.Reduction.SUM, devices=['/gpu:0', '/gpu:1']) estimator_spec = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, {}) @@ -881,7 +901,7 @@ class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase): with self.test_session(): with self.assertRaisesRegexp(ValueError, - 'Please.+wrap.+with.+_TowerOptimizer'): + 'Please.+wrap.+with.+TowerOptimizer'): replicated_model_fn = replicate_model_fn._replicate_model_fn( self.model_fn, devices=['/gpu:0', '/gpu:1']) _ = replicated_model_fn(features, labels, model_fn_lib.ModeKeys.TRAIN, @@ -890,30 +910,43 @@ class FailToWrapOptimizerInTheModelFn(test_util.TensorFlowTestCase): class GetLossTowersTest(test_util.TensorFlowTestCase): - def model_fn(self, mode, features, labels, params): - c = variable_scope.get_variable( - 'c', - initializer=constant_op.constant(0.25, dtype=dtypes.float64), - dtype=dtypes.float64) + def create_model_fn_with_loss_reduction(self, loss_reduction): - predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) - labels = np.array([0.1, 0.2, 0.3, labels[0]]) + def model_fn(mode, features, labels, params): + del params + c = variable_scope.get_variable( + 'c', + initializer=constant_op.constant(0.25, dtype=dtypes.float64), + dtype=dtypes.float64) - loss = losses.absolute_difference( - labels=labels, predictions=predictions, reduction=losses.Reduction.SUM) + predictions = math_ops.add(np.array([0.1, 0.2, 0.3, features[0]]), c) + labels = np.array([0.1, 0.2, 0.3, labels[0]]) - return model_fn_lib.EstimatorSpec(mode=mode, loss=math_ops.reduce_sum(loss)) + loss = losses.absolute_difference( + labels=labels, + predictions=predictions, + reduction=losses.Reduction.SUM) + + optimizer = replicate_model_fn._TowerOptimizer( + gradient_descent.GradientDescentOptimizer(1.0), + loss_reduction) + + return model_fn_lib.EstimatorSpec( + mode=mode, + loss=math_ops.reduce_sum(loss), + train_op=optimizer.minimize(loss)) + + return model_fn def test_gradients_are_computed(self): with self.test_session() as session: tower_specs = replicate_model_fn._get_loss_towers( - self.model_fn, + self.create_model_fn_with_loss_reduction(losses.Reduction.SUM), mode=None, features=[[0.6], [1.6]], labels=[[0.6], [0.6]], params=None, config=None, - loss_reduction=losses.Reduction.SUM, devices=['/gpu:0', '/gpu:1'], local_ps_devices=['/gpu:0'], name_scope_pattern='test_tower_{}') @@ -941,12 +974,11 @@ class GetLossTowersTest(test_util.TensorFlowTestCase): def test_gradients_are_computed_with_mean_reduction(self): with self.test_session() as session: tower_specs = replicate_model_fn._get_loss_towers( - self.model_fn, + self.create_model_fn_with_loss_reduction(losses.Reduction.MEAN), mode=model_fn_lib.ModeKeys.EVAL, features=[[0.6], [1.6]], labels=[[0.6], [0.6]], params=None, - loss_reduction=losses.Reduction.MEAN, config=None, devices=['/gpu:0', '/gpu:1'], local_ps_devices=['/gpu:0'], @@ -999,7 +1031,6 @@ class GetLossTowersTest(test_util.TensorFlowTestCase): features=[[0.6], [1.6], [2.6]], labels=[[0.6], [0.6], [2.6]], params=None, - loss_reduction=losses.Reduction.SUM, config=None, devices=['/gpu:0', '/gpu:1', '/gpu:3'], local_ps_devices=['/gpu:0', '/gpu:1', '/gpu:3'], @@ -1296,7 +1327,6 @@ class PredictSpecTest(test_util.TensorFlowTestCase): self.model_fn, mode=None, features=[[0.1], [0.2]], - loss_reduction=losses.Reduction.SUM, labels=[[], []], params=None, config=None, -- GitLab From 2057bf784770c55ab56bdbe5b96c233afbed50ce Mon Sep 17 00:00:00 2001 From: Eugene Brevdo Date: Mon, 12 Mar 2018 13:35:03 -0700 Subject: [PATCH 751/884] [TFLite] Don't require a std::vector for Interpreter::SetTensorParameters*. PiperOrigin-RevId: 188770522 --- tensorflow/contrib/lite/interpreter.cc | 27 +++++++++++++------------- tensorflow/contrib/lite/interpreter.h | 22 ++++++++++++++++++--- tensorflow/contrib/lite/util.cc | 16 +++++++++------ tensorflow/contrib/lite/util.h | 8 +++++--- 4 files changed, 48 insertions(+), 25 deletions(-) diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index bbcd318efd..f03c1c9fe9 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -575,9 +575,9 @@ TfLiteStatus Interpreter::GetNodeAndRegistration( } TfLiteStatus Interpreter::SetTensorParametersReadOnly( - int tensor_index, TfLiteType type, const char* name, - const std::vector& dims, TfLiteQuantizationParams quantization, - const char* buffer, size_t bytes, const Allocation* allocation) { + int tensor_index, TfLiteType type, const char* name, const int rank, + const int* dims, TfLiteQuantizationParams quantization, const char* buffer, + size_t bytes, const Allocation* allocation) { TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); // For most tensors we know exactly how much memory is necessary so we can @@ -585,23 +585,24 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( // because their sizes change with the contents of the individual strings. if (type != kTfLiteString) { size_t required_bytes; - TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), - &required_bytes)); + TF_LITE_ENSURE_OK(&context_, + BytesRequired(type, dims, rank, &required_bytes)); TF_LITE_ENSURE_EQ(&context_, required_bytes, bytes); } TfLiteTensor& tensor = context_.tensors[tensor_index]; - if (type == tensor.type && EqualVectorAndTfLiteIntArray(tensor.dims, dims)) { + if (type == tensor.type && + EqualArrayAndTfLiteIntArray(tensor.dims, rank, dims)) { // Fast path which does not invalidate the invokable property. TfLiteTensorDataFree(&tensor); tensor.data.raw = const_cast(buffer); - if (!tensor.dims) tensor.dims = ConvertVectorToTfLiteIntArray(dims); + if (!tensor.dims) tensor.dims = ConvertArrayToTfLiteIntArray(rank, dims); tensor.params = quantization; tensor.allocation_type = kTfLiteMmapRo; tensor.allocation = allocation; } else { invokable_ = false; - TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims), quantization, const_cast(buffer), bytes, kTfLiteMmapRo, allocation, &tensor); } @@ -613,8 +614,8 @@ TfLiteStatus Interpreter::SetTensorParametersReadOnly( // bytes. The lifetime of buffer must be ensured to be greater or equal // to Interpreter. TfLiteStatus Interpreter::SetTensorParametersReadWrite( - int tensor_index, TfLiteType type, const char* name, - const std::vector& dims, TfLiteQuantizationParams quantization) { + int tensor_index, TfLiteType type, const char* name, const int rank, + const int* dims, TfLiteQuantizationParams quantization) { invokable_ = false; TF_LITE_ENSURE(&context_, tensor_index < context_.tensors_size && tensor_index >= 0); @@ -624,10 +625,10 @@ TfLiteStatus Interpreter::SetTensorParametersReadWrite( // many bytes we will need based on the dimensions. String tensors are // allocated dynamically and we can't know ahead of time how much space // they will require. - TF_LITE_ENSURE_OK(&context_, BytesRequired(type, dims.data(), dims.size(), - &required_bytes)); + TF_LITE_ENSURE_OK(&context_, + BytesRequired(type, dims, rank, &required_bytes)); } - TfLiteTensorReset(type, name, ConvertVectorToTfLiteIntArray(dims), + TfLiteTensorReset(type, name, ConvertArrayToTfLiteIntArray(rank, dims), quantization, /*buffer=*/nullptr, required_bytes, type == kTfLiteString ? kTfLiteDynamic : kTfLiteArenaRw, diff --git a/tensorflow/contrib/lite/interpreter.h b/tensorflow/contrib/lite/interpreter.h index f2d4a05164..7c5a195815 100644 --- a/tensorflow/contrib/lite/interpreter.h +++ b/tensorflow/contrib/lite/interpreter.h @@ -134,18 +134,34 @@ class Interpreter { // This variant assumes an external buffer has been allocated of size // bytes. The lifetime of buffer must be ensured to be greater or equal // to Interpreter. - TfLiteStatus SetTensorParametersReadOnly( + inline TfLiteStatus SetTensorParametersReadOnly( int tensor_index, TfLiteType type, const char* name, const std::vector& dims, TfLiteQuantizationParams quantization, + const char* buffer, size_t bytes, + const Allocation* allocation = nullptr) { + return SetTensorParametersReadOnly(tensor_index, type, name, dims.size(), + dims.data(), quantization, buffer, bytes, + allocation); + }; + + TfLiteStatus SetTensorParametersReadOnly( + int tensor_index, TfLiteType type, const char* name, const int rank, + const int* dims, TfLiteQuantizationParams quantization, const char* buffer, size_t bytes, const Allocation* allocation = nullptr); // Set description of inputs/outputs/data/fptrs for node `node_index`. // This variant assumes an external buffer has been allocated of size // bytes. The lifetime of buffer must be ensured to be greater or equal // to Interpreter. - TfLiteStatus SetTensorParametersReadWrite( + inline TfLiteStatus SetTensorParametersReadWrite( int tensor_index, TfLiteType type, const char* name, - const std::vector& dims, TfLiteQuantizationParams quantization); + const std::vector& dims, TfLiteQuantizationParams quantization) { + return SetTensorParametersReadWrite(tensor_index, type, name, dims.size(), + dims.data(), quantization); + } + TfLiteStatus SetTensorParametersReadWrite( + int tensor_index, TfLiteType type, const char* name, const int rank, + const int* dims, TfLiteQuantizationParams quantization); // Functions to access tensor data diff --git a/tensorflow/contrib/lite/util.cc b/tensorflow/contrib/lite/util.cc index b7f31e2731..fb4af07d06 100644 --- a/tensorflow/contrib/lite/util.cc +++ b/tensorflow/contrib/lite/util.cc @@ -17,17 +17,21 @@ limitations under the License. namespace tflite { TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input) { - TfLiteIntArray* output = TfLiteIntArrayCreate(input.size()); - for (size_t i = 0; i < input.size(); i++) { - output->data[i] = input[i]; + return ConvertArrayToTfLiteIntArray(input.size(), input.data()); +} + +TfLiteIntArray* ConvertArrayToTfLiteIntArray(const int rank, const int* dims) { + TfLiteIntArray* output = TfLiteIntArrayCreate(rank); + for (size_t i = 0; i < rank; i++) { + output->data[i] = dims[i]; } return output; } -bool EqualVectorAndTfLiteIntArray(const TfLiteIntArray* a, - const std::vector& b) { +bool EqualArrayAndTfLiteIntArray(const TfLiteIntArray* a, const int b_size, + const int* b) { if (!a) return false; - if (a->size != b.size()) return false; + if (a->size != b_size) return false; for (int i = 0; i < a->size; ++i) { if (a->data[i] != b[i]) return false; } diff --git a/tensorflow/contrib/lite/util.h b/tensorflow/contrib/lite/util.h index f505d82a11..a34db35823 100644 --- a/tensorflow/contrib/lite/util.h +++ b/tensorflow/contrib/lite/util.h @@ -29,9 +29,11 @@ namespace tflite { // Converts a `std::vector` to a `TfLiteIntArray`. TfLiteIntArray* ConvertVectorToTfLiteIntArray(const std::vector& input); -// Checks whether a `TfLiteIntArray` and `std::vector` have matching elements. -bool EqualVectorAndTfLiteIntArray(const TfLiteIntArray* a, - const std::vector& b); +TfLiteIntArray* ConvertArrayToTfLiteIntArray(const int rank, const int* dims); + +// Checks whether a `TfLiteIntArray` and an int array have matching elements. +bool EqualArrayAndTfLiteIntArray(const TfLiteIntArray* a, const int b_size, + const int* b); } // namespace tflite -- GitLab From bc57adb9576a4f8a04a04dc517d7069a2ac8f330 Mon Sep 17 00:00:00 2001 From: Sanjoy Das Date: Mon, 12 Mar 2018 13:44:58 -0700 Subject: [PATCH 752/884] [TF:XLA] Bump open source llvm revision to r327201 PiperOrigin-RevId: 188771994 --- tensorflow/workspace.bzl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index d7c3e3702f..e231ba8016 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -475,11 +475,11 @@ def tf_workspace(path_prefix="", tf_repo_name=""): tf_http_archive( name = "llvm", urls = [ - "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/636e2230de961637b059b9cd15799daef32544f8.tar.gz", - "https://github.com/llvm-mirror/llvm/archive/636e2230de961637b059b9cd15799daef32544f8.tar.gz", + "https://mirror.bazel.build/github.com/llvm-mirror/llvm/archive/197b6c81959a17be37035d4fe71b382023bff2f0.tar.gz", + "https://github.com/llvm-mirror/llvm/archive/197b6c81959a17be37035d4fe71b382023bff2f0.tar.gz", ], - sha256 = "44f08a32ac48eca545fd6eac4d5ef3a9cea4382f805b87dce38340255e7d2138", - strip_prefix = "llvm-636e2230de961637b059b9cd15799daef32544f8", + sha256 = "e77a8715fbd5d3c049bc7707da236152faab50ee2b7cec5234a0737b72ddb52a", + strip_prefix = "llvm-197b6c81959a17be37035d4fe71b382023bff2f0", build_file = str(Label("//third_party/llvm:llvm.BUILD")), ) -- GitLab From 27533f61ddfa674ceccb59777d24e2fe0157f70c Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 13:50:35 -0700 Subject: [PATCH 753/884] Move "hoist common factor out of aggregation" optimization to a separate stage. 1) Use a new naming scheme for optimized ops, share it with AddOpsRewrite 2) Make sure that tests actually test that optimized nodes exists in a graph PiperOrigin-RevId: 188772892 --- .../optimizers/arithmetic_optimizer.cc | 461 ++++++++++++------ .../optimizers/arithmetic_optimizer.h | 1 + .../optimizers/arithmetic_optimizer_test.cc | 212 +++++--- 3 files changed, 462 insertions(+), 212 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc index 177b0735e9..c0fcfaf428 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.cc @@ -290,25 +290,30 @@ NodeDef* GetTailOfValuePreservingChain( struct ArithmeticOptimizerContext { ArithmeticOptimizerContext( const std::unordered_set* nodes_to_preserve, - GraphDef* optimized_graph, NodeMap* node_map, + GraphDef* optimized_graph, NodeMap* node_map, FrameMap* frame_map, SetVector* nodes_to_simplify) : nodes_to_preserve(nodes_to_preserve), optimized_graph(optimized_graph), node_map(node_map), + frame_map(frame_map), nodes_to_simplify(nodes_to_simplify) {} const std::unordered_set* nodes_to_preserve; GraphDef* optimized_graph; NodeMap* node_map; + FrameMap* frame_map; SetVector* nodes_to_simplify; }; // Base class for single arithmetic optimization: e.g. Bitcast optimization, // AddOps optimization, etc... +// TODO(ezhulenev): extract this class to be reused by other multi-stage +// graph optimizers (const_folding, dependency_optimizer, etc...) class ArithmeticOptimizerStage { public: - explicit ArithmeticOptimizerStage(ArithmeticOptimizerContext ctx) - : ctx_(ctx) {} + explicit ArithmeticOptimizerStage(const string& name, + const ArithmeticOptimizerContext& ctx) + : name_(name), ctx_(ctx) {} virtual ~ArithmeticOptimizerStage() = default; // Check if we should try to simplify node. Returning true doesn't @@ -336,6 +341,46 @@ class ArithmeticOptimizerStage { string* simplified_node_name) = 0; protected: + struct ScopedNodeName { + string scope; + string name; + }; + + const ScopedNodeName ParseScopedNodeName(const string& name) const { + auto pos = name.find_last_of("/"); + if (pos == string::npos) { + return {"", name}; + } else { + return {name.substr(0, pos), name.substr(pos + 1)}; + } + } + + // Prefix optimized node name with stage name and rewrite_rule + const string OptimizedNodeName(const string& rewrite_rule, + const ScopedNodeName& scoped_node_name) const { + return MakeOptimizedNodeName(strings::StrCat(name_, "_", rewrite_rule), + scoped_node_name); + } + + // Prefix optimized node name with stage name and rewrite_rule + const string OptimizedNodeName(const string& rewrite_rule, + const ScopedNodeName& scoped_node_name, + const std::vector& node_names) const { + return MakeOptimizedNodeName(strings::StrCat(name_, "_", rewrite_rule), + scoped_node_name, node_names); + } + + // Prefix optimized node name with stage name + const string OptimizedNodeName(const ScopedNodeName& scoped_node_name) const { + return MakeOptimizedNodeName(name_, scoped_node_name); + } + + // Prefix optimized node name with stage name + const string OptimizedNodeName(const ScopedNodeName& scoped_node_name, + const std::vector& node_names) const { + return MakeOptimizedNodeName(name_, scoped_node_name, node_names); + } + // Simplification graph rewrite can create additional nodes that are inputs // to final simplified node, they can be also added to the arithmetic // optimizer queue for further optimization. @@ -374,7 +419,91 @@ class ArithmeticOptimizerStage { } } - ArithmeticOptimizerContext ctx_; + NodeDef* AddCopyNode(const string& name, const NodeDef* node_to_copy) { + CHECK(node_to_copy != nullptr); + CHECK(!ctx_.node_map->NodeExists(name)) + << "Node " << name << " already exists in a graph"; + NodeDef* new_node = ctx_.optimized_graph->add_node(); + *new_node = *node_to_copy; + new_node->set_name(name); + ctx_.node_map->AddNode(name, new_node); + return new_node; + } + + NodeDef* AddEmptyNode(const string& name) { + CHECK(!ctx_.node_map->NodeExists(name)) + << "Node " << name << " already exists in a graph"; + NodeDef* new_node = ctx_.optimized_graph->add_node(); + new_node->set_name(name); + ctx_.node_map->AddNode(name, new_node); + return new_node; + } + + // TODO(ezhulenev): remove this method from ArithmeticOptimizer when all + // optimizations will be migrated to stages + void AddFrameControlDeps(const NodeDef* old_node, + const std::vector& new_nodes, + const string& source_for_ctrl_dep, + const std::vector& sinks_for_control_dep) { + const auto frame_it = ctx_.frame_map->find(old_node); + if (frame_it != ctx_.frame_map->end()) { + for (auto node : new_nodes) { + ctx_.frame_map->emplace(node, frame_it->second); + } + if (!source_for_ctrl_dep.empty() && !sinks_for_control_dep.empty()) { + const string ctrl_dep = ConstantFolding::AddControlDependency( + source_for_ctrl_dep, ctx_.optimized_graph, ctx_.node_map); + for (auto node : sinks_for_control_dep) { + MaybeAddControlInput(ctrl_dep, node, ctx_.optimized_graph, + ctx_.node_map); + } + } + } + } + + const string name_; + const ArithmeticOptimizerContext ctx_; + + private: + // Get a name for a new node obtained by optimizing a single node of the + // original graph. The optimized node is placed under the original node scope. + // + // Node name uniqueness is guaranteed by unique name of an original node in + // a same scope. + // + // Example: MakeOptimizedNodeName("AwesomeRewrite", "a/b/c/Add_1") + // Optimized name: "a/b/c/ArithmeticOptimizer/AwesomeRewrite_Add_1" + const string MakeOptimizedNodeName( + const string& prefix, const ScopedNodeName& scoped_node_name) const { + string node_name; + strings::StrAppend(&node_name, scoped_node_name.scope); + if (!node_name.empty()) strings::StrAppend(&node_name, "/"); + strings::StrAppend(&node_name, kArithmeticOptimizer, "/", prefix, "_", + scoped_node_name.name); + return node_name; + } + + // Get a name for a new node obtained by optimizing multiple nodes of the + // original graph, starting from "root". The optimized node is placed under + // the original scope of a "root" node. + // + // Node name uniqueness is guaranteed by unique name of a "root" node in + // a same scope. + // + // Example: + // MakeOptimizedNodeName("AwesomeRewrite", "a/b/Add_AB", ["x/y/Add_XY"]) + // Optimized name: + // "a/b/ArithmeticOptimizer/AwesomeRewrite_Add_AB_Add_XY" + const string MakeOptimizedNodeName( + const string& prefix, const ScopedNodeName& scoped_node_name, + const std::vector& node_names) const { + string node_name = MakeOptimizedNodeName(prefix, scoped_node_name); + for (const string& optimized : node_names) { + auto scoped_node = ParseScopedNodeName(optimized); + strings::StrAppend(&node_name, "_", scoped_node.name); + } + return node_name; + } }; // Rewrite a tree of Add/AddN with a single AddN operation, consuming all the @@ -393,8 +522,8 @@ class ArithmeticOptimizerStage { // q e class AddOpsRewriteStage : public ArithmeticOptimizerStage { public: - explicit AddOpsRewriteStage(ArithmeticOptimizerContext ctx) - : ArithmeticOptimizerStage(ctx), rewritten_nodes_() {} + explicit AddOpsRewriteStage(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("AddOpsRewrite", ctx), rewritten_nodes_() {} ~AddOpsRewriteStage() override = default; @@ -422,7 +551,7 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { AddOpsGroup group; TF_RETURN_IF_ERROR(CreateAddOpsGroup(node, &group)); - if (!group.absorbed_nodes.empty()) { + if (!group.absorbed_nodes.empty() && !IsRewritten(group)) { *simplified_node_name = RewriteAddOpsGroup(group); } @@ -530,6 +659,12 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { DrivesControlDependency(*node)); } + // Check that optimized group node name doesn't exists. It might happen if + // graph optimized multiple times without pruning beween invocations. + bool IsRewritten(const AddOpsGroup& group) const { + return ctx_.node_map->NodeExists(AddOpsGroupName(group)); + } + // Create an AddOpsGroup with a root in a given node Status CreateAddOpsGroup(const NodeDef* root_node, AddOpsGroup* group) { group->root_node = root_node; @@ -559,39 +694,23 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { return Status::OK(); } - const std::pair ParseNodeScopeAndName(const string& name) { - auto pos = name.find_last_of("/"); - if (pos == string::npos) { - return {"", name}; - } else { - return {name.substr(0, pos), name.substr(pos + 1)}; - } - } - // New node for AddOpsGroup is added to the same scope as a root_node. All // absorbed nodes are stripped of their scope, and only names are used in a // new node name. // // Example: AddOpsGroup(root="a/b/c/Add_2", absorbed=["d/Add_1", "e/Add"]) // node_name="a/b/c/AddOpsGroup_Add_2_Add_1_Add - string AddOpsGroupName(const AddOpsGroup& group) { + string AddOpsGroupName(const AddOpsGroup& group) const { CHECK_NOTNULL(group.root_node); - string node_name; - auto root_node = ParseNodeScopeAndName(group.root_node->name()); - auto root_scope = root_node.first; - auto root_name = root_node.second; - if (!root_scope.empty()) { - strings::StrAppend(&node_name, root_scope, "/"); - } + auto root = ParseScopedNodeName(group.root_node->name()); - strings::StrAppend(&node_name, kArithmeticOptimizer, "/", "AddOpsGroup_", - root_name); - for (const NodeDef* absorbed : group.absorbed_nodes) { - auto absorbed_node = ParseNodeScopeAndName(absorbed->name()); - strings::StrAppend(&node_name, "_", absorbed_node.second); - } - return node_name; + std::vector absorbed_node_names(group.absorbed_nodes.size()); + std::transform(group.absorbed_nodes.begin(), group.absorbed_nodes.end(), + absorbed_node_names.begin(), + [](const NodeDef* node) { return node->name(); }); + + return OptimizedNodeName(root, absorbed_node_names); } // Create a new node for a AddOpsGroup and return it's name. @@ -605,18 +724,17 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { // copy attributes from a root node DataType dtype = group.root_node->attr().at("T").type(); - // add new node - NodeDef* added_node = ctx_.optimized_graph->add_node(); - added_node->set_name(node_name); + // add new AddN node + NodeDef* added_node = AddEmptyNode(node_name); added_node->set_op("AddN"); added_node->set_device(group.root_node->device()); (*added_node->mutable_attr())["T"].set_type(dtype); (*added_node->mutable_attr())["N"].set_i(group.inputs.size()); - ctx_.node_map->AddNode(node_name, added_node); - for (string input : group.inputs) { + // all inputs of absorbed nodes are added to the new node + for (const string& input : group.inputs) { ctx_.node_map->AddOutput(input, node_name); - added_node->add_input(std::move(input)); + added_node->add_input(input); } VLOG(1) << "Absorbed " << group.absorbed_nodes.size() @@ -635,11 +753,167 @@ class AddOpsRewriteStage : public ArithmeticOptimizerStage { std::unordered_set rewritten_nodes_; }; +// Use the commutativity and (left- and right-) distributive property of +// multiplication over addition to hoist common factors out of aggregate nodes +// where all the inputs are Mul nodes. This pattern occurs frequently in +// regularization terms for the gradients during training. +// +// For example, we can rewrite an expression of the form: +// AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn)) +// to the following: +// Mul(x, AddN(y1, y2, y3, ... yn)) +class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage { + public: + explicit HoistCommonFactorOutOfAggregation( + const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("HoistCommonFactor", ctx) {} + ~HoistCommonFactorOutOfAggregation() override = default; + + bool IsSupported(const NodeDef* node) const override { + return IsAggregate(*node) && NumNonControlInputs(*node) > 1 && + !IsRewritten(node); + } + + Status TrySimplify(const NodeDef* node, + string* simplified_node_name) override { + CHECK(IsSupported(node)); + + std::set common_factors; + TF_RETURN_IF_ERROR(GetCommonFactors(node, &common_factors)); + + if (common_factors.size() == 1) { + const string& common_factor = *common_factors.begin(); + + // Gather up the non-shared factors + bool shapes_match = true; + std::vector unique_factors; + TF_RETURN_IF_ERROR(GetUniqueFactors(node, common_factor, &shapes_match, + &unique_factors)); + + if (shapes_match) { + NodeDef* input_0; + TF_RETURN_IF_ERROR(GetInputNode(node->input(0), &input_0)); + + // Use a copy of the first Mul node for the outer multiplication. + NodeDef* new_mul_node = AddCopyNode(OuterMulNodeName(node), input_0); + // And a copy of aggregation node as one of the inner operands + NodeDef* new_add_node = AddCopyNode(InnerAddNodeName(node), node); + + new_mul_node->set_device(node->device()); + new_mul_node->set_input(0, common_factor); + new_mul_node->set_input(1, new_add_node->name()); + + ctx_.node_map->AddOutput(common_factor, new_mul_node->name()); + ctx_.node_map->AddOutput(new_add_node->name(), new_mul_node->name()); + + // Hoist non-shared factors up into the new AddN node. + for (int i = 0; i < unique_factors.size(); ++i) { + new_add_node->set_input(i, unique_factors[i]); + } + + // Add frame dependencies that the original node might have had. + AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, + {new_add_node}); + + // optimize new inner aggregation node + AddToOptimizationQueue(new_add_node); + // do not optimize the same node twice + rewritten_nodes_.insert(node->name()); + *simplified_node_name = new_mul_node->name(); + } + } + return Status::OK(); + } + + private: + // Get a name for new outer Mul node + string OuterMulNodeName(const NodeDef* node) const { + auto scoped_node = ParseScopedNodeName(node->name()); + return OptimizedNodeName("Mul", scoped_node); + } + + // Get a name new inner Add node + string InnerAddNodeName(const NodeDef* node) const { + auto scoped_node = ParseScopedNodeName(node->name()); + return OptimizedNodeName("Add", scoped_node); + } + + // Determine the set of common factors if the input nodes are all Mul nodes. + Status GetCommonFactors(const NodeDef* node, + std::set* common_factors) const { + CHECK(common_factors->empty()); + + for (int i = 0; i < node->input_size(); ++i) { + if (i > 0 && common_factors->empty()) break; + if (IsControlInput(node->input(i))) break; + + NodeDef* input; + TF_RETURN_IF_ERROR(GetInputNode(node->input(i), &input)); + + if (!IsMul(*input)) { + common_factors->clear(); + break; + } + + std::set factors_i{input->input(0), input->input(1)}; + if (i == 0) { + std::swap(*common_factors, factors_i); + } else { + std::set intersection; + std::set_intersection( + factors_i.begin(), factors_i.end(), common_factors->begin(), + common_factors->end(), + std::inserter(intersection, intersection.begin())); + std::swap(*common_factors, intersection); + } + } + return Status::OK(); + } + + // Gather up the non-shared factors (the y's in the example). + // Unless the aggregation is Add, we have to make sure that all the y's + // have the same shape since the other aggregation ops do not support + // broadcasting. + Status GetUniqueFactors(const NodeDef* node, const string& common_factor, + bool* shapes_match, + std::vector* unique_factors) const { + *shapes_match = true; + unique_factors->reserve(node->input_size()); + + for (int i = 0; i < node->input_size() && shapes_match; ++i) { + const string& input = node->input(i); + if (IsControlInput(input)) { + break; + } + NodeDef* mul_node; + TF_RETURN_IF_ERROR(GetInputNode(input, &mul_node)); + const int unique_factor_index = + mul_node->input(0) == common_factor ? 1 : 0; + unique_factors->push_back(mul_node->input(unique_factor_index)); + if (i > 0 && !IsAdd(*node)) { + *shapes_match = ShapesEqual(unique_factors->front(), + unique_factors->back(), *ctx_.node_map); + } + } + return Status::OK(); + } + + bool IsRewritten(const NodeDef* node) const { + // if graph rewrite happens in multiple passes without graph pruning between + // them, it's possible that rewritten node already exists in a graph + return rewritten_nodes_.find(node->name()) != rewritten_nodes_.end() || + ctx_.node_map->NodeExists(OuterMulNodeName(node)); + } + + // keep names of the nodes that were optimized by this stage + std::unordered_set rewritten_nodes_; +}; + // Removes inverse transpose nodes class RemoveInverseTranspose : public ArithmeticOptimizerStage { public: - explicit RemoveInverseTranspose(ArithmeticOptimizerContext ctx) - : ArithmeticOptimizerStage(ctx) {} + explicit RemoveInverseTranspose(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("RemoveInverseTranspose", ctx) {} ~RemoveInverseTranspose() override = default; bool IsSupported(const NodeDef* node) const override { @@ -702,8 +976,8 @@ class RemoveInverseTranspose : public ArithmeticOptimizerStage { // 2) Rewrite Bitcast(Bitcast(x, type1), type2) => Bitcast(x, type2) class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { public: - explicit RemoveRedundantBitcastStage(ArithmeticOptimizerContext ctx) - : ArithmeticOptimizerStage(ctx) {} + explicit RemoveRedundantBitcastStage(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("RemoveRedundantBitcast", ctx) {} ~RemoveRedundantBitcastStage() override = default; bool IsSupported(const NodeDef* node) const override { @@ -742,8 +1016,8 @@ class RemoveRedundantBitcastStage : public ArithmeticOptimizerStage { // Remove Casts whose source type and destination type are equal. class RemoveRedundantCastStage : public ArithmeticOptimizerStage { public: - explicit RemoveRedundantCastStage(ArithmeticOptimizerContext ctx) - : ArithmeticOptimizerStage(ctx) {} + explicit RemoveRedundantCastStage(const ArithmeticOptimizerContext& ctx) + : ArithmeticOptimizerStage("RemoveRedundantCast", ctx) {} ~RemoveRedundantCastStage() override = default; bool IsSupported(const NodeDef* node) const override { return IsCast(*node); } @@ -1276,98 +1550,6 @@ string ArithmeticOptimizer::TrySimplifyAndReplaceUses( } } - // Use the commutativity and (left- and right-) distributive property of - // multiplication over addition to hoist common factors out of aggregate nodes - // where all the inputs are Mul nodes. This pattern occurs frequently in - // regularization terms for the gradients during training. - // For example, we can rewrite an expression of the form: - // AddN(Mul(x, y1), Mul(y2, x), Mul(x, y3), ... Mul(x, yn)) - // to the following: - // Mul(x, AddN(y1, y2, y3, ... yn)) - if (IsAggregate(*node) && NumNonControlInputs(*node) > 1 && - !OptimizedNodeExists(*node, "hoist_add") && - !OptimizedNodeExists(*node, "hoist_mul")) { - // Determine the set of common factors if the input nodes are all Mul nodes. - std::set common_factors; - for (int i = 0; i < node->input_size(); ++i) { - if (i > 0 && common_factors.empty()) { - break; - } - if (IsControlInput(node->input(i))) { - break; - } - const NodeDef* input = node_map_->GetNode(node->input(i)); - if (input->op() == "Mul") { - std::set factors_i{input->input(0), input->input(1)}; - if (i == 0) { - std::swap(common_factors, factors_i); - } else { - std::set intersection; - std::set_intersection( - factors_i.begin(), factors_i.end(), common_factors.begin(), - common_factors.end(), - std::inserter(intersection, intersection.begin())); - std::swap(common_factors, intersection); - } - } else { - common_factors.clear(); - } - } - if (common_factors.size() == 1) { - const string& common_factor = *common_factors.begin(); - - // Gather up the non-shared factors (the y's in the example). - // Unless the aggregation is Add, we have to make sure that all the y's - // have the same shape since the other aggregation ops do not support - // broadcasting. - std::vector unique_factors; - unique_factors.reserve(node->input_size()); - bool shapes_match = true; - for (int i = 0; i < node->input_size() && shapes_match; ++i) { - const string& input = node->input(i); - if (IsControlInput(input)) { - break; - } - const NodeDef* mul_node = node_map_->GetNode(input); - const int unique_factor_index = - mul_node->input(0) == common_factor ? 1 : 0; - unique_factors.push_back(mul_node->input(unique_factor_index)); - if (i > 0 && !IsAdd(*node)) { - shapes_match = ShapesEqual(unique_factors.front(), - unique_factors.back(), *node_map_); - } - } - - if (shapes_match) { - // 1. Use a copy of the first Mul node for the outer multiplication. - NodeDef* new_mul_node = AddNode(OptimizedNodeName(*node, "hoist_mul"), - node_map_->GetNode(node->input(0))); - NodeDef* new_add_node = AddNode(*node, "hoist_add", /*copy_node=*/true); - new_mul_node->set_device(node->device()); - new_mul_node->set_input(0, common_factor); - node_map_->AddOutput(common_factor, new_mul_node->name()); - new_mul_node->set_input(1, new_add_node->name()); - node_map_->AddOutput(new_add_node->name(), new_mul_node->name()); - - // 2. Hoist non-shared factors up into the new AddN node. - nodes_to_simplify->PushBack(new_add_node); - for (int i = 0; i < node->input_size(); ++i) { - const string& input = node->input(i); - if (IsControlInput(input)) { - break; - } - new_add_node->set_input(i, unique_factors[i]); - } - - // 3. Add frame dependencies that the original node might have had. - AddFrameControlDeps(node, {new_add_node, new_mul_node}, common_factor, - {new_add_node}); - - return new_mul_node->name(); - } - } - } - // Fold Transpose into matrix multiplication. if ((node->op() == "MatMul" || node->op() == "SparseMatMul" || node->op() == "BatchMatMul") && @@ -1444,8 +1626,9 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { nodes_to_simplify.PushBack(optimized_graph_->mutable_node(i)); } - ArithmeticOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, - node_map_.get(), &nodes_to_simplify); + const ArithmeticOptimizerContext ctx(&nodes_to_preserve_, optimized_graph_, + node_map_.get(), &frame_map_, + &nodes_to_simplify); std::vector> stages; @@ -1453,6 +1636,10 @@ Status ArithmeticOptimizer::SimplifyArithmeticOps() { stages.push_back( std::unique_ptr(new AddOpsRewriteStage(ctx))); } + if (options_.hoist_common_factor_out_of_aggregation) { + stages.push_back(std::unique_ptr( + new HoistCommonFactorOutOfAggregation(ctx))); + } if (options_.remove_inverse_transpose) { stages.push_back(std::unique_ptr( new RemoveInverseTranspose(ctx))); diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h index 787084454d..d5a7af5ba6 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer.h @@ -56,6 +56,7 @@ class ArithmeticOptimizer : public GraphOptimizer { // Granular control for arithmetic optimizer stages struct ArithmeticOptimizerOptions { bool combine_add_to_addn = true; + bool hoist_common_factor_out_of_aggregation = true; bool remove_inverse_transpose = true; bool remove_redundant_bitcast = true; bool remove_redundant_cast = true; diff --git a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc index 98842b29f1..e1f47625c1 100644 --- a/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc +++ b/tensorflow/core/grappler/optimizers/arithmetic_optimizer_test.cc @@ -30,6 +30,22 @@ namespace grappler { namespace { +constexpr char kHoistFactorOptimizerMul[] = + "ArithmeticOptimizer/HoistCommonFactor_Mul_"; + +constexpr char kHoistFactorOptimizerAdd[] = + "ArithmeticOptimizer/HoistCommonFactor_Add_"; + +// Optimized name of outer Mul node by HoistCommonFactorOutOfAggregation +string HoistMulName(const string& name) { + return AddPrefixToNodeName(name, kHoistFactorOptimizerMul, ""); +} + +// Optimized name of inner Add node by HoistCommonFactorOutOfAggregation +string HoistAddName(const string& name) { + return AddPrefixToNodeName(name, kHoistFactorOptimizerAdd, ""); +} + string OptimizedName(const string& name) { return AddPrefixToNodeName(name, kArithmeticOptimizer); } @@ -61,22 +77,40 @@ class ArithmeticOptimizerTest : public GrapplerTest { TF_EXPECT_OK(ModelPruner().Optimize(nullptr, *item, output)); } + // Run ArithmeticOptimizer twice to make sure the rewrite is idempotent. + void OptimizeTwice(ArithmeticOptimizer* optimizer, GrapplerItem* item, + GraphDef* output) { + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + item->graph.Swap(output); + TF_EXPECT_OK(optimizer->Optimize(nullptr, *item, output)); + } + // TODO(ezhulenev): Make private. After migration to stages each test // should explicitly enable required optimization for tests isolation void DisableAllStages(ArithmeticOptimizer* optimizer) { ArithmeticOptimizer::ArithmeticOptimizerOptions options; options.combine_add_to_addn = false; + options.hoist_common_factor_out_of_aggregation = false; options.remove_inverse_transpose = false; options.remove_redundant_bitcast = false; options.remove_redundant_cast = false; optimizer->options_ = options; } + void DisableAddToAddNCombining(ArithmeticOptimizer* optimizer) { + optimizer->options_.combine_add_to_addn = false; + } + void EnableOnlyAddToAddNCombining(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); optimizer->options_.combine_add_to_addn = true; } + void EnableOnlyHoistCommonFactor(ArithmeticOptimizer* optimizer) { + DisableAllStages(optimizer); + optimizer->options_.hoist_common_factor_out_of_aggregation = true; + } + void EnableOnlyRemoveInverseTranspose(ArithmeticOptimizer* optimizer) { DisableAllStages(optimizer); optimizer->options_.remove_inverse_transpose = true; @@ -396,59 +430,66 @@ TEST_F(ArithmeticOptimizerTest, TrivialSumsRepeatedAdd) { } ArithmeticOptimizer optimizer; - DisableAllStages(&optimizer); + DisableAddToAddNCombining(&optimizer); GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); + OptimizeTwice(&optimizer, &item, &output); - EXPECT_EQ(17, output.node_size()); - // The graph gets optimized to + // We expect the following rewrite(s) to occur: + // // Mul(p, - // Add(Add(Const(2), Const(2)), - // Add(Const(2), Const(2)))) + // Add_6(Add_4(Const(2), Const(2)), + // Add_5(Const(2), Const(2)))) + NodeMap node_map(&output); + EXPECT_EQ(17, output.node_size()); - for (const auto& node : output.node()) { - if ("id" == node.name()) { - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ(OptimizedName("Add_6_hoist_mul"), node.input(0)); - } else if (OptimizedName("Add_6_hoist_mul") == node.name()) { - EXPECT_EQ("Mul", node.op()); - EXPECT_EQ(2, node.input_size()); - EXPECT_EQ("Placeholder", node.input(0)); - EXPECT_EQ(OptimizedName("Add_6_hoist_add"), node.input(1)); - } else if (OptimizedName("Add_6_hoist_add") == node.name()) { - EXPECT_EQ("Add", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ(OptimizedName("Add_4_hoist_add"), node.input(0)); - EXPECT_EQ(OptimizedName("Add_5_hoist_add"), node.input(1)); - EXPECT_EQ("^Placeholder", node.input(2)); - } else if (OptimizedName("Add_4_hoist_add") == node.name()) { - EXPECT_EQ("Add", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ(OptimizedName("Add_const"), node.input(0)); - EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1)); - EXPECT_EQ("^Placeholder", node.input(2)); - } else if (OptimizedName("Add_5_hoist_add") == node.name()) { - EXPECT_EQ("Add", node.op()); - EXPECT_EQ(3, node.input_size()); - EXPECT_EQ(OptimizedName("Add_const"), node.input(0)); - EXPECT_EQ(OptimizedName("Add_1_const"), node.input(1)); - EXPECT_EQ("^Placeholder", node.input(2)); - } else if (OptimizedName("Add_const") == node.name()) { - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("^Placeholder", node.input(0)); - } else if (OptimizedName("Add_1_const") == node.name()) { - EXPECT_EQ("Const", node.op()); - EXPECT_EQ(1, node.input_size()); - EXPECT_EQ("^Placeholder", node.input(0)); - } - } + + const NodeDef* id_node = node_map.GetNode("id"); + ASSERT_TRUE(id_node != nullptr); + EXPECT_EQ(1, id_node->input_size()); + EXPECT_EQ(HoistMulName("Add_6"), id_node->input(0)); + + const NodeDef* mul_node = node_map.GetNode(HoistMulName("Add_6")); + ASSERT_TRUE(mul_node != nullptr); + EXPECT_EQ(2, mul_node->input_size()); + EXPECT_EQ("Placeholder", mul_node->input(0)); + EXPECT_EQ(HoistAddName("Add_6"), mul_node->input(1)); + + const NodeDef* add_6_node = node_map.GetNode(HoistAddName("Add_6")); + ASSERT_TRUE(add_6_node != nullptr); + EXPECT_EQ(3, add_6_node->input_size()); + EXPECT_EQ(HoistAddName("Add_4"), add_6_node->input(0)); + EXPECT_EQ(HoistAddName("Add_5"), add_6_node->input(1)); + EXPECT_EQ("^Placeholder", add_6_node->input(2)); + + const NodeDef* add_4_node = node_map.GetNode(HoistAddName("Add_4")); + ASSERT_TRUE(add_4_node != nullptr); + EXPECT_EQ("Add", add_4_node->op()); + EXPECT_EQ(3, add_4_node->input_size()); + EXPECT_EQ(OptimizedName("Add_const"), add_4_node->input(0)); + EXPECT_EQ(OptimizedName("Add_1_const"), add_4_node->input(1)); + EXPECT_EQ("^Placeholder", add_4_node->input(2)); + + const NodeDef* add_5_node = node_map.GetNode(HoistAddName("Add_5")); + ASSERT_TRUE(add_5_node != nullptr); + EXPECT_EQ("Add", add_5_node->op()); + EXPECT_EQ(3, add_5_node->input_size()); + EXPECT_EQ(OptimizedName("Add_const"), add_5_node->input(0)); + EXPECT_EQ(OptimizedName("Add_1_const"), add_5_node->input(1)); + EXPECT_EQ("^Placeholder", add_5_node->input(2)); + + const NodeDef* add_const_node = node_map.GetNode(OptimizedName("Add_const")); + ASSERT_TRUE(add_const_node != nullptr); + EXPECT_EQ("Const", add_const_node->op()); + EXPECT_EQ(1, add_const_node->input_size()); + EXPECT_EQ("^Placeholder", add_const_node->input(0)); + + const NodeDef* add_1_const_node = + node_map.GetNode(OptimizedName("Add_1_const")); + ASSERT_TRUE(add_1_const_node != nullptr); + EXPECT_EQ("Const", add_1_const_node->op()); + EXPECT_EQ(1, add_1_const_node->input_size()); + EXPECT_EQ("^Placeholder", add_1_const_node->input(0)); } TEST_F(ArithmeticOptimizerTest, HoistFactor) { @@ -469,31 +510,46 @@ TEST_F(ArithmeticOptimizerTest, HoistFactor) { ops::Add(s.WithOpName("add"), mul1, mul2)); GrapplerItem item; + item.fetch = {"id"}; TF_CHECK_OK(s.ToGraphDef(&item.graph)); + ArithmeticOptimizer optimizer; + EnableOnlyHoistCommonFactor(&optimizer); + GraphDef output; - Status status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); - // Run the optimizer twice to make sure the rewrite is idempotent. - item.graph.Swap(&output); - status = optimizer.Optimize(nullptr, item, &output); - TF_EXPECT_OK(status); + OptimizeTwice(&optimizer, &item, &output); + + // We expect the following rewrite(s) to occur: + // + // Add Mul + // / \ / \ + // Mul Mul -> x Add + // / \ / \ / \ + // x y1 y2 x y1 y2 + // + // If "root" op is AddN and shapes does not match, this rewrite is not + // possible and graph should stay intact. + NodeMap node_map(&output); if (use_addn && !matching_shapes) { VerifyGraphsMatch(item.graph, output, __LINE__); } else { EXPECT_EQ(9, output.node_size()); - const NodeDef& new_add = output.node(8); - EXPECT_EQ(OptimizedName("add_hoist_add"), new_add.name()); - EXPECT_EQ("y1", new_add.input(0)); - EXPECT_EQ("y2", new_add.input(1)); - const NodeDef& new_mul = output.node(7); - EXPECT_EQ(OptimizedName("add_hoist_mul"), new_mul.name()); - EXPECT_EQ("x", new_mul.input(0)); - EXPECT_EQ(OptimizedName("add_hoist_add"), new_mul.input(1)); - const NodeDef& new_id = output.node(6); - EXPECT_EQ("id", new_id.name()); - EXPECT_EQ(OptimizedName("add_hoist_mul"), new_id.input(0)); + + const NodeDef* new_add_node = node_map.GetNode(HoistAddName("add")); + ASSERT_TRUE(new_add_node != nullptr) << "Hoisted Add node not found"; + EXPECT_EQ("y1", new_add_node->input(0)); + EXPECT_EQ("y2", new_add_node->input(1)); + + const NodeDef* new_mul_node = node_map.GetNode(HoistMulName("add")); + ASSERT_TRUE(new_mul_node != nullptr) << "Hoisted Mul node not found"; + EXPECT_EQ("x", new_mul_node->input(0)); + EXPECT_EQ(new_add_node->name(), new_mul_node->input(1)); + + const NodeDef* id_node = node_map.GetNode("id"); + ASSERT_TRUE(id_node != nullptr) << "Id node not found"; + EXPECT_EQ("id", id_node->name()); + EXPECT_EQ(HoistMulName("add"), id_node->input(0)); } } } @@ -1249,8 +1305,9 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { NodeMap node_map(&output); // check add tree was replaced with AddN - const NodeDef* collapsed_add = CHECK_NOTNULL( - node_map.GetNode("y/ArithmeticOptimizer/AddOpsGroup_Add_abc_Add_ab")); + const NodeDef* collapsed_add = + node_map.GetNode("y/ArithmeticOptimizer/AddOpsRewrite_Add_abc_Add_ab"); + ASSERT_TRUE(collapsed_add != nullptr); EXPECT_EQ("AddN", collapsed_add->op()); EXPECT_EQ(3, collapsed_add->input_size()); @@ -1259,7 +1316,8 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteCollapseAddsOfIdenticalShape) { EXPECT_EQ("c", collapsed_add->input(2)); // check output was re-wired to new node - const NodeDef* updated_outputs = CHECK_NOTNULL(node_map.GetNode("outputs")); + const NodeDef* updated_outputs = node_map.GetNode("outputs"); + ASSERT_TRUE(updated_outputs != nullptr); EXPECT_EQ(collapsed_add->name(), updated_outputs->input(0)); } @@ -1306,8 +1364,9 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { NodeMap node_map(&output); // check left Add subtree replaced with AddN - const NodeDef* collapsed_left = CHECK_NOTNULL( - node_map.GetNode("ArithmeticOptimizer/AddOpsGroup_Add_abc_Add_ab")); + const NodeDef* collapsed_left = + node_map.GetNode("ArithmeticOptimizer/AddOpsRewrite_Add_abc_Add_ab"); + ASSERT_TRUE(collapsed_left != nullptr); EXPECT_EQ("AddN", collapsed_left->op()); EXPECT_EQ(3, collapsed_left->input_size()); @@ -1316,8 +1375,9 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { EXPECT_EQ("c", collapsed_left->input(2)); // check right Add subtree replaced with AddN - const NodeDef* collapsed_right = CHECK_NOTNULL( - node_map.GetNode("ArithmeticOptimizer/AddOpsGroup_Add_xyz_Add_xy")); + const NodeDef* collapsed_right = + node_map.GetNode("ArithmeticOptimizer/AddOpsRewrite_Add_xyz_Add_xy"); + ASSERT_TRUE(collapsed_right != nullptr); EXPECT_EQ("AddN", collapsed_right->op()); EXPECT_EQ(3, collapsed_right->input_size()); @@ -1326,7 +1386,8 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteMultiplePasses) { EXPECT_EQ("z", collapsed_right->input(2)); // check that Mul inputs re-wired to new Nodes - const NodeDef* updated_mul = CHECK_NOTNULL(node_map.GetNode("Mul")); + const NodeDef* updated_mul = node_map.GetNode("Mul"); + ASSERT_TRUE(updated_mul != nullptr); EXPECT_EQ("Mul", updated_mul->op()); EXPECT_EQ(2, updated_mul->input_size()); @@ -1367,8 +1428,9 @@ TEST_F(ArithmeticOptimizerTest, AddOpsRewriteAddInputThroughMultiplePaths) { NodeMap node_map(&output); // check Add tree replaced with AddN - const NodeDef* collapsed_add = CHECK_NOTNULL(node_map.GetNode( - "ArithmeticOptimizer/AddOpsGroup_Add_all_Add_ab_Add_bc")); + const NodeDef* collapsed_add = node_map.GetNode( + "ArithmeticOptimizer/AddOpsRewrite_Add_all_Add_ab_Add_bc"); + ASSERT_TRUE(collapsed_add != nullptr); EXPECT_EQ("AddN", collapsed_add->op()); EXPECT_EQ(4, collapsed_add->input_size()); -- GitLab From c111ed1be0091ee5c26bea66a86b8f511a61a152 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 14:23:49 -0700 Subject: [PATCH 754/884] K-FAC: FisherBlocks for tf.nn.{depthwise_conv2d, separable_conv2d, convolution}. PiperOrigin-RevId: 188778072 --- .../python/kernel_tests/fisher_blocks_test.py | 71 +++- .../kernel_tests/fisher_factors_test.py | 320 ++++++++++++++-- .../kernel_tests/layer_collection_test.py | 57 ++- .../kfac/python/kernel_tests/utils_test.py | 80 ++++ .../contrib/kfac/python/ops/fisher_blocks.py | 349 ++++++++++++++++-- .../contrib/kfac/python/ops/fisher_factors.py | 139 +++++-- .../kfac/python/ops/layer_collection.py | 233 +++++++++++- .../kfac/python/ops/layer_collection_lib.py | 2 + tensorflow/contrib/kfac/python/ops/utils.py | 122 ++++++ .../contrib/kfac/python/ops/utils_lib.py | 3 + 10 files changed, 1271 insertions(+), 105 deletions(-) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py index c9c0f8e0ae..b70c700f09 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_blocks_test.py @@ -764,6 +764,54 @@ class ConvDiagonalFBTest(test.TestCase): return multiply_result, multiply_inverse_result +class DepthwiseConvKFCBasicFBTest(test.TestCase): + + def testInstantiateFactors(self): + with ops.Graph().as_default(): + random_seed.set_random_seed(200) + params = random_ops.random_normal((3, 3, 8, 2)) + inputs = random_ops.random_normal((32, 5, 5, 8)) + outputs = random_ops.random_normal((32, 5, 5, 16)) + layer_collection = lc.LayerCollection() + block = fb.DepthwiseConvKFCBasicFB( + layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') + block.register_additional_minibatch(inputs, outputs) + grads = outputs**2 + block.instantiate_factors(([grads],), 0.5) + + def testMultiplyInverse(self): + with ops.Graph().as_default(), self.test_session() as sess: + random_seed.set_random_seed(200) + params = random_ops.random_normal((3, 3, 8, 2)) + inputs = random_ops.random_normal((32, 5, 5, 8)) + outputs = random_ops.random_normal((32, 5, 5, 16)) + layer_collection = lc.LayerCollection() + block = fb.DepthwiseConvKFCBasicFB( + layer_collection, params=params, strides=[1, 1, 1, 1], padding='SAME') + block.register_additional_minibatch(inputs, outputs) + grads = outputs**2 + block.instantiate_factors(([grads],), 0.5) + block._input_factor.instantiate_cov_variables() + block._output_factor.instantiate_cov_variables() + block.register_inverse() + block._input_factor.instantiate_inv_variables() + block._output_factor.instantiate_inv_variables() + + # Ensure inverse update op doesn't crash. + sess.run(tf_variables.global_variables_initializer()) + sess.run([ + factor.make_inverse_update_ops() + for factor in layer_collection.get_factors() + ]) + + # Ensure inverse-vector multiply doesn't crash. + output = block.multiply_inverse(params) + sess.run(output) + + # Ensure same shape. + self.assertAllEqual(output.shape, params.shape) + + class ConvKFCBasicFBTest(test.TestCase): def _testConvKFCBasicFBInitParams(self, params): @@ -775,16 +823,17 @@ class ConvKFCBasicFBTest(test.TestCase): params = array_ops.constant(params) inputs = random_ops.random_normal((2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2)) - block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, [1, 1, 1], 'SAME') + block = fb.ConvKFCBasicFB( + lc.LayerCollection(), params=params, padding='SAME') block.register_additional_minibatch(inputs, outputs) self.assertAllEqual([outputs], block.tensors_to_compute_grads()) def testConvKFCBasicFBInitParamsParamsTuple(self): - self._testConvKFCBasicFBInitParams([np.array([1., 2.]), np.array(3.)]) + self._testConvKFCBasicFBInitParams([np.ones([1, 2, 2]), np.ones([2])]) def testConvKFCBasicFBInitParamsParamsSingle(self): - self._testConvKFCBasicFBInitParams([np.array([1., 2.])]) + self._testConvKFCBasicFBInitParams([np.ones([1, 2, 2])]) def testMultiplyInverseTuple(self): with ops.Graph().as_default(), self.test_session() as sess: @@ -792,8 +841,8 @@ class ConvKFCBasicFBTest(test.TestCase): params = random_ops.random_normal((2, 2, 2, 2)) inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), - 'SAME') + block = fb.ConvKFCBasicFB( + lc.LayerCollection(), params=params, padding='SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 block.instantiate_factors(((grads,),), 0.5) @@ -823,8 +872,8 @@ class ConvKFCBasicFBTest(test.TestCase): params = random_ops.random_normal((2, 2, 2, 2)) inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), - 'SAME') + block = fb.ConvKFCBasicFB( + lc.LayerCollection(), params=params, padding='SAME') block.register_additional_minibatch(inputs, outputs) self.assertFalse(block._has_bias) grads = outputs**2 @@ -851,8 +900,8 @@ class ConvKFCBasicFBTest(test.TestCase): params = [random_ops.random_normal((2, 2, 2, 2))] inputs = random_ops.random_normal((2, 2, 2, 2)) outputs = random_ops.random_normal((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), - 'SAME') + block = fb.ConvKFCBasicFB( + lc.LayerCollection(), params=params, padding='SAME') block.register_additional_minibatch(inputs, outputs) self.assertTrue(block._has_bias) grads = outputs**2 @@ -879,8 +928,8 @@ class ConvKFCBasicFBTest(test.TestCase): params = array_ops.zeros((2, 2, 2, 2)) inputs = array_ops.zeros((2, 2, 2, 2)) outputs = array_ops.zeros((2, 2, 2, 2)) - block = fb.ConvKFCBasicFB(lc.LayerCollection(), params, (1, 1, 1, 1), - 'SAME') + block = fb.ConvKFCBasicFB( + lc.LayerCollection(), params=params, padding='SAME') block.register_additional_minibatch(inputs, outputs) grads = outputs**2 damping = 0. # This test is only valid without damping. diff --git a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py index beb427bdcc..16f02f1199 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/fisher_factors_test.py @@ -23,12 +23,14 @@ import numpy.random as npr from tensorflow.contrib.kfac.python.ops import fisher_blocks as fb from tensorflow.contrib.kfac.python.ops import fisher_factors as ff +from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops as tf_ops from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import math_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variables as tf_variables from tensorflow.python.platform import test @@ -447,6 +449,117 @@ class EmbeddingInputKroneckerFactorTest(test.TestCase): self.assertAllClose(np.array([1., 1., 0., 0., 1.]) / 3., new_cov) +class ConvDiagonalFactorTest(test.TestCase): + + def setUp(self): + self.batch_size = 10 + self.height = self.width = 32 + self.in_channels = 3 + self.out_channels = 1 + self.kernel_height = self.kernel_width = 3 + self.strides = [1, 2, 2, 1] + self.data_format = 'NHWC' + self.padding = 'SAME' + self.kernel_shape = [ + self.kernel_height, self.kernel_width, self.in_channels, + self.out_channels + ] + + def testInit(self): + with tf_ops.Graph().as_default(): + inputs = random_ops.random_uniform( + [self.batch_size, self.height, self.width, self.in_channels]) + outputs_grads = [ + random_ops.random_uniform([ + self.batch_size, self.height // self.strides[1], + self.width // self.strides[2], self.out_channels + ]) for _ in range(3) + ] + + factor = ff.ConvDiagonalFactor( + inputs, + outputs_grads, + self.kernel_shape, + self.strides, + self.padding, + data_format=self.data_format) + factor.instantiate_cov_variables() + + # Ensure covariance matrix's shape makes sense. + self.assertEqual([ + self.kernel_height * self.kernel_width * self.in_channels, + self.out_channels + ], + factor.get_cov_var().shape.as_list()) + + def testMakeCovarianceUpdateOp(self): + with tf_ops.Graph().as_default(): + # Construct all arguments such that convolution kernel is applied in + # exactly one spatial location. + inputs = np.random.randn( + 1, # batch_size + self.kernel_height, + self.kernel_width, + self.in_channels) # in_channels + outputs_grad = np.random.randn( + 1, # batch_size + 1, # output_height + 1, # output_width + self.out_channels) + + factor = ff.ConvDiagonalFactor( + constant_op.constant(inputs), [constant_op.constant(outputs_grad)], + self.kernel_shape, + strides=[1, 1, 1, 1], + padding='VALID') + factor.instantiate_cov_variables() + + # Completely forget initial value on first update. + cov_update_op = factor.make_covariance_update_op(0.0) + + # Ensure new covariance value is same as outer-product of inputs/outputs + # vectorized, squared. + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + cov = sess.run(cov_update_op) + expected_cov = np.outer(inputs.flatten(), outputs_grad.flatten())**2 + self.assertAllClose(expected_cov, cov) + + def testHasBias(self): + with tf_ops.Graph().as_default(): + inputs = random_ops.random_uniform( + [self.batch_size, self.height, self.width, self.in_channels]) + outputs_grads = [ + random_ops.random_uniform([ + self.batch_size, self.height // self.strides[1], + self.width // self.strides[2], self.out_channels + ]) for _ in range(3) + ] + + factor = ff.ConvDiagonalFactor( + inputs, + outputs_grads, + self.kernel_shape, + self.strides, + self.padding, + data_format=self.data_format, + has_bias=True) + factor.instantiate_cov_variables() + + # Ensure shape accounts for bias. + self.assertEqual([ + self.kernel_height * self.kernel_width * self.in_channels + 1, + self.out_channels + ], + factor.get_cov_var().shape.as_list()) + + # Ensure update op doesn't crash. + cov_update_op = factor.make_covariance_update_op(0.0) + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(cov_update_op) + + class FullyConnectedKroneckerFactorTest(test.TestCase): def _testFullyConnectedKroneckerFactorInit(self, @@ -493,24 +606,152 @@ class FullyConnectedKroneckerFactorTest(test.TestCase): self.assertAllClose([[3, 3.5], [3.5, 5.5]], new_cov) -class ConvInputKroneckerFactorTest(test.TestCase): +class ConvFactorTestCase(test.TestCase): + + def assertMatrixRank(self, rank, matrix, atol=1e-5): + assert rank <= matrix.shape[0], 'Rank cannot be larger than matrix size.' + eigvals = np.linalg.eigvals(matrix) + nnz_eigvals = np.sum(eigvals > atol) + self.assertEqual( + rank, + nnz_eigvals, + msg=('Found %d of %d expected non-zero eigenvalues: %s.' % + (nnz_eigvals, rank, eigvals))) + + +class ConvInputKroneckerFactorTest(ConvFactorTestCase): + + def test3DConvolution(self): + with tf_ops.Graph().as_default(): + batch_size = 1 + width = 3 + in_channels = 3**3 + out_channels = 4 + + factor = ff.ConvInputKroneckerFactor( + inputs=random_ops.random_uniform( + (batch_size, width, width, width, in_channels), seed=0), + filter_shape=(width, width, width, in_channels, out_channels), + padding='SAME', + strides=(2, 2, 2), + extract_patches_fn='extract_convolution_patches', + has_bias=False) + factor.instantiate_cov_variables() + + # Ensure shape of covariance matches input size of filter. + input_size = in_channels * (width**3) + self.assertEqual([input_size, input_size], + factor.get_cov_var().shape.as_list()) + + # Ensure cov_update_op doesn't crash. + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(factor.make_covariance_update_op(0.0)) + cov = sess.run(factor.get_cov_var()) + + # Cov should be rank-8, as the filter will be applied at each corner of + # the 4-D cube. + self.assertMatrixRank(8, cov) + + def testPointwiseConv2d(self): + with tf_ops.Graph().as_default(): + batch_size = 1 + width = 3 + in_channels = 3**2 + out_channels = 4 + + factor = ff.ConvInputKroneckerFactor( + inputs=random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0), + filter_shape=(1, 1, in_channels, out_channels), + padding='SAME', + strides=(1, 1, 1, 1), + extract_patches_fn='extract_pointwise_conv2d_patches', + has_bias=False) + factor.instantiate_cov_variables() + + # Ensure shape of covariance matches input size of filter. + self.assertEqual([in_channels, in_channels], + factor.get_cov_var().shape.as_list()) + + # Ensure cov_update_op doesn't crash. + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(factor.make_covariance_update_op(0.0)) + cov = sess.run(factor.get_cov_var()) + + # Cov should be rank-9, as the filter will be applied at each location. + self.assertMatrixRank(9, cov) + + def testStrides(self): + with tf_ops.Graph().as_default(): + batch_size = 1 + width = 3 + in_channels = 3**2 + out_channels = 4 + + factor = ff.ConvInputKroneckerFactor( + inputs=random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0), + filter_shape=(1, 1, in_channels, out_channels), + padding='SAME', + strides=(1, 2, 1, 1), + extract_patches_fn='extract_image_patches', + has_bias=False) + factor.instantiate_cov_variables() + + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(factor.make_covariance_update_op(0.0)) + cov = sess.run(factor.get_cov_var()) + + # Cov should be the sum of 3 * 2 = 6 outer products. + self.assertMatrixRank(6, cov) + + def testDilationRate(self): + with tf_ops.Graph().as_default(): + batch_size = 1 + width = 3 + in_channels = 2 + out_channels = 4 + + factor = ff.ConvInputKroneckerFactor( + inputs=random_ops.random_uniform( + (batch_size, width, width, in_channels), seed=0), + filter_shape=(3, 3, in_channels, out_channels), + padding='SAME', + extract_patches_fn='extract_image_patches', + strides=(1, 1, 1, 1), + dilation_rate=(1, width, width, 1), + has_bias=False) + factor.instantiate_cov_variables() + + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(factor.make_covariance_update_op(0.0)) + cov = sess.run(factor.get_cov_var()) + + # Cov should be rank = in_channels, as only the center of the filter + # receives non-zero input for each input channel. + self.assertMatrixRank(in_channels, cov) def testConvInputKroneckerFactorInitNoBias(self): with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') + tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( - tensor, (1, 2, 3, 4), 3, 2, has_bias=False) + inputs=tensor, + filter_shape=(1, 2, 3, 4), + padding='SAME', + has_bias=False) factor.instantiate_cov_variables() self.assertEqual([1 * 2 * 3, 1 * 2 * 3], factor.get_cov().get_shape().as_list()) def testConvInputKroneckerFactorInit(self): with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') + tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c') factor = ff.ConvInputKroneckerFactor( - tensor, (1, 2, 3, 4), 3, 2, has_bias=True) + tensor, filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) factor.instantiate_cov_variables() self.assertEqual([1 * 2 * 3 + 1, 1 * 2 * 3 + 1], factor.get_cov().get_shape().as_list()) @@ -518,10 +759,9 @@ class ConvInputKroneckerFactorTest(test.TestCase): def testConvInputKroneckerFactorInitFloat64(self): with tf_ops.Graph().as_default(): dtype = dtypes.float64_ref - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), dtype=dtype, name='a/b/c') + tensor = array_ops.ones((64, 1, 2, 3), name='a/b/c', dtype=dtypes.float64) factor = ff.ConvInputKroneckerFactor( - tensor, (1, 2, 3, 4), 3, 2, has_bias=True) + tensor, filter_shape=(1, 2, 3, 4), padding='SAME', has_bias=True) factor.instantiate_cov_variables() cov = factor.get_cov() self.assertEqual(cov.dtype, dtype) @@ -530,33 +770,60 @@ class ConvInputKroneckerFactorTest(test.TestCase): def testMakeCovarianceUpdateOpWithBias(self): with tf_ops.Graph().as_default(), self.test_session() as sess: - random_seed.set_random_seed(200) + input_shape = (2, 1, 1, 1) tensor = array_ops.constant( - np.arange(1., 17.).reshape(2, 2, 2, 2), dtype=dtypes.float32) + np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( + np.float32)) factor = ff.ConvInputKroneckerFactor( - tensor, (1, 2, 1, 1), [1, 1, 1, 1], 'SAME', has_bias=True) + tensor, filter_shape=(1, 1, 1, 1), padding='SAME', has_bias=True) factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[34.375, 37, 3.125], [37, 41, 3.5], [3.125, 3.5, 1]], - new_cov) + new_cov = sess.run(factor.make_covariance_update_op(0.)) + self.assertAllClose( + [ + [(1. + 4.) / 2., (1. + 2.) / 2.], # + [(1. + 2.) / 2., (1. + 1.) / 2.] + ], # + new_cov) def testMakeCovarianceUpdateOpNoBias(self): with tf_ops.Graph().as_default(), self.test_session() as sess: - random_seed.set_random_seed(200) + input_shape = (2, 1, 1, 1) tensor = array_ops.constant( - np.arange(1., 17.).reshape(2, 2, 2, 2), dtype=dtypes.float32) - factor = ff.ConvInputKroneckerFactor(tensor, (1, 2, 1, 1), - [1, 1, 1, 1], 'SAME') + np.arange(1, 1 + np.prod(input_shape)).reshape(input_shape).astype( + np.float32)) + factor = ff.ConvInputKroneckerFactor( + tensor, filter_shape=(1, 1, 1, 1), padding='SAME') factor.instantiate_cov_variables() sess.run(tf_variables.global_variables_initializer()) - new_cov = sess.run(factor.make_covariance_update_op(.5)) - self.assertAllClose([[34.375, 37], [37, 41]], new_cov) + new_cov = sess.run(factor.make_covariance_update_op(0.)) + self.assertAllClose([[(1. + 4.) / 2.]], new_cov) -class ConvOutputKroneckerFactorTest(test.TestCase): +class ConvOutputKroneckerFactorTest(ConvFactorTestCase): + + def test3DConvolution(self): + with tf_ops.Graph().as_default(): + batch_size = 1 + width = 3 + out_channels = width**3 + + factor = ff.ConvOutputKroneckerFactor(outputs_grads=[ + random_ops.random_uniform( + (batch_size, width, width, width, out_channels), seed=0) + ]) + factor.instantiate_cov_variables() + + with self.test_session() as sess: + sess.run(tf_variables.global_variables_initializer()) + sess.run(factor.make_covariance_update_op(0.0)) + cov = sess.run(factor.get_cov()) + + # Cov should be rank 3^3, as each spatial position donates a rank-1 + # update. + self.assertMatrixRank(width**3, cov) def testConvOutputKroneckerFactorInit(self): with tf_ops.Graph().as_default(): @@ -577,13 +844,6 @@ class ConvOutputKroneckerFactorTest(test.TestCase): self.assertEqual(cov.dtype, dtype) self.assertEqual([5, 5], cov.get_shape().as_list()) - def testConvOutputKroneckerFactorInitNotEnoughDims(self): - with tf_ops.Graph().as_default(): - random_seed.set_random_seed(200) - tensor = array_ops.ones((2, 3), name='a/b/c') - with self.assertRaises(IndexError): - ff.ConvOutputKroneckerFactor((tensor,)) - def testMakeCovarianceUpdateOp(self): with tf_ops.Graph().as_default(), self.test_session() as sess: random_seed.set_random_seed(200) diff --git a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py index 889f336811..bae6bd7a3b 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/layer_collection_test.py @@ -104,14 +104,31 @@ class LayerCollectionTest(test.TestCase): array_ops.constant(3), approx=layer_collection.APPROX_DIAGONAL_NAME) lc.register_conv2d( - array_ops.constant(4), [1, 1, 1, 1], 'SAME', - array_ops.ones((1, 1, 1, 1)), array_ops.constant(3)) + params=array_ops.ones((2, 3, 4, 5)), + strides=[1, 1, 1, 1], + padding='SAME', + inputs=array_ops.ones((1, 2, 3, 4)), + outputs=array_ops.ones((1, 1, 1, 5))) lc.register_conv2d( - array_ops.constant(4), [1, 1, 1, 1], - 'SAME', - array_ops.ones((1, 1, 1, 1)), - array_ops.constant(3), + params=array_ops.ones((2, 3, 4, 5)), + strides=[1, 1, 1, 1], + padding='SAME', + inputs=array_ops.ones((1, 2, 3, 4)), + outputs=array_ops.ones((1, 1, 1, 5)), approx=layer_collection.APPROX_DIAGONAL_NAME) + lc.register_separable_conv2d( + depthwise_params=array_ops.ones((3, 3, 1, 2)), + pointwise_params=array_ops.ones((1, 1, 2, 4)), + inputs=array_ops.ones((32, 5, 5, 1)), + depthwise_outputs=array_ops.ones((32, 5, 5, 2)), + pointwise_outputs=array_ops.ones((32, 5, 5, 4)), + strides=[1, 1, 1, 1], + padding='SAME') + lc.register_convolution( + params=array_ops.ones((3, 3, 1, 8)), + inputs=array_ops.ones((32, 5, 5, 1)), + outputs=array_ops.ones((32, 5, 5, 8)), + padding='SAME') lc.register_generic( array_ops.constant(5), 16, approx=layer_collection.APPROX_FULL_NAME) lc.register_generic( @@ -119,7 +136,7 @@ class LayerCollectionTest(test.TestCase): 16, approx=layer_collection.APPROX_DIAGONAL_NAME) - self.assertEqual(6, len(lc.get_blocks())) + self.assertEqual(9, len(lc.get_blocks())) def testRegisterBlocksMultipleRegistrations(self): with ops.Graph().as_default(): @@ -535,6 +552,32 @@ class LayerCollectionTest(test.TestCase): self.assertIsInstance(lc.fisher_blocks[b_0], fisher_blocks.FullFB) self.assertIsInstance(lc.fisher_blocks[b_1], fisher_blocks.NaiveDiagonalFB) + def testDefaultLayerCollection(self): + with ops.Graph().as_default(): + # Can't get default if there isn't one set. + with self.assertRaises(ValueError): + layer_collection.get_default_layer_collection() + + # Can't set default twice. + lc = layer_collection.LayerCollection() + layer_collection.set_default_layer_collection(lc) + with self.assertRaises(ValueError): + layer_collection.set_default_layer_collection(lc) + + # Same as one set. + self.assertTrue(lc is layer_collection.get_default_layer_collection()) + + # Can set to None. + layer_collection.set_default_layer_collection(None) + with self.assertRaises(ValueError): + layer_collection.get_default_layer_collection() + + # as_default() is the same as setting/clearing. + with lc.as_default(): + self.assertTrue(lc is layer_collection.get_default_layer_collection()) + with self.assertRaises(ValueError): + layer_collection.get_default_layer_collection() + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py index 97a97adbf5..2cee01212a 100644 --- a/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py +++ b/tensorflow/contrib/kfac/python/kernel_tests/utils_test.py @@ -29,6 +29,8 @@ from tensorflow.python.framework import random_seed from tensorflow.python.ops import array_ops from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops +from tensorflow.python.ops import random_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import test @@ -325,6 +327,84 @@ class UtilsTest(test.TestCase): ], values) + def testExtractConvolutionPatches(self): + with ops.Graph().as_default(), self.test_session() as sess: + batch_size = 10 + image_spatial_shape = [9, 10, 11] + in_channels = out_channels = 32 + kernel_spatial_shape = [5, 3, 3] + spatial_strides = [1, 2, 1] + spatial_dilation = [1, 1, 1] + padding = 'SAME' + + images = random_ops.random_uniform( + [batch_size] + image_spatial_shape + [in_channels], seed=0) + kernel_shape = kernel_spatial_shape + [in_channels, out_channels] + kernel = random_ops.random_uniform(kernel_shape, seed=1) + + # Ensure shape matches expectation. + patches = utils.extract_convolution_patches( + images, + kernel_shape, + padding, + strides=spatial_strides, + dilation_rate=spatial_dilation) + result_spatial_shape = ( + patches.shape.as_list()[1:1 + len(image_spatial_shape)]) + self.assertEqual(patches.shape.as_list(), + [batch_size] + result_spatial_shape + + kernel_spatial_shape + [in_channels]) + + # Ensure extract...patches() + matmul() and convolution() implementation + # give the same answer. + outputs = nn_ops.convolution( + images, + kernel, + padding, + strides=spatial_strides, + dilation_rate=spatial_dilation) + + patches_flat = array_ops.reshape( + patches, [-1, np.prod(kernel_spatial_shape) * in_channels]) + kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) + outputs_flat = math_ops.matmul(patches_flat, kernel_flat) + + outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) + self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten()) + + def testExtractPointwiseConv2dPatches(self): + with ops.Graph().as_default(), self.test_session() as sess: + batch_size = 10 + image_height = image_width = 8 + in_channels = out_channels = 3 + kernel_height = kernel_width = 1 + strides = [1, 1, 1, 1] + padding = 'VALID' + + images = random_ops.random_uniform( + [batch_size, image_height, image_width, in_channels], seed=0) + kernel_shape = [kernel_height, kernel_width, in_channels, out_channels] + kernel = random_ops.random_uniform(kernel_shape, seed=1) + + # Ensure shape matches expectation. + patches = utils.extract_pointwise_conv2d_patches(images, kernel_shape) + self.assertEqual(patches.shape.as_list(), [ + batch_size, image_height, image_width, kernel_height, kernel_width, + in_channels + ]) + + # Ensure extract...patches() + matmul() and conv2d() implementation + # give the same answer. + outputs = nn_ops.conv2d(images, kernel, strides, padding) + + patches_flat = array_ops.reshape( + patches, [-1, kernel_height * kernel_width * in_channels]) + kernel_flat = array_ops.reshape(kernel, [-1, out_channels]) + outputs_flat = math_ops.matmul(patches_flat, kernel_flat) + + outputs_, outputs_flat_ = sess.run([outputs, outputs_flat]) + self.assertAllClose(outputs_.flatten(), outputs_flat_.flatten()) + if __name__ == '__main__': test.main() diff --git a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py index 521a98866b..31f4689fbf 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_blocks.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_blocks.py @@ -40,10 +40,12 @@ from __future__ import print_function import abc import enum # pylint: disable=g-bad-import-order +import numpy as np import six from tensorflow.contrib.kfac.python.ops import fisher_factors from tensorflow.contrib.kfac.python.ops import utils +from tensorflow.python.framework import ops from tensorflow.python.ops import array_ops from tensorflow.python.ops import math_ops @@ -517,7 +519,7 @@ class FullyConnectedDiagonalFB(InputOutputMultiMinibatch, FisherBlock): class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): - """FisherBlock for convolutional layers using a diagonal approx. + """FisherBlock for 2-D convolutional layers using a diagonal approx. Estimates the Fisher Information matrix's diagonal entries for a convolutional layer. Unlike NaiveDiagonalFB this uses the low-variance "sum of squares" @@ -541,7 +543,13 @@ class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): to the layer's parameters 'w'. """ - def __init__(self, layer_collection, params, strides, padding): + def __init__(self, + layer_collection, + params, + strides, + padding, + data_format=None, + dilations=None): """Creates a ConvDiagonalFB block. Args: @@ -553,29 +561,53 @@ class ConvDiagonalFB(InputOutputMultiMinibatch, FisherBlock): containing the previous and a Tensor of shape [out_channels]. strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (e.g. "SAME"). + data_format: str or None. Format of input data. + dilations: List of 4 ints or None. Rate for dilation along all dimensions. + + Raises: + ValueError: if strides is not length-4. + ValueError: if dilations is not length-4. + ValueError: if channel is not last dimension. """ - self._strides = tuple(strides) if isinstance(strides, list) else strides + if len(strides) != 4: + raise ValueError("strides must contain 4 numbers.") + + if dilations is None: + dilations = [1, 1, 1, 1] + + if len(dilations) != 4: + raise ValueError("dilations must contain 4 numbers.") + + if not utils.is_data_format_channel_last(data_format): + raise ValueError("data_format must be channels-last.") + + self._strides = maybe_tuple(strides) self._padding = padding + self._data_format = data_format + self._dilations = maybe_tuple(dilations) self._has_bias = isinstance(params, (tuple, list)) fltr = params[0] if self._has_bias else params self._filter_shape = tuple(fltr.shape.as_list()) + if len(self._filter_shape) != 4: + raise ValueError( + "Convolution filter must be of shape" + " [filter_height, filter_width, in_channels, out_channels].") + super(ConvDiagonalFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): - # Infer number of locations upon which convolution is applied. - inputs_shape = tuple(self._inputs[0].shape.as_list()) - self._num_locations = ( - inputs_shape[1] * inputs_shape[2] // - (self._strides[1] * self._strides[2])) - inputs, grads_list = self._package_minibatches(grads_list) + # Infer number of locations upon which convolution is applied. + self._num_locations = num_conv_locations(inputs.shape.as_list(), + self._strides) + self._factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvDiagonalFactor, - (inputs, grads_list, self._filter_shape, self._strides, - self._padding, self._has_bias)) + (inputs, grads_list, self._filter_shape, self._strides, self._padding, + self._data_format, self._dilations, self._has_bias)) def damping_func(): return self._num_locations * normalize_damping(damping, @@ -658,8 +690,8 @@ class KroneckerProductFB(FisherBlock): reshaped_out = self._input_factor.left_multiply_matpower( reshaped_out, exp, self._input_damping_func) if self._renorm_coeff != 1.0: - reshaped_out *= math_ops.cast( - self._renorm_coeff**exp, dtype=reshaped_out.dtype) + renorm_coeff = math_ops.cast(self._renorm_coeff, dtype=reshaped_out.dtype) + reshaped_out *= math_ops.cast(renorm_coeff**exp, dtype=reshaped_out.dtype) return utils.mat2d_to_layer_params(vector, reshaped_out) def full_fisher_block(self): @@ -761,7 +793,7 @@ class FullyConnectedKFACBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): - """FisherBlock for 2D convolutional layers using the basic KFC approx. + """FisherBlock for convolutional layers using the basic KFC approx. Estimates the Fisher Information matrix's blog for a convolutional layer. @@ -784,21 +816,40 @@ class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): See equation 23 in https://arxiv.org/abs/1602.01407 for details. """ - def __init__(self, layer_collection, params, strides, padding): + def __init__(self, + layer_collection, + params, + padding, + strides=None, + dilation_rate=None, + data_format=None, + extract_patches_fn=None): """Creates a ConvKFCBasicFB block. Args: layer_collection: The collection of all layers in the K-FAC approximate Fisher information matrix to which this FisherBlock belongs. params: The parameters (Tensor or tuple of Tensors) of this layer. If - kernel alone, a Tensor of shape [kernel_height, kernel_width, + kernel alone, a Tensor of shape [..spatial_filter_shape.., in_channels, out_channels]. If kernel and bias, a tuple of 2 elements containing the previous and a Tensor of shape [out_channels]. - strides: The stride size in this layer (1-D Tensor of length 4). - padding: The padding in this layer (1-D of Tensor length 4). + padding: str. Padding method. + strides: List of ints or None. Contains [..spatial_filter_strides..] if + 'extract_patches_fn' is compatible with tf.nn.convolution(), else + [1, ..spatial_filter_strides, 1]. + dilation_rate: List of ints or None. Rate for dilation along each spatial + dimension if 'extract_patches_fn' is compatible with + tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. + data_format: str or None. Format of input data. + extract_patches_fn: str or None. Name of function that extracts image + patches. One of "extract_convolution_patches", "extract_image_patches", + "extract_pointwise_conv2d_patches". """ - self._strides = tuple(strides) if isinstance(strides, list) else strides self._padding = padding + self._strides = maybe_tuple(strides) + self._dilation_rate = maybe_tuple(dilation_rate) + self._data_format = data_format + self._extract_patches_fn = extract_patches_fn self._has_bias = isinstance(params, (tuple, list)) fltr = params[0] if self._has_bias else params @@ -807,15 +858,16 @@ class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): super(ConvKFCBasicFB, self).__init__(layer_collection) def instantiate_factors(self, grads_list, damping): + inputs, grads_list = self._package_minibatches(grads_list) + # Infer number of locations upon which convolution is applied. self._num_locations = num_conv_locations(self._inputs[0].shape.as_list(), self._strides) - inputs, grads_list = self._package_minibatches(grads_list) - self._input_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvInputKroneckerFactor, - (inputs, self._filter_shape, self._strides, self._padding, + (inputs, self._filter_shape, self._padding, self._strides, + self._dilation_rate, self._data_format, self._extract_patches_fn, self._has_bias)) self._output_factor = self._layer_collection.make_or_get_factor( fisher_factors.ConvOutputKroneckerFactor, (grads_list,)) @@ -827,17 +879,262 @@ class ConvKFCBasicFB(InputOutputMultiMinibatch, KroneckerProductFB): return self._num_locations +class DepthwiseConvDiagonalFB(ConvDiagonalFB): + """FisherBlock for depthwise_conv2d(). + + Equivalent to ConvDiagonalFB applied to each input channel in isolation. + """ + + def __init__(self, + layer_collection, + params, + strides, + padding, + rate=None, + data_format=None): + """Creates a DepthwiseConvKFCBasicFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + params: Tensor of shape [filter_height, filter_width, in_channels, + channel_multiplier]. + strides: List of 4 ints. Strides along all dimensions. + padding: str. Padding method. + rate: List of 4 ints or None. Rate for dilation along all dimensions. + data_format: str or None. Format of input data. + + Raises: + NotImplementedError: If parameters contains bias. + ValueError: If filter is not 4-D. + ValueError: If strides is not length-4. + ValueError: If rates is not length-2. + ValueError: If channels are not last dimension. + """ + if isinstance(params, (tuple, list)): + raise NotImplementedError("Bias not yet supported.") + + if params.shape.ndims != 4: + raise ValueError("Filter must be 4-D.") + + if len(strides) != 4: + raise ValueError("strides must account for 4 dimensions.") + + if rate is not None: + if len(rate) != 2: + raise ValueError("rate must only account for spatial dimensions.") + rate = [1, rate[0], rate[1], 1] # conv2d expects 4-element rate. + + if not utils.is_data_format_channel_last(data_format): + raise ValueError("data_format must be channels-last.") + + super(DepthwiseConvDiagonalFB, self).__init__( + layer_collection=layer_collection, + params=params, + strides=strides, + padding=padding, + dilations=rate, + data_format=data_format) + + # This is a hack to overwrite the same setting in ConvKFCBasicFB.__init__(). + filter_height, filter_width, in_channels, channel_multiplier = ( + params.shape.as_list()) + self._filter_shape = (filter_height, filter_width, in_channels, + in_channels * channel_multiplier) + + def multiply_matpower(self, vector, exp): + conv2d_vector = depthwise_conv2d_filter_to_conv2d_filter(vector) + conv2d_result = super(DepthwiseConvDiagonalFB, self).multiply_matpower( + conv2d_vector, exp) + return conv2d_filter_to_depthwise_conv2d_filter(conv2d_result) + + +class DepthwiseConvKFCBasicFB(ConvKFCBasicFB): + """FisherBlock for depthwise_conv2d(). + + Equivalent to ConvKFCBasicFB applied to each input channel in isolation. + """ + + def __init__(self, + layer_collection, + params, + strides, + padding, + rate=None, + data_format=None): + """Creates a DepthwiseConvKFCBasicFB block. + + Args: + layer_collection: The collection of all layers in the K-FAC approximate + Fisher information matrix to which this FisherBlock belongs. + params: Tensor of shape [filter_height, filter_width, in_channels, + channel_multiplier]. + strides: List of 4 ints. Strides along all dimensions. + padding: str. Padding method. + rate: List of 4 ints or None. Rate for dilation along all dimensions. + data_format: str or None. Format of input data. + + Raises: + NotImplementedError: If parameters contains bias. + ValueError: If filter is not 4-D. + ValueError: If strides is not length-4. + ValueError: If rates is not length-2. + ValueError: If channels are not last dimension. + """ + if isinstance(params, (tuple, list)): + raise NotImplementedError("Bias not yet supported.") + + if params.shape.ndims != 4: + raise ValueError("Filter must be 4-D.") + + if len(strides) != 4: + raise ValueError("strides must account for 4 dimensions.") + + if rate is not None: + if len(rate) != 2: + raise ValueError("rate must only account for spatial dimensions.") + rate = [1, rate[0], rate[1], 1] # conv2d expects 4-element rate. + + if not utils.is_data_format_channel_last(data_format): + raise ValueError("data_format must be channels-last.") + + super(DepthwiseConvKFCBasicFB, self).__init__( + layer_collection=layer_collection, + params=params, + padding=padding, + strides=strides, + dilation_rate=rate, + data_format=data_format, + extract_patches_fn="extract_image_patches") + + # This is a hack to overwrite the same setting in ConvKFCBasicFB.__init__(). + filter_height, filter_width, in_channels, channel_multiplier = ( + params.shape.as_list()) + self._filter_shape = (filter_height, filter_width, in_channels, + in_channels * channel_multiplier) + + def multiply_matpower(self, vector, exp): + conv2d_vector = depthwise_conv2d_filter_to_conv2d_filter(vector) + conv2d_result = super(DepthwiseConvKFCBasicFB, self).multiply_matpower( + conv2d_vector, exp) + return conv2d_filter_to_depthwise_conv2d_filter(conv2d_result) + + +def depthwise_conv2d_filter_to_conv2d_filter(filter, name=None): # pylint: disable=redefined-builtin + """Converts a convolution filter for use with conv2d. + + Transforms a filter for use with tf.nn.depthwise_conv2d() to one that's + compatible with tf.nn.conv2d(). + + Args: + filter: Tensor of shape [height, width, in_channels, channel_multiplier]. + name: None or str. Name of Op. + + Returns: + Tensor of shape [height, width, in_channels, out_channels]. + + """ + with ops.name_scope(name, "depthwise_conv2d_filter_to_conv2d_filter", + [filter]): + filter = ops.convert_to_tensor(filter) + filter_height, filter_width, in_channels, channel_multiplier = ( + filter.shape.as_list()) + + results = [] + for i in range(in_channels): + # Slice out one in_channel's filter. Insert zeros around it to force it + # to affect that channel and that channel alone. + elements = [] + if i > 0: + elements.append( + array_ops.zeros( + [filter_height, filter_width, i, channel_multiplier])) + elements.append(filter[:, :, i:(i + 1), :]) + if i + 1 < in_channels: + elements.append( + array_ops.zeros([ + filter_height, filter_width, in_channels - (i + 1), + channel_multiplier + ])) + + # Concat along in_channel. + results.append( + array_ops.concat(elements, axis=-2, name="in_channel_%d" % i)) + + # Concat along out_channel. + return array_ops.concat(results, axis=-1, name="out_channel") + + +def conv2d_filter_to_depthwise_conv2d_filter(filter, name=None): # pylint: disable=redefined-builtin + """Converts a convolution filter for use with depthwise_conv2d. + + Transforms a filter for use with tf.nn.conv2d() to one that's + compatible with tf.nn.depthwise_conv2d(). Ignores all filters but those along + the diagonal. + + Args: + filter: Tensor of shape [height, width, in_channels, out_channels]. + name: None or str. Name of Op. + + Returns: + Tensor of shape, + [height, width, in_channels, channel_multiplier] + + Raises: + ValueError: if out_channels is not evenly divisible by in_channels. + """ + with ops.name_scope(name, "conv2d_filter_to_depthwise_conv2d_filter", + [filter]): + filter = ops.convert_to_tensor(filter) + filter_height, filter_width, in_channels, out_channels = ( + filter.shape.as_list()) + + if out_channels % in_channels != 0: + raise ValueError("out_channels must be evenly divisible by in_channels.") + channel_multiplier = out_channels // in_channels + + results = [] + filter = array_ops.reshape(filter, [ + filter_height, filter_width, in_channels, in_channels, + channel_multiplier + ]) + for i in range(in_channels): + # Slice out output corresponding to the correct filter. + filter_slice = array_ops.reshape( + filter[:, :, i, i, :], + [filter_height, filter_width, 1, channel_multiplier]) + results.append(filter_slice) + + # Concat along out_channel. + return array_ops.concat(results, axis=-2, name="in_channels") + + +def maybe_tuple(obj): + if not isinstance(obj, list): + return obj + return tuple(obj) + + def num_conv_locations(input_shape, strides): """Returns the number of spatial locations a 2D Conv kernel is applied to. Args: - input_shape: list representing shape of inputs to the Conv layer. - strides: list representing strides for the Conv kernel. + input_shape: List of ints representing shape of inputs to + tf.nn.convolution(). + strides: List of ints representing strides along spatial dimensions as + passed in to tf.nn.convolution(). Returns: A scalar |T| denoting the number of spatial locations for the Conv layer. """ - return input_shape[1] * input_shape[2] // (strides[1] * strides[2]) + spatial_input_locations = np.prod(input_shape[1:-1]) + + if strides is None: + spatial_strides_divisor = 1 + else: + spatial_strides_divisor = np.prod(strides) + + return spatial_input_locations // spatial_strides_divisor class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): @@ -858,7 +1155,7 @@ class FullyConnectedMultiIndepFB(InputOutputMultiMinibatch, KroneckerProductFB): def instantiate_factors(self, grads_list, damping): - self._num_uses = len(self._inputs[0]) + self._num_uses = float(len(self._inputs[0])) inputs, grads_list = self._package_minibatches_multi(grads_list) self._input_factor = self._layer_collection.make_or_get_factor( diff --git a/tensorflow/contrib/kfac/python/ops/fisher_factors.py b/tensorflow/contrib/kfac/python/ops/fisher_factors.py index 8ac63bc764..6fc163e232 100644 --- a/tensorflow/contrib/kfac/python/ops/fisher_factors.py +++ b/tensorflow/contrib/kfac/python/ops/fisher_factors.py @@ -159,7 +159,9 @@ def scope_string_from_params(params): name_parts = [] for param in params: - if isinstance(param, (tuple, list)): + if param is None: + name_parts.append("None") + elif isinstance(param, (tuple, list)): if all([isinstance(p, int) for p in param]): name_parts.append("-".join([str(p) for p in param])) else: @@ -867,6 +869,8 @@ class ConvDiagonalFactor(DiagonalFactor): filter_shape, strides, padding, + data_format=None, + dilations=None, has_bias=False): """Creates a ConvDiagonalFactor object. @@ -880,15 +884,42 @@ class ConvDiagonalFactor(DiagonalFactor): out_channels). Represents shape of kernel used in this layer. strides: The stride size in this layer (1-D Tensor of length 4). padding: The padding in this layer (1-D of Tensor length 4). + data_format: None or str. Format of conv2d inputs. + dilations: None or tuple of 4 ints. has_bias: Python bool. If True, the layer is assumed to have a bias parameter in addition to its filter parameter. + + Raises: + ValueError: If inputs, output_grads, and filter_shape do not agree on + in_channels or out_channels. + ValueError: If strides, dilations are not length-4 lists of ints. + ValueError: If data_format does not put channel last. """ + if not utils.is_data_format_channel_last(data_format): + raise ValueError("Channel must be last.") + if inputs.shape.ndims != 4: + raise ValueError("inputs must be 4-D Tensor.") + if inputs.shape.as_list()[-1] != filter_shape[-2]: + raise ValueError("inputs and filter_shape must agree on in_channels.") + for i, outputs_grad in enumerate(outputs_grads): + if outputs_grad.shape.ndims != 4: + raise ValueError("outputs[%d] must be 4-D Tensor." % i) + if outputs_grad.shape.as_list()[-1] != filter_shape[-1]: + raise ValueError( + "outputs[%d] and filter_shape must agree on out_channels." % i) + if len(strides) != 4: + raise ValueError("strides must be length-4 list of ints.") + if dilations is not None and len(dilations) != 4: + raise ValueError("dilations must be length-4 list of ints.") + self._inputs = inputs + self._outputs_grads = outputs_grads self._filter_shape = filter_shape self._strides = strides self._padding = padding + self._data_format = data_format + self._dilations = dilations self._has_bias = has_bias - self._outputs_grads = outputs_grads self._patches = None super(ConvDiagonalFactor, self).__init__() @@ -919,11 +950,15 @@ class ConvDiagonalFactor(DiagonalFactor): # TODO(b/64144716): there is potential here for a big savings in terms # of memory use. + if self._dilations is None: + rates = (1, 1, 1, 1) + else: + rates = tuple(self._dilations) patches = array_ops.extract_image_patches( self._inputs, ksizes=[1, filter_height, filter_width, 1], strides=self._strides, - rates=[1, 1, 1, 1], + rates=rates, padding=self._padding) if self._has_bias: @@ -1010,39 +1045,55 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): def __init__(self, inputs, filter_shape, - strides, padding, + strides=None, + dilation_rate=None, + data_format=None, + extract_patches_fn=None, has_bias=False): """Initializes ConvInputKroneckerFactor. Args: - inputs: A Tensor of shape [batch_size, height, width, in_channels] - which is the inputs to the layer (before being processed into patches). - filter_shape: 1-D Tensor of length 4. Contains [kernel_height, - kernel_width, in_channels, out_channels]. - strides: 1-D Tensor of length 4. Contains [batch_stride, height_stride, - width_stride, in_channel_stride]. + inputs: Tensor of shape [batch_size, ..spatial_input_size.., in_channels]. + Inputs to layer. + filter_shape: List of ints. Contains [..spatial_filter_size.., + in_channels, out_channels]. Shape of convolution kernel. padding: str. Padding method for layer. "SAME" or "VALID". + strides: List of ints or None. Contains [..spatial_filter_strides..] if + 'extract_patches_fn' is compatible with tf.nn.convolution(), else + [1, ..spatial_filter_strides, 1]. + dilation_rate: List of ints or None. Rate for dilation along each spatial + dimension if 'extract_patches_fn' is compatible with + tf.nn.convolution(), else [1, ..spatial_dilation_rates.., 1]. + data_format: str or None. Format of input data. + extract_patches_fn: str or None. Name of function that extracts image + patches. One of "extract_convolution_patches", "extract_image_patches", + "extract_pointwise_conv2d_patches". has_bias: bool. If True, append 1 to in_channel. """ + self._inputs = inputs self._filter_shape = filter_shape self._strides = strides self._padding = padding + self._dilation_rate = dilation_rate + self._data_format = data_format + self._extract_patches_fn = extract_patches_fn self._has_bias = has_bias - self._inputs = inputs + super(ConvInputKroneckerFactor, self).__init__() @property def _var_scope(self): return "ff_convinkron_" + scope_string_from_params([ self._inputs, self._filter_shape, self._strides, self._padding, - self._has_bias + self._dilation_rate, self._data_format, self._has_bias ]) @property def _cov_shape(self): - filter_height, filter_width, in_channels, _ = self._filter_shape - size = filter_height * filter_width * in_channels + self._has_bias + spatial_filter_shape = self._filter_shape[0:-2] + in_channels = self._filter_shape[-2] + size = np.prod(spatial_filter_shape) * in_channels + self._has_bias return [size, size] @property @@ -1057,18 +1108,44 @@ class ConvInputKroneckerFactor(InverseProvidingFactor): if idx != 0: raise ValueError("ConvInputKroneckerFactor only supports idx = 0") - filter_height, filter_width, in_channels, _ = self._filter_shape - # TODO(b/64144716): there is potential here for a big savings in terms of # memory use. - patches = array_ops.extract_image_patches( - self._inputs, - ksizes=[1, filter_height, filter_width, 1], - strides=self._strides, - rates=[1, 1, 1, 1], - padding=self._padding) + if self._extract_patches_fn in [None, "extract_convolution_patches"]: + patches = utils.extract_convolution_patches( + self._inputs, + self._filter_shape, + padding=self._padding, + strides=self._strides, + dilation_rate=self._dilation_rate, + data_format=self._data_format) + + elif self._extract_patches_fn == "extract_image_patches": + assert self._inputs.shape.ndims == 4 + assert len(self._filter_shape) == 4 + assert len(self._strides) == 4, self._strides + if self._dilation_rate is None: + rates = [1, 1, 1, 1] + else: + rates = self._dilation_rate + assert len(rates) == 4 + assert rates[0] == rates[-1] == 1 + patches = array_ops.extract_image_patches( + self._inputs, + ksizes=[1] + list(self._filter_shape[0:-2]) + [1], + strides=self._strides, + rates=rates, + padding=self._padding) + + elif self._extract_patches_fn == "extract_pointwise_conv2d_patches": + assert self._strides in [None, [1, 1, 1, 1], (1, 1, 1, 1)] + assert self._filter_shape[0] == self._filter_shape[1] == 1 + patches = utils.extract_pointwise_conv2d_patches( + self._inputs, self._filter_shape, data_format=None) - flatten_size = (filter_height * filter_width * in_channels) + else: + raise NotImplementedError(self._extract_patches_fn) + + flatten_size = np.prod(self._filter_shape[0:-1]) # patches_flat below is the matrix [[A_l]] from the KFC paper (tilde # omitted over A for clarity). It has shape M|T| x J|Delta| (eq. 14), # where M = minibatch size, |T| = number of spatial locations, @@ -1100,14 +1177,21 @@ class ConvOutputKroneckerFactor(InverseProvidingFactor): Section 3.1 Estimating the factors. """ - def __init__(self, outputs_grads): + def __init__(self, outputs_grads, data_format=None): """Initializes ConvOutputKroneckerFactor. Args: - outputs_grads: List of Tensors, each of shape [batch_size, - height, width, out_channels]. One Tensor for each "source". + outputs_grads: list of Tensors. Each Tensor is of shape + [batch_size, ..spatial_input_size.., out_channels]. One Tensor per + source. + data_format: None or str. Format of outputs_grads. + + Raises: + ValueError: If channels are not final dimension. """ - self._out_channels = outputs_grads[0].shape.as_list()[3] + if not utils.is_data_format_channel_last(data_format): + raise ValueError("Channel must be last.") + self._out_channels = outputs_grads[0].shape.as_list()[-1] self._outputs_grads = outputs_grads super(ConvOutputKroneckerFactor, self).__init__() @@ -1433,4 +1517,3 @@ class FullyConnectedMultiKF(InverseProvidingFactor): return [control_flow_ops.group(*ops)] # pylint: enable=invalid-name - diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection.py b/tensorflow/contrib/kfac/python/ops/layer_collection.py index 60894ed951..4eb5e4c092 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection.py @@ -26,6 +26,7 @@ from __future__ import print_function from collections import defaultdict from collections import OrderedDict +from contextlib import contextmanager from functools import partial import math @@ -75,6 +76,27 @@ _FULLY_CONNECTED_MULTI_APPROX_TO_BLOCK_TYPES = { # tf.get_variable_scope().reuse. VARIABLE_SCOPE = "VARIABLE_SCOPE" +_DEFAULT_LAYER_COLLECTION = None + + +def get_default_layer_collection(): + """Get default LayerCollection.""" + if _DEFAULT_LAYER_COLLECTION is None: + raise ValueError( + "Attempted to retrieve default LayerCollection when none is set. Use " + "LayerCollection.as_default().") + + return _DEFAULT_LAYER_COLLECTION + + +def set_default_layer_collection(layer_collection): + global _DEFAULT_LAYER_COLLECTION + + if _DEFAULT_LAYER_COLLECTION is not None and layer_collection is not None: + raise ValueError("Default LayerCollection is already set.") + + _DEFAULT_LAYER_COLLECTION = layer_collection + class LayerParametersDict(OrderedDict): """An OrderedDict where keys are Tensors or tuples of Tensors. @@ -594,21 +616,25 @@ class LayerCollection(object): padding, inputs, outputs, + data_format=None, + dilations=None, approx=None, reuse=VARIABLE_SCOPE): - """Registers a convolutional layer. + """Registers a call to tf.nn.conv2d(). Args: params: Tensor or 2-tuple of Tensors corresponding to weight and bias of this layer. Weight matrix should have shape [kernel_height, kernel_width, in_channels, out_channels]. Bias should have shape [out_channels]. - strides: 1-D Tensor of length 4. Strides for convolution kernel. + strides: List of 4 ints. Strides for convolution kernel. padding: string. see tf.nn.conv2d for valid values. inputs: Tensor of shape [batch_size, height, width, in_channels]. Inputs to layer. outputs: Tensor of shape [batch_size, height, width, out_channels]. Output produced by layer. + data_format: str or None. Format of data. + dilations: List of 4 ints. Dilations along each dimension. approx: str. One of "kron" or "diagonal". reuse: bool or str. If True, reuse an existing FisherBlock. If False, create a new FisherBlock. If "VARIABLE_SCOPE", use @@ -629,12 +655,206 @@ class LayerCollection(object): raise ValueError("Bad value {} for approx.".format(approx)) block_type = _CONV2D_APPROX_TO_BLOCK_TYPES[approx] + if approx == APPROX_KRONECKER_NAME: + block = self.register_block( + params, + block_type( + layer_collection=self, + params=params, + padding=padding, + strides=strides, + data_format=data_format, + dilation_rate=dilations, + extract_patches_fn="extract_image_patches"), + reuse=reuse) + elif approx == APPROX_DIAGONAL_NAME: + assert strides[0] == strides[-1] == 1 + block = self.register_block( + params, + block_type( + layer_collection=self, + params=params, + padding=padding, + strides=strides, + dilations=dilations, + data_format=data_format), + reuse=reuse) + else: + raise NotImplementedError + + block.register_additional_minibatch(inputs, outputs) + + self._add_uses(params, 1) + + def register_convolution(self, + params, + inputs, + outputs, + padding, + strides=None, + dilation_rate=None, + data_format=None, + approx=None, + reuse=VARIABLE_SCOPE): + """Register a call to tf.nn.convolution(). + + Args: + params: Tensor or 2-tuple of Tensors corresponding to weight and bias of + this layer. Weight matrix should have shape [..filter_spatial_size.., + in_channels, out_channels]. Bias should have shape [out_channels]. + inputs: Tensor of shape [batch_size, ..input_spatial_size.., in_channels]. + Inputs to layer. + outputs: Tensor of shape [batch_size, ..output_spatial_size.., + out_channels]. Output produced by layer. + padding: string. see tf.nn.conv2d for valid values. + strides: List of ints of length len(..input_spatial_size..). Strides for + convolution kernel in spatial dimensions. + dilation_rate: List of ints of length len(..input_spatial_size..). + Dilations along spatial dimension. + data_format: str or None. Format of data. + approx: str. One of "kron" or "diagonal". + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + assert approx is None or approx == APPROX_KRONECKER_NAME + block = self.register_block( - params, block_type(self, params, strides, padding), reuse=reuse) + params, + fb.ConvKFCBasicFB( + layer_collection=self, + params=params, + padding=padding, + strides=strides, + dilation_rate=dilation_rate, + data_format=data_format), + reuse=reuse) block.register_additional_minibatch(inputs, outputs) self._add_uses(params, 1) + def register_depthwise_conv2d(self, + params, + inputs, + outputs, + strides, + padding, + rate=None, + data_format=None, + approx=None, + reuse=VARIABLE_SCOPE): + """Register a call to tf.nn.depthwise_conv2d(). + + Args: + params: 4-D Tensor of shape [filter_height, filter_width, + in_channels, channel_multiplier]. Convolutional filter. + inputs: Tensor of shape [batch_size, input_height, input_width, + in_channels]. Inputs to layer. + outputs: Tensor of shape [batch_size, output_height, output_width, + in_channels * channel_multiplier]. Output produced by depthwise conv2d. + strides: List of ints of length 4. Strides along all dimensions. + padding: string. see tf.nn.conv2d for valid values. + rate: None or List of ints of length 2. Dilation rates in spatial + dimensions. + data_format: str or None. Format of data. + approx: None or str. Must be "diagonal" if non-None. + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + assert approx is None or approx == APPROX_DIAGONAL_NAME + assert data_format in [None, "NHWC"] + + block = self.register_block( + params, + fb.DepthwiseConvDiagonalFB( + layer_collection=self, + params=params, + strides=strides, + padding=padding, + rate=rate, + data_format=data_format), + reuse=reuse) + block.register_additional_minibatch(inputs, outputs) + + self._add_uses(params, 1) + + def register_separable_conv2d(self, + depthwise_params, + pointwise_params, + inputs, + depthwise_outputs, + pointwise_outputs, + strides, + padding, + rate=None, + data_format=None, + approx=None, + reuse=VARIABLE_SCOPE): + """Register a call to tf.nn.separable_conv2d(). + + Note: This requires access to intermediate outputs betwee depthwise and + pointwise convolutions. + + Args: + depthwise_params: 4-D Tensor of shape [filter_height, filter_width, + in_channels, channel_multiplier]. Filter for depthwise conv2d. + pointwise_params: 4-D Tensor of shape [1, 1, in_channels * + channel_multiplier, out_channels]. Filter for pointwise conv2d. + inputs: Tensor of shape [batch_size, input_height, input_width, + in_channels]. Inputs to layer. + depthwise_outputs: Tensor of shape [batch_size, output_height, + output_width, in_channels * channel_multiplier]. Output produced by + depthwise conv2d. + pointwise_outputs: Tensor of shape [batch_size, output_height, + output_width, out_channels]. Output produced by pointwise conv2d. + strides: List of ints of length 4. Strides for depthwise conv2d kernel in + all dimensions. + padding: string. see tf.nn.conv2d for valid values. + rate: None or List of ints of length 2. Dilation rate of depthwise conv2d + kernel in spatial dimensions. + data_format: str or None. Format of data. + approx: None or str. Must be "kron" if non-None. + reuse: bool or str. If True, reuse an existing FisherBlock. If False, + create a new FisherBlock. If "VARIABLE_SCOPE", use + tf.get_variable_scope().reuse. + + Raises: + ValueError: For improper value to 'approx'. + KeyError: If reuse == True but no FisherBlock found for 'params'. + ValueError: If reuse == True and FisherBlock found but of the wrong type. + """ + self.register_depthwise_conv2d( + params=depthwise_params, + inputs=inputs, + outputs=depthwise_outputs, + strides=strides, + padding=padding, + rate=rate, + data_format=data_format, + approx=APPROX_DIAGONAL_NAME, + reuse=reuse) + + self.register_conv2d( + params=pointwise_params, + inputs=depthwise_outputs, + outputs=pointwise_outputs, + strides=[1, 1, 1, 1], + padding="VALID", + data_format=data_format, + approx=approx, + reuse=reuse) + def register_generic(self, params, batch_size, @@ -833,3 +1053,10 @@ class LayerCollection(object): with variable_scope.variable_scope(self._var_scope): self.fisher_factors[key] = cls(*args) return self.fisher_factors[key] + + @contextmanager + def as_default(self): + """Sets this LayerCollection as the default.""" + set_default_layer_collection(self) + yield + set_default_layer_collection(None) diff --git a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py b/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py index f8aa230d9c..9f46853807 100644 --- a/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py +++ b/tensorflow/contrib/kfac/python/ops/layer_collection_lib.py @@ -30,6 +30,8 @@ from tensorflow.python.util.all_util import remove_undocumented # pylint: enable=unused-import,line-too-long,wildcard-import _allowed_symbols = [ + "get_default_layer_collection", + "set_default_layer_collection", "LayerParametersDict", "LayerCollection", "APPROX_KRONECKER_NAME", diff --git a/tensorflow/contrib/kfac/python/ops/utils.py b/tensorflow/contrib/kfac/python/ops/utils.py index 5ce5338a9f..af26f5e56b 100644 --- a/tensorflow/contrib/kfac/python/ops/utils.py +++ b/tensorflow/contrib/kfac/python/ops/utils.py @@ -30,6 +30,7 @@ from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import gradients_impl from tensorflow.python.ops import linalg_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import nn_ops from tensorflow.python.ops import random_ops from tensorflow.python.ops import resource_variable_ops from tensorflow.python.ops import variables @@ -431,6 +432,127 @@ def batch_execute(global_step, thunks, batch_size, name=None): return result +def extract_convolution_patches(inputs, + filter_shape, + padding, + strides=None, + dilation_rate=None, + name=None, + data_format=None): + """Extracts inputs to each output coordinate in tf.nn.convolution. + + This is a generalization of tf.extract_image_patches() to tf.nn.convolution(), + where the number of spatial dimensions may be something other than 2. + + Assumes, + - First dimension of inputs is batch_size + - Convolution filter is applied to all input channels. + + Args: + inputs: Tensor of shape [batch_size, ..spatial_image_shape.., + ..spatial_filter_shape.., in_channels]. Inputs to tf.nn.convolution(). + filter_shape: List of ints. Shape of filter passed to tf.nn.convolution(). + padding: string. Padding method. One of "VALID", "SAME". + strides: None or list of ints. Strides along spatial dimensions. + dilation_rate: None or list of ints. Dilation along spatial dimensions. + name: None or str. Name of Op. + data_format: None or str. Format of data. + + Returns: + Tensor of shape [batch_size, ..spatial_image_shape.., + ..spatial_filter_shape.., in_channels] + + Raises: + ValueError: If data_format does not put channel last. + ValueError: If inputs and filter disagree on in_channels. + """ + if not is_data_format_channel_last(data_format): + raise ValueError("Channel must be last dimension.") + with ops.name_scope(name, "extract_convolution_patches", + [inputs, filter_shape, padding, strides, dilation_rate]): + batch_size = inputs.shape.as_list()[0] + in_channels = inputs.shape.as_list()[-1] + + # filter_shape = spatial_filter_shape + [in_channels, out_channels] + spatial_filter_shape = filter_shape[:-2] + if in_channels != filter_shape[-2]: + raise ValueError("inputs and filter_shape must agree on in_channels.") + + # Map each input feature to a location in the output. + out_channels = np.prod(spatial_filter_shape) * in_channels + filters = linalg_ops.eye(out_channels) + filters = array_ops.reshape( + filters, + list(spatial_filter_shape) + [in_channels, out_channels]) + + result = nn_ops.convolution( + inputs, + filters, + padding=padding, + strides=strides, + dilation_rate=dilation_rate) + spatial_output_shape = result.shape.as_list()[1:-1] + result = array_ops.reshape(result, + [batch_size or -1] + spatial_output_shape + + list(spatial_filter_shape) + [in_channels]) + + return result + + +def extract_pointwise_conv2d_patches(inputs, + filter_shape, + name=None, + data_format=None): + """Extract patches for a 1x1 conv2d. + + Args: + inputs: 4-D Tensor of shape [batch_size, height, width, in_channels]. + filter_shape: List of 4 ints. Shape of filter to apply with conv2d() + name: None or str. Name for Op. + data_format: None or str. Format for data. See 'data_format' in + tf.nn.conv2d() for details. + + Returns: + Tensor of shape [batch_size, ..spatial_input_shape.., + ..spatial_filter_shape.., in_channels] + + Raises: + ValueError: if inputs is not 4-D. + ValueError: if filter_shape is not [1, 1, ?, ?] + ValueError: if data_format is not channels-last. + """ + if inputs.shape.ndims != 4: + raise ValueError("inputs must have 4 dims.") + if len(filter_shape) != 4: + raise ValueError("filter_shape must have 4 dims.") + if filter_shape[0] != 1 or filter_shape[1] != 1: + raise ValueError("filter_shape must have shape 1 along spatial dimensions.") + if not is_data_format_channel_last(data_format): + raise ValueError("data_format must be channels last.") + with ops.name_scope(name, "extract_pointwise_conv2d_patches", + [inputs, filter_shape]): + ksizes = [1, 1, 1, 1] # Spatial shape is 1x1. + strides = [1, 1, 1, 1] # Operate on all pixels. + rates = [1, 1, 1, 1] # Dilation has no meaning with spatial shape = 1. + padding = "VALID" # Doesn't matter. + result = array_ops.extract_image_patches(inputs, ksizes, strides, rates, + padding) + + batch_size, input_height, input_width, in_channels = inputs.shape.as_list() + filter_height, filter_width, in_channels, _ = filter_shape + return array_ops.reshape(result, [ + batch_size, input_height, input_width, filter_height, filter_width, + in_channels + ]) + + +def is_data_format_channel_last(data_format): + """True if data_format puts channel last.""" + if data_format is None: + return True + return data_format.endswith("C") + + def matmul_sparse_dense(A, B, name=None): # pylint: disable=invalid-name """Computes matmul(A, B) where A is sparse, B is dense. diff --git a/tensorflow/contrib/kfac/python/ops/utils_lib.py b/tensorflow/contrib/kfac/python/ops/utils_lib.py index 8e424a7946..330d222dbf 100644 --- a/tensorflow/contrib/kfac/python/ops/utils_lib.py +++ b/tensorflow/contrib/kfac/python/ops/utils_lib.py @@ -40,6 +40,9 @@ _allowed_symbols = [ "fwd_gradients", "ensure_sequence", "batch_execute", + "extract_convolution_patches", + "extract_pointwise_conv2d_patches", + "is_data_format_channel_last", "matmul_sparse_dense", "matmul_diag_sparse", ] -- GitLab From 6921d4fdbb7f10a0f9a6211eb0b1b535a417d081 Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Mon, 12 Mar 2018 14:52:24 -0700 Subject: [PATCH 755/884] Fix typo in the doc: tf.multiplytiply https://www.tensorflow.org/api_docs/python/tf/multiply PiperOrigin-RevId: 188782466 --- tensorflow/python/ops/math_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/python/ops/math_ops.py b/tensorflow/python/ops/math_ops.py index 5130c50717..0063de52c7 100644 --- a/tensorflow/python/ops/math_ops.py +++ b/tensorflow/python/ops/math_ops.py @@ -328,7 +328,7 @@ def multiply(x, y, name=None): return gen_math_ops.mul(x, y, name) -multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Mul", "`tf.multiply`") +multiply.__doc__ = gen_math_ops.mul.__doc__.replace("Multiply", "`tf.multiply`") # TODO(aselle): put deprecation in after another round of global code changes -- GitLab From c9956b9b6da05b5acea3f5d528ec5fed29f45092 Mon Sep 17 00:00:00 2001 From: Yu-Cheng Ling Date: Mon, 12 Mar 2018 15:05:11 -0700 Subject: [PATCH 756/884] TFLite Delegate: Expose input / output tensor indicies in `Init` PiperOrigin-RevId: 188784614 --- tensorflow/contrib/lite/context.h | 2 + tensorflow/contrib/lite/interpreter.cc | 86 ++++++++++++++++----- tensorflow/contrib/lite/interpreter_test.cc | 18 ++++- 3 files changed, 84 insertions(+), 22 deletions(-) diff --git a/tensorflow/contrib/lite/context.h b/tensorflow/contrib/lite/context.h index 6491d8c86a..45184b05ec 100644 --- a/tensorflow/contrib/lite/context.h +++ b/tensorflow/contrib/lite/context.h @@ -415,6 +415,8 @@ typedef struct _TfLiteDelegate { typedef struct { TfLiteDelegate* delegate; TfLiteIntArray* nodes_to_replace; + TfLiteIntArray* input_tensors; + TfLiteIntArray* output_tensors; } TfLiteDelegateParams; #ifdef __cplusplus diff --git a/tensorflow/contrib/lite/interpreter.cc b/tensorflow/contrib/lite/interpreter.cc index f03c1c9fe9..cee57bba5e 100644 --- a/tensorflow/contrib/lite/interpreter.cc +++ b/tensorflow/contrib/lite/interpreter.cc @@ -139,31 +139,76 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( namespace { +// Copy a std::vector to an existing TfLiteIntArray. +// This is a low-level data manipulation function, and it's caller's +// responsibility to ensure TfLiteIntArray has enough size. +void CopyVectorToTfLiteIntArray(const std::vector& vec, + TfLiteIntArray* arr) { + arr->size = vec.size(); + memcpy(arr->data, vec.data(), sizeof(int) * arr->size); +} + // This function allocates a continuous memory space that contains a -// TfLiteDelegateParams followed by a TfLiteIntArray. The pointer will be -// deallocated by C `free` function later. -TfLiteDelegateParams* CreateDelegateParams( - TfLiteDelegate* delegate, const std::vector& nodes_to_replace) { - int nodes_to_replace_size_in_bytes = - TfLiteIntArrayGetSizeInBytes(nodes_to_replace.size()); - void* allocation = - malloc(sizeof(TfLiteDelegateParams) + nodes_to_replace_size_in_bytes); +// TfLiteDelegateParams followed by a several TfLiteIntArray. +// When calling `free` at TfLiteDelegateParams*, all the allocated space +// will be freed together. +// +// +-----------------------------------+ +// | TfLiteDelegateParams | +// | TfLiteDelegate* delegate; | +// | TfLiteIntArray* nodes_to_replace; |--\ +// | TfLiteIntArray* input_tensors; |--+--\ +// | TfLiteIntArray* output_tensors; |--+--+--\ +// +-----------------------------------+ | | | +// | TfLiteIntArray (variable size) |<-/ | | +// +-----------------------------------+ | | +// | TfLiteIntArray (variable size) |<----/ | +// +-----------------------------------+ | +// | TfLiteIntArray (variable size) |<-------/ +// +-----------------------------------+ +TfLiteDelegateParams* CreateDelegateParams(TfLiteDelegate* delegate, + const Subgraph& subgraph) { + // Step 1: Calculate the allocation size. + int allocation_size = sizeof(TfLiteDelegateParams); + + int nodes_to_replace_size = + TfLiteIntArrayGetSizeInBytes(subgraph.nodes.size()); + allocation_size += nodes_to_replace_size; + + int input_tensors_size = + TfLiteIntArrayGetSizeInBytes(subgraph.input_tensors.size()); + allocation_size += input_tensors_size; + + int output_tensors_size = + TfLiteIntArrayGetSizeInBytes(subgraph.output_tensors.size()); + allocation_size += output_tensors_size; + + // Step 2: Allocate the memory. + // Use `char*` for conveniently step through the allocated space by bytes. + char* allocation = reinterpret_cast(malloc(allocation_size)); + + // Step 3: Fill all data structures structures. TfLiteDelegateParams* params = reinterpret_cast(allocation); - TfLiteIntArray* nodes_to_replace_arr = reinterpret_cast( - static_cast(allocation) + sizeof(TfLiteDelegateParams)); + params->delegate = delegate; + allocation += sizeof(TfLiteDelegateParams); - nodes_to_replace_arr->size = nodes_to_replace.size(); - for (int i = 0; i < nodes_to_replace.size(); ++i) { - nodes_to_replace_arr->data[i] = nodes_to_replace[i]; - } + params->nodes_to_replace = reinterpret_cast(allocation); + CopyVectorToTfLiteIntArray(subgraph.nodes, params->nodes_to_replace); + allocation += nodes_to_replace_size; + + params->input_tensors = reinterpret_cast(allocation); + CopyVectorToTfLiteIntArray(subgraph.input_tensors, params->input_tensors); + allocation += input_tensors_size; + + params->output_tensors = reinterpret_cast(allocation); + CopyVectorToTfLiteIntArray(subgraph.output_tensors, params->output_tensors); + allocation += output_tensors_size; - params->delegate = delegate; - params->nodes_to_replace = nodes_to_replace_arr; return params; } -} // Anonymous namespace +} // namespace TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace, @@ -192,8 +237,7 @@ TfLiteStatus Interpreter::ReplaceSubgraphsWithDelegateKernels( case Subgraph::kTfPartition: { int node_index; - TfLiteDelegateParams* params = - CreateDelegateParams(delegate, subgraph.nodes); + TfLiteDelegateParams* params = CreateDelegateParams(delegate, subgraph); AddNodeWithParameters(subgraph.input_tensors, subgraph.output_tensors, nullptr, 0, params, ®istration, &node_index); @@ -229,8 +273,8 @@ TfLiteStatus Interpreter::GetExecutionPlan(TfLiteIntArray** execution_plan) { *execution_plan = plan_cache_.get(); static_assert(sizeof(plan_cache_->data[0]) == sizeof(execution_plan_[0]), "TfLiteIntArray and execution_plan do not contain same type."); - memcpy(plan_cache_->data, execution_plan_.data(), - sizeof(plan_cache_->data[0]) * execution_plan_.size()); + std::memcpy(plan_cache_->data, execution_plan_.data(), + sizeof(plan_cache_->data[0]) * execution_plan_.size()); return kTfLiteOk; } diff --git a/tensorflow/contrib/lite/interpreter_test.cc b/tensorflow/contrib/lite/interpreter_test.cc index 17eb2f4b07..7a029c7df8 100644 --- a/tensorflow/contrib/lite/interpreter_test.cc +++ b/tensorflow/contrib/lite/interpreter_test.cc @@ -923,8 +923,24 @@ TEST_F(TestDelegate, BasicDelegate) { ASSERT_EQ(interpreter_->execution_plan().size(), 1); int node = interpreter_->execution_plan()[0]; const auto* node_and_reg = interpreter_->node_and_registration(node); - ASSERT_EQ(node_and_reg->second.custom_name, + EXPECT_EQ(node_and_reg->second.custom_name, SimpleDelegate::FakeFusedRegistration().custom_name); + + const TfLiteDelegateParams* params = + reinterpret_cast( + node_and_reg->first.builtin_data); + ASSERT_EQ(params->nodes_to_replace->size, 3); + EXPECT_EQ(params->nodes_to_replace->data[0], 0); + EXPECT_EQ(params->nodes_to_replace->data[1], 1); + EXPECT_EQ(params->nodes_to_replace->data[2], 2); + + ASSERT_EQ(params->input_tensors->size, 2); + EXPECT_EQ(params->input_tensors->data[0], 0); + EXPECT_EQ(params->input_tensors->data[1], 1); + + ASSERT_EQ(params->output_tensors->size, 2); + EXPECT_EQ(params->output_tensors->data[0], 3); + EXPECT_EQ(params->output_tensors->data[1], 4); } TEST_F(TestDelegate, ComplexDeligate) { -- GitLab From 8d327187577c797499d5697cdef79af6a5fc7823 Mon Sep 17 00:00:00 2001 From: Ben Barsdell Date: Mon, 12 Mar 2018 15:26:05 -0700 Subject: [PATCH 757/884] Enable CUDNN_TENSOR_OP_MATH for fp16 RNNs (#17367) - Speeds up CUDNN RNNs with fp16 input/output when possible on supported GPUs. Computations will fall back to pseudo-fp16 if tensor op math is not supported. - Enabled by default, but can be disabled by setting the environment variable TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH=1. --- tensorflow/stream_executor/cuda/cuda_dnn.cc | 33 ++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 0b3b060fe7..03e3e0857f 100644 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -274,7 +274,8 @@ CUDNN_DNN_ROUTINE_EACH_R6(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) // clang-format off #if CUDNN_VERSION >= 7000 #define CUDNN_DNN_ROUTINE_EACH_R7(__macro) \ - __macro(cudnnSetConvolutionMathType) + __macro(cudnnSetConvolutionMathType) \ + __macro(cudnnSetRNNMatrixMathType) // clang-format on CUDNN_DNN_ROUTINE_EACH_R7(PERFTOOLS_GPUTOOLS_CUDNN_WRAP) @@ -586,6 +587,19 @@ static bool TensorOpMathEnabled() { return is_enabled; } +// A helper function to decide whether to enable the TENSOR_OP_MATH math type +// for RNNs. +static bool RnnTensorOpMathEnabled() { + static bool is_enabled = [] { + bool is_disabled = false; + TF_CHECK_OK( + tensorflow::ReadBoolFromEnvVar("TF_DISABLE_CUDNN_RNN_TENSOR_OP_MATH", + /*default_val=*/false, &is_disabled)); + return !is_disabled; + }(); + return is_enabled; +} + // A helper function to decide whether to use CUDNN_BATCHNORM_SPATIAL_PERSISTENT // in batchnorm. This mode can be faster in some tasks because an optimized path // may be selected for CUDNN_DATA_FLOAT and CUDNN_DATA_HALF data types, compute @@ -1124,6 +1138,9 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { SetFailure(cudnn_params_desc_->Status()); return; } + if (data_type == CUDNN_DATA_HALF) { + set_use_tensor_op_math(true); + } } ~CudnnRnnDescriptor() override { if (rnn_desc_) { @@ -1132,6 +1149,20 @@ class CudnnRnnDescriptor : public CudnnDescriptorCommon { CUDNN_RETURN_IF_FAIL(status, "Unable to destroy RNN descriptor"); } } + void set_use_tensor_op_math(bool use_tensor_op_math) { +#if CUDNN_VERSION >= 7000 + cudnnMathType_t math_type = + (use_tensor_op_math ? CUDNN_TENSOR_OP_MATH : CUDNN_DEFAULT_MATH); + if (RnnTensorOpMathEnabled()) { + cudnnStatus_t status = + wrap::cudnnSetRNNMatrixMathType(parent_, rnn_desc_, math_type); + if (status != CUDNN_STATUS_SUCCESS) { + LOG(FATAL) << "could not set cudnn RNN math type: " + << ToString(status); + } + } +#endif + } cudnnRNNDescriptor_t handle() const { if (!ok()) return nullptr; return rnn_desc_; -- GitLab From 1db29b831dc66a98442ce7a00204e0128239c1dd Mon Sep 17 00:00:00 2001 From: Mark Daoust Date: Mon, 12 Mar 2018 15:29:51 -0700 Subject: [PATCH 758/884] Fix the script entry point for freeze_graph. The wrapper created by `setup.py` calls the entry point function with no arguments. `freeze_graph.main` expects the global `FLAGS` to be set, and one argument. This change adds a `run_main` function to use as the entry point, which expects no arguments and parses the flags. It also adds a `flags` argument to `main` so the flags can be passed directly without using a `global FLAGS` declaration. --- tensorflow/python/tools/freeze_graph.py | 36 +++++++++++++------------ tensorflow/tools/pip_package/setup.py | 2 +- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/tools/freeze_graph.py b/tensorflow/python/tools/freeze_graph.py index a52f325ddb..e9f1def48c 100644 --- a/tensorflow/python/tools/freeze_graph.py +++ b/tensorflow/python/tools/freeze_graph.py @@ -56,8 +56,6 @@ from tensorflow.python.saved_model import tag_constants from tensorflow.python.tools import saved_model_utils from tensorflow.python.training import saver as saver_lib -FLAGS = None - def freeze_graph_with_def_protos(input_graph_def, input_saver_def, @@ -256,25 +254,24 @@ def freeze_graph(input_graph, checkpoint_version=checkpoint_version) -def main(unused_args): - if FLAGS.checkpoint_version == 1: +def main(unused_args, flags): + if flags.checkpoint_version == 1: checkpoint_version = saver_pb2.SaverDef.V1 - elif FLAGS.checkpoint_version == 2: + elif flags.checkpoint_version == 2: checkpoint_version = saver_pb2.SaverDef.V2 else: print("Invalid checkpoint version (must be '1' or '2'): %d" % - FLAGS.checkpoint_version) + flags.checkpoint_version) return -1 - freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, - FLAGS.input_checkpoint, FLAGS.output_node_names, - FLAGS.restore_op_name, FLAGS.filename_tensor_name, - FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, - FLAGS.variable_names_whitelist, FLAGS.variable_names_blacklist, - FLAGS.input_meta_graph, FLAGS.input_saved_model_dir, - FLAGS.saved_model_tags, checkpoint_version) - + freeze_graph(flags.input_graph, flags.input_saver, flags.input_binary, + flags.input_checkpoint, flags.output_node_names, + flags.restore_op_name, flags.filename_tensor_name, + flags.output_graph, flags.clear_devices, flags.initializer_nodes, + flags.variable_names_whitelist, flags.variable_names_blacklist, + flags.input_meta_graph, flags.input_saved_model_dir, + flags.saved_model_tags, checkpoint_version) -if __name__ == "__main__": +def run_main(): parser = argparse.ArgumentParser() parser.register("type", "bool", lambda v: v.lower() == "true") parser.add_argument( @@ -376,5 +373,10 @@ if __name__ == "__main__": separated by \',\'. For tag-set contains multiple tags, all tags \ must be passed in.\ """) - FLAGS, unparsed = parser.parse_known_args() - app.run(main=main, argv=[sys.argv[0]] + unparsed) + flags, unparsed = parser.parse_known_args() + + my_main = lambda unused_args: main(unused_args, flags) + app.run(main=my_main, argv=[sys.argv[0]] + unparsed) + +if __name__ == '__main__': + run_main() diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index 815ea8157d..7fdf0d8c17 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -72,7 +72,7 @@ if sys.version_info < (3, 4): # pylint: disable=line-too-long CONSOLE_SCRIPTS = [ - 'freeze_graph = tensorflow.python.tools.freeze_graph:main', + 'freeze_graph = tensorflow.python.tools.freeze_graph:run_main', 'toco_from_protos = tensorflow.contrib.lite.toco.python.toco_from_protos:main', 'toco = tensorflow.contrib.lite.toco.python.toco_wrapper:main', 'saved_model_cli = tensorflow.python.tools.saved_model_cli:main', -- GitLab From 8b4f7542cee2122eedc398a3d299e47e8f22f615 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 12 Mar 2018 15:40:47 -0700 Subject: [PATCH 759/884] [tf.data] Enable Dataset.make_one_shot_iterator() and Dataset.__iter__() in eager mode. This change partially replicates the code in `tf.contrib.eager.Iterator`. However, since that class depends on contrib-level functionality (viz. cross-device prefetching support), we cannot move it wholesale to core. PiperOrigin-RevId: 188790349 --- tensorflow/contrib/eager/python/datasets.py | 128 ++------------- .../contrib/eager/python/datasets_test.py | 21 +++ tensorflow/python/data/ops/BUILD | 2 + tensorflow/python/data/ops/dataset_ops.py | 25 ++- tensorflow/python/data/ops/iterator_ops.py | 148 ++++++++++++++++++ 5 files changed, 207 insertions(+), 117 deletions(-) diff --git a/tensorflow/contrib/eager/python/datasets.py b/tensorflow/contrib/eager/python/datasets.py index 30a7642dd3..332bada57b 100644 --- a/tensorflow/contrib/eager/python/datasets.py +++ b/tensorflow/contrib/eager/python/datasets.py @@ -27,7 +27,6 @@ from tensorflow.python.data.util import sparse from tensorflow.python.eager import context from tensorflow.python.framework import constant_op from tensorflow.python.framework import dtypes -from tensorflow.python.framework import errors from tensorflow.python.framework import function from tensorflow.python.framework import ops from tensorflow.python.ops import gen_dataset_ops @@ -45,8 +44,13 @@ def _generate_shared_name(prefix): return "{}{}".format(prefix, uid) -class Iterator(object): - """An iterator producing tf.Tensor objects from a tf.data.Dataset.""" +class Iterator(iterator_ops.EagerIterator): + """An iterator producing tf.Tensor objects from a tf.data.Dataset. + + NOTE: Unlike the iterator created by the + @{tf.data.Dataset.make_one_shot_iterator} method, this class enables + additional experimental functionality, such as prefetching to the GPU. + """ def __init__(self, dataset): """Creates a new iterator over the given dataset. @@ -67,37 +71,12 @@ class Iterator(object): Raises: RuntimeError: When invoked without eager execution enabled. """ - - if not context.executing_eagerly(): - raise RuntimeError( - "{} objects can only be used when eager execution is enabled, use " - "tf.data.Dataset.make_initializable_iterator or " - "tf.data.Dataset.make_one_shot_iterator for graph construction". - format(type(self))) - with ops.device("/device:CPU:0"): - ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access - self._output_classes = dataset.output_classes - self._output_types = dataset.output_types - self._output_shapes = dataset.output_shapes - self._flat_output_types = nest.flatten( - sparse.as_dense_types(self._output_types, self._output_classes)) - self._flat_output_shapes = nest.flatten( - sparse.as_dense_shapes(self._output_shapes, self._output_classes)) - self._resource = gen_dataset_ops.iterator( - shared_name="", - container=_generate_shared_name("eageriterator"), - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - gen_dataset_ops.make_iterator(ds_variant, self._resource) - # Delete the resource when this object is deleted - self._resource_deleter = resource_variable_ops.EagerResourceDeleter( - handle=self._resource, handle_device="/device:CPU:0") - self._device = context.context().device_name - self._buffer_resource_handle = None + super(Iterator, self).__init__(dataset) if not context.context().device_spec.device_type: is_remote_device = False else: is_remote_device = context.context().device_spec.device_type != "CPU" + self._buffer_resource_handle = None if is_remote_device: with ops.device("/device:CPU:0"): iter_string_handle = gen_dataset_ops.iterator_to_string_handle( @@ -106,7 +85,7 @@ class Iterator(object): @function.Defun(dtypes.string) def remote_fn(h): remote_iterator = iterator_ops.Iterator.from_string_handle( - h, self._output_types, self._output_shapes) + h, self.output_types, self.output_shapes, self.output_classes) return remote_iterator.get_next() remote_fn.add_to_graph(None) @@ -124,89 +103,16 @@ class Iterator(object): handle=self._buffer_resource_handle, handle_device=self._device) - def __iter__(self): - return self - - def __next__(self): # For Python 3 compatibility - return self.next() - def _next_internal(self): """Returns a nested structure of `tf.Tensor`s containing the next element. """ - with ops.device(self._device): - if self._buffer_resource_handle is not None: + if self._buffer_resource_handle is not None: + with ops.device(self._device): ret = prefetching_ops.function_buffering_resource_get_next( function_buffer_resource=self._buffer_resource_handle, output_types=self._flat_output_types) - else: - # TODO(ashankar): Consider removing this ops.device() contextmanager - # and instead mimic ops placement in graphs: Operations on resource - # handles execute on the same device as where the resource is placed. - # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next` - # because in eager mode this code will run synchronously on the calling - # thread. Therefore we do not need to make a defensive context switch - # to a background thread, and can achieve a small constant performance - # boost by invoking the iterator synchronously. - ret = gen_dataset_ops.iterator_get_next_sync( - self._resource, - output_types=self._flat_output_types, - output_shapes=self._flat_output_shapes) - - return sparse.deserialize_sparse_tensors( - nest.pack_sequence_as(self._output_types, ret), self._output_types, - self._output_shapes, self._output_classes) - - def next(self): - """Returns a nested structure of `tf.Tensor`s containing the next element. - """ - try: - return self._next_internal() - except errors.OutOfRangeError: - raise StopIteration - - @property - def output_classes(self): - """Returns the class of each component of an element of this iterator. - - The expected values are `tf.Tensor` and `tf.SparseTensor`. - - Returns: - A nested structure of Python `type` objects corresponding to each - component of an element of this dataset. - """ - return self._output_classes - - @property - def output_shapes(self): - """Returns the shape of each component of an element of this iterator. - - Returns: - A nested structure of `tf.TensorShape` objects corresponding to each - component of an element of this dataset. - """ - return self._output_shapes - - @property - def output_types(self): - """Returns the type of each component of an element of this iterator. - - Returns: - A nested structure of `tf.DType` objects corresponding to each component - of an element of this dataset. - """ - return self._output_types - - def get_next(self, name=None): - """Returns a nested structure of `tf.Tensor`s containing the next element. - - Args: - name: (Optional.) A name for the created operation. Currently unused. - - Returns: - A nested structure of `tf.Tensor` objects. - - Raises: - `tf.errors.OutOfRangeError`: If the end of the dataset has been reached. - """ - del name - return self._next_internal() + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, ret), self._output_types, + self._output_shapes, self._output_classes) + else: + return super(Iterator, self)._next_internal() diff --git a/tensorflow/contrib/eager/python/datasets_test.py b/tensorflow/contrib/eager/python/datasets_test.py index 35c3c5d3fa..4afadd88f5 100644 --- a/tensorflow/contrib/eager/python/datasets_test.py +++ b/tensorflow/contrib/eager/python/datasets_test.py @@ -44,6 +44,18 @@ class IteratorTest(test.TestCase): got.append(t.numpy()) self.assertAllEqual([0, 1, 2, 3], got) + def testBasicOneShotIterator(self): + got = [] + for t in Dataset.range(4).make_one_shot_iterator(): + got.append(t.numpy()) + self.assertAllEqual([0, 1, 2, 3], got) + + def testBasicImplicitIterator(self): + got = [] + for t in Dataset.range(4): + got.append(t.numpy()) + self.assertAllEqual([0, 1, 2, 3], got) + def testGetNext(self): iterator = datasets.Iterator(Dataset.range(4)) self.assertEqual(0, iterator.get_next().numpy()) @@ -53,6 +65,15 @@ class IteratorTest(test.TestCase): with self.assertRaises(errors.OutOfRangeError): iterator.get_next() + def testGetNextOneShotIterator(self): + iterator = Dataset.range(4).make_one_shot_iterator() + self.assertEqual(0, iterator.get_next().numpy()) + self.assertEqual(1, iterator.get_next().numpy()) + self.assertEqual(2, iterator.get_next().numpy()) + self.assertEqual(3, iterator.get_next().numpy()) + with self.assertRaises(errors.OutOfRangeError): + iterator.get_next() + def testMultipleIteratorsOnTheSameDataset(self): ds = Dataset.range(4) it1 = datasets.Iterator(ds) diff --git a/tensorflow/python/data/ops/BUILD b/tensorflow/python/data/ops/BUILD index a8f2154db8..3119ab0037 100644 --- a/tensorflow/python/data/ops/BUILD +++ b/tensorflow/python/data/ops/BUILD @@ -52,9 +52,11 @@ py_library( "//tensorflow/python:dataset_ops_gen", "//tensorflow/python:dtypes", "//tensorflow/python:framework_ops", + "//tensorflow/python:resource_variable_ops", "//tensorflow/python:tensor_shape", "//tensorflow/python/data/util:nest", "//tensorflow/python/data/util:sparse", + "//tensorflow/python/eager:context", ], ) diff --git a/tensorflow/python/data/ops/dataset_ops.py b/tensorflow/python/data/ops/dataset_ops.py index e0d63b5ebc..390ce852b1 100644 --- a/tensorflow/python/data/ops/dataset_ops.py +++ b/tensorflow/python/data/ops/dataset_ops.py @@ -111,11 +111,11 @@ class Dataset(object): self.output_types, self.output_shapes, self.output_classes) - def make_one_shot_iterator(self): + def __iter__(self): """Creates an `Iterator` for enumerating the elements of this dataset. - Note: The returned iterator will be initialized automatically. - A "one-shot" iterator does not currently support re-initialization. + The returned iterator implements the Python iterator protocol and therefore + can only be used in eager mode. Returns: An `Iterator` over the elements of this dataset. @@ -124,9 +124,22 @@ class Dataset(object): RuntimeError: If eager execution is enabled. """ if context.executing_eagerly(): - raise RuntimeError( - "dataset.make_one_shot_iterator is not supported when eager " - "execution is enabled.") + return iterator_ops.EagerIterator(self) + else: + raise RuntimeError("dataset.__iter__() is only supported when eager " + "execution is enabled.") + + def make_one_shot_iterator(self): + """Creates an `Iterator` for enumerating the elements of this dataset. + + Note: The returned iterator will be initialized automatically. + A "one-shot" iterator does not currently support re-initialization. + + Returns: + An `Iterator` over the elements of this dataset. + """ + if context.executing_eagerly(): + return iterator_ops.EagerIterator(self) # NOTE(mrry): We capture by value here to ensure that `_make_dataset()` is # a 0-argument function. @function.Defun(capture_by_value=True) diff --git a/tensorflow/python/data/ops/iterator_ops.py b/tensorflow/python/data/ops/iterator_ops.py index 4756ec7482..d79b9d6011 100644 --- a/tensorflow/python/data/ops/iterator_ops.py +++ b/tensorflow/python/data/ops/iterator_ops.py @@ -17,14 +17,18 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import threading import warnings from tensorflow.python.data.util import nest from tensorflow.python.data.util import sparse +from tensorflow.python.eager import context from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors from tensorflow.python.framework import ops from tensorflow.python.framework import tensor_shape from tensorflow.python.ops import gen_dataset_ops +from tensorflow.python.ops import resource_variable_ops from tensorflow.python.util.tf_export import tf_export @@ -412,3 +416,147 @@ class Iterator(object): of an element of this dataset. """ return self._output_types + + +_uid_counter = 0 +_uid_lock = threading.Lock() + + +def _generate_shared_name(prefix): + with _uid_lock: + global _uid_counter + uid = _uid_counter + _uid_counter += 1 + return "{}{}".format(prefix, uid) + + +class EagerIterator(object): + """An iterator producing tf.Tensor objects from a tf.data.Dataset.""" + + def __init__(self, dataset): + """Creates a new iterator over the given dataset. + + For example: + ```python + dataset = tf.data.Dataset.range(4) + for x in Iterator(dataset): + print(x) + ``` + + Tensors produced will be placed on the device on which this iterator object + was created. + + Args: + dataset: A `tf.data.Dataset` object. + + Raises: + RuntimeError: When invoked without eager execution enabled. + """ + + if not context.executing_eagerly(): + raise RuntimeError( + "{} objects can only be used when eager execution is enabled, use " + "tf.data.Dataset.make_initializable_iterator or " + "tf.data.Dataset.make_one_shot_iterator for graph construction". + format(type(self))) + with ops.device("/device:CPU:0"): + ds_variant = dataset._as_variant_tensor() # pylint: disable=protected-access + self._output_classes = dataset.output_classes + self._output_types = dataset.output_types + self._output_shapes = dataset.output_shapes + self._flat_output_types = nest.flatten( + sparse.as_dense_types(self._output_types, self._output_classes)) + self._flat_output_shapes = nest.flatten( + sparse.as_dense_shapes(self._output_shapes, self._output_classes)) + self._resource = gen_dataset_ops.iterator( + shared_name="", + container=_generate_shared_name("eageriterator"), + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + gen_dataset_ops.make_iterator(ds_variant, self._resource) + # Delete the resource when this object is deleted + self._resource_deleter = resource_variable_ops.EagerResourceDeleter( + handle=self._resource, handle_device="/device:CPU:0") + self._device = context.context().device_name + + def __iter__(self): + return self + + def __next__(self): # For Python 3 compatibility + return self.next() + + def _next_internal(self): + """Returns a nested structure of `tf.Tensor`s containing the next element. + """ + with ops.device(self._device): + # TODO(ashankar): Consider removing this ops.device() contextmanager + # and instead mimic ops placement in graphs: Operations on resource + # handles execute on the same device as where the resource is placed. + # NOTE(mrry): Here we use the "_sync" variant of `iterator_get_next` + # because in eager mode this code will run synchronously on the calling + # thread. Therefore we do not need to make a defensive context switch + # to a background thread, and can achieve a small constant performance + # boost by invoking the iterator synchronously. + ret = gen_dataset_ops.iterator_get_next_sync( + self._resource, + output_types=self._flat_output_types, + output_shapes=self._flat_output_shapes) + + return sparse.deserialize_sparse_tensors( + nest.pack_sequence_as(self._output_types, ret), self._output_types, + self._output_shapes, self._output_classes) + + def next(self): + """Returns a nested structure of `tf.Tensor`s containing the next element. + """ + try: + return self._next_internal() + except errors.OutOfRangeError: + raise StopIteration + + @property + def output_classes(self): + """Returns the class of each component of an element of this iterator. + + The expected values are `tf.Tensor` and `tf.SparseTensor`. + + Returns: + A nested structure of Python `type` objects corresponding to each + component of an element of this dataset. + """ + return self._output_classes + + @property + def output_shapes(self): + """Returns the shape of each component of an element of this iterator. + + Returns: + A nested structure of `tf.TensorShape` objects corresponding to each + component of an element of this dataset. + """ + return self._output_shapes + + @property + def output_types(self): + """Returns the type of each component of an element of this iterator. + + Returns: + A nested structure of `tf.DType` objects corresponding to each component + of an element of this dataset. + """ + return self._output_types + + def get_next(self, name=None): + """Returns a nested structure of `tf.Tensor`s containing the next element. + + Args: + name: (Optional.) A name for the created operation. Currently unused. + + Returns: + A nested structure of `tf.Tensor` objects. + + Raises: + `tf.errors.OutOfRangeError`: If the end of the dataset has been reached. + """ + del name + return self._next_internal() -- GitLab From ddbd1ca1865739be448ad1d01d38e086c3a82856 Mon Sep 17 00:00:00 2001 From: Benoit Steiner Date: Mon, 12 Mar 2018 16:25:46 -0700 Subject: [PATCH 760/884] Don't use shapes unless it's safe to do so. PiperOrigin-RevId: 188796626 --- tensorflow/core/grappler/optimizers/constant_folding.cc | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 6cb0447355..4c9431deac 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -1529,6 +1529,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (replaceable) { ReplaceOperationWithIdentity(0, node, output); + continue; } } @@ -1569,11 +1570,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (replaceable) { ReplaceOperationWithIdentity(0, node, output); + continue; } } } - if (IsTile(*node) && + if (use_shape_info && IsTile(*node) && properties->GetInputProperties(node->name()).size() == 2) { const auto& m = properties->GetInputProperties(node->name())[1]; if (TensorShape::IsValid(m.shape()) && m.has_value()) { @@ -1597,11 +1599,12 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (replaceable) { ReplaceOperationWithIdentity(0, node, output); + continue; } } } - if (IsPad(*node) && + if (use_shape_info && IsPad(*node) && properties->GetInputProperties(node->name()).size() >= 2) { const auto& p = properties->GetInputProperties(node->name())[1]; if (TensorShape::IsValid(p.shape()) && p.has_value()) { @@ -1620,6 +1623,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (replaceable) { ReplaceOperationWithIdentity(0, node, output); + continue; } } } @@ -1639,6 +1643,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output, } if (replaceable) { ReplaceOperationWithIdentity(0, node, output); + continue; } } -- GitLab From 2bf6a50677983e88866c44a97a482a615eb52705 Mon Sep 17 00:00:00 2001 From: Frank Chen Date: Mon, 12 Mar 2018 16:41:23 -0700 Subject: [PATCH 761/884] fix build file via buildifier --- tensorflow/contrib/learn/BUILD | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tensorflow/contrib/learn/BUILD b/tensorflow/contrib/learn/BUILD index 44da18b181..b05f5eeaee 100644 --- a/tensorflow/contrib/learn/BUILD +++ b/tensorflow/contrib/learn/BUILD @@ -432,8 +432,8 @@ py_test( srcs = ["python/learn/estimators/kmeans_test.py"], srcs_version = "PY2AND3", tags = [ - "nomac", "noasan", # b/73741358 + "nomac", ], deps = [ ":learn", -- GitLab From 2277b19ee300640c58137bc43ad152f357b3d7c3 Mon Sep 17 00:00:00 2001 From: Derek Murray Date: Mon, 12 Mar 2018 16:46:48 -0700 Subject: [PATCH 762/884] Switch BuildGraphOptions to wrap CallableOptions. This change harmonizes the graph construction codepaths for DirectSession and MasterSession, which will make it easier to add new subgraph creation features. PiperOrigin-RevId: 188799932 --- .../common_runtime/build_graph_options.cc | 6 +- .../core/common_runtime/build_graph_options.h | 11 +-- .../core/common_runtime/direct_session.cc | 27 ++---- .../common_runtime/graph_execution_state.cc | 27 +++--- .../distributed_runtime/master_session.cc | 83 ++++++++++--------- tensorflow/core/graph/subgraph.cc | 19 +++++ tensorflow/core/graph/subgraph.h | 6 ++ 7 files changed, 101 insertions(+), 78 deletions(-) diff --git a/tensorflow/core/common_runtime/build_graph_options.cc b/tensorflow/core/common_runtime/build_graph_options.cc index 811d459758..a9dc6ca6cd 100644 --- a/tensorflow/core/common_runtime/build_graph_options.cc +++ b/tensorflow/core/common_runtime/build_graph_options.cc @@ -21,15 +21,15 @@ namespace tensorflow { string BuildGraphOptions::DebugString() const { string rv = "Feed endpoints: "; - for (auto& s : feed_endpoints) { + for (auto& s : callable_options.feed()) { strings::StrAppend(&rv, s, ", "); } strings::StrAppend(&rv, "\nFetch endpoints: "); - for (auto& s : fetch_endpoints) { + for (auto& s : callable_options.fetch()) { strings::StrAppend(&rv, s, ", "); } strings::StrAppend(&rv, "\nTarget nodes: "); - for (auto& s : target_nodes) { + for (auto& s : callable_options.target()) { strings::StrAppend(&rv, s, ", "); } return rv; diff --git a/tensorflow/core/common_runtime/build_graph_options.h b/tensorflow/core/common_runtime/build_graph_options.h index 5f0e8f170b..5ca170e922 100644 --- a/tensorflow/core/common_runtime/build_graph_options.h +++ b/tensorflow/core/common_runtime/build_graph_options.h @@ -19,25 +19,18 @@ limitations under the License. #include #include "tensorflow/core/platform/types.h" -#include "tensorflow/core/protobuf/debug.pb.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { struct BuildGraphOptions { - std::vector feed_endpoints; - std::vector fetch_endpoints; - - // TODO(vrv): Remove this when we unify target_nodes and fetch_endpoint, - // the former via "ref" fetch_endpoints. - std::vector target_nodes; + CallableOptions callable_options; // If `true`, uses Arg/Retval to implement feeds/fetches; otherwise // uses Recv/Send to implement feeds/fetches. // TODO(mrry): Remove this when the distributed runtime supports Arg/Retval. bool use_function_convention = false; - DebugOptions debug_options; - string DebugString() const; }; diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 1fbc314e2e..25cfb9e524 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -1083,19 +1083,8 @@ Status DirectSession::CreateExecutors( std::unique_ptr* out_func_info, RunStateArgs* run_state_args) { BuildGraphOptions options; - options.feed_endpoints = std::vector(callable_options.feed().begin(), - callable_options.feed().end()); - options.fetch_endpoints = std::vector( - callable_options.fetch().begin(), callable_options.fetch().end()); - options.target_nodes = std::vector(callable_options.target().begin(), - callable_options.target().end()); + options.callable_options = callable_options; options.use_function_convention = !run_state_args->is_partial_run; - if (!callable_options.run_options() - .debug_options() - .debug_tensor_watch_opts() - .empty()) { - options.debug_options = callable_options.run_options().debug_options(); - } std::unique_ptr func_info(new FunctionInfo); std::unique_ptr ek(new ExecutorsAndKeys); @@ -1191,9 +1180,11 @@ Status DirectSession::CreateExecutors( /*shape_map=*/nullptr); // EXPERIMENTAL: tfdbg inserts debug nodes in the graph. - if (!options.debug_options.debug_tensor_watch_opts().empty()) { + const DebugOptions& debug_options = + options.callable_options.run_options().debug_options(); + if (!debug_options.debug_tensor_watch_opts().empty()) { TF_RETURN_IF_ERROR(DecorateAndPublishGraphForDebug( - options.debug_options, partition_graph.get(), params.device)); + debug_options, partition_graph.get(), params.device)); } TF_RETURN_IF_ERROR(EnsureMemoryTypes(DeviceType(device->device_type()), @@ -1384,19 +1375,19 @@ Status DirectSession::CreateGraphs( execution_state->BuildGraph(subgraph_options, &client_graph)); } - if (subgraph_options.feed_endpoints.size() != + if (subgraph_options.callable_options.feed_size() != client_graph->feed_types.size()) { return errors::Internal( "Graph pruning failed: requested number of feed endpoints = ", - subgraph_options.feed_endpoints.size(), + subgraph_options.callable_options.feed_size(), " versus number of pruned feed endpoints = ", client_graph->feed_types.size()); } - if (subgraph_options.fetch_endpoints.size() != + if (subgraph_options.callable_options.fetch_size() != client_graph->fetch_types.size()) { return errors::Internal( "Graph pruning failed: requested number of fetch endpoints = ", - subgraph_options.fetch_endpoints.size(), + subgraph_options.callable_options.fetch_size(), " versus number of pruned fetch endpoints = ", client_graph->fetch_types.size()); } diff --git a/tensorflow/core/common_runtime/graph_execution_state.cc b/tensorflow/core/common_runtime/graph_execution_state.cc index 785ec3d227..f5e3d78242 100644 --- a/tensorflow/core/common_runtime/graph_execution_state.cc +++ b/tensorflow/core/common_runtime/graph_execution_state.cc @@ -252,8 +252,8 @@ Status GraphExecutionState::InitBaseGraph(const BuildGraphOptions& options) { // Rewrite the graph before placement. rewrite_metadata_.reset(new subgraph::RewriteGraphMetadata); TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( - new_graph.get(), options.feed_endpoints, options.fetch_endpoints, - options.target_nodes, device_set_->client_device()->attributes(), + new_graph.get(), options.callable_options, + device_set_->client_device()->attributes(), options.use_function_convention, rewrite_metadata_.get())); } @@ -299,13 +299,16 @@ Status GraphExecutionState::OptimizeGraph( item.id = "tf_graph"; graph_->ToGraphDef(&item.graph); - item.fetch = options.fetch_endpoints; - item.fetch.insert(item.fetch.end(), options.target_nodes.begin(), - options.target_nodes.end()); + item.fetch.insert(item.fetch.end(), + options.callable_options.fetch().begin(), + options.callable_options.fetch().end()); + item.fetch.insert(item.fetch.end(), + options.callable_options.target().begin(), + options.callable_options.target().end()); - if (!options.feed_endpoints.empty()) { + if (!options.callable_options.feed().empty()) { std::unordered_set feeds; - for (const string& feed : options.feed_endpoints) { + for (const string& feed : options.callable_options.feed()) { TensorId id = ParseTensorName(feed); if (id.second != 0) { return errors::InvalidArgument("Unsupported feed: ", feed); @@ -404,8 +407,8 @@ Status GraphExecutionState::BuildGraph(const BuildGraphOptions& options, // Extract the subset of the graph that needs to be run, adding feed/fetch // ops as needed. TF_RETURN_IF_ERROR(subgraph::RewriteGraphForExecution( - ng.get(), options.feed_endpoints, options.fetch_endpoints, - options.target_nodes, device_set_->client_device()->attributes(), + ng.get(), options.callable_options, + device_set_->client_device()->attributes(), options.use_function_convention, &rewrite_metadata)); } else { // This GraphExecutionState represents a graph that was @@ -415,8 +418,10 @@ Status GraphExecutionState::BuildGraph(const BuildGraphOptions& options, rewrite_metadata = *rewrite_metadata_; } - CHECK_EQ(options.feed_endpoints.size(), rewrite_metadata.feed_types.size()); - CHECK_EQ(options.fetch_endpoints.size(), rewrite_metadata.fetch_types.size()); + CHECK_EQ(options.callable_options.feed_size(), + rewrite_metadata.feed_types.size()); + CHECK_EQ(options.callable_options.fetch_size(), + rewrite_metadata.fetch_types.size()); // Make a fresh copy of the function library for the client graph. std::unique_ptr flib( diff --git a/tensorflow/core/distributed_runtime/master_session.cc b/tensorflow/core/distributed_runtime/master_session.cc index 878a1398c9..01da54fcb3 100644 --- a/tensorflow/core/distributed_runtime/master_session.cc +++ b/tensorflow/core/distributed_runtime/master_session.cc @@ -72,7 +72,7 @@ class MasterSession::ReffedClientGraph : public core::RefCounted { client_graph_(std::move(cg)), session_opts_(session_opts), is_partial_(is_partial), - debug_opts_(bopts.debug_options), + debug_opts_(bopts.callable_options.run_options().debug_options()), worker_cache_(worker_cache), should_deregister_(should_deregister) { VLOG(1) << "Created ReffedClientGraph for node with " @@ -921,61 +921,70 @@ void MasterSession::ReffedClientGraph::DeregisterPartitions() { } } +namespace { +void CopyAndSortStrings(size_t size, + const std::function& input_accessor, + protobuf::RepeatedPtrField* output) { + std::vector temp; + temp.reserve(size); + for (size_t i = 0; i < size; ++i) { + output->Add(input_accessor(i)); + } + std::sort(output->begin(), output->end()); +} +} // namespace + void BuildBuildGraphOptions(const RunStepRequestWrapper& req, BuildGraphOptions* opts) { - for (size_t i = 0; i < req.num_feeds(); ++i) { - opts->feed_endpoints.push_back(req.feed_name(i)); - } - for (size_t i = 0; i < req.num_fetches(); ++i) { - opts->fetch_endpoints.push_back(req.fetch_name(i)); - } - for (size_t i = 0; i < req.num_targets(); ++i) { - opts->target_nodes.push_back(req.target_name(i)); - } + CallableOptions* callable_opts = &opts->callable_options; + CopyAndSortStrings(req.num_feeds(), + [&req](size_t i) { return req.feed_name(i); }, + callable_opts->mutable_feed()); + CopyAndSortStrings(req.num_fetches(), + [&req](size_t i) { return req.fetch_name(i); }, + callable_opts->mutable_fetch()); + CopyAndSortStrings(req.num_targets(), + [&req](size_t i) { return req.target_name(i); }, + callable_opts->mutable_target()); if (!req.options().debug_options().debug_tensor_watch_opts().empty()) { - opts->debug_options = req.options().debug_options(); + *callable_opts->mutable_run_options()->mutable_debug_options() = + req.options().debug_options(); } - - std::sort(opts->feed_endpoints.begin(), opts->feed_endpoints.end()); - std::sort(opts->target_nodes.begin(), opts->target_nodes.end()); - std::sort(opts->fetch_endpoints.begin(), opts->fetch_endpoints.end()); } void BuildBuildGraphOptions(const PartialRunSetupRequest& req, BuildGraphOptions* opts) { - for (const auto& feed : req.feed()) { - opts->feed_endpoints.push_back(feed); - } - for (const auto& fetch : req.fetch()) { - opts->fetch_endpoints.push_back(fetch); - } - for (const auto& target : req.target()) { - opts->target_nodes.push_back(target); - } + CallableOptions* callable_opts = &opts->callable_options; + CopyAndSortStrings(req.feed_size(), [&req](size_t i) { return req.feed(i); }, + callable_opts->mutable_feed()); + CopyAndSortStrings(req.fetch_size(), + [&req](size_t i) { return req.fetch(i); }, + callable_opts->mutable_fetch()); + CopyAndSortStrings(req.target_size(), + [&req](size_t i) { return req.target(i); }, + callable_opts->mutable_target()); // TODO(cais): Add TFDBG support to partial runs. - - std::sort(opts->feed_endpoints.begin(), opts->feed_endpoints.end()); - std::sort(opts->target_nodes.begin(), opts->target_nodes.end()); - std::sort(opts->fetch_endpoints.begin(), opts->fetch_endpoints.end()); } uint64 HashBuildGraphOptions(const BuildGraphOptions& opts) { uint64 h = 0x2b992ddfa23249d6ull; - for (const string& name : opts.feed_endpoints) { + for (const string& name : opts.callable_options.feed()) { h = Hash64(name.c_str(), name.size(), h); } - for (const string& name : opts.target_nodes) { + for (const string& name : opts.callable_options.target()) { h = Hash64(name.c_str(), name.size(), h); } - for (const string& name : opts.fetch_endpoints) { + for (const string& name : opts.callable_options.fetch()) { h = Hash64(name.c_str(), name.size(), h); } - if (!opts.debug_options.debug_tensor_watch_opts().empty()) { - const string watch_summary = SummarizeDebugTensorWatches( - opts.debug_options.debug_tensor_watch_opts()); + const DebugOptions& debug_options = + opts.callable_options.run_options().debug_options(); + if (!debug_options.debug_tensor_watch_opts().empty()) { + const string watch_summary = + SummarizeDebugTensorWatches(debug_options.debug_tensor_watch_opts()); h = Hash64(watch_summary.c_str(), watch_summary.size(), h); } @@ -984,15 +993,15 @@ uint64 HashBuildGraphOptions(const BuildGraphOptions& opts) { string BuildGraphOptionsString(const BuildGraphOptions& opts) { string buf; - for (const string& name : opts.feed_endpoints) { + for (const string& name : opts.callable_options.feed()) { strings::StrAppend(&buf, " FdE: ", name); } strings::StrAppend(&buf, "\n"); - for (const string& name : opts.target_nodes) { + for (const string& name : opts.callable_options.target()) { strings::StrAppend(&buf, " TN: ", name); } strings::StrAppend(&buf, "\n"); - for (const string& name : opts.fetch_endpoints) { + for (const string& name : opts.callable_options.fetch()) { strings::StrAppend(&buf, " FeE: ", name); } strings::StrAppend(&buf, "\n"); diff --git a/tensorflow/core/graph/subgraph.cc b/tensorflow/core/graph/subgraph.cc index 2a08bf8ca0..ca93d049d0 100644 --- a/tensorflow/core/graph/subgraph.cc +++ b/tensorflow/core/graph/subgraph.cc @@ -323,6 +323,25 @@ Status RewriteGraphForExecution( return Status::OK(); } +namespace { +template +std::vector ConvertToVector(StringContainer field) { + return std::vector(field.begin(), field.end()); +} +} // namespace + +Status RewriteGraphForExecution(Graph* g, + const CallableOptions& callable_options, + const DeviceAttributes& device_info, + bool use_function_convention, + RewriteGraphMetadata* out_metadata) { + return RewriteGraphForExecution(g, ConvertToVector(callable_options.feed()), + ConvertToVector(callable_options.fetch()), + ConvertToVector(callable_options.target()), + device_info, use_function_convention, + out_metadata); +} + } // namespace subgraph } // namespace tensorflow diff --git a/tensorflow/core/graph/subgraph.h b/tensorflow/core/graph/subgraph.h index 3c1f8870f5..0dc59582f4 100644 --- a/tensorflow/core/graph/subgraph.h +++ b/tensorflow/core/graph/subgraph.h @@ -22,6 +22,7 @@ limitations under the License. #include "tensorflow/core/graph/graph.h" #include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/gtl/array_slice.h" +#include "tensorflow/core/protobuf/config.pb.h" namespace tensorflow { namespace subgraph { @@ -70,6 +71,11 @@ Status RewriteGraphForExecution( const gtl::ArraySlice& target_node_names, const DeviceAttributes& device_info, bool use_function_convention, RewriteGraphMetadata* out_metadata); +Status RewriteGraphForExecution(Graph* g, + const CallableOptions& callable_options, + const DeviceAttributes& device_info, + bool use_function_convention, + RewriteGraphMetadata* out_metadata); typedef std::unordered_map NameIndex; -- GitLab From 6a125bbc0e6fd1e33c90cc6134b2466bbc81198a Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 16:54:10 -0700 Subject: [PATCH 763/884] BREAKING_CHANGE: Split out event_ndims=0 bijectors from Affine and CholeskyOuterProduct. - Deprecate event_ndims argument - Create a Square bijector for the scalar case of CholeskyOuterProduct (which now only operates on matrices). - Create a AffineScalar bijector for the scalar case of Affine (which now only operates on vectors) PiperOrigin-RevId: 188801116 --- tensorflow/contrib/distributions/BUILD | 38 +++ .../bijectors/affine_scalar_test.py | 153 ++++++++++ .../kernel_tests/bijectors/affine_test.py | 263 +----------------- .../bijectors/cholesky_outer_product_test.py | 47 +--- .../kernel_tests/bijectors/invert_test.py | 3 +- .../kernel_tests/bijectors/square_test.py | 58 ++++ .../transformed_distribution_test.py | 5 +- .../python/ops/bijectors/__init__.py | 4 + .../python/ops/bijectors/affine.py | 29 +- .../python/ops/bijectors/affine_scalar.py | 138 +++++++++ .../ops/bijectors/cholesky_outer_product.py | 40 +-- .../python/ops/bijectors/square.py | 84 ++++++ .../distributions/python/ops/sinh_arcsinh.py | 9 +- .../python/ops/vector_sinh_arcsinh_diag.py | 2 +- 14 files changed, 506 insertions(+), 367 deletions(-) create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py create mode 100644 tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py create mode 100644 tensorflow/contrib/distributions/python/ops/bijectors/square.py diff --git a/tensorflow/contrib/distributions/BUILD b/tensorflow/contrib/distributions/BUILD index 203fbf9931..6bd3f5f09b 100644 --- a/tensorflow/contrib/distributions/BUILD +++ b/tensorflow/contrib/distributions/BUILD @@ -816,6 +816,25 @@ cuda_py_test( tags = ["noasan"], # times out b/63678675 ) +cuda_py_test( + name = "affine_scalar_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/affine_scalar_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "affine_linear_operator_test", size = "small", @@ -1164,6 +1183,25 @@ cuda_py_test( ], ) +cuda_py_test( + name = "square_test", + size = "small", + srcs = ["python/kernel_tests/bijectors/square_test.py"], + additional_deps = [ + ":bijectors_py", + ":distributions_py", + "//third_party/py/numpy", + "@six_archive//:six", + "//tensorflow/contrib/linalg:linalg_py", + "//tensorflow/python:array_ops", + "//tensorflow/python:client_testlib", + "//tensorflow/python:framework_for_generated_wrappers", + "//tensorflow/python:framework_test_lib", + "//tensorflow/python:math_ops", + "//tensorflow/python:platform_test", + ], +) + cuda_py_test( name = "weibull_test", size = "small", diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py new file mode 100644 index 0000000000..16173a166f --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_scalar_test.py @@ -0,0 +1,153 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Affine Scalar Tests.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops.bijectors.affine_scalar import AffineScalar +from tensorflow.python.framework import dtypes +from tensorflow.python.ops import array_ops +from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency +from tensorflow.python.platform import test + + +class AffineScalarBijectorTest(test.TestCase): + """Tests correctness of the Y = scale @ x + shift transformation.""" + + def testProperties(self): + with self.test_session(): + mu = -1. + # scale corresponds to 1. + bijector = AffineScalar(shift=mu) + self.assertEqual("affine_scalar", bijector.name) + + def testNoBatchScalar(self): + with self.test_session() as sess: + + def static_run(fun, x): + return fun(x).eval() + + def dynamic_run(fun, x_value): + x_value = np.array(x_value) + x = array_ops.placeholder(dtypes.float32, name="x") + return sess.run(fun(x), feed_dict={x: x_value}) + + for run in (static_run, dynamic_run): + mu = -1. + # Corresponds to scale = 2 + bijector = AffineScalar(shift=mu, scale=2.) + x = [1., 2, 3] # Three scalar samples (no batches). + self.assertAllClose([1., 3, 5], run(bijector.forward, x)) + self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) + self.assertAllClose([-np.log(2.)] * 3, + run(bijector.inverse_log_det_jacobian, x)) + + def testOneBatchScalarViaIdentityIn64BitUserProvidesShiftOnly(self): + with self.test_session() as sess: + + def static_run(fun, x): + return fun(x).eval() + + def dynamic_run(fun, x_value): + x_value = np.array(x_value).astype(np.float64) + x = array_ops.placeholder(dtypes.float64, name="x") + return sess.run(fun(x), feed_dict={x: x_value}) + + for run in (static_run, dynamic_run): + mu = np.float64([1.]) + # One batch, scalar. + # Corresponds to scale = 1. + bijector = AffineScalar(shift=mu) + x = np.float64([1.]) # One sample from one batches. + self.assertAllClose([2.], run(bijector.forward, x)) + self.assertAllClose([0.], run(bijector.inverse, x)) + self.assertAllClose([0.], run(bijector.inverse_log_det_jacobian, x)) + + def testOneBatchScalarViaIdentityIn64BitUserProvidesScaleOnly(self): + with self.test_session() as sess: + + def static_run(fun, x): + return fun(x).eval() + + def dynamic_run(fun, x_value): + x_value = np.array(x_value).astype(np.float64) + x = array_ops.placeholder(dtypes.float64, name="x") + return sess.run(fun(x), feed_dict={x: x_value}) + + for run in (static_run, dynamic_run): + multiplier = np.float64([2.]) + # One batch, scalar. + # Corresponds to scale = 2, shift = 0. + bijector = AffineScalar(scale=multiplier) + x = np.float64([1.]) # One sample from one batches. + self.assertAllClose([2.], run(bijector.forward, x)) + self.assertAllClose([0.5], run(bijector.inverse, x)) + self.assertAllClose([np.log(0.5)], + run(bijector.inverse_log_det_jacobian, x)) + + def testTwoBatchScalarIdentityViaIdentity(self): + with self.test_session() as sess: + + def static_run(fun, x): + return fun(x).eval() + + def dynamic_run(fun, x_value): + x_value = np.array(x_value) + x = array_ops.placeholder(dtypes.float32, name="x") + return sess.run(fun(x), feed_dict={x: x_value}) + + for run in (static_run, dynamic_run): + mu = [1., -1] + # Univariate, two batches. + # Corresponds to scale = 1. + bijector = AffineScalar(shift=mu) + x = [1., 1] # One sample from each of two batches. + self.assertAllClose([2., 0], run(bijector.forward, x)) + self.assertAllClose([0., 2], run(bijector.inverse, x)) + self.assertAllClose([0., 0.], run(bijector.inverse_log_det_jacobian, x)) + + def testTwoBatchScalarIdentityViaScale(self): + with self.test_session() as sess: + + def static_run(fun, x): + return fun(x).eval() + + def dynamic_run(fun, x_value): + x_value = np.array(x_value) + x = array_ops.placeholder(dtypes.float32, name="x") + return sess.run(fun(x), feed_dict={x: x_value}) + + for run in (static_run, dynamic_run): + mu = [1., -1] + # Univariate, two batches. + # Corresponds to scale = 1. + bijector = AffineScalar(shift=mu, scale=[2., 1]) + x = [1., 1] # One sample from each of two batches. + self.assertAllClose([3., 0], run(bijector.forward, x)) + self.assertAllClose([0., 2], run(bijector.inverse, x)) + self.assertAllClose( + [-np.log(2), 0.], run(bijector.inverse_log_det_jacobian, x)) + + def testScalarCongruency(self): + with self.test_session(): + bijector = AffineScalar(shift=3.6, scale=0.42) + assert_scalar_congruency(bijector, lower_x=-2., upper_x=2.) + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py index c9158117f7..077e6176b4 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/affine_test.py @@ -25,7 +25,6 @@ import numpy as np from tensorflow.contrib.distributions.python.ops.bijectors.affine import Affine from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency from tensorflow.python.platform import test @@ -36,192 +35,9 @@ class AffineBijectorTest(test.TestCase): with self.test_session(): mu = -1. # scale corresponds to 1. - bijector = Affine(shift=mu, event_ndims=0) + bijector = Affine(shift=mu) self.assertEqual("affine", bijector.name) - def testNoBatchScalarViaIdentity(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = -1. - # Corresponds to scale = 2 - bijector = Affine( - shift=mu, scale_identity_multiplier=2., event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [1., 2, 3] # Three scalar samples (no batches). - self.assertAllClose([1., 3, 5], run(bijector.forward, x)) - self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.), - run(bijector.inverse_log_det_jacobian, x)) - - def testNoBatchScalarViaDiag(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = -1. - # Corresponds to scale = 2 - bijector = Affine(shift=mu, scale_identity_multiplier=2., event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [1., 2, 3] # Three scalar samples (no batches). - self.assertAllClose([1., 3, 5], run(bijector.forward, x)) - self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.), - run(bijector.inverse_log_det_jacobian, x)) - - def testWeirdSampleNoBatchScalarViaDiagMultiplier(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = -1. - # Corresponds to scale = 2. - bijector = Affine( - shift=mu, scale_identity_multiplier=2., event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [[1., 2, 3], [4, 5, 6]] # Weird sample shape. - self.assertAllClose([[1., 3, 5], - [7, 9, 11]], - run(bijector.forward, x)) - self.assertAllClose([[1., 1.5, 2.], - [2.5, 3, 3.5]], - run(bijector.inverse, x)) - self.assertAllClose(-np.log(2.), - run(bijector.inverse_log_det_jacobian, x)) - - def testOneBatchScalarViaIdentityIn64BitUserProvidesShiftOnly(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value).astype(np.float64) - x = array_ops.placeholder(dtypes.float64, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = np.float64([1.]) - # One batch, scalar. - # Corresponds to scale = 1. - bijector = Affine(shift=mu, event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = np.float64([1.]) # One sample from one batches. - self.assertAllClose([2.], run(bijector.forward, x)) - self.assertAllClose([0.], run(bijector.inverse, x)) - self.assertAllClose(0., run(bijector.inverse_log_det_jacobian, x)) - - def testOneBatchScalarViaIdentityIn64BitUserProvidesMultiplierOnly(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value).astype(np.float64) - x = array_ops.placeholder(dtypes.float64, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - multiplier = np.float64([2.]) - # One batch, scalar. - # Corresponds to scale = 2, shift = 0. - bijector = Affine(scale_identity_multiplier=multiplier, event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = np.float64([1.]) # One sample from one batches. - self.assertAllClose([2.], run(bijector.forward, x)) - self.assertAllClose([0.5], run(bijector.inverse, x)) - self.assertAllClose([np.log(0.5)], - run(bijector.inverse_log_det_jacobian, x)) - - def testOneBatchScalarViaDiagMultiplier(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = [1.] - # One batch, scalar. - # Corresponds to scale = 1. - bijector = Affine(shift=mu, scale_identity_multiplier=1., event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [1.] # One sample from one batches. - self.assertAllClose([2.], run(bijector.forward, x)) - self.assertAllClose([0.], run(bijector.inverse, x)) - self.assertAllClose(0., run(bijector.inverse_log_det_jacobian, x)) - - def testTwoBatchScalarIdentityViaIdentity(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = [1., -1] - # Univariate, two batches. - # Corresponds to scale = 1. - bijector = Affine(shift=mu, event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [1., 1] # One sample from each of two batches. - self.assertAllClose([2., 0], run(bijector.forward, x)) - self.assertAllClose([0., 2], run(bijector.inverse, x)) - self.assertAllClose(0., run(bijector.inverse_log_det_jacobian, x)) - - def testTwoBatchScalarIdentityViaDiagMultiplier(self): - with self.test_session() as sess: - - def static_run(fun, x): - return fun(x).eval() - - def dynamic_run(fun, x_value): - x_value = np.array(x_value) - x = array_ops.placeholder(dtypes.float32, name="x") - return sess.run(fun(x), feed_dict={x: x_value}) - - for run in (static_run, dynamic_run): - mu = [1., -1] - # Univariate, two batches. - # Corresponds to scale = 1. - bijector = Affine(shift=mu, scale_identity_multiplier=1., event_ndims=0) - self.assertEqual(0, bijector.event_ndims.eval()) # "is scalar" - x = [1., 1] # One sample from each of two batches. - self.assertAllClose([2., 0], run(bijector.forward, x)) - self.assertAllClose([0., 2], run(bijector.inverse, x)) - self.assertAllClose(0., run(bijector.inverse_log_det_jacobian, x)) - def testNoBatchMultivariateIdentity(self): with self.test_session() as sess: @@ -238,7 +54,6 @@ class AffineBijectorTest(test.TestCase): # Multivariate # Corresponds to scale = [[1., 0], [0, 1.]] bijector = Affine(shift=mu) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 1] # matmul(sigma, x) + shift # = [-1, -1] + [1, -1] @@ -269,7 +84,6 @@ class AffineBijectorTest(test.TestCase): # Multivariate # Corresponds to scale = [[2., 0], [0, 1.]] bijector = Affine(shift=mu, scale_diag=[2., 1]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 1] # matmul(sigma, x) + shift # = [-1, -1] + [1, -1] @@ -297,22 +111,17 @@ class AffineBijectorTest(test.TestCase): x = array_ops.placeholder(dtypes.float32, name="x") mu = array_ops.placeholder(dtypes.float32, name="mu") scale_diag = array_ops.placeholder(dtypes.float32, name="scale_diag") - event_ndims = array_ops.placeholder(dtypes.int32, name="event_ndims") x_value = np.array([[1., 1]], dtype=np.float32) mu_value = np.array([1., -1], dtype=np.float32) scale_diag_value = np.array([2., 2], dtype=np.float32) - event_ndims_value = np.array(1, dtype=np.int32) feed_dict = { x: x_value, mu: mu_value, scale_diag: scale_diag_value, - event_ndims: event_ndims_value } - bijector = Affine( - shift=mu, scale_diag=scale_diag, event_ndims=event_ndims) - self.assertEqual(1, sess.run(bijector.event_ndims, feed_dict)) + bijector = Affine(shift=mu, scale_diag=scale_diag) self.assertAllClose([[3., 1]], sess.run(bijector.forward(x), feed_dict)) self.assertAllClose([[0., 1]], sess.run(bijector.inverse(x), feed_dict)) self.assertAllClose( @@ -335,7 +144,6 @@ class AffineBijectorTest(test.TestCase): # Corresponds to 1 2x2 matrix, with twos on the diagonal. scale = 2. bijector = Affine(shift=mu, scale_identity_multiplier=scale) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [[[1., 1]]] self.assertAllClose([[[3., 1]]], run(bijector.forward, x)) self.assertAllClose([[[0., 1]]], run(bijector.inverse, x)) @@ -358,7 +166,6 @@ class AffineBijectorTest(test.TestCase): # Corresponds to 1 2x2 matrix, with twos on the diagonal. scale_diag = [[2., 2]] bijector = Affine(shift=mu, scale_diag=scale_diag) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [[[1., 1]]] self.assertAllClose([[[3., 1]]], run(bijector.forward, x)) self.assertAllClose([[[0., 1]]], run(bijector.inverse, x)) @@ -370,23 +177,18 @@ class AffineBijectorTest(test.TestCase): x = array_ops.placeholder(dtypes.float32, name="x") mu = array_ops.placeholder(dtypes.float32, name="mu") scale_diag = array_ops.placeholder(dtypes.float32, name="scale_diag") - event_ndims = array_ops.placeholder(dtypes.int32, name="event_ndims") x_value = np.array([[[1., 1]]], dtype=np.float32) mu_value = np.array([[1., -1]], dtype=np.float32) scale_diag_value = np.array([[2., 2]], dtype=np.float32) - event_ndims_value = 1 feed_dict = { x: x_value, mu: mu_value, scale_diag: scale_diag_value, - event_ndims: event_ndims_value } - bijector = Affine( - shift=mu, scale_diag=scale_diag, event_ndims=event_ndims) - self.assertEqual(1, sess.run(bijector.event_ndims, feed_dict)) + bijector = Affine(shift=mu, scale_diag=scale_diag) self.assertAllClose([[[3., 1]]], sess.run(bijector.forward(x), feed_dict)) self.assertAllClose([[[0., 1]]], sess.run(bijector.inverse(x), feed_dict)) self.assertAllClose([-np.log(4)], @@ -410,9 +212,7 @@ class AffineBijectorTest(test.TestCase): bijector = Affine( shift=mu, scale_identity_multiplier=1., - scale_diag=[1., 1., 1.], - event_ndims=1) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" + scale_diag=[1., 1., 1.]) x = [1., 2, 3] # Three scalar samples (no batches). self.assertAllClose([1., 3, 5], run(bijector.forward, x)) self.assertAllClose([1., 1.5, 2.], run(bijector.inverse, x)) @@ -437,7 +237,6 @@ class AffineBijectorTest(test.TestCase): shift=mu, scale_identity_multiplier=1., scale_tril=[[1., 0], [2., 1]]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [[1., 2]] # One multivariate sample. self.assertAllClose([[1., 5]], run(bijector.forward, x)) self.assertAllClose([[1., 0.5]], run(bijector.inverse, x)) @@ -460,7 +259,6 @@ class AffineBijectorTest(test.TestCase): # scale = [[2., 0], [2, 3]] bijector = Affine( shift=mu, scale_diag=[1., 2.], scale_tril=[[1., 0], [2., 1]]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [[1., 2]] # One multivariate sample. self.assertAllClose([[1., 7]], run(bijector.forward, x)) self.assertAllClose([[1., 1 / 3.]], run(bijector.inverse, x)) @@ -486,7 +284,6 @@ class AffineBijectorTest(test.TestCase): scale_identity_multiplier=1.0, scale_diag=[1., 2.], scale_tril=[[1., 0], [2., 1]]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [[1., 2]] # One multivariate sample. self.assertAllClose([[2., 9]], run(bijector.forward, x)) self.assertAllClose([[2 / 3., 5 / 12.]], run(bijector.inverse, x)) @@ -514,7 +311,6 @@ class AffineBijectorTest(test.TestCase): scale_perturb_factor=[[2., 0], [0., 0], [0, 1]]) bijector_ref = Affine(shift=mu, scale_diag=[10., 2, 3]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 2, 3] # Vector. self.assertAllClose([9., 3, 8], run(bijector.forward, x)) self.assertAllClose( @@ -550,7 +346,6 @@ class AffineBijectorTest(test.TestCase): scale_perturb_factor=[[2., 0], [0., 0], [0, 1]]) bijector_ref = Affine(shift=mu, scale_diag=[10., 3, 5]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 2, 3] # Vector. self.assertAllClose([9., 5, 14], run(bijector.forward, x)) self.assertAllClose( @@ -586,7 +381,6 @@ class AffineBijectorTest(test.TestCase): bijector_ref = Affine( shift=mu, scale_tril=[[10., 0, 0], [1, 3, 0], [2, 3, 5]]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 2, 3] # Vector. self.assertAllClose([9., 6, 22], run(bijector.forward, x)) self.assertAllClose( @@ -622,7 +416,6 @@ class AffineBijectorTest(test.TestCase): bijector_ref = Affine( shift=mu, scale_tril=[[6., 0, 0], [1, 3, 0], [2, 3, 5]]) - self.assertEqual(1, bijector.event_ndims.eval()) # "is vector" x = [1., 2, 3] # Vector. self.assertAllClose([5., 6, 22], run(bijector.forward, x)) self.assertAllClose( @@ -647,38 +440,6 @@ class AffineBijectorTest(test.TestCase): with self.assertRaisesOpError("diagonal part must be non-zero"): bijector.forward([1., 1.]).eval() - def testEventNdimsLargerThanOneRaises(self): - with self.test_session(): - mu = [1., -1] - with self.assertRaisesRegexp( - ValueError, (r"event_ndims\(2\) was not 0 or 1")): - # Scale corresponds to 2x2 identity matrix. - bijector = Affine(shift=mu, event_ndims=2, validate_args=True) - bijector.forward([1., 1.]).eval() - - def testScaleZeroScalarRaises(self): - with self.test_session(): - mu = -1. - # Check Identity matrix with zero scaling. - bijector = Affine( - shift=mu, - scale_identity_multiplier=0., - event_ndims=0, - validate_args=True) - with self.assertRaisesOpError("identity_multiplier should be non-zero"): - bijector.forward(1.).eval() - - def testScaleDiagAndEventNdimsZeroRaises(self): - # Check Diag matrix with zero scaling. - with self.assertRaisesRegexp(ValueError, "only scale argument"): - Affine(shift=None, scale_diag=[0.0], event_ndims=0, validate_args=True) - - def testScalarCongruency(self): - with self.test_session(): - bijector = Affine( - shift=3.6, scale_identity_multiplier=0.42, event_ndims=0) - assert_scalar_congruency(bijector, lower_x=-2., upper_x=2.) - def _makeScale(self, x, scale_identity_multiplier=None, @@ -747,14 +508,12 @@ class AffineBijectorTest(test.TestCase): scale_args = dict({"x": x}, **args) scale = self._makeScale(**scale_args) - bijector_args = dict({"event_ndims": 1}, **args) - # We haven't specified enough information for the scale. if scale is None: with self.assertRaisesRegexp(ValueError, ("must be specified.")): - bijector = Affine(shift=shift, **bijector_args) + bijector = Affine(shift=shift, **args) else: - bijector = Affine(shift=shift, **bijector_args) + bijector = Affine(shift=shift, **args) np_x = x # For the case a vector is passed in, we need to make the shape # match the matrix for matmul to work. @@ -829,15 +588,5 @@ class AffineBijectorTest(test.TestCase): x=np.array( [1., 2], dtype=np.float32)) - def testScalarEventIdentityScale(self): - with self.test_session() as sess: - doubler = Affine( - scale_identity_multiplier=2., - event_ndims=0) - doubler2 = doubler.inverse_log_det_jacobian(2.) - doubler2_ildj_ = sess.run([doubler2]) - self.assertAllClose([-np.log(2.)], doubler2_ildj_) - - if __name__ == "__main__": test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py index ab2338f4cb..f392e83d2c 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/cholesky_outer_product_test.py @@ -23,7 +23,6 @@ import numpy as np from tensorflow.contrib.distributions.python.ops import bijectors from tensorflow.python.framework import dtypes from tensorflow.python.ops import array_ops -from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency from tensorflow.python.platform import test @@ -32,8 +31,7 @@ class CholeskyOuterProductBijectorTest(test.TestCase): def testBijectorMatrix(self): with self.test_session(): - bijector = bijectors.CholeskyOuterProduct( - event_ndims=2, validate_args=True) + bijector = bijectors.CholeskyOuterProduct(validate_args=True) self.assertEqual("cholesky_outer_product", bijector.name) x = [[[1., 0], [2, 1]], [[np.sqrt(2.), 0], [np.sqrt(8.), 1]]] y = np.matmul(x, np.transpose(x, axes=(0, 2, 1))) @@ -60,39 +58,12 @@ class CholeskyOuterProductBijectorTest(test.TestCase): atol=0., rtol=1e-7) - def testBijectorScalar(self): - with self.test_session(): - bijector = bijectors.CholeskyOuterProduct( - event_ndims=0, validate_args=True) - self.assertEqual("cholesky_outer_product", bijector.name) - x = [[[1., 5], - [2, 1]], - [[np.sqrt(2.), 3], - [np.sqrt(8.), 1]]] - y = np.square(x) - ildj = -np.log(2.) - np.log(x) - self.assertAllClose(y, bijector.forward(x).eval()) - self.assertAllClose(x, bijector.inverse(y).eval()) - self.assertAllClose( - ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) - self.assertAllClose( - -bijector.inverse_log_det_jacobian(y).eval(), - bijector.forward_log_det_jacobian(x).eval(), - atol=0., - rtol=1e-7) - - def testScalarCongruency(self): - with self.test_session(): - bijector = bijectors.CholeskyOuterProduct( - event_ndims=0, validate_args=True) - assert_scalar_congruency(bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) - def testNoBatchStatic(self): x = np.array([[1., 0], [2, 1]]) # np.linalg.cholesky(y) y = np.array([[1., 2], [2, 5]]) # np.matmul(x, x.T) with self.test_session() as sess: - y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x) - x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y) + y_actual = bijectors.CholeskyOuterProduct().forward(x=x) + x_actual = bijectors.CholeskyOuterProduct().inverse(y=y) [y_actual_, x_actual_] = sess.run([y_actual, x_actual]) self.assertAllEqual([2, 2], y_actual.get_shape()) self.assertAllEqual([2, 2], x_actual.get_shape()) @@ -105,8 +76,8 @@ class CholeskyOuterProductBijectorTest(test.TestCase): with self.test_session() as sess: x_pl = array_ops.placeholder(dtypes.float32) y_pl = array_ops.placeholder(dtypes.float32) - y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x_pl) - x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y_pl) + y_actual = bijectors.CholeskyOuterProduct().forward(x=x_pl) + x_actual = bijectors.CholeskyOuterProduct().inverse(y=y_pl) [y_actual_, x_actual_] = sess.run([y_actual, x_actual], feed_dict={x_pl: x, y_pl: y}) self.assertEqual(None, y_actual.get_shape()) @@ -124,8 +95,8 @@ class CholeskyOuterProductBijectorTest(test.TestCase): [[9., 3], [3, 5]]]) # np.matmul(x, x.T) with self.test_session() as sess: - y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x) - x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y) + y_actual = bijectors.CholeskyOuterProduct().forward(x=x) + x_actual = bijectors.CholeskyOuterProduct().inverse(y=y) [y_actual_, x_actual_] = sess.run([y_actual, x_actual]) self.assertEqual([2, 2, 2], y_actual.get_shape()) self.assertEqual([2, 2, 2], x_actual.get_shape()) @@ -144,8 +115,8 @@ class CholeskyOuterProductBijectorTest(test.TestCase): with self.test_session() as sess: x_pl = array_ops.placeholder(dtypes.float32) y_pl = array_ops.placeholder(dtypes.float32) - y_actual = bijectors.CholeskyOuterProduct(event_ndims=2).forward(x=x_pl) - x_actual = bijectors.CholeskyOuterProduct(event_ndims=2).inverse(y=y_pl) + y_actual = bijectors.CholeskyOuterProduct().forward(x=x_pl) + x_actual = bijectors.CholeskyOuterProduct().inverse(y=y_pl) [y_actual_, x_actual_] = sess.run([y_actual, x_actual], feed_dict={x_pl: x, y_pl: y}) self.assertEqual(None, y_actual.get_shape()) diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py index 0ff3530428..28e3e31354 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/invert_test.py @@ -35,8 +35,7 @@ class InvertBijectorTest(test.TestCase): for fwd in [ bijectors.Identity(), bijectors.Exp(event_ndims=1), - bijectors.Affine( - shift=[0., 1.], scale_diag=[2., 3.], event_ndims=1), + bijectors.Affine(shift=[0., 1.], scale_diag=[2., 3.]), bijectors.Softplus(event_ndims=1), bijectors.SoftmaxCentered(event_ndims=1), bijectors.SigmoidCentered(), diff --git a/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py new file mode 100644 index 0000000000..f03d6f1343 --- /dev/null +++ b/tensorflow/contrib/distributions/python/kernel_tests/bijectors/square_test.py @@ -0,0 +1,58 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for Bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.distributions.python.ops import bijectors +from tensorflow.python.ops.distributions.bijector_test_util import assert_scalar_congruency +from tensorflow.python.platform import test + + +class SquareBijectorTest(test.TestCase): + """Tests the correctness of the Y = X ** 2 transformation.""" + + def testBijectorScalar(self): + with self.test_session(): + bijector = bijectors.Square(validate_args=True) + self.assertEqual("square", bijector.name) + x = [[[1., 5], + [2, 1]], + [[np.sqrt(2.), 3], + [np.sqrt(8.), 1]]] + y = np.square(x) + ildj = -np.log(2.) - np.log(x) + self.assertAllClose(y, bijector.forward(x).eval()) + self.assertAllClose(x, bijector.inverse(y).eval()) + self.assertAllClose( + ildj, bijector.inverse_log_det_jacobian(y).eval(), atol=0., rtol=1e-7) + self.assertAllClose( + -bijector.inverse_log_det_jacobian(y).eval(), + bijector.forward_log_det_jacobian(x).eval(), + atol=0., + rtol=1e-7) + + def testScalarCongruency(self): + with self.test_session(): + bijector = bijectors.Square(validate_args=True) + assert_scalar_congruency(bijector, lower_x=1e-3, upper_x=1.5, rtol=0.05) + + +if __name__ == "__main__": + test.main() diff --git a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py index cbaf74d3f6..af13553c32 100644 --- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py +++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py @@ -245,9 +245,8 @@ class TransformedDistributionTest(test.TestCase): with self.test_session() as sess: exp2 = self._cls()( ds.Exponential(rate=0.25), - bijector=ds.bijectors.Affine( - scale_identity_multiplier=2., - event_ndims=0)) + bijector=ds.bijectors.AffineScalar(scale=2.) + ) log_prob = exp2.log_prob(1.) log_prob_ = sess.run(log_prob) base_log_prob = -0.5 * 0.25 + np.log(0.25) diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py index 46ec49754a..452f1caa30 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/__init__.py @@ -17,6 +17,7 @@ @@AbsoluteValue @@Affine @@AffineLinearOperator +@@AffineScalar @@Bijector @@BatchNormalization @@Chain @@ -38,6 +39,7 @@ @@SinhArcsinh @@SoftmaxCentered @@Softplus +@@Square @@Weibull @@masked_autoregressive_default_template @@ -54,6 +56,7 @@ from __future__ import print_function from tensorflow.contrib.distributions.python.ops.bijectors.absolute_value import * from tensorflow.contrib.distributions.python.ops.bijectors.affine import * from tensorflow.contrib.distributions.python.ops.bijectors.affine_linear_operator import * +from tensorflow.contrib.distributions.python.ops.bijectors.affine_scalar import * from tensorflow.contrib.distributions.python.ops.bijectors.batch_normalization import * from tensorflow.contrib.distributions.python.ops.bijectors.chain import * from tensorflow.contrib.distributions.python.ops.bijectors.cholesky_outer_product import * @@ -73,6 +76,7 @@ from tensorflow.contrib.distributions.python.ops.bijectors.sigmoid_centered impo from tensorflow.contrib.distributions.python.ops.bijectors.sinh_arcsinh import * from tensorflow.contrib.distributions.python.ops.bijectors.softmax_centered import * from tensorflow.contrib.distributions.python.ops.bijectors.softplus import * +from tensorflow.contrib.distributions.python.ops.bijectors.square import * from tensorflow.python.ops.distributions.bijector import * from tensorflow.python.ops.distributions.identity_bijector import Identity diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py index 05bb9c2f9b..7fe73ada44 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/affine.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine.py @@ -104,7 +104,6 @@ class Affine(bijector.Bijector): scale_tril=None, scale_perturb_factor=None, scale_perturb_diag=None, - event_ndims=1, validate_args=False, name="affine"): """Instantiates the `Affine` bijector. @@ -157,8 +156,6 @@ class Affine(bijector.Bijector): matrix. `scale_perturb_diag` has shape [N1, N2, ... r], which represents an `r x r` diagonal matrix. When `None` low rank updates will take the form `scale_perturb_factor * scale_perturb_factor.T`. - event_ndims: Scalar `int` `Tensor` indicating the number of dimensions - associated with a particular draw from the distribution. Must be 0 or 1. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. @@ -187,23 +184,6 @@ class Affine(bijector.Bijector): with self._name_scope("init", values=[ shift, scale_identity_multiplier, scale_diag, scale_tril, scale_perturb_diag, scale_perturb_factor]): - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - event_ndims_const = tensor_util.constant_value(event_ndims) - if event_ndims_const is not None and event_ndims_const not in (0, 1): - raise ValueError("event_ndims(%s) was not 0 or 1" % event_ndims_const) - else: - if validate_args: - # Shape tool will catch if event_ndims is negative. - event_ndims = control_flow_ops.with_dependencies( - [check_ops.assert_less( - event_ndims, 2, message="event_ndims must be 0 or 1")], - event_ndims) - - if event_ndims_const == 0 and not self._is_only_identity_multiplier: - raise ValueError( - "If event_ndims == 0, the only scale argument you can pass is " - "scale_identity_multiplier. All others operate on vectors.") - # In the absence of `loc` and `scale`, we'll assume `dtype` is `float32`. dtype = dtypes.float32 @@ -251,12 +231,11 @@ class Affine(bijector.Bijector): self._scale = scale self._shaper = _DistributionShape( batch_ndims=batch_ndims, - event_ndims=event_ndims, + event_ndims=1, validate_args=validate_args) super(Affine, self).__init__( - event_ndims=event_ndims, + event_ndims=1, graph_parents=( - [event_ndims] + [self._scale] if tensor_util.is_tensor(self._scale) else self._scale.graph_parents + [self._shift] if self._shift is not None else []), @@ -388,9 +367,7 @@ class Affine(bijector.Bijector): if self._is_only_identity_multiplier: # We don't pad in this case and instead let the fldj be applied # via broadcast. - event_size = distribution_util.pick_vector( - math_ops.equal(self._shaper.event_ndims, 0), - [1], array_ops.shape(x))[-1] + event_size = array_ops.shape(x)[-1] event_size = math_ops.cast(event_size, dtype=self._scale.dtype) return math_ops.log(math_ops.abs(self._scale)) * event_size return self.scale.log_abs_determinant() diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py b/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py new file mode 100644 index 0000000000..8adaa54c84 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/affine_scalar.py @@ -0,0 +1,138 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Affine bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import bijector + + +__all__ = [ + "AffineScalar", +] + + +class AffineScalar(bijector.Bijector): + """Compute `Y = g(X; shift, scale) = scale * X + shift`. + + Examples: + + ```python + # Y = X + b = AffineScalar() + + # Y = X + shift + b = AffineScalar(shift=[1., 2, 3]) + + # Y = 2 * X + shift + b = AffineScalar( + shift=[1., 2, 3], + scale=2.) + ``` + + """ + + def __init__(self, + shift=None, + scale=None, + validate_args=False, + name="affine_scalar"): + """Instantiates the `AffineScalar` bijector. + + This `Bijector` is initialized with `shift` `Tensor` and `scale` arguments, + giving the forward operation: + + ```none + Y = g(X) = scale * X + shift + ``` + + if `scale` is not specified, then the bijector has the semantics of + `scale = 1.`. Similarly, if `shift` is not specified, then the bijector + has the semantics of `shift = 0.`. + + Args: + shift: Floating-point `Tensor`. If this is set to `None`, no shift is + applied. + scale: Floating-point `Tensor`. If this is set to `None`, no scale is + applied. + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + """ + self._graph_parents = [] + self._name = name + self._validate_args = validate_args + + with self._name_scope("init", values=[scale, shift]): + self._shift = shift + self._scale = scale + + if self._shift is not None: + self._shift = ops.convert_to_tensor(shift, name="shift") + + if self._scale is not None: + self._scale = ops.convert_to_tensor(self._scale, name="scale") + if validate_args: + self._scale = control_flow_ops.with_dependencies( + [check_ops.assert_none_equal( + self._scale, + array_ops.zeros([], dtype=self._scale.dtype))], + self._scale) + + super(AffineScalar, self).__init__( + event_ndims=0, + is_constant_jacobian=True, + validate_args=validate_args, + name=name) + + @property + def shift(self): + """The `shift` `Tensor` in `Y = scale @ X + shift`.""" + return self._shift + + @property + def scale(self): + """The `scale` `LinearOperator` in `Y = scale @ X + shift`.""" + return self._scale + + def _forward(self, x): + y = array_ops.identity(x) + if self.scale is not None: + y *= self.scale + if self.shift is not None: + y += self.shift + return y + + def _inverse(self, y): + x = array_ops.identity(y) + if self.shift is not None: + x -= self.shift + if self.scale is not None: + x /= self.scale + return x + + def _forward_log_det_jacobian(self, x): + log_det_jacobian = array_ops.zeros_like(x) + if self.scale is None: + return log_det_jacobian + log_det_jacobian += math_ops.log(math_ops.abs(self.scale)) + return log_det_jacobian diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py index cbd60f92a6..43208ff088 100644 --- a/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py +++ b/tensorflow/contrib/distributions/python/ops/bijectors/cholesky_outer_product.py @@ -20,8 +20,6 @@ from __future__ import print_function import numpy as np -from tensorflow.python.framework import ops -from tensorflow.python.framework import tensor_util from tensorflow.python.ops import array_ops from tensorflow.python.ops import check_ops from tensorflow.python.ops import control_flow_ops @@ -39,8 +37,6 @@ __all__ = [ class CholeskyOuterProduct(bijector.Bijector): """Compute `g(X) = X @ X.T`; X is lower-triangular, positive-diagonal matrix. - `event_ndims` must be 0 or 2, i.e., scalar or matrix. - Note: the upper-triangular part of X is ignored (whether or not its zero). The surjectivity of g as a map from the set of n x n positive-diagonal @@ -64,46 +60,31 @@ class CholeskyOuterProduct(bijector.Bijector): Examples: ```python - bijector.CholeskyOuterProduct(event_ndims=2).forward(x=[[1., 0], [2, 1]]) + bijector.CholeskyOuterProduct().forward(x=[[1., 0], [2, 1]]) # Result: [[1., 2], [2, 5]], i.e., x @ x.T - bijector.CholeskyOuterProduct(event_ndims=2).inverse(y=[[1., 2], [2, 5]]) + bijector.CholeskyOuterProduct().inverse(y=[[1., 2], [2, 5]]) # Result: [[1., 0], [2, 1]], i.e., cholesky(y). ``` """ - def __init__(self, event_ndims=2, validate_args=False, - name="cholesky_outer_product"): + def __init__(self, validate_args=False, name="cholesky_outer_product"): """Instantiates the `CholeskyOuterProduct` bijector. Args: - event_ndims: `constant` `int32` scalar `Tensor` indicating the number of - dimensions associated with a particular draw from the distribution. Must - be 0 or 2. validate_args: Python `bool` indicating whether arguments should be checked for correctness. name: Python `str` name given to ops managed by this object. - - Raises: - ValueError: if event_ndims is neither 0 or 2. """ self._graph_parents = [] self._name = name - with self._name_scope("init", values=[event_ndims]): - event_ndims = ops.convert_to_tensor(event_ndims, name="event_ndims") - event_ndims = tensor_util.constant_value(event_ndims) - if event_ndims is None or event_ndims not in [0, 2]: - raise ValueError("`event_ndims` must be a TF constant which is 0 or 2") - self._static_event_ndims = event_ndims super(CholeskyOuterProduct, self).__init__( - event_ndims=event_ndims, + event_ndims=2, validate_args=validate_args, name=name) def _forward(self, x): - if self._static_event_ndims == 0: - return math_ops.square(x) if self.validate_args: is_matrix = check_ops.assert_rank_at_least(x, 2) shape = array_ops.shape(x) @@ -114,11 +95,7 @@ class CholeskyOuterProduct(bijector.Bijector): return math_ops.matmul(x, x, adjoint_b=True) def _inverse(self, y): - return (math_ops.sqrt(y) if self._static_event_ndims == 0 - else linalg_ops.cholesky(y)) - - def _inverse_log_det_jacobian(self, y): - return -self._forward_log_det_jacobian(x=self._inverse(y)) + return linalg_ops.cholesky(y) def _forward_log_det_jacobian(self, x): # Let Y be a symmetric, positive definite matrix and write: @@ -161,13 +138,6 @@ class CholeskyOuterProduct(bijector.Bijector): # Since there is a 2 X[j,j] term for every lower-triangular element of X we # conclude: # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. - if self._static_event_ndims == 0: - if self.validate_args: - is_positive = check_ops.assert_positive( - x, message="All elements must be positive.") - x = control_flow_ops.with_dependencies([is_positive], x) - return np.log(2.) + math_ops.log(x) - diag = array_ops.matrix_diag_part(x) # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output diff --git a/tensorflow/contrib/distributions/python/ops/bijectors/square.py b/tensorflow/contrib/distributions/python/ops/bijectors/square.py new file mode 100644 index 0000000000..2831a92df8 --- /dev/null +++ b/tensorflow/contrib/distributions/python/ops/bijectors/square.py @@ -0,0 +1,84 @@ +# Copyright 2016 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Square bijector.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import control_flow_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops.distributions import bijector + + +__all__ = [ + "Square", +] + + +class Square(bijector.Bijector): + """Compute `g(X) = X^2`; X is a positive real number. + + g is a bijection between the non-negative real numbers (R_+) and the + non-negative real numbers. + + Examples: + + ```python + bijector.Square().forward(x=[[1., 0], [2, 1]]) + # Result: [[1., 0], [4, 1]], i.e., x^2 + + bijector.Square().inverse(y=[[1., 4], [9, 1]]) + # Result: [[1., 2], [3, 1]], i.e., sqrt(y). + ``` + + """ + + def __init__(self, validate_args=False, name="square"): + """Instantiates the `Square` bijector. + + Args: + validate_args: Python `bool` indicating whether arguments should be + checked for correctness. + name: Python `str` name given to ops managed by this object. + """ + self._name = name + super(Square, self).__init__( + event_ndims=0, + validate_args=validate_args, + name=name) + + def _forward(self, x): + x = self._maybe_assert_valid(x) + return math_ops.square(x) + + def _inverse(self, y): + y = self._maybe_assert_valid(y) + return math_ops.sqrt(y) + + def _forward_log_det_jacobian(self, x): + x = self._maybe_assert_valid(x) + return np.log(2.) + math_ops.log(x) + + def _maybe_assert_valid(self, t): + if not self.validate_args: + return t + is_valid = check_ops.assert_non_negative( + t, message="All elements must be non-negative.") + return control_flow_ops.with_dependencies([is_valid], t) + diff --git a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py index c4b8f055b7..0d8a192691 100644 --- a/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py +++ b/tensorflow/contrib/distributions/python/ops/sinh_arcsinh.py @@ -174,13 +174,12 @@ class SinhArcsinh(transformed_distribution.TransformedDistribution): skewness=skewness.dtype.as_numpy_dtype(0.), tailweight=tailweight, event_ndims=0) - # Make the Affine bijector, Z --> loc + scale * Z (2 / F_0(2)) + # Make the AffineScalar bijector, Z --> loc + scale * Z (2 / F_0(2)) c = 2 * scale / f_noskew.forward(ops.convert_to_tensor(2, dtype=dtype)) - affine = bijectors.Affine( + affine = bijectors.AffineScalar( shift=loc, - scale_identity_multiplier=c, - validate_args=validate_args, - event_ndims=0) + scale=c, + validate_args=validate_args) bijector = bijectors.Chain([affine, f]) diff --git a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py index e1ccf11645..003c66b941 100644 --- a/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py +++ b/tensorflow/contrib/distributions/python/ops/vector_sinh_arcsinh_diag.py @@ -227,7 +227,7 @@ class VectorSinhArcsinhDiag(transformed_distribution.TransformedDistribution): c = 2 * scale_diag_part / f_noskew.forward( ops.convert_to_tensor(2, dtype=dtype)) affine = bijectors.Affine( - shift=loc, scale_diag=c, validate_args=validate_args, event_ndims=1) + shift=loc, scale_diag=c, validate_args=validate_args) bijector = bijectors.Chain([affine, f]) -- GitLab From 097734e280eaee9a78f1cc0f1e0a95265a0cbe99 Mon Sep 17 00:00:00 2001 From: Brett Koonce Date: Mon, 12 Mar 2018 17:02:33 -0700 Subject: [PATCH 764/884] contrib/quantize: minor spelling --- tensorflow/contrib/quantize/python/fold_batch_norms.py | 4 ++-- tensorflow/contrib/quantize/python/quant_ops.py | 4 ++-- tensorflow/contrib/quantize/python/quantize.py | 2 +- tensorflow/contrib/quantize/python/quantize_graph.py | 2 +- .../quantize/python/quantize_parameterized_test.py | 8 ++++---- tensorflow/contrib/quantize/python/quantize_test.py | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tensorflow/contrib/quantize/python/fold_batch_norms.py b/tensorflow/contrib/quantize/python/fold_batch_norms.py index b278265639..1afcbb8504 100644 --- a/tensorflow/contrib/quantize/python/fold_batch_norms.py +++ b/tensorflow/contrib/quantize/python/fold_batch_norms.py @@ -237,7 +237,7 @@ def _FindFusedBatchNorms(graph): # The batch variance used during forward and backward prop is biased, # i.e it is calculated as: V=sum(x(k)-mu)^2/N. For the moving average # calculation, the variance is corrected by the term N/N-1 (Bessel's - # correction). The variance tensor read from FuseBatchNorm has bessel's + # correction). The variance tensor read from FuseBatchNorm has Bessel's # correction applied, so we undo it here. scope, sep, _ = bn_op.name.rpartition('/') g = ops.get_default_graph() @@ -306,7 +306,7 @@ def _ComputeBatchNormCorrections(context, match, freeze_batch_norm_delay, Args: context: The scope under which we look for batch norm params - match: Object containg required batch norm tensors for correction + match: Object containing required batch norm tensors for correction computation. freeze_batch_norm_delay: Delay in steps at which computation switches from regular batch norm to frozen mean and variance. diff --git a/tensorflow/contrib/quantize/python/quant_ops.py b/tensorflow/contrib/quantize/python/quant_ops.py index 0a8e35080c..a4f7b1b221 100644 --- a/tensorflow/contrib/quantize/python/quant_ops.py +++ b/tensorflow/contrib/quantize/python/quant_ops.py @@ -282,8 +282,8 @@ def _FakeQuantWithMinMaxVars(inputs, min_var, max_var, per_channel, num_bits, Args: inputs: a tensor containing values to be quantized. min_var: a variable containing quantization range lower end(s). - max_var: a variable containing quantization range lupper end(s). - per_channel: a boolean specifying whether to use per-channel quantizatioh. + max_var: a variable containing quantization range upper end(s). + per_channel: a boolean specifying whether to use per-channel quantization. num_bits: Number of bits to use for quantization, must be between 2 and 8. narrow_range: Whether to use the narrow quantization range [1; 2^num_bits - 1] or wide range [0; 2^num_bits - 1]. diff --git a/tensorflow/contrib/quantize/python/quantize.py b/tensorflow/contrib/quantize/python/quantize.py index 0608ab9302..ec721afbc8 100644 --- a/tensorflow/contrib/quantize/python/quantize.py +++ b/tensorflow/contrib/quantize/python/quantize.py @@ -267,7 +267,7 @@ def _InsertQuantOp(context, """Inserts a quant op between a producer op and (multiple) consumer ops. Args: - context: Context w,here producer and consumer operations are nested. + context: Context where producer and consumer operations are nested. name: Name for the new quantization op within the context. producer: Producer operation of the pairs where quantization will be inserted. diff --git a/tensorflow/contrib/quantize/python/quantize_graph.py b/tensorflow/contrib/quantize/python/quantize_graph.py index 5a3a74cec4..5abdcd2475 100644 --- a/tensorflow/contrib/quantize/python/quantize_graph.py +++ b/tensorflow/contrib/quantize/python/quantize_graph.py @@ -158,7 +158,7 @@ def experimental_create_training_graph(input_graph=None, often fail. Args: - input_graph: The tf.Graph to be transformed,if None then defaults to the + input_graph: The tf.Graph to be transformed, if None then defaults to the default graph. weight_bits: Number of bits to use for quantizing weights. activation_bits: Number of bits to use for quantizing activations. diff --git a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py index 0624cc878b..db745aa562 100644 --- a/tensorflow/contrib/quantize/python/quantize_parameterized_test.py +++ b/tensorflow/contrib/quantize/python/quantize_parameterized_test.py @@ -419,7 +419,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -470,7 +470,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -526,7 +526,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): normalizer_params=self._BatchNormParams(fused_batch_norm), scope=scope) - # Manually add a bypass (optionaly) and an activation. + # Manually add a bypass (optional) and an activation. if with_bypass: node = math_ops.add(inputs, node, name='test/Add') @@ -565,7 +565,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) diff --git a/tensorflow/contrib/quantize/python/quantize_test.py b/tensorflow/contrib/quantize/python/quantize_test.py index ef59475167..b2e5707a6d 100644 --- a/tensorflow/contrib/quantize/python/quantize_test.py +++ b/tensorflow/contrib/quantize/python/quantize_test.py @@ -144,7 +144,7 @@ class QuantizeTest(test_util.TensorFlowTestCase): stddev: Standard deviation of normal variable. Returns: - An initialized that initialzes with a truncated normal variable. + An initialized that initializes with a truncated normal variable. """ return init_ops.truncated_normal_initializer(stddev=stddev) -- GitLab From 3b47d5f1e2048ad3721a946c054d7025d9f37a87 Mon Sep 17 00:00:00 2001 From: Skye Wanderman-Milne Date: Mon, 12 Mar 2018 17:12:32 -0700 Subject: [PATCH 765/884] Don't evaluate control flow in EvaluateConstantTensor. ExtractConstantSubgraph doesn't copy control edges, which are sometimes necessary to correctly evaluate conds (at the very least). Avoid evaluating conds at all to address this. PiperOrigin-RevId: 188803649 --- .../core/common_runtime/eval_const_tensor.cc | 19 +++++++++++-------- .../python/framework/smart_cond_test.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/tensorflow/core/common_runtime/eval_const_tensor.cc b/tensorflow/core/common_runtime/eval_const_tensor.cc index 6370bb5028..c1542f1f57 100644 --- a/tensorflow/core/common_runtime/eval_const_tensor.cc +++ b/tensorflow/core/common_runtime/eval_const_tensor.cc @@ -128,12 +128,16 @@ Status ExtractConstantSubgraph( return Status::OK(); } + if (IsMerge(&target_node)) { + return Status::OK(); + } + if (target_node.type_string() == "PlaceholderWithDefault") { return Status::OK(); } - // TODO(skyewm): more of the filtering applied in input nodes below should be - // applied to target_node here + // TODO(skyewm): should more of the filtering applied in input nodes below be + // applied to target_node here? // Identify the possibly constant subgraph by recursively iterating backwards // through the inputs to 'target_node' until we either 1) find an already @@ -153,11 +157,8 @@ Status ExtractConstantSubgraph( // Add the target node's inputs to seed the recursion. std::deque edges_to_visit; for (const Edge* e : target_node.in_edges()) { - // TODO(vrv): What do we do about control edges? Based on our - // definition of a constant graph, we should be free to ignore - // control edges since the order in which a constant graph is - // executed should be the same regardless of when nodes run: we - // should only need to recurse down data edges. + // TODO(skyewm): control edges will be meaningful if/when we handle control + // flow (e.g. constants in cond branches are triggered via control edges). if (e->IsControlEdge()) continue; edges_to_visit.push_back(e); } @@ -177,7 +178,9 @@ Status ExtractConstantSubgraph( } // During construction or import from GraphConstructor, back edges may not - // be filled in. Don't constant fold through merges at all for now. + // be filled in. In addition, control flow constructs may depend on control + // edges which aren't handled by this method. Don't constant fold through + // merges at all for now. if (IsMerge(current_node)) { *is_constant_graph = false; return Status::OK(); diff --git a/tensorflow/python/framework/smart_cond_test.py b/tensorflow/python/framework/smart_cond_test.py index 582ce81e7a..1170a41c99 100644 --- a/tensorflow/python/framework/smart_cond_test.py +++ b/tensorflow/python/framework/smart_cond_test.py @@ -24,6 +24,7 @@ from tensorflow.python.framework import ops from tensorflow.python.framework import smart_cond from tensorflow.python.framework import test_util from tensorflow.python.ops import array_ops +from tensorflow.python.ops import control_flow_ops from tensorflow.python.ops import math_ops from tensorflow.python.platform import googletest @@ -144,5 +145,22 @@ class SmartCaseTest(test_util.TensorFlowTestCase): self.assertEqual(sess.run(z, feed_dict={x: 0}), 3) +@test_util.with_c_api +class SmartConstantValueTest(test_util.TensorFlowTestCase): + + # TODO(skyewm): this is essentially a regression test for + # TF_TryEvaluateConstant, and is not really a valid smart_constant_value test + # (smart_constant_value is only supposed to return bools). Move the + # TF_TryEvaluateConstant call to tensor_util.constant_value and make this a + # constant_value test instead. + def testCond(self): + with ops.Graph().as_default(): + pred = array_ops.placeholder_with_default(True, shape=()) + x = control_flow_ops.cond(pred, + lambda: constant_op.constant(1), + lambda: constant_op.constant(2)) + self.assertIsNone(smart_cond.smart_constant_value(x)) + + if __name__ == "__main__": googletest.main() -- GitLab From d9cbe36d1b5b661475d2a3d11384cd0a83493a67 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 17:13:09 -0700 Subject: [PATCH 766/884] [XLA:Tools] Make hlo_runner understand --xla_hlo_profile. PiperOrigin-RevId: 188803724 --- tensorflow/compiler/xla/service/hlo_module_config.cc | 2 +- tensorflow/compiler/xla/service/hlo_module_config.h | 10 ++++------ tensorflow/compiler/xla/service/hlo_runner.cc | 4 ++-- tensorflow/compiler/xla/service/service.cc | 2 -- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/tensorflow/compiler/xla/service/hlo_module_config.cc b/tensorflow/compiler/xla/service/hlo_module_config.cc index 822e2f1f53..4205b0402c 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.cc +++ b/tensorflow/compiler/xla/service/hlo_module_config.cc @@ -40,7 +40,7 @@ void HloModuleConfig::SetDefaultComputationLayout( string HloModuleConfig::compilation_cache_key() const { string key = - tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled_); + tensorflow::strings::StrCat("profiling=", hlo_profiling_enabled()); StrAppend(&key, "::("); std::vector params; for (const ShapeLayout& param_layout : diff --git a/tensorflow/compiler/xla/service/hlo_module_config.h b/tensorflow/compiler/xla/service/hlo_module_config.h index d3c1fae592..586a03d412 100644 --- a/tensorflow/compiler/xla/service/hlo_module_config.h +++ b/tensorflow/compiler/xla/service/hlo_module_config.h @@ -63,9 +63,10 @@ class HloModuleConfig { return &(*entry_computation_layout_); } - // Sets/returns whether to enable HLO-level profiling. - bool hlo_profiling_enabled() const { return hlo_profiling_enabled_; } - void enable_hlo_profiling(bool enabled) { hlo_profiling_enabled_ = enabled; } + // Returns whether to enable HLO-level profiling. + bool hlo_profiling_enabled() const { + return debug_options_.xla_hlo_profile(); + } // Sets/returns whether this is a "host module". Host modules are used to // record the data- and control-flow dependencies of host side computation @@ -110,9 +111,6 @@ class HloModuleConfig { tensorflow::gtl::optional entry_computation_layout_; - // Whether to enable HLO-level profiling. - bool hlo_profiling_enabled_ = false; - // Whether this is a 'host module'. bool is_host_module_ = false; diff --git a/tensorflow/compiler/xla/service/hlo_runner.cc b/tensorflow/compiler/xla/service/hlo_runner.cc index d65befaf84..e5b1c2efa3 100644 --- a/tensorflow/compiler/xla/service/hlo_runner.cc +++ b/tensorflow/compiler/xla/service/hlo_runner.cc @@ -158,8 +158,8 @@ StatusOr> HloRunner::Execute( TF_ASSIGN_OR_RETURN( std::unique_ptr result, - executable->ExecuteOnStream(&service_run_options, argument_buffer_ptrs, - /*hlo_execution_profile=*/nullptr)); + executable->ExecuteOnStreamWrapper( + &service_run_options, /*profile=*/nullptr, argument_buffer_ptrs)); // Create a ScopedShapedBuffer of the result to manage deallocation. This will // deallocate all the device memory when it goes out of scope. diff --git a/tensorflow/compiler/xla/service/service.cc b/tensorflow/compiler/xla/service/service.cc index 8edd457281..0becc9d8f8 100644 --- a/tensorflow/compiler/xla/service/service.cc +++ b/tensorflow/compiler/xla/service/service.cc @@ -314,8 +314,6 @@ StatusOr> Service::CreateModuleConfig( if (execution_options != nullptr) { config->set_seed(execution_options->seed()); config->set_debug_options(execution_options->debug_options()); - config->enable_hlo_profiling( - execution_options->debug_options().xla_hlo_profile()); } else { config->set_debug_options(legacy_flags::GetDebugOptionsFromFlags()); } -- GitLab From a2643a983694a91ef0027650bc0ce28f2f760067 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 17:48:57 -0700 Subject: [PATCH 767/884] Make tf.add_to_collections visible. PiperOrigin-RevId: 188807786 --- tensorflow/python/framework/framework_lib.py | 1 + tensorflow/python/framework/ops.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tensorflow/python/framework/framework_lib.py b/tensorflow/python/framework/framework_lib.py index 3172f3c2c3..4bb030cb89 100644 --- a/tensorflow/python/framework/framework_lib.py +++ b/tensorflow/python/framework/framework_lib.py @@ -48,6 +48,7 @@ ## Graph collections @@add_to_collection +@@add_to_collections @@get_collection @@get_collection_ref @@GraphKeys diff --git a/tensorflow/python/framework/ops.py b/tensorflow/python/framework/ops.py index f5dde3a358..6174d32237 100644 --- a/tensorflow/python/framework/ops.py +++ b/tensorflow/python/framework/ops.py @@ -5602,7 +5602,7 @@ def add_to_collection(name, value): """ get_default_graph().add_to_collection(name, value) - +@tf_export("add_to_collections") def add_to_collections(names, value): """Wrapper for `Graph.add_to_collections()` using the default graph. -- GitLab From f5efe97603855c517795e3fe9fc6364b59502d8a Mon Sep 17 00:00:00 2001 From: Max Galkin Date: Mon, 12 Mar 2018 18:35:15 -0700 Subject: [PATCH 768/884] Demystify MaterializeShapes a bit. PiperOrigin-RevId: 188812445 --- .../grappler/optimizers/constant_folding.cc | 230 ++++++++++-------- 1 file changed, 123 insertions(+), 107 deletions(-) diff --git a/tensorflow/core/grappler/optimizers/constant_folding.cc b/tensorflow/core/grappler/optimizers/constant_folding.cc index 4c9431deac..a4d8376667 100644 --- a/tensorflow/core/grappler/optimizers/constant_folding.cc +++ b/tensorflow/core/grappler/optimizers/constant_folding.cc @@ -244,44 +244,41 @@ string ConstantFolding::AddControlDependency(const string& input_name, } } -Status ConvertShapeToConstant(const string& op, const DataType& type, - const PartialTensorShape& shp, Tensor* value) { +// Puts the given value into the tensor at the given "flat" index. +static Status PutValueIntoTensor(const int64 value, const DataType& type, + const int index, Tensor* tensor) { + if (type == DT_INT32) { + if (value >= INT_MAX) { + return Status(error::INVALID_ARGUMENT, "int32 overflow"); + } + tensor->flat()(index) = static_cast(value); + } else { + tensor->flat()(index) = value; + } + return Status::OK(); +} + +// Writes the given tensor shape into the given tensor. +// Op is assumed to be Shape, ShapeN, Size or Rank. +static Status ConvertShapeToConstant(const string& op, const DataType& type, + const PartialTensorShape& shp, + Tensor* tensor) { if (op == "Shape" || op == "ShapeN") { - *value = Tensor(type, TensorShape({shp.dims()})); + *tensor = Tensor(type, TensorShape({shp.dims()})); for (int i = 0; i < shp.dims(); ++i) { - if (type == DT_INT32) { - if (shp.dim_size(i) >= INT_MAX) { - return Status(error::INVALID_ARGUMENT, "Invalid dimension size"); - } - value->flat()(i) = shp.dim_size(i); - } else { - value->flat()(i) = shp.dim_size(i); - } + TF_RETURN_IF_ERROR(PutValueIntoTensor(shp.dim_size(i), type, i, tensor)); } } else if (op == "Size") { int64 size = 1; for (int i = 0; i < shp.dims(); ++i) { size *= shp.dim_size(i); } - *value = Tensor(type, TensorShape({})); - if (type == DT_INT32) { - if (size >= INT_MAX) { - return Status(error::INVALID_ARGUMENT, "Invalid dimension size"); - } - value->flat()(0) = size; - } else { - value->flat()(0) = size; - } + *tensor = Tensor(type, TensorShape({})); + TF_RETURN_IF_ERROR(PutValueIntoTensor(size, type, 0, tensor)); } else { - *value = Tensor(type, TensorShape({})); - if (type == DT_INT32) { - if (shp.dims() >= INT_MAX) { - return Status(error::INVALID_ARGUMENT, "Invalid dimension size"); - } - value->flat()(0) = shp.dims(); - } else { - value->flat()(0) = shp.dims(); - } + CHECK_EQ(op, "Rank"); + *tensor = Tensor(type, TensorShape({})); + TF_RETURN_IF_ERROR(PutValueIntoTensor(shp.dims(), type, 0, tensor)); } return Status::OK(); } @@ -306,13 +303,14 @@ bool ConstantFolding::IsReallyConstant(const NodeDef& node) const { return feed_nodes_.find(node.name()) == feed_nodes_.end(); } +// Materialize the shapes using constants whenever possible. Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { - // We may add some nodes to the graph to encode control dependencies: there is - // no need to process these, so only iterate over the nodes of the input - // graph. + // We may add some nodes to the graph to encode control dependencies and hold + // the materialized shapes: there is no need to process these added nodes, so + // only iterate over the nodes of the input graph. const int node_count = graph_->node_size(); - for (int i = 0; i < node_count; ++i) { - NodeDef* node = graph_->mutable_node(i); + for (int node_idx = 0; node_idx < node_count; ++node_idx) { + NodeDef* node = graph_->mutable_node(node_idx); const string op = node->op(); if (op != "Shape" && op != "Size" && op != "Rank" && op != "ShapeN") { continue; @@ -325,91 +323,109 @@ Status ConstantFolding::MaterializeShapes(const GraphProperties& properties) { if (input.empty() || output.empty()) { continue; } + if (op == "Shape" || op == "Size" || op == "Rank") { CHECK_EQ(1, output.size()); CHECK_EQ(1, input.size()); + + const DataType type = output[0].dtype(); + CHECK(type == DT_INT32 || type == DT_INT64); + const PartialTensorShape shape(input[0].shape()); + + if ((op != "Rank" && !shape.IsFullyDefined()) || + (op == "Rank" && shape.unknown_rank())) { + continue; + } + + Tensor constant_value(type); + if (!ConvertShapeToConstant(op, type, shape, &constant_value).ok()) { + continue; + } + + // Repurpose the existing node to be the constant. + // Device placement is preserved. + node->set_op("Const"); + node->clear_attr(); + (*node->mutable_attr())["dtype"].set_type(type); + constant_value.AsProtoTensorContent( + (*node->mutable_attr())["value"].mutable_tensor()); + + // Turn the data input into a control dependency: this is needed to + // ensure that the constant value will only be run in the + // cases where the shape/rank/size would have been run in + // the original graph. + string ctrl_dep = + AddControlDependency(node->input(0), graph_, node_map_.get()); + node->set_input(0, ctrl_dep); + node_map_->AddOutput(NodeName(ctrl_dep), node->name()); + + // Done with the Shape/Size/Rank node, move to the next node. + continue; } - CHECK_EQ(input.size(), output.size()); - for (int j = 0; j < output.size(); ++j) { - const DataType type = output[j].dtype(); + // Handle ShapeN materialization case. + // It's possible that not all input tensors have known shapes. + CHECK_EQ(op, "ShapeN"); + CHECK_EQ(input.size(), output.size()); + const NodeDef* const shape_n_node = node; + for (int port_idx = 0; port_idx < output.size(); ++port_idx) { + const DataType type = output[port_idx].dtype(); CHECK(type == DT_INT32 || type == DT_INT64); - const TensorShapeProto shape = input[j].shape(); - // Materialize the shapes using constants whenever possible. - PartialTensorShape shp(shape); - if (shp.IsFullyDefined() || (!shp.unknown_rank() && op == "Rank")) { - Tensor value(type); - auto status = ConvertShapeToConstant(op, type, shp, &value); - if (!status.ok()) { - continue; - } - // We rewrite the existing node for the first const output and - // create new nodes for the remaining const outputs (Note that ShapeN - // could have multiple outputs). - if (op == "Shape" || op == "Size" || op == "Rank") { - // Replace the node with the corresponding constant. - node->set_op("Const"); - node->clear_attr(); - (*node->mutable_attr())["dtype"].set_type(type); - value.AsProtoTensorContent( - (*node->mutable_attr())["value"].mutable_tensor()); - - // Turn the data input into a control dependency: this is needed to - // ensure that the constant value will only be run in the - // cases where the shape/rank/size would have been run in - // the original graph. Additional inputs are extra control - string ctrl_dep = - AddControlDependency(node->input(0), graph_, node_map_.get()); - node->set_input(0, ctrl_dep); - node_map_->AddOutput(NodeName(ctrl_dep), node->name()); - } else { - auto outputs = node_map_->GetOutputs(node->name()); - for (NodeDef* output : outputs) { - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node->name() && port == j) { - // Create a const node as ShapeN's output if not already. - const string const_name = - OptimizedNodeName(*node, strings::StrCat("-matshapes-", j)); - if (node_map_->GetNode(const_name) == nullptr) { - NodeDef* added_node = graph_->add_node(); - added_node->set_name(const_name); - added_node->set_op("Const"); - added_node->set_device(node->device()); - node_map_->AddNode(added_node->name(), added_node); - (*added_node->mutable_attr())["dtype"].set_type(type); - value.AsProtoTensorContent( - (*added_node->mutable_attr())["value"].mutable_tensor()); - // We add a control dependency to the original ShapeN node, - // so that the node will only be run if all inputs of the - // original ShapeN node are run. - string ctrl_dep = AddControlDependency(node->name(), graph_, - node_map_.get()); - *added_node->add_input() = ctrl_dep; - node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); - } - *output->mutable_input(k) = const_name; - node_map_->AddOutput(const_name, output->name()); - } - } - bool remove_output = true; - for (int k = 0; k < output->input_size(); ++k) { - int port; - string node_name = ParseNodeName(output->input(k), &port); - if (node_name == node->name()) { - remove_output = false; - break; - } - } - if (remove_output) { - node_map_->RemoveOutput(node->name(), output->name()); + const PartialTensorShape shape(input[port_idx].shape()); + if (!shape.IsFullyDefined()) { + continue; + } + Tensor constant_value(type); + auto status = ConvertShapeToConstant(op, type, shape, &constant_value); + if (!status.ok()) { + continue; + } + + // Find all nodes consuming this shape and connect them through the new + // constant node instead. + auto outputs = node_map_->GetOutputs(shape_n_node->name()); + for (NodeDef* output : outputs) { + // Track whether there are any direct edges left between shape_n_node + // and this output node after the transformation. + bool direct_edges_exist = false; + for (int k = 0; k < output->input_size(); ++k) { + int port; + const string node_name = ParseNodeName(output->input(k), &port); + if (node_name == shape_n_node->name() && port == port_idx) { + // Create a const node as ShapeN's output if not already. + const string const_name = OptimizedNodeName( + *shape_n_node, strings::StrCat("-matshapes-", port_idx)); + if (node_map_->GetNode(const_name) == nullptr) { + NodeDef* added_node = graph_->add_node(); + added_node->set_name(const_name); + added_node->set_op("Const"); + added_node->set_device(shape_n_node->device()); + node_map_->AddNode(added_node->name(), added_node); + (*added_node->mutable_attr())["dtype"].set_type(type); + constant_value.AsProtoTensorContent( + (*added_node->mutable_attr())["value"].mutable_tensor()); + // We add a control dependency to the original ShapeN node, + // so that the node will only be run if all inputs of the + // original ShapeN node are run. + string ctrl_dep = AddControlDependency(shape_n_node->name(), + graph_, node_map_.get()); + *added_node->add_input() = ctrl_dep; + node_map_->AddOutput(NodeName(ctrl_dep), added_node->name()); } + *output->mutable_input(k) = const_name; + node_map_->AddOutput(const_name, output->name()); } + if (node_name == shape_n_node->name() && port != port_idx) { + direct_edges_exist = true; + } + } + if (!direct_edges_exist) { + node_map_->RemoveOutput(node->name(), output->name()); } } } } + return Status::OK(); } -- GitLab From 2bda52d485c9715dcd17f49526cea7890e091cb8 Mon Sep 17 00:00:00 2001 From: "A. Unique TensorFlower" Date: Mon, 12 Mar 2018 19:09:28 -0700 Subject: [PATCH 769/884] Remove integration_tests directory and associated files. PiperOrigin-RevId: 188815493 --- tensorflow/BUILD | 1 - .../integration_tests/gcs_smoke_test/BUILD | 67 ----- .../gcs_smoke_test/gcs_smoke.py | 253 ------------------ .../integration_tests/gcs_smoke_test/setup.sh | 20 -- .../gcs_smoke_test/teardown.sh | 26 -- .../gcs_smoke_test/test_wrapper.sh | 21 -- tensorflow/workspace.bzl | 10 - 7 files changed, 398 deletions(-) delete mode 100755 tensorflow/tools/integration_tests/gcs_smoke_test/BUILD delete mode 100755 tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py delete mode 100755 tensorflow/tools/integration_tests/gcs_smoke_test/setup.sh delete mode 100755 tensorflow/tools/integration_tests/gcs_smoke_test/teardown.sh delete mode 100755 tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh diff --git a/tensorflow/BUILD b/tensorflow/BUILD index 4b2facd6b3..a4e7602bea 100644 --- a/tensorflow/BUILD +++ b/tensorflow/BUILD @@ -674,7 +674,6 @@ filegroup( "//tensorflow/tools/docs:all_files", "//tensorflow/tools/git:all_files", "//tensorflow/tools/graph_transforms:all_files", - "//tensorflow/tools/integration_tests/gcs_smoke_test:all_files", "//tensorflow/tools/mlpbtxt:all_files", "//tensorflow/tools/proto_text:all_files", "//tensorflow/tools/quantization:all_files", diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD b/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD deleted file mode 100755 index 0acc139df9..0000000000 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/BUILD +++ /dev/null @@ -1,67 +0,0 @@ -package(default_visibility = ["//visibility:public"]) - -load("@rbe_integration_test//skylark:integration_tests.bzl", "sut_component", "integration_test") -load("@rbe_integration_test//skylark:toolchains.bzl", "toolchain_container_images") - -sut_component( - name = "gcs", - docker_image = toolchain_container_images()["tensorflow"], - setups = [{ - "program": "setup.sh", - "args": [ - "gs://tensorflow-test-bucket/tf-gcs-test", - ], - "output_properties": ["gcs_path"], - "timeout_seconds": 100, - }], - teardowns = [{ - "program": "teardown.sh", - "args": ["{gcs_path}"], - "timeout_seconds": 100, - }], -) - -py_binary( - name = "gcs_smoke", - srcs = ["gcs_smoke.py"], -) - -sh_binary( - name = "test_wrapper", - srcs = ["test_wrapper.sh"], - data = [ - "gcs_smoke", - ], -) - -integration_test( - name = "gcs_smoke_test", - sut_deps = { - ":gcs": "gcs", - }, - tags = [ - "manual", - "notap", - ], - test = { - "program": ":test_wrapper", - "args": [ - "--gcs_bucket_url={gcs#gcs_path}", - "--num_examples=20", - ], - "timeout_seconds": 250, - }, - test_docker_image = toolchain_container_images()["tensorflow"], - test_type = "MultiMachine", -) - -filegroup( - name = "all_files", - srcs = glob( - ["**/*"], - exclude = [ - "**/METADATA", - "**/OWNERS", - ], - ), -) diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py b/tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py deleted file mode 100755 index 8438c2156c..0000000000 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py +++ /dev/null @@ -1,253 +0,0 @@ -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Smoke test for reading records from GCS to TensorFlow.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -import time - -import numpy as np -import tensorflow as tf -from tensorflow.core.example import example_pb2 -from tensorflow.python.lib.io import file_io - -flags = tf.app.flags -flags.DEFINE_string("gcs_bucket_url", "", - "The URL to the GCS bucket in which the temporary " - "tfrecord file is to be written and read, e.g., " - "gs://my-gcs-bucket/test-directory") -flags.DEFINE_integer("num_examples", 10, "Number of examples to generate") - -FLAGS = flags.FLAGS - - -def create_examples(num_examples, input_mean): - """Create ExampleProto's containing data.""" - ids = np.arange(num_examples).reshape([num_examples, 1]) - inputs = np.random.randn(num_examples, 1) + input_mean - target = inputs - input_mean - examples = [] - for row in range(num_examples): - ex = example_pb2.Example() - ex.features.feature["id"].bytes_list.value.append(str(ids[row, 0])) - ex.features.feature["target"].float_list.value.append(target[row, 0]) - ex.features.feature["inputs"].float_list.value.append(inputs[row, 0]) - examples.append(ex) - return examples - - -def create_dir_test(): - """Verifies file_io directory handling methods.""" - - # Test directory creation. - starttime_ms = int(round(time.time() * 1000)) - dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms) - print("Creating dir %s" % dir_name) - file_io.create_dir(dir_name) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Created directory in: %d milliseconds" % elapsed_ms) - - # Check that the directory exists. - dir_exists = file_io.is_directory(dir_name) - assert dir_exists - print("%s directory exists: %s" % (dir_name, dir_exists)) - - # Test recursive directory creation. - starttime_ms = int(round(time.time() * 1000)) - recursive_dir_name = "%s/%s/%s" % (dir_name, - "nested_dir1", - "nested_dir2") - print("Creating recursive dir %s" % recursive_dir_name) - file_io.recursive_create_dir(recursive_dir_name) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Created directory recursively in: %d milliseconds" % elapsed_ms) - - # Check that the directory exists. - recursive_dir_exists = file_io.is_directory(recursive_dir_name) - assert recursive_dir_exists - print("%s directory exists: %s" % (recursive_dir_name, recursive_dir_exists)) - - # Create some contents in the just created directory and list the contents. - num_files = 10 - files_to_create = ["file_%d.txt" % n for n in range(num_files)] - for file_num in files_to_create: - file_name = "%s/%s" % (dir_name, file_num) - print("Creating file %s." % file_name) - file_io.write_string_to_file(file_name, "test file.") - - print("Listing directory %s." % dir_name) - starttime_ms = int(round(time.time() * 1000)) - directory_contents = file_io.list_directory(dir_name) - print(directory_contents) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Listed directory %s in %s milliseconds" % (dir_name, elapsed_ms)) - assert set(directory_contents) == set(files_to_create + ["nested_dir1/"]) - - # Test directory renaming. - dir_to_rename = "%s/old_dir" % dir_name - new_dir_name = "%s/new_dir" % dir_name - file_io.create_dir(dir_to_rename) - assert file_io.is_directory(dir_to_rename) - assert not file_io.is_directory(new_dir_name) - - starttime_ms = int(round(time.time() * 1000)) - print("Will try renaming directory %s to %s" % (dir_to_rename, new_dir_name)) - file_io.rename(dir_to_rename, new_dir_name) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Renamed directory %s to %s in %s milliseconds" % ( - dir_to_rename, new_dir_name, elapsed_ms)) - assert not file_io.is_directory(dir_to_rename) - assert file_io.is_directory(new_dir_name) - - # Test Delete directory recursively. - print("Deleting directory recursively %s." % dir_name) - starttime_ms = int(round(time.time() * 1000)) - file_io.delete_recursively(dir_name) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - dir_exists = file_io.is_directory(dir_name) - assert not dir_exists - print("Deleted directory recursively %s in %s milliseconds" % ( - dir_name, elapsed_ms)) - - -def create_object_test(): - """Verifies file_io's object manipulation methods .""" - starttime_ms = int(round(time.time() * 1000)) - dir_name = "%s/tf_gcs_test_%s" % (FLAGS.gcs_bucket_url, starttime_ms) - print("Creating dir %s." % dir_name) - file_io.create_dir(dir_name) - - num_files = 5 - # Create files of 2 different patterns in this directory. - files_pattern_1 = ["%s/test_file_%d.txt" % (dir_name, n) - for n in range(num_files)] - files_pattern_2 = ["%s/testfile%d.txt" % (dir_name, n) - for n in range(num_files)] - - starttime_ms = int(round(time.time() * 1000)) - files_to_create = files_pattern_1 + files_pattern_2 - for file_name in files_to_create: - print("Creating file %s." % file_name) - file_io.write_string_to_file(file_name, "test file creation.") - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Created %d files in %s milliseconds" % - (len(files_to_create), elapsed_ms)) - - # Listing files of pattern1. - list_files_pattern = "%s/test_file*.txt" % dir_name - print("Getting files matching pattern %s." % list_files_pattern) - starttime_ms = int(round(time.time() * 1000)) - files_list = file_io.get_matching_files(list_files_pattern) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Listed files in %s milliseconds" % elapsed_ms) - print(files_list) - assert set(files_list) == set(files_pattern_1) - - # Listing files of pattern2. - list_files_pattern = "%s/testfile*.txt" % dir_name - print("Getting files matching pattern %s." % list_files_pattern) - starttime_ms = int(round(time.time() * 1000)) - files_list = file_io.get_matching_files(list_files_pattern) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("Listed files in %s milliseconds" % elapsed_ms) - print(files_list) - assert set(files_list) == set(files_pattern_2) - - # Test renaming file. - file_to_rename = "%s/oldname.txt" % dir_name - file_new_name = "%s/newname.txt" % dir_name - file_io.write_string_to_file(file_to_rename, "test file.") - assert file_io.file_exists(file_to_rename) - assert not file_io.file_exists(file_new_name) - - print("Will try renaming file %s to %s" % (file_to_rename, file_new_name)) - starttime_ms = int(round(time.time() * 1000)) - file_io.rename(file_to_rename, file_new_name) - elapsed_ms = int(round(time.time() * 1000)) - starttime_ms - print("File %s renamed to %s in %s milliseconds" % ( - file_to_rename, file_new_name, elapsed_ms)) - assert not file_io.file_exists(file_to_rename) - assert file_io.file_exists(file_new_name) - - # Delete directory. - print("Deleting directory %s." % dir_name) - file_io.delete_recursively(dir_name) - - -def main(argv): - del argv # Unused. - # Sanity check on the GCS bucket URL. - if not FLAGS.gcs_bucket_url or not FLAGS.gcs_bucket_url.startswith("gs://"): - print("ERROR: Invalid GCS bucket URL: \"%s\"" % FLAGS.gcs_bucket_url) - sys.exit(1) - - # Verify that writing to the records file in GCS works. - print("\n=== Testing writing and reading of GCS record file... ===") - example_data = create_examples(FLAGS.num_examples, 5) - with tf.python_io.TFRecordWriter(FLAGS.gcs_bucket_url) as hf: - for e in example_data: - hf.write(e.SerializeToString()) - - print("Data written to: %s" % FLAGS.gcs_bucket_url) - - # Verify that reading from the tfrecord file works and that - # tf_record_iterator works. - record_iter = tf.python_io.tf_record_iterator(FLAGS.gcs_bucket_url) - read_count = 0 - for _ in record_iter: - read_count += 1 - print("Read %d records using tf_record_iterator" % read_count) - - if read_count != FLAGS.num_examples: - print("FAIL: The number of records read from tf_record_iterator (%d) " - "differs from the expected number (%d)" % (read_count, - FLAGS.num_examples)) - sys.exit(1) - - # Verify that running the read op in a session works. - print("\n=== Testing TFRecordReader.read op in a session... ===") - with tf.Graph().as_default() as _: - filename_queue = tf.train.string_input_producer([FLAGS.gcs_bucket_url], - num_epochs=1) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - sess.run(tf.local_variables_initializer()) - tf.train.start_queue_runners() - index = 0 - for _ in range(FLAGS.num_examples): - print("Read record: %d" % index) - sess.run(serialized_example) - index += 1 - - # Reading one more record should trigger an exception. - try: - sess.run(serialized_example) - print("FAIL: Failed to catch the expected OutOfRangeError while " - "reading one more record than is available") - sys.exit(1) - except tf.errors.OutOfRangeError: - print("Successfully caught the expected OutOfRangeError while " - "reading one more record than is available") - - create_dir_test() - create_object_test() - -if __name__ == "__main__": - tf.app.run(main) diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/setup.sh b/tensorflow/tools/integration_tests/gcs_smoke_test/setup.sh deleted file mode 100755 index 6553ba5e30..0000000000 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/setup.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -GCS_NUMBER=$(cat /dev/urandom | tr -dc 'A-F0-9' | fold -w 8 | head -n 1) -GCS_PATH="$1"/"$GCS_NUMBER".tfrecord - -echo "gcs_path=$GCS_PATH" > "$_SETUP_OUTPUT" -touch "$_SETUP_DONE" diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/teardown.sh b/tensorflow/tools/integration_tests/gcs_smoke_test/teardown.sh deleted file mode 100755 index 852486d167..0000000000 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/teardown.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -GSUTIL_BIN="/var/gcloud/google-cloud-sdk/bin/gsutil" - -echo "Got teardown argument $1" - -if "${GSUTIL_BIN}" rm "$1" -then - echo "Cleaned up new tfrecord file in GCS: '$1'" -else - echo "FAIL: Unable to clean up new tfrecord file in GCS: '$1'" - exit 1 -fi diff --git a/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh b/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh deleted file mode 100755 index d4b6524a81..0000000000 --- a/tensorflow/tools/integration_tests/gcs_smoke_test/test_wrapper.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# This is a python2 only test. -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# Test Tensorflow package installation. -/usr/local/bin/pip install --user tf-nightly - -# Test Tensorflow interaction with GCS. -python tensorflow/tools/integration_tests/gcs_smoke_test/gcs_smoke.py "$@" diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index e231ba8016..f9bd558b97 100644 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -696,16 +696,6 @@ def tf_workspace(path_prefix="", tf_repo_name=""): sha256 = "699b55a6916c687f4b7dc092dbbf5f64672cde0dc965f79717735ec4e5416556", ) - tf_http_archive( - name = "rbe_integration_test", - urls = [ - "http://mirror.bazel.build/github.com/google/rbe-integration-test/archive/78a6194c7dda200b9522cf07707e3bc695804d1e.tar.gz", - "https://github.com/google/rbe-integration-test/archive/78a6194c7dda200b9522cf07707e3bc695804d1e.tar.gz", - ], - sha256 = "66d93b3919a165d486c31f5290d312abe9fda2685242f812c110653c124e1db4", - strip_prefix = "rbe-integration-test-78a6194c7dda200b9522cf07707e3bc695804d1e", - ) - tf_http_archive( name = "arm_neon_2_x86_sse", sha256 = "c8d90aa4357f8079d427e87a6f4c493da1fa4140aee926c05902d7ec1533d9a5", -- GitLab From 7144571f2fc59c8705e4e3d7b922fa0ebf44f3fa Mon Sep 17 00:00:00 2001 From: Shanqing Cai Date: Mon, 12 Mar 2018 19:33:52 -0700 Subject: [PATCH 770/884] Merge changes from github. PiperOrigin-RevId: 188817194 --- README.md | 37 +- SECURITY.md | 6 +- configure | 3 +- configure.py | 76 +-- tensorflow/cc/gradients/nn_grad.cc | 64 +++ tensorflow/cc/gradients/nn_grad_test.cc | 44 +- tensorflow/cc/profiler/profiler.h | 6 +- .../cmake/tests/cuda/compatibility_test.cc | 4 +- .../sequential_feature_column.py | 325 ++++++++++++ .../sequential_feature_column_test.py | 471 +++++++++++++++++ .../gan/python/eval/python/summaries_test.py | 9 +- .../contrib/layers/python/layers/layers.py | 15 +- .../layers/python/layers/layers_test.py | 15 +- .../tflitecamerademo/ImageClassifier.java | 8 +- .../ImageClassifierFloatInception.java | 14 +- .../ImageClassifierQuantizedMobileNet.java | 9 +- .../internal/optimized/neon_tensor_utils.cc | 1 + .../contrib/lite/testing/generate_examples.py | 1 + .../rnn/python/kernel_tests/rnn_cell_test.py | 34 +- .../seq2seq/python/ops/beam_search_decoder.py | 2 +- .../slim/python/slim/data/parallel_reader.py | 2 +- .../tensor_forest/kernels/v4/grow_stats.h | 2 +- tensorflow/contrib/tensorrt/BUILD | 44 +- .../contrib/tensorrt/convert/convert_nodes.cc | 14 +- .../contrib/tensorrt/kernels/trt_calib_op.cc | 129 +++++ .../contrib/tensorrt/kernels/trt_calib_op.h | 52 ++ .../contrib/tensorrt/ops/trt_calib_op.cc | 37 ++ .../tensorrt/resources/trt_int8_calibrator.cc | 119 +++++ .../tensorrt/resources/trt_int8_calibrator.h | 65 +++ .../resources/trt_resource_manager.cc | 39 ++ .../tensorrt/resources/trt_resource_manager.h | 49 ++ .../tensorrt/resources/trt_resources.h | 95 ++++ .../timeseries/python/timeseries/BUILD | 2 + .../timeseries/python/timeseries/head.py | 58 ++- tensorflow/contrib/verbs/README.md | 2 +- .../verbs/patch_notes_verbs_with_0_copies.md | 2 +- tensorflow/contrib/verbs/rdma.cc | 1 + .../base_api/api_def_UniqueWithCountsV2.pbtxt | 85 +++ .../base_api/api_def_UnsortedSegmentMax.pbtxt | 13 +- .../base_api/api_def_UnsortedSegmentMin.pbtxt | 33 ++ .../api_def_UnsortedSegmentProd.pbtxt | 32 ++ .../python_api/api_def_UniqueWithCounts.pbtxt | 4 + .../api_def_UniqueWithCountsV2.pbtxt | 4 + .../core/common_runtime/gpu/gpu_device.h | 2 +- .../core/distributed_runtime/session_mgr.cc | 4 +- tensorflow/core/framework/numeric_types.h | 42 +- .../core/framework/variant_op_registry.h | 4 +- tensorflow/core/grappler/optimizers/BUILD | 6 + .../grappler/optimizers/loop_optimizer.cc | 381 +++++++++++++- .../core/grappler/optimizers/loop_optimizer.h | 26 + .../optimizers/loop_optimizer_test.cc | 489 +++++++++++++++++- tensorflow/core/kernels/BUILD | 15 +- tensorflow/core/kernels/cwise_op_maximum.cc | 4 +- .../core/kernels/mkl_fused_batch_norm_op.cc | 96 ++-- tensorflow/core/kernels/mkl_relu_op.cc | 20 +- tensorflow/core/kernels/reshape_op.cc | 1 - .../core/kernels/segment_reduction_ops.cc | 305 ++++++----- .../core/kernels/segment_reduction_ops.h | 117 +++-- .../kernels/segment_reduction_ops_gpu.cu.cc | 143 +++-- tensorflow/core/kernels/unique_op.cc | 10 + tensorflow/core/kernels/unravel_index_op.cc | 2 +- tensorflow/core/ops/array_ops.cc | 17 + tensorflow/core/ops/math_ops.cc | 20 + tensorflow/core/platform/s3/s3_file_system.cc | 1 + tensorflow/core/platform/windows/port.cc | 14 +- tensorflow/core/util/cuda_device_functions.h | 148 +++++- tensorflow/core/util/cuda_kernel_helper.h | 54 -- .../docs_src/get_started/checkpoints.md | 4 +- .../docs_src/get_started/custom_estimators.md | 2 +- .../performance/xla/operation_semantics.md | 20 +- .../docs_src/programmers_guide/saved_model.md | 60 +-- .../docs_src/programmers_guide/variables.md | 5 +- tensorflow/examples/speech_commands/train.py | 6 +- tensorflow/python/framework/test_util.py | 3 +- .../keras/_impl/keras/layers/lstm_test.py | 4 +- .../linalg/linear_operator_diag_test.py | 2 +- .../segment_reduction_ops_test.py | 165 +++--- .../python/kernel_tests/unique_op_test.py | 33 ++ tensorflow/python/ops/array_ops.py | 12 + tensorflow/python/ops/bitwise_ops_test.py | 6 +- tensorflow/python/ops/check_ops.py | 6 +- tensorflow/python/ops/confusion_matrix.py | 18 +- .../python/ops/distributions/special_math.py | 2 +- tensorflow/python/ops/hidden_ops.txt | 2 + tensorflow/python/ops/image_ops_impl.py | 2 - tensorflow/python/ops/image_ops_test.py | 2 +- .../python/ops/linalg/linear_operator_diag.py | 2 +- tensorflow/python/ops/losses/losses_impl.py | 2 +- tensorflow/python/ops/math_grad.py | 136 ++++- tensorflow/python/ops/math_ops.py | 118 +++++ tensorflow/python/ops/nn_impl.py | 2 +- tensorflow/python/tools/saved_model_cli.py | 68 ++- .../python/tools/saved_model_cli_test.py | 141 ++--- .../python/training/checkpoint_utils.py | 6 +- .../python/training/checkpoint_utils_test.py | 4 +- tensorflow/tools/api/golden/tensorflow.pbtxt | 12 + .../tools/ci_build/builds/with_the_same_user | 7 +- .../tools/ci_build/install/install_bazel.sh | 2 +- tensorflow/tools/docker/Dockerfile.devel | 2 +- tensorflow/tools/docker/Dockerfile.devel-gpu | 2 +- tensorflow/tools/graph_transforms/BUILD | 2 +- .../remove_control_dependencies.cc | 29 +- tensorflow/tools/lib_package/BUILD | 16 +- 103 files changed, 4051 insertions(+), 802 deletions(-) create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py create mode 100644 tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/kernels/trt_calib_op.h create mode 100644 tensorflow/contrib/tensorrt/ops/trt_calib_op.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resource_manager.h create mode 100644 tensorflow/contrib/tensorrt/resources/trt_resources.h create mode 100644 tensorflow/core/api_def/base_api/api_def_UniqueWithCountsV2.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMin.pbtxt create mode 100644 tensorflow/core/api_def/base_api/api_def_UnsortedSegmentProd.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_UniqueWithCounts.pbtxt create mode 100644 tensorflow/core/api_def/python_api/api_def_UniqueWithCountsV2.pbtxt diff --git a/README.md b/README.md index 916e5200b2..ef5bdc66ef 100644 --- a/README.md +++ b/README.md @@ -4,9 +4,10 @@ ----------------- -| **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | -|-----------------|---------------------|------------------|-------------------|---------------| -| [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) | + +| **`Documentation`** | **`Linux CPU`** | **`Linux GPU`** | **`Mac OS CPU`** | **`Windows CPU`** | **`Android`** | +|-----------------|---------------------|------------------|-------------------|---------------|---------------| +| [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://www.tensorflow.org/api_docs/) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-cpu)](https://ci.tensorflow.org/job/tensorflow-master-cpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-linux-gpu)](https://ci.tensorflow.org/job/tensorflow-master-linux-gpu) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-mac)](https://ci.tensorflow.org/job/tensorflow-master-mac) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-win-cmake-py)](https://ci.tensorflow.org/job/tensorflow-master-win-cmake-py) | [![Build Status](https://ci.tensorflow.org/buildStatus/icon?job=tensorflow-master-android)](https://ci.tensorflow.org/job/tensorflow-master-android) [ ![Download](https://api.bintray.com/packages/google/tensorflow/tensorflow/images/download.svg) ](https://bintray.com/google/tensorflow/tensorflow/_latestVersion) **TensorFlow** is an open source software library for numerical computation using data flow graphs. The graph nodes represent mathematical operations, while @@ -21,20 +22,6 @@ organization for the purposes of conducting machine learning and deep neural networks research. The system is general enough to be applicable in a wide variety of other domains, as well. -**If you want to contribute to TensorFlow, be sure to review the [contribution -guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's -[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to -uphold this code.** - -**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for -tracking requests and bugs. So please see -[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions -and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** - -The TensorFlow project strives to abide by generally accepted best practices in open-source software development: - -[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) - ## Installation *See [Installing TensorFlow](https://www.tensorflow.org/get_started/os_setup.html) for instructions on how to install our release binaries or how to build from source.* @@ -75,6 +62,22 @@ $ python >>> sess.close() ``` +## Contribution guidelines + +**If you want to contribute to TensorFlow, be sure to review the [contribution +guidelines](CONTRIBUTING.md). This project adheres to TensorFlow's +[code of conduct](CODE_OF_CONDUCT.md). By participating, you are expected to +uphold this code.** + +**We use [GitHub issues](https://github.com/tensorflow/tensorflow/issues) for +tracking requests and bugs. So please see +[TensorFlow Discuss](https://groups.google.com/a/tensorflow.org/forum/#!forum/discuss) for general questions +and discussion, and please direct specific questions to [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow).** + +The TensorFlow project strives to abide by generally accepted best practices in open-source software development: + +[![CII Best Practices](https://bestpractices.coreinfrastructure.org/projects/1486/badge)](https://bestpractices.coreinfrastructure.org/projects/1486) + ## For more information * [TensorFlow Website](https://www.tensorflow.org) diff --git a/SECURITY.md b/SECURITY.md index 6ddac1f964..fea24b2739 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -233,7 +233,7 @@ v//Fw6ZeY+HmRDFdirjD7wXtIuER4vqCryIqR6Xe9X8oJXz9L/Jhslc= ### Known vulnerabilities -| Type | Versions affected | Reported by | Additional Information | -|------|:-----------------:|---------------------------------------| -| out of bounds read| <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | +| Type | Versions affected | Reported by | Additional Information | +|-------------------|:-----------------:|--------------------|-----------------------------| +| out of bounds read| <=1.4 | TenCent Blade Team | [issue report](https://github.com/tensorflow/tensorflow/issues/14959) | diff --git a/configure b/configure index 9c21d2b03a..66b66ba54e 100755 --- a/configure +++ b/configure @@ -8,7 +8,8 @@ if [ -z "$PYTHON_BIN_PATH" ]; then fi # Set all env variables -"$PYTHON_BIN_PATH" configure.py +CONFIGURE_DIR=$(dirname "$0") +"$PYTHON_BIN_PATH" "${CONFIGURE_DIR}/configure.py" "$@" echo "Configuration finished" diff --git a/configure.py b/configure.py index 9744f6ac81..97f46757ee 100644 --- a/configure.py +++ b/configure.py @@ -18,6 +18,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import argparse import errno import os import platform @@ -32,10 +33,6 @@ except ImportError: from distutils.spawn import find_executable as which # pylint: enable=g-import-not-at-top -_TF_BAZELRC = os.path.join(os.path.dirname(os.path.abspath(__file__)), - '.tf_configure.bazelrc') -_TF_WORKSPACE = os.path.join(os.path.dirname(os.path.abspath(__file__)), - 'WORKSPACE') _DEFAULT_CUDA_VERSION = '9.0' _DEFAULT_CUDNN_VERSION = '7' _DEFAULT_CUDA_COMPUTE_CAPABILITIES = '3.5,5.2' @@ -51,6 +48,11 @@ _SUPPORTED_ANDROID_NDK_VERSIONS = [10, 11, 12, 13, 14, 15] _DEFAULT_PROMPT_ASK_ATTEMPTS = 10 +_TF_WORKSPACE_ROOT = os.path.abspath(os.path.dirname(__file__)) +_TF_BAZELRC_FILENAME = '.tf_configure.bazelrc' +_TF_BAZELRC = os.path.join(_TF_WORKSPACE_ROOT, _TF_BAZELRC_FILENAME) +_TF_WORKSPACE = os.path.join(_TF_WORKSPACE_ROOT, 'WORKSPACE') + class UserInputError(Exception): pass @@ -119,22 +121,6 @@ def sed_in_place(filename, old, new): f.write(newdata) -def remove_line_with(filename, token): - """Remove lines that contain token from file. - - Args: - filename: string for filename. - token: string token to check if to remove a line from file or not. - """ - with open(filename, 'r') as f: - filedata = f.read() - - with open(filename, 'w') as f: - for line in filedata.strip().split('\n'): - if token not in line: - f.write(line + '\n') - - def write_to_bazelrc(line): with open(_TF_BAZELRC, 'a') as f: f.write(line + '\n') @@ -245,25 +231,30 @@ def setup_python(environ_cp): environ_cp['PYTHON_BIN_PATH'] = python_bin_path # Write tools/python_bin_path.sh - with open('tools/python_bin_path.sh', 'w') as f: + with open(os.path.join( + _TF_WORKSPACE_ROOT, 'tools', 'python_bin_path.sh'), 'w') as f: f.write('export PYTHON_BIN_PATH="%s"' % python_bin_path) -def reset_tf_configure_bazelrc(): +def reset_tf_configure_bazelrc(workspace_path): """Reset file that contains customized config settings.""" open(_TF_BAZELRC, 'w').close() - - home = os.path.expanduser('~') - if not os.path.exists('.bazelrc'): - if os.path.exists(os.path.join(home, '.bazelrc')): - with open('.bazelrc', 'a') as f: - f.write('import %s/.bazelrc\n' % home.replace('\\', '/')) + bazelrc_path = os.path.join(workspace_path, '.bazelrc') + + data = [] + if os.path.exists(bazelrc_path): + with open(bazelrc_path, 'r') as f: + data = f.read().splitlines() + with open(bazelrc_path, 'w') as f: + for l in data: + if _TF_BAZELRC_FILENAME in l: + continue + f.write('%s\n' % l) + if is_windows(): + tf_bazelrc_path = _TF_BAZELRC.replace("\\", "/") else: - open('.bazelrc', 'w').close() - - remove_line_with('.bazelrc', 'tf_configure') - with open('.bazelrc', 'a') as f: - f.write('import %workspace%/.tf_configure.bazelrc\n') + tf_bazelrc_path = _TF_BAZELRC + f.write('import %s\n' % tf_bazelrc_path) def cleanup_makefile(): @@ -271,7 +262,8 @@ def cleanup_makefile(): These files could interfere with Bazel parsing. """ - makefile_download_dir = 'tensorflow/contrib/makefile/downloads' + makefile_download_dir = os.path.join( + _TF_WORKSPACE_ROOT, 'tensorflow', 'contrib', 'makefile', 'downloads') if os.path.isdir(makefile_download_dir): for root, _, filenames in os.walk(makefile_download_dir): for f in filenames: @@ -456,7 +448,7 @@ def check_bazel_version(min_version): if which('bazel') is None: print('Cannot find bazel. Please install bazel.') sys.exit(0) - curr_version = run_shell(['bazel', '--batch', 'version']) + curr_version = run_shell(['bazel', '--batch', '--bazelrc=/dev/null', 'version']) for line in curr_version.split('\n'): if 'Build label: ' in line: @@ -502,7 +494,8 @@ def set_cc_opt_flags(environ_cp): for opt in cc_opt_flags.split(): write_to_bazelrc('build:opt --copt=%s' % opt) # It should be safe on the same build host. - write_to_bazelrc('build:opt --host_copt=-march=native') + if not is_ppc64le(): + write_to_bazelrc('build:opt --host_copt=-march=native') write_to_bazelrc('build:opt --define with_default_optimizations=true') # TODO(mikecase): Remove these default defines once we are able to get # TF Lite targets building without them. @@ -1229,7 +1222,7 @@ def set_host_c_compiler(environ_cp): environ_cp, var_name='HOST_C_COMPILER', var_default=default_c_host_compiler, - ask_for_var=('Please specify which C compiler should be used as the host' + ask_for_var=('Please specify which C compiler should be used as the host ' 'C compiler.'), check_success=os.path.exists, error_msg='Invalid C compiler path. %s cannot be found.', @@ -1373,13 +1366,20 @@ def config_info_line(name, help_text): def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--workspace", + type=str, + default=_TF_WORKSPACE_ROOT, + help="The absolute path to your active Bazel workspace.") + args = parser.parse_args() + # Make a copy of os.environ to be clear when functions and getting and setting # environment variables. environ_cp = dict(os.environ) check_bazel_version('0.5.4') - reset_tf_configure_bazelrc() + reset_tf_configure_bazelrc(args.workspace) cleanup_makefile() setup_python(environ_cp) diff --git a/tensorflow/cc/gradients/nn_grad.cc b/tensorflow/cc/gradients/nn_grad.cc index 9b732421e5..0cb3132e94 100644 --- a/tensorflow/cc/gradients/nn_grad.cc +++ b/tensorflow/cc/gradients/nn_grad.cc @@ -182,6 +182,70 @@ Status MaxPoolGradV2Helper(const Scope& scope, const Operation& op, } REGISTER_GRADIENT_OP("MaxPoolV2", MaxPoolGradV2Helper); +Status MaxPool3DGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + MaxPool3DGrad::Attrs grad_attrs; + auto dx = MaxPool3DGrad(scope, op.input(0), op.output(0), grad_inputs[0], + ksize, strides, padding, + grad_attrs.DataFormat(data_format)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("MaxPool3D", MaxPool3DGradHelper); + +Status AvgPoolGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + internal::AvgPoolGrad::Attrs grad_attrs; + auto dx = + internal::AvgPoolGrad(scope, Shape(scope, op.input(0)), grad_inputs[0], + ksize, strides, padding, + grad_attrs.DataFormat(data_format)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("AvgPool", AvgPoolGradHelper); + +Status AvgPool3DGradHelper(const Scope& scope, const Operation& op, + const std::vector& grad_inputs, + std::vector* grad_outputs) { + std::vector ksize; + std::vector strides; + string padding; + string data_format; + auto attrs = op.output(0).node()->attrs(); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "ksize", &ksize)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "strides", &strides)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "padding", &padding)); + TF_RETURN_IF_ERROR(GetNodeAttr(attrs, "data_format", &data_format)); + AvgPool3DGrad::Attrs grad_attrs; + auto dx = AvgPool3DGrad(scope, Shape(scope, op.input(0)), grad_inputs[0], + ksize, strides, padding, + grad_attrs.DataFormat(data_format)); + grad_outputs->push_back(dx); + return scope.status(); +} +REGISTER_GRADIENT_OP("AvgPool3D", AvgPool3DGradHelper); + Status LRNGradHelper(const Scope& scope, const Operation& op, const std::vector& grad_inputs, std::vector* grad_outputs) { diff --git a/tensorflow/cc/gradients/nn_grad_test.cc b/tensorflow/cc/gradients/nn_grad_test.cc index 0cfe5f6e3c..c4eba7ecb0 100644 --- a/tensorflow/cc/gradients/nn_grad_test.cc +++ b/tensorflow/cc/gradients/nn_grad_test.cc @@ -31,8 +31,11 @@ using ops::Elu; using ops::L2Loss; using ops::LogSoftmax; using ops::LRN; +using ops::AvgPool; +using ops::AvgPool3D; using ops::MaxPool; using ops::MaxPoolV2; +using ops::MaxPool3D; using ops::Placeholder; using ops::Relu; using ops::Relu6; @@ -70,9 +73,9 @@ class NNGradTest : public ::testing::Test { // Sets tensor with random values, ensuring that the max value is largest by // a reasonable amount. - // This is an issue for MaxPool and MaxPoolV2, in which perturbations by the - // numeric gradient computation in the gradient checker can change the max - // value if values are too close together. + // This is an issue for MaxPool, MaxPoolV2 and MaxPool3D, in which + // perturbations by the numeric gradient computation in the gradient checker + // can change the max value if values are too close together. template void SetRandomValuesWithBumpedMax(Tensor* tensor) { auto tensor_flat = tensor->flat(); @@ -203,6 +206,41 @@ TEST_F(NNGradTest, MaxPoolGradV2Helper) { RunTest(x, x_init_value, y, y_shape); } +TEST_F(NNGradTest, MaxPool3DGradHelper) { + TensorShape x_shape({1, 3, 3, 3, 1}); + TensorShape y_shape({1, 1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one MaxPool3D. + const std::vector ksize{1, 3, 3, 3, 1}; + const std::vector strides{1, 3, 3, 3, 1}; + auto y = MaxPool3D(scope_, x, ksize, strides, "VALID"); + Tensor x_init_value = Tensor(DT_FLOAT, x_shape); + SetRandomValuesWithBumpedMax(&x_init_value); + RunTest(x, x_init_value, y, y_shape); +} + +TEST_F(NNGradTest, AvgPoolGradHelper) { + TensorShape x_shape({1, 2, 2, 1}); + TensorShape y_shape({1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one AvgPool. + const std::vector ksize{1, 2, 2, 1}; + const std::vector strides{1, 2, 2, 1}; + auto y = AvgPool(scope_, x, ksize, strides, "SAME"); + RunTest(x, x_shape, y, y_shape); +} + +TEST_F(NNGradTest, AvgPool3DGradHelper) { + TensorShape x_shape({1, 3, 3, 3, 1}); + TensorShape y_shape({1, 1, 1, 1, 1}); + auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); + // Setup window and strides so that we only do one AvgPool3D. + const std::vector ksize{1, 3, 3, 3, 1}; + const std::vector strides{1, 3, 3, 3, 1}; + auto y = AvgPool3D(scope_, x, ksize, strides, "SAME"); + RunTest(x, x_shape, y, y_shape); +} + TEST_F(NNGradTest, LRN){ TensorShape x_shape({1, 1, 2, 1}); auto x = Placeholder(scope_, DT_FLOAT, Placeholder::Shape(x_shape)); diff --git a/tensorflow/cc/profiler/profiler.h b/tensorflow/cc/profiler/profiler.h index 6077c45c58..64edbb5766 100644 --- a/tensorflow/cc/profiler/profiler.h +++ b/tensorflow/cc/profiler/profiler.h @@ -61,18 +61,18 @@ class Profiler { /// Adds tracing information `run_meta` to profiler. A `run_meta` is /// generated by a TensorFlow session run call. `step` is the key /// to the `run_meta`. When calling ProfileXXX methods, caller can specify - /// `step` in `options` to seletively profile the corresponding `run_meta`. + /// `step` in `options` to selectively profile the corresponding `run_meta`. /// Multiple different `run_meta` can be keyed by the same `step` in order /// to group them together. void AddStep(int64 step, const RunMetadata& run_meta); /// Profiles the model by organizing nodes in graph structure. - /// Each node is an op and the nodes are contected by the op inputs/outputs. + /// Each node is an op and the nodes are connected by the op inputs/outputs. GraphNodeProto ProfileGraph(const Options& options); /// Profiles the model by organizing nodes in name scope structure. /// Each node is an op, and nodes are organized by the ops' name - /// scope, similar to a filesystem tree. + /// scope, similar to a file system tree. /// E.g. /foo is the root of operation /foo/matmul_1 and foo/conv_2. GraphNodeProto ProfileNameScope(const Options& options); diff --git a/tensorflow/contrib/cmake/tests/cuda/compatibility_test.cc b/tensorflow/contrib/cmake/tests/cuda/compatibility_test.cc index a50461cafd..beb574061b 100644 --- a/tensorflow/contrib/cmake/tests/cuda/compatibility_test.cc +++ b/tensorflow/contrib/cmake/tests/cuda/compatibility_test.cc @@ -17,4 +17,6 @@ limitations under the License. #define __CUDACC__ #include "crt/host_config.h" -int main(void) { return 0; } +int main(void) { + return 0; +} diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py new file mode 100644 index 0000000000..4ed7268e7a --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column.py @@ -0,0 +1,325 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Experimental methods for tf.feature_column sequence input.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + + +import abc +import collections + + +from tensorflow.python.feature_column import feature_column as fc +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_shape +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import check_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import variable_scope + +# TODO(b/73160931): Fix pydoc. +# pylint: disable=g-doc-args,missing-docstring,protected-access +# TODO(b/73827486): Support SequenceExample. + + +def sequence_input_layer( + features, + feature_columns, + weight_collections=None, + trainable=True, + scope=None): + """"Builds input layer for sequence input. + + All `feature_columns` must be sequence dense columns with the same + `sequence_length`. The output of this method can be fed into sequence + networks, such as RNN. + + The output of this method is a 3D `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ from + batch to batch. + + If multiple `feature_columns` are given with `Di` `num_elements` each, their + outputs are concatenated. So, the final `Tensor` has shape + `[batch_size, T, D0 + D1 + ... + Dn]`. + + Example: + + ```python + rating = sequence_numeric_column('rating') + watches = sequence_categorical_column_with_identity( + 'watches', num_buckets=1000) + watches_embedding = embedding_column(watches, dimension=10) + columns = [rating, watches] + + features = tf.parse_example(..., features=make_parse_example_spec(columns)) + input_layer, sequence_length = sequence_input_layer(features, columns) + + rnn_cell = tf.nn.rnn_cell.BasicRNNCell(hidden_size) + outputs, state = tf.nn.dynamic_rnn( + rnn_cell, inputs=input_layer, sequence_length=sequence_length) + ``` + + Returns: + An `(input_layer, sequence_length)` tuple where: + - input_layer: A float `Tensor` of shape `[batch_size, T, D]`. + `T` is the maximum sequence length for this batch, which could differ + from batch to batch. `D` is the sum of `num_elements` for all + `feature_columns`. + - sequence_length: An int `Tensor` of shape `[batch_size]`. The sequence + length for each example. + Raises: + ValueError: If any of the `feature_columns` is the wrong type. + """ + feature_columns = fc._clean_feature_columns(feature_columns) + for c in feature_columns: + if not isinstance(c, _SequenceDenseColumn): + raise ValueError( + 'All feature_columns must be of type _SequenceDenseColumn. ' + 'Given (type {}): {}'.format(type(c), c)) + + with variable_scope.variable_scope( + scope, default_name='sequence_input_layer', values=features.values()): + builder = fc._LazyBuilder(features) + output_tensors = [] + sequence_lengths = [] + ordered_columns = [] + for column in sorted(feature_columns, key=lambda x: x.name): + ordered_columns.append(column) + with variable_scope.variable_scope( + None, default_name=column._var_scope_name): + dense_tensor, sequence_length = column._get_sequence_dense_tensor( + builder, + weight_collections=weight_collections, + trainable=trainable) + # Flattens the final dimension to produce a 3D Tensor. + num_elements = column._variable_shape.num_elements() + shape = array_ops.shape(dense_tensor) + output_tensors.append( + array_ops.reshape( + dense_tensor, + shape=array_ops.concat([shape[:2], [num_elements]], axis=0))) + sequence_lengths.append(sequence_length) + fc._verify_static_batch_size_equality(output_tensors, ordered_columns) + # TODO(b/73160931): Verify sequence_length equality. + return array_ops.concat(output_tensors, -1), sequence_lengths[0] + + +# TODO(b/73160931): Add remaining categorical columns. +def sequence_categorical_column_with_identity( + key, num_buckets, default_value=None): + return _SequenceCategoricalColumn( + fc.categorical_column_with_identity( + key=key, + num_buckets=num_buckets, + default_value=default_value)) + + +# TODO(b/73160931): Merge with embedding_column +def _sequence_embedding_column( + categorical_column, dimension, initializer=None, ckpt_to_load_from=None, + tensor_name_in_ckpt=None, max_norm=None, trainable=True): + if not isinstance(categorical_column, _SequenceCategoricalColumn): + raise ValueError( + 'categorical_column must be of type _SequenceCategoricalColumn. ' + 'Given (type {}): {}'.format( + type(categorical_column), categorical_column)) + return _SequenceEmbeddingColumn( + fc.embedding_column( + categorical_column, + dimension=dimension, + initializer=initializer, + ckpt_to_load_from=ckpt_to_load_from, + tensor_name_in_ckpt=tensor_name_in_ckpt, + max_norm=max_norm, + trainable=trainable)) + + +def sequence_numeric_column( + key, + shape=(1,), + default_value=0., + dtype=dtypes.float32): + # TODO(b/73160931): Add validations. + return _SequenceNumericColumn( + key, + shape=shape, + default_value=default_value, + dtype=dtype) + + +class _SequenceDenseColumn(fc._FeatureColumn): + """Represents dense sequence data.""" + + __metaclass__ = abc.ABCMeta + + TensorSequenceLengthPair = collections.namedtuple( # pylint: disable=invalid-name + 'TensorSequenceLengthPair', ['dense_tensor', 'sequence_length']) + + @abc.abstractproperty + def _variable_shape(self): + """`TensorShape` without batch and sequence dimensions.""" + pass + + @abc.abstractmethod + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + """Returns a `TensorSequenceLengthPair`.""" + pass + + +def _sequence_length_from_sparse_tensor(sp_tensor, num_elements=1): + with ops.name_scope(None, 'sequence_length') as name_scope: + row_ids = sp_tensor.indices[:, 0] + column_ids = sp_tensor.indices[:, 1] + column_ids += array_ops.ones_like(column_ids) + seq_length = ( + math_ops.segment_max(column_ids, segment_ids=row_ids) / num_elements) + # If the last n rows do not have ids, seq_length will have shape + # [batch_size - n]. Pad the remaining values with zeros. + n_pad = array_ops.shape(sp_tensor)[:1] - array_ops.shape(seq_length)[:1] + padding = array_ops.zeros(n_pad, dtype=seq_length.dtype) + return array_ops.concat([seq_length, padding], axis=0, name=name_scope) + + +class _SequenceCategoricalColumn( + fc._CategoricalColumn, + collections.namedtuple( + '_SequenceCategoricalColumn', ['categorical_column'])): + + @property + def name(self): + return self.categorical_column.name + + @property + def _parse_example_spec(self): + return self.categorical_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.categorical_column._transform_feature(inputs) + + @property + def _num_buckets(self): + return self.categorical_column._num_buckets + + def _get_sparse_tensors(self, inputs, weight_collections=None, + trainable=None): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + id_tensor = sparse_tensors.id_tensor + weight_tensor = sparse_tensors.weight_tensor + # Expands final dimension, so that embeddings are not combined during + # embedding lookup. + check_id_rank = check_ops.assert_equal( + array_ops.rank(id_tensor), 2, + data=[ + 'Column {} expected ID tensor of rank 2. '.format(self.name), + 'id_tensor shape: ', array_ops.shape(id_tensor)]) + with ops.control_dependencies([check_id_rank]): + id_tensor = sparse_ops.sparse_reshape( + id_tensor, + shape=array_ops.concat([id_tensor.dense_shape, [1]], axis=0)) + if weight_tensor is not None: + check_weight_rank = check_ops.assert_equal( + array_ops.rank(weight_tensor), 2, + data=[ + 'Column {} expected weight tensor of rank 2.'.format(self.name), + 'weight_tensor shape:', array_ops.shape(weight_tensor)]) + with ops.control_dependencies([check_weight_rank]): + weight_tensor = sparse_ops.sparse_reshape( + weight_tensor, + shape=array_ops.concat([weight_tensor.dense_shape, [1]], axis=0)) + return fc._CategoricalColumn.IdWeightPair(id_tensor, weight_tensor) + + def _sequence_length(self, inputs): + sparse_tensors = self.categorical_column._get_sparse_tensors(inputs) + return _sequence_length_from_sparse_tensor(sparse_tensors.id_tensor) + + +class _SequenceEmbeddingColumn( + _SequenceDenseColumn, + collections.namedtuple('_SequenceEmbeddingColumn', ['embedding_column'])): + + @property + def name(self): + return self.embedding_column.name + + @property + def _parse_example_spec(self): + return self.embedding_column._parse_example_spec + + def _transform_feature(self, inputs): + return self.embedding_column._transform_feature(inputs) + + @property + def _variable_shape(self): + return self.embedding_column._variable_shape + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + dense_tensor = self.embedding_column._get_dense_tensor( + inputs=inputs, + weight_collections=weight_collections, + trainable=trainable) + sequence_length = self.embedding_column.categorical_column._sequence_length( + inputs) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + + +class _SequenceNumericColumn( + _SequenceDenseColumn, + collections.namedtuple( + '_SequenceNumericColumn', + ['key', 'shape', 'default_value', 'dtype'])): + + @property + def name(self): + return self.key + + @property + def _parse_example_spec(self): + return {self.key: parsing_ops.VarLenFeature(self.dtype)} + + def _transform_feature(self, inputs): + return inputs.get(self.key) + + @property + def _variable_shape(self): + return tensor_shape.TensorShape(self.shape) + + def _get_sequence_dense_tensor( + self, inputs, weight_collections=None, trainable=None): + # Do nothing with weight_collections and trainable since no variables are + # created in this function. + del weight_collections + del trainable + sp_tensor = inputs.get(self) + dense_tensor = sparse_ops.sparse_tensor_to_dense( + sp_tensor, default_value=self.default_value) + # Reshape into [batch_size, T, variable_shape]. + dense_shape = array_ops.concat( + [array_ops.shape(dense_tensor)[:1], [-1], self._variable_shape], + axis=0) + dense_tensor = array_ops.reshape(dense_tensor, shape=dense_shape) + sequence_length = _sequence_length_from_sparse_tensor( + sp_tensor, num_elements=self._variable_shape.num_elements()) + return _SequenceDenseColumn.TensorSequenceLengthPair( + dense_tensor=dense_tensor, sequence_length=sequence_length) + +# pylint: enable=g-doc-args,missing-docstring,protected-access diff --git a/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py new file mode 100644 index 0000000000..59674869a2 --- /dev/null +++ b/tensorflow/contrib/feature_column/python/feature_column/sequential_feature_column_test.py @@ -0,0 +1,471 @@ +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Tests for sequential_feature_column.""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np + +from tensorflow.contrib.feature_column.python.feature_column import sequential_feature_column as sfc +from tensorflow.python.feature_column.feature_column import _LazyBuilder +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.platform import test +from tensorflow.python.training import monitored_session + + +class SequenceInputLayerTest(test.TestCase): + + def test_embedding_column(self): + vocabulary_size = 3 + sparse_input_a = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + sparse_input_b = sparse_tensor.SparseTensorValue( + # example 0, ids [1] + # example 1, ids [2, 0] + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + + embedding_dimension_a = 2 + embedding_values_a = ( + (1., 2.), # id 0 + (3., 4.), # id 1 + (5., 6.) # id 2 + ) + embedding_dimension_b = 3 + embedding_values_b = ( + (11., 12., 13.), # id 0 + (14., 15., 16.), # id 1 + (17., 18., 19.) # id 2 + ) + def _get_initializer(embedding_dimension, embedding_values): + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + return _initializer + + expected_input_layer = [ + # example 0, ids_a [2], ids_b [1] + [[5., 6., 14., 15., 16.], [0., 0., 0., 0., 0.]], + # example 1, ids_a [0, 1], ids_b [2, 0] + [[1., 2., 17., 18., 19.], [3., 4., 11., 12., 13.]], + ] + expected_sequence_length = [1, 2] + + categorical_column_a = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column_a = sfc._sequence_embedding_column( + categorical_column_a, dimension=embedding_dimension_a, + initializer=_get_initializer(embedding_dimension_a, embedding_values_a)) + categorical_column_b = sfc.sequence_categorical_column_with_identity( + key='bbb', num_buckets=vocabulary_size) + embedding_column_b = sfc._sequence_embedding_column( + categorical_column_b, dimension=embedding_dimension_b, + initializer=_get_initializer(embedding_dimension_b, embedding_values_b)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={ + 'aaa': sparse_input_a, + 'bbb': sparse_input_b, + }, + # Test that columns are reordered alphabetically. + feature_columns=[embedding_column_b, embedding_column_a]) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('sequence_input_layer/aaa_embedding/embedding_weights:0', + 'sequence_input_layer/bbb_embedding/embedding_weights:0'), + tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values_a, global_vars[0].eval(session=sess)) + self.assertAllEqual(embedding_values_b, global_vars[1].eval(session=sess)) + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_input_layer = [ + [[0.], [1.]], + [[10.], [0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_numeric_column_multi_dim(self): + """Tests sequence_input_layer for multi-dimensional numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + # The output of numeric_column._get_dense_tensor should be flattened. + expected_input_layer = [ + [[0., 1., 2., 3.], [4., 5., 6., 7.]], + [[10., 11., 12., 13.], [0., 0., 0., 0.]], + ] + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + input_layer, sequence_length = sfc.sequence_input_layer( + features={'aaa': sparse_input}, + feature_columns=[numeric_column]) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(expected_input_layer, input_layer.eval(session=sess)) + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +def _assert_sparse_tensor_value(test_case, expected, actual): + test_case.assertEqual(np.int64, np.array(actual.indices).dtype) + test_case.assertAllEqual(expected.indices, actual.indices) + + test_case.assertEqual( + np.array(expected.values).dtype, np.array(actual.values).dtype) + test_case.assertAllEqual(expected.values, actual.values) + + test_case.assertEqual(np.int64, np.array(actual.dense_shape).dtype) + test_case.assertAllEqual(expected.dense_shape, actual.dense_shape) + + +class SequenceCategoricalColumnWithIdentityTest(test.TestCase): + + def test_get_sparse_tensors(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sparse_ids = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=np.array((1, 2, 0), dtype=np.int64), + dense_shape=(2, 2, 1)) + + id_weight_pair = column._get_sparse_tensors(_LazyBuilder({'aaa': inputs})) + + self.assertIsNone(id_weight_pair.weight_tensor) + with monitored_session.MonitoredSession() as sess: + _assert_sparse_tensor_value( + self, + expected_sparse_ids, + id_weight_pair.id_tensor.eval(session=sess)) + + def test_get_sparse_tensors_inputs3d(self): + """Tests _get_sparse_tensors when the input is already 3D Tensor.""" + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0, 0), (1, 0, 0), (1, 1, 0)), + values=(1, 2, 0), + dense_shape=(2, 2, 1)) + + with self.assertRaisesRegexp( + errors.InvalidArgumentError, + r'Column aaa expected ID tensor of rank 2\.\s*' + r'id_tensor shape:\s*\[2 2 1\]'): + id_weight_pair = column._get_sparse_tensors( + _LazyBuilder({'aaa': inputs})) + with monitored_session.MonitoredSession() as sess: + id_weight_pair.id_tensor.eval(session=sess) + + def test_sequence_length(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((0, 0), (1, 0), (1, 1)), + values=(1, 2, 0), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_zeros(self): + column = sfc.sequence_categorical_column_with_identity( + 'aaa', num_buckets=3) + inputs = sparse_tensor.SparseTensorValue( + indices=((1, 0), (3, 0), (3, 1)), + values=(1, 2, 0), + dense_shape=(5, 2)) + expected_sequence_length = [0, 1, 0, 2, 0] + + sequence_length = column._sequence_length(_LazyBuilder({'aaa': inputs})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceEmbeddingColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + # example 2, ids [] + # example 3, ids [1] + indices=((0, 0), (1, 0), (1, 1), (3, 0)), + values=(2, 0, 1, 1), + dense_shape=(4, 2)) + + embedding_dimension = 2 + embedding_values = ( + (1., 2.), # id 0 + (3., 5.), # id 1 + (7., 11.) # id 2 + ) + def _initializer(shape, dtype, partition_info): + self.assertAllEqual((vocabulary_size, embedding_dimension), shape) + self.assertEqual(dtypes.float32, dtype) + self.assertIsNone(partition_info) + return embedding_values + + expected_lookups = [ + # example 0, ids [2] + [[7., 11.], [0., 0.]], + # example 1, ids [0, 1] + [[1., 2.], [3., 5.]], + # example 2, ids [] + [[0., 0.], [0., 0.]], + # example 3, ids [1] + [[3., 5.], [0., 0.]], + ] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=embedding_dimension, + initializer=_initializer) + + embedding_lookup, _ = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + global_vars = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES) + self.assertItemsEqual( + ('embedding_weights:0',), tuple([v.name for v in global_vars])) + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual(embedding_values, global_vars[0].eval(session=sess)) + self.assertAllEqual(expected_lookups, embedding_lookup.eval(session=sess)) + + def test_sequence_length(self): + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [2] + # example 1, ids [0, 1] + indices=((0, 0), (1, 0), (1, 1)), + values=(2, 0, 1), + dense_shape=(2, 2)) + expected_sequence_length = [1, 2] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + vocabulary_size = 3 + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, ids [] + # example 1, ids [2] + # example 2, ids [0, 1] + # example 3, ids [] + # example 4, ids [1] + # example 5, ids [] + indices=((1, 0), (2, 0), (2, 1), (4, 0)), + values=(2, 0, 1, 1), + dense_shape=(6, 2)) + expected_sequence_length = [0, 1, 2, 0, 1, 0] + + categorical_column = sfc.sequence_categorical_column_with_identity( + key='aaa', num_buckets=vocabulary_size) + embedding_column = sfc._sequence_embedding_column( + categorical_column, dimension=2) + + _, sequence_length = embedding_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +class SequenceNumericColumnTest(test.TestCase): + + def test_get_sequence_dense_tensor(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_dense_tensor = [ + [[0.], [1.]], + [[10.], [0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa') + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_sequence_dense_tensor_with_shape(self): + """Tests get_sequence_dense_tensor with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_dense_tensor = [ + [[0., 1., 2.], [3., 4., 5.]], + [[10., 11., 12.], [0., 0., 0.]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_get_dense_tensor_multi_dim(self): + """Tests get_sequence_dense_tensor for multi-dim numeric_column.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]] + # example 1, [[[10., 11.], [12., 13.]]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6), (0, 7), + (1, 0), (1, 1), (1, 2), (1, 3)), + values=(0., 1., 2., 3., 4., 5., 6., 7., 10., 11., 12., 13.), + dense_shape=(2, 8)) + expected_dense_tensor = [ + [[[0., 1.], [2., 3.]], [[4., 5.], [6., 7.]]], + [[[10., 11.], [12., 13.]], [[0., 0.], [0., 0.]]], + ] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(2, 2)) + + dense_tensor, _ = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_dense_tensor, dense_tensor.eval(session=sess)) + + def test_sequence_length(self): + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0., 1., 2.], [3., 4., 5.]] + # example 1, [[10., 11., 12.]] + indices=((0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (0, 5), + (1, 0), (1, 1), (1, 2)), + values=(0., 1., 2., 3., 4., 5., 10., 11., 12.), + dense_shape=(2, 6)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa', shape=(3,)) + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_shape(self): + """Tests _sequence_length with shape !=(1,).""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [[0.], [1]] + # example 1, [[10.]] + indices=((0, 0), (0, 1), (1, 0)), + values=(0., 1., 10.), + dense_shape=(2, 2)) + expected_sequence_length = [2, 1] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + def test_sequence_length_with_empty_rows(self): + """Tests _sequence_length when some examples do not have ids.""" + sparse_input = sparse_tensor.SparseTensorValue( + # example 0, values [] + # example 1, values [[0.], [1.]] + # example 2, [[2.]] + # example 3, values [] + # example 4, [[3.]] + # example 5, values [] + indices=((1, 0), (1, 1), (2, 0), (4, 0)), + values=(0., 1., 2., 3.), + dense_shape=(6, 2)) + expected_sequence_length = [0, 2, 1, 0, 1, 0] + numeric_column = sfc.sequence_numeric_column('aaa') + + _, sequence_length = numeric_column._get_sequence_dense_tensor( + _LazyBuilder({'aaa': sparse_input})) + + with monitored_session.MonitoredSession() as sess: + self.assertAllEqual( + expected_sequence_length, sequence_length.eval(session=sess)) + + +if __name__ == '__main__': + test.main() diff --git a/tensorflow/contrib/gan/python/eval/python/summaries_test.py b/tensorflow/contrib/gan/python/eval/python/summaries_test.py index 5549df971d..45eb108586 100644 --- a/tensorflow/contrib/gan/python/eval/python/summaries_test.py +++ b/tensorflow/contrib/gan/python/eval/python/summaries_test.py @@ -71,10 +71,11 @@ def get_cyclegan_model(): class SummariesTest(test.TestCase): - def _test_add_gan_model_image_summaries_impl( - self, get_model_fn, expected_num_summary_ops, model_summaries): - summaries.add_gan_model_image_summaries( - get_model_fn(), grid_size=2, model_summaries=model_summaries) + def _test_add_gan_model_image_summaries_impl(self, get_model_fn, + expected_num_summary_ops, + model_summaries): + summaries.add_gan_model_image_summaries(get_model_fn(), grid_size=2, + model_summaries=model_summaries) self.assertEquals(expected_num_summary_ops, len(ops.get_collection(ops.GraphKeys.SUMMARIES))) diff --git a/tensorflow/contrib/layers/python/layers/layers.py b/tensorflow/contrib/layers/python/layers/layers.py index 559c0c63da..350bcb3bca 100644 --- a/tensorflow/contrib/layers/python/layers/layers.py +++ b/tensorflow/contrib/layers/python/layers/layers.py @@ -58,12 +58,12 @@ __all__ = [ 'avg_pool2d', 'avg_pool3d', 'batch_norm', 'bias_add', 'conv2d', 'conv3d', 'conv2d_in_plane', 'conv2d_transpose', 'conv3d_transpose', 'convolution', 'convolution2d', 'convolution2d_in_plane', 'convolution2d_transpose', - 'convolution3d', 'convolution3d_transpose', 'dense_to_sparse', 'dropout', - 'elu', 'flatten', 'fully_connected', 'GDN', 'gdn', 'images_to_sequence', - 'layer_norm', 'linear', 'pool', 'max_pool2d', 'max_pool3d', - 'one_hot_encoding', 'relu', 'relu6', 'repeat', 'scale_gradient', - 'separable_conv2d', 'separable_convolution2d', 'sequence_to_images', - 'softmax', 'spatial_softmax', 'stack', 'unit_norm', + 'convolution3d', 'convolution3d_transpose', 'dense_to_sparse', + 'dropout', 'elu', 'flatten', 'fully_connected', 'GDN', 'gdn', + 'images_to_sequence', 'layer_norm', 'linear', 'pool', 'max_pool2d', + 'max_pool3d', 'one_hot_encoding', 'relu', 'relu6', 'repeat', + 'scale_gradient', 'separable_conv2d', 'separable_convolution2d', + 'sequence_to_images', 'softmax', 'spatial_softmax', 'stack', 'unit_norm', 'legacy_fully_connected', 'legacy_linear', 'legacy_relu', 'maxout' ] @@ -2718,7 +2718,8 @@ def sequence_to_images(inputs, num_batches = -1 else: num_batches = num_batches // height - reshaped = array_ops.reshape(inputs, [width, num_batches, height, depth]) + reshaped = array_ops.reshape(inputs, + [width, num_batches, height, depth]) if output_data_format == 'channels_first': outputs = array_ops.transpose(reshaped, [1, 3, 2, 0]) else: diff --git a/tensorflow/contrib/layers/python/layers/layers_test.py b/tensorflow/contrib/layers/python/layers/layers_test.py index ba70432c48..997f910a2a 100644 --- a/tensorflow/contrib/layers/python/layers/layers_test.py +++ b/tensorflow/contrib/layers/python/layers/layers_test.py @@ -3447,8 +3447,9 @@ class SequenceToImagesTest(test.TestCase): num_time_steps = 11 num_channels = 5 desired_height = 7 - sequence = np.random.uniform( - size=(num_time_steps, num_batches, num_channels)).astype(np.float32) + sequence = np.random.uniform(size=(num_time_steps, + num_batches, + num_channels)).astype(np.float32) output = _layers.sequence_to_images(sequence, desired_height) self.assertListEqual(output.get_shape().as_list(), [2, 7, 11, 5]) @@ -3457,10 +3458,12 @@ class SequenceToImagesTest(test.TestCase): num_time_steps = 11 num_channels = 5 desired_height = 7 - sequence = np.random.uniform( - size=(num_time_steps, num_batches, num_channels)).astype(np.float32) - output = _layers.sequence_to_images( - sequence, desired_height, output_data_format='channels_first') + sequence = np.random.uniform(size=(num_time_steps, + num_batches, + num_channels)).astype(np.float32) + output = _layers.sequence_to_images(sequence, + desired_height, + output_data_format='channels_first') self.assertListEqual(output.get_shape().as_list(), [2, 5, 7, 11]) diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java index 2c91be9d62..c57bb348c5 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifier.java @@ -20,6 +20,9 @@ import android.content.res.AssetFileDescriptor; import android.graphics.Bitmap; import android.os.SystemClock; import android.util.Log; + +import org.tensorflow.lite.Interpreter; + import java.io.BufferedReader; import java.io.FileInputStream; import java.io.IOException; @@ -34,9 +37,10 @@ import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.PriorityQueue; -import org.tensorflow.lite.Interpreter; -/** Classifies images with Tensorflow Lite. */ +/** + * Classifies images with Tensorflow Lite. + */ public abstract class ImageClassifier { /** Tag for the {@link Log}. */ diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java index 3108422952..be17b85e0c 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierFloatInception.java @@ -16,22 +16,24 @@ limitations under the License. package com.example.android.tflitecamerademo; import android.app.Activity; + import java.io.IOException; /** - * This classifier works with the Inception-v3 slim model. It applies floating point inference - * rather than using a quantized model. + * This classifier works with the Inception-v3 slim model. + * It applies floating point inference rather than using a quantized model. */ public class ImageClassifierFloatInception extends ImageClassifier { - /** The inception net requires additional normalization of the used input. */ + /** + * The inception net requires additional normalization of the used input. + */ private static final int IMAGE_MEAN = 128; - private static final float IMAGE_STD = 128.0f; /** - * An array to hold inference results, to be feed into Tensorflow Lite as outputs. This isn't part - * of the super class, because we need a primitive array here. + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. */ private float[][] labelProbArray = null; diff --git a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java index ee89dbd375..c533de7927 100644 --- a/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java +++ b/tensorflow/contrib/lite/java/demo/app/src/main/java/com/example/android/tflitecamerademo/ImageClassifierQuantizedMobileNet.java @@ -16,14 +16,17 @@ limitations under the License. package com.example.android.tflitecamerademo; import android.app.Activity; + import java.io.IOException; -/** This classifier works with the quantized MobileNet model. */ +/** + * This classifier works with the quantized MobileNet model. + */ public class ImageClassifierQuantizedMobileNet extends ImageClassifier { /** - * An array to hold inference results, to be feed into Tensorflow Lite as outputs. This isn't part - * of the super class, because we need a primitive array here. + * An array to hold inference results, to be feed into Tensorflow Lite as outputs. + * This isn't part of the super class, because we need a primitive array here. */ private byte[][] labelProbArray = null; diff --git a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc index 883c7f270d..780401e052 100644 --- a/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc +++ b/tensorflow/contrib/lite/kernels/internal/optimized/neon_tensor_utils.cc @@ -15,6 +15,7 @@ limitations under the License. #include #include "tensorflow/contrib/lite/builtin_op_data.h" +#include "tensorflow/contrib/lite/kernels/internal/common.h" #include "tensorflow/contrib/lite/kernels/activation_functor.h" #include "tensorflow/contrib/lite/kernels/internal/common.h" #include "tensorflow/contrib/lite/kernels/internal/optimized/tensor_utils_impl.h" diff --git a/tensorflow/contrib/lite/testing/generate_examples.py b/tensorflow/contrib/lite/testing/generate_examples.py index 2481add769..5488b71fcf 100644 --- a/tensorflow/contrib/lite/testing/generate_examples.py +++ b/tensorflow/contrib/lite/testing/generate_examples.py @@ -36,6 +36,7 @@ import traceback import zipfile import numpy as np from six import StringIO +from six.moves import xrange # TODO(aselle): Disable GPU for now os.environ["CUDA_VISIBLE_DEVICES"] = "-1" diff --git a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py index f21915ffbc..63fdd91d36 100644 --- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py +++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py @@ -1585,7 +1585,8 @@ class WeightNormLSTMCellTest(test.TestCase): with self.test_session() as sess: init = init_ops.constant_initializer(0.5) - with variable_scope.variable_scope("root", initializer=init): + with variable_scope.variable_scope("root", + initializer=init): x = array_ops.zeros([1, 2]) c0 = array_ops.zeros([1, 2]) h0 = array_ops.zeros([1, 2]) @@ -1595,12 +1596,11 @@ class WeightNormLSTMCellTest(test.TestCase): xout, sout = cell()(x, state0) sess.run([variables.global_variables_initializer()]) - res = sess.run( - [xout, sout], { - x.name: np.array([[1., 1.]]), - c0.name: 0.1 * np.asarray([[0, 1]]), - h0.name: 0.1 * np.asarray([[2, 3]]), - }) + res = sess.run([xout, sout], { + x.name: np.array([[1., 1.]]), + c0.name: 0.1 * np.asarray([[0, 1]]), + h0.name: 0.1 * np.asarray([[2, 3]]), + }) actual_state_c = res[1].c actual_state_h = res[1].h @@ -1611,8 +1611,9 @@ class WeightNormLSTMCellTest(test.TestCase): """Tests cell w/o peepholes and w/o normalisation.""" def cell(): - return contrib_rnn_cell.WeightNormLSTMCell( - 2, norm=False, use_peepholes=False) + return contrib_rnn_cell.WeightNormLSTMCell(2, + norm=False, + use_peepholes=False) actual_c, actual_h = self._cell_output(cell) @@ -1626,8 +1627,9 @@ class WeightNormLSTMCellTest(test.TestCase): """Tests cell with peepholes and w/o normalisation.""" def cell(): - return contrib_rnn_cell.WeightNormLSTMCell( - 2, norm=False, use_peepholes=True) + return contrib_rnn_cell.WeightNormLSTMCell(2, + norm=False, + use_peepholes=True) actual_c, actual_h = self._cell_output(cell) @@ -1641,8 +1643,9 @@ class WeightNormLSTMCellTest(test.TestCase): """Tests cell w/o peepholes and with normalisation.""" def cell(): - return contrib_rnn_cell.WeightNormLSTMCell( - 2, norm=True, use_peepholes=False) + return contrib_rnn_cell.WeightNormLSTMCell(2, + norm=True, + use_peepholes=False) actual_c, actual_h = self._cell_output(cell) @@ -1656,8 +1659,9 @@ class WeightNormLSTMCellTest(test.TestCase): """Tests cell with peepholes and with normalisation.""" def cell(): - return contrib_rnn_cell.WeightNormLSTMCell( - 2, norm=True, use_peepholes=True) + return contrib_rnn_cell.WeightNormLSTMCell(2, + norm=True, + use_peepholes=True) actual_c, actual_h = self._cell_output(cell) diff --git a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py index 6e57ccd6dd..03fe31abf7 100644 --- a/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py +++ b/tensorflow/contrib/seq2seq/python/ops/beam_search_decoder.py @@ -722,7 +722,7 @@ def _mask_probs(probs, eos_token, finished): eos_token, vocab_size, dtype=probs.dtype, - on_value=0., + on_value=ops.convert_to_tensor(0., dtype=probs.dtype), off_value=probs.dtype.min) finished_probs = array_ops.tile( array_ops.reshape(finished_row, [1, 1, -1]), diff --git a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py index ad5e985487..b3343aef47 100644 --- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py +++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py @@ -221,7 +221,7 @@ def parallel_read(data_sources, the data will be cycled through indefinitely. num_readers: a integer, number of Readers to create. reader_kwargs: an optional dict, of kwargs for the reader. - shuffle: boolean, wether should shuffle the files and the records by using + shuffle: boolean, whether should shuffle the files and the records by using RandomShuffleQueue as common_queue. dtypes: A list of types. The length of dtypes must equal the number of elements in each record. If it is None it will default to diff --git a/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.h b/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.h index 04e6b0a735..dc3e9fe79d 100644 --- a/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.h +++ b/tensorflow/contrib/tensor_forest/kernels/v4/grow_stats.h @@ -468,7 +468,7 @@ class FixedSizeSparseClassificationGrowStats : public ClassificationStats { void PackToProto(FertileSlot* slot) const override; void InitLeafClassStats(int best_split_index, LeafStat* left_stats, - LeafStat* right_stats) const; + LeafStat* right_stats) const override; protected: void ClassificationAddSplitStats() override { diff --git a/tensorflow/contrib/tensorrt/BUILD b/tensorflow/contrib/tensorrt/BUILD index 3b7b68f61b..c832c6f2e0 100644 --- a/tensorflow/contrib/tensorrt/BUILD +++ b/tensorflow/contrib/tensorrt/BUILD @@ -47,7 +47,10 @@ tf_cuda_cc_test( tf_custom_op_library( name = "python/ops/_trt_engine_op.so", - srcs = ["ops/trt_engine_op.cc"], + srcs = [ + "ops/trt_calib_op.cc", + "ops/trt_engine_op.cc", + ], deps = [ ":trt_engine_op_kernel", ":trt_shape_function", @@ -71,11 +74,18 @@ tf_cuda_library( cc_library( name = "trt_engine_op_kernel", - srcs = ["kernels/trt_engine_op.cc"], - hdrs = ["kernels/trt_engine_op.h"], + srcs = [ + "kernels/trt_calib_op.cc", + "kernels/trt_engine_op.cc", + ], + hdrs = [ + "kernels/trt_calib_op.h", + "kernels/trt_engine_op.h", + ], copts = tf_copts(), deps = [ ":trt_logging", + ":trt_resources", "//tensorflow/core:gpu_headers_lib", "//tensorflow/core:lib_proto_parsing", "//tensorflow/core:stream_executor_headers_lib", @@ -87,7 +97,10 @@ cc_library( ) tf_gen_op_libs( - op_lib_names = ["trt_engine_op"], + op_lib_names = [ + "trt_engine_op", + "trt_calib_op", + ], deps = if_tensorrt([ "@local_config_tensorrt//:nv_infer", ]), @@ -109,6 +122,7 @@ tf_gen_op_wrapper_py( name = "trt_engine_op", gen_locally = True, deps = [ + ":trt_calib_op_op_lib", ":trt_engine_op_op_lib", ":trt_logging", ":trt_shape_function", @@ -172,6 +186,27 @@ tf_py_wrap_cc( ], ) +tf_cuda_library( + name = "trt_resources", + srcs = [ + "resources/trt_int8_calibrator.cc", + "resources/trt_resource_manager.cc", + ], + hdrs = [ + "resources/trt_int8_calibrator.h", + "resources/trt_resource_manager.h", + "resources/trt_resources.h", + ], + deps = [ + ":trt_logging", + "//tensorflow/core:framework_headers_lib", + "//tensorflow/core:framework_lite", + "//tensorflow/core:lib_proto_parsing", + ] + if_tensorrt([ + "@local_config_tensorrt//:nv_infer", + ]), +) + # Library for the node-level conversion portion of TensorRT operation creation tf_cuda_library( name = "trt_conversion", @@ -186,6 +221,7 @@ tf_cuda_library( deps = [ ":segment", ":trt_logging", + ":trt_resources", "//tensorflow/core/grappler:grappler_item", "//tensorflow/core/grappler:utils", "//tensorflow/core:framework", diff --git a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc index 4003ba056d..9ee717dd7f 100644 --- a/tensorflow/contrib/tensorrt/convert/convert_nodes.cc +++ b/tensorflow/contrib/tensorrt/convert/convert_nodes.cc @@ -809,9 +809,9 @@ tensorflow::Status BinaryTensorOpTensor( CHECK_EQ_TYPE(tensor_r->getType(), dtype); auto op_pair = ops.find(node_def.op()); if (op_pair == ops.end()) - return tensorflow::errors::Unimplemented( - "binary op: " + node_def.op() + - " not supported at: " + node_def.name()); + return tensorflow::errors::Unimplemented("binary op: " + node_def.op() + + " not supported at: " + + node_def.name()); nvinfer1::IElementWiseLayer* layer = ctx.network()->addElementWise( *const_cast(tensor_l), @@ -1471,13 +1471,13 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( << std::to_string(op_info_vec.size()); // TODO(ben,jie): update TRT input format/dimension - nvinfer1::DimsCHW input_dim_pseudo_chw; - for (int i = 0; i < 3; i++) input_dim_pseudo_chw.d[i] = 1; + nvinfer1::DimsCHW input_dim_psuedo_chw; + for (int i = 0; i < 3; i++) input_dim_psuedo_chw.d[i] = 1; for (int i = 1; i < op_info.shape().dim_size(); i++) { VLOG(2) << "dimension: " << i << " , size: " << op_info.shape().dim(i).size(); - input_dim_pseudo_chw.d[i - 1] = op_info.shape().dim(i).size(); + input_dim_psuedo_chw.d[i - 1] = op_info.shape().dim(i).size(); } // TODO(ben,jie): proper way to restore input tensor name? @@ -1486,7 +1486,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef( input_tensor_name = node_name + ":" + std::to_string(output_idx); nvinfer1::ITensor* input_tensor = converter.network()->addInput( - input_tensor_name.c_str(), dtype, input_dim_pseudo_chw); + input_tensor_name.c_str(), dtype, input_dim_psuedo_chw); if (!input_tensor) return tensorflow::errors::InvalidArgument( diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc new file mode 100644 index 0000000000..1dcb87e768 --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.cc @@ -0,0 +1,129 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/kernels/trt_calib_op.h" +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/contrib/tensorrt/resources/trt_resources.h" +#include "tensorflow/core/framework/tensor.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/framework/tensor_types.h" +#include "tensorflow/core/framework/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { + +TRTCalibOp::TRTCalibOp(OpKernelConstruction* context) : OpKernel(context) { + OP_REQUIRES_OK(context, context->GetAttr("segment_nodes", &segment_nodes_)); + OP_REQUIRES_OK(context, context->GetAttr("input_names", &input_names_)); + OP_REQUIRES_OK(context, context->GetAttr("resource_name", &resource_name_)); +}; + +#define TYPECASE(dt, X, Y) \ + case dt: { \ + return (void*)X->flat::Type>().data(); \ + } + +void* GetTensorAddress(const Tensor* tensor_ptr) { + auto tensor_type = tensor_ptr->dtype(); + switch (tensor_type) { + TYPECASE(tensorflow::DT_FLOAT, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_HALF, tensor_ptr, dest_ptr); + TYPECASE(tensorflow::DT_INT8, tensor_ptr, dest_ptr); + default: { + LOG(FATAL) << "Unsupported Data type " + << tensorflow::DataTypeString(tensor_type); + return nullptr; + } + } +} + +void TRTCalibOp::Compute(tensorflow::OpKernelContext* ctx) { + // TODO(aaroey): make sure ctx->resource_mgr() is used in future PR. + auto trt_rm = tensorflow::tensorrt::TRTResourceManager::instance(); + auto res_mgr = trt_rm->getManager("TRTCalibOps"); + tensorflow::tensorrt::TRTCalibrationResource* calib_res = nullptr; + auto status = res_mgr->Lookup(resource_name_, resource_name_, &calib_res); + + if (!status.ok()) { + ctx->SetStatus(status); + return; + } + int num_inputs = ctx->num_inputs(); + // first run instantiate calibrator + if (calib_res->calibrator_ == nullptr) { + dev_tensors_.resize(num_inputs); + int batch_size = ctx->input(0).dim_size(0); + VLOG(1) << " Constructing calibrator"; + for (int i = 0; i < num_inputs; i++) { + // allocate workspace on device for inputs + const tensorflow::Tensor& t = ctx->input(i); + OP_REQUIRES_OK(ctx, + ctx->allocate_persistent(t.dtype(), t.shape(), + &dev_tensors_.at(i), nullptr)); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), device_tensor->TotalBytes()); + void* device_address = GetTensorAddress(device_tensor); + device_buffers_.emplace(input_names_.at(i), + std::pair( + device_address, device_tensor->TotalBytes())); + } + + calib_res->calibrator_ = + new TRTInt8Calibrator(device_buffers_, batch_size, resource_name_); + string label(resource_name_); + calib_res->thr_ = new std::thread([calib_res, label]() { + VLOG(1) << "Starting calibration thread, Calibration Resource @ " + << calib_res; + calib_res->builder_->setInt8Calibrator(calib_res->calibrator_); + calib_res->builder_->setInt8Mode(true); + calib_res->engine_ = calib_res->builder_->buildCudaEngine( + *calib_res->network_); // will loop until we terminate calibrator + VLOG(1) << "Calibration loop terminated " << label; + }); + VLOG(1) << "initialized calibrator resource"; + } // calibrator initialized + + // Pass input data to calibrator + std::unordered_map input_data; + for (int i = 0; i < num_inputs; i++) { + const Tensor& t = ctx->input(i); + void* data_address = GetTensorAddress(&t); + const auto device_tensor = dev_tensors_.at(i).AccessTensor(ctx); + CHECK_EQ(t.TotalBytes(), + device_tensor->TotalBytes()); // use the tensor so FW keeps it + input_data.emplace(input_names_.at(i), data_address); + ctx->set_output(i, t); + } + VLOG(2) << "Filled map for sending"; + calib_res->calibrator_->setBatch(input_data); + VLOG(2) << "Passed calibration data"; + // TODO(aaroey): make sure we wait for the completion of calibration on the + // last batch in future PR. +}; + +#undef TYPECASE + +REGISTER_KERNEL_BUILDER(Name("TRTCalibOp").Device(DEVICE_GPU), TRTCalibOp); + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h new file mode 100644 index 0000000000..23df9db32f --- /dev/null +++ b/tensorflow/contrib/tensorrt/kernels/trt_calib_op.h @@ -0,0 +1,52 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H +#define TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H + +#include +#include +#include +#include +#include +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/tensor_shape.h" +#include "tensorflow/core/platform/types.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +namespace tensorflow { +namespace tensorrt { +// TODO(sami): Convert this to async kernel! +class TRTCalibOp : public OpKernel { + public: + explicit TRTCalibOp(OpKernelConstruction* context); + + void Compute(OpKernelContext* context) override; + + private: + string resource_name_; + std::vector segment_nodes_; + std::vector input_names_; + std::vector shapes_; + std::unordered_map> device_buffers_; + std::vector dev_tensors_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif +#endif // TENSORFLOW_CONTRIB_TENSORRT_KERNELS_TRT_CALIB_OP_H diff --git a/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc new file mode 100644 index 0000000000..4835e50650 --- /dev/null +++ b/tensorflow/contrib/tensorrt/ops/trt_calib_op.cc @@ -0,0 +1,37 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/shape_inference.h" +namespace tensorflow { + +REGISTER_OP("TRTCalibOp") + .Attr("segment_nodes: list(string)") // names of the ops in segment + .Attr("segment_output_names: list(string)") // names of the output ops in + // segment + .Attr("input_names: list(string)") // names of the inputs for + // passing into tensorrt + .Attr("resource_name: string") + .Attr("InT: list({int8, float16, float32})") + .Input("in_tensor: InT") + .Output("out_tensor: InT") + .SetShapeFn([](tensorflow::shape_inference::InferenceContext* c) { + for (int i = 0; i < c->num_inputs(); i++) { + c->set_output(i, c->input(i)); + } + return Status::OK(); + }); + +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc new file mode 100644 index 0000000000..3d5cc76c42 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.cc @@ -0,0 +1,119 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" + +#include +#include +#include + +#include "tensorflow/core/platform/logging.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "cuda_runtime_api.h" + +namespace tensorflow { +namespace tensorrt { + +// set the batch size before constructing the thread to execute engine +int TRTInt8Calibrator::getBatchSize() const { return batch_size_; } + +TRTInt8Calibrator::TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name) + : batch_size_(batch_size), + done_(false), + dev_buffers_(dev_buffers), + calib_running_(false), + engine_name_(engine_name) {} + +bool TRTInt8Calibrator::setBatch( + const std::unordered_map& data) { + // TODO(aaroey): make sure that in future PR: + // 1. the mutex_lock is outside of the loop + // 2. wait() is used instead of wait_for() + // 3. done_ is to be protected by the mutex + // 4. the first batch is not missed + if (done_) return false; + while (calib_running_.load( + std::memory_order_acquire)) { // wait while calibration is running + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + VLOG(1) << "Set Batch Waiting finished"; + for (const auto it : data) { + auto devptr = dev_buffers_.find(it.first); + if (devptr == dev_buffers_.end()) { + LOG(FATAL) << "FATAL " << engine_name_ << " input name '" << it.first + << "' does not match with the buffer names"; + } + const auto& d = devptr->second; + + // TODO(aaroey): we should not use sync copy on default stream. Make sure + // stream->ThenMemcpy() is used in future PRs. + auto status = + cudaMemcpy(d.first, it.second, d.second, cudaMemcpyDeviceToDevice); + if (status != cudaSuccess) { + LOG(FATAL) << "cudaMemcpy " << engine_name_ << " for '" << it.first + << "' failed with " << status; + } + } + calib_running_.store(true, std::memory_order_release); // release builder + cond_.notify_all(); + return true; +} + +bool TRTInt8Calibrator::getBatch(void** bindings, const char** names, + int num_bindings) { + calib_running_.store(false, std::memory_order_release); // wait for new batch + cond_.notify_all(); + while (!calib_running_.load( + std::memory_order_acquire)) { // wait until new batch arrives + tensorflow::mutex_lock l(cond_mtx_); + cond_.wait_for(l, std::chrono::milliseconds(50)); + if (done_) return false; + } + if (done_) { + return false; + } + + for (int i = 0; i < num_bindings; i++) { + auto it = dev_buffers_.find(names[i]); + if (it == dev_buffers_.end()) { + LOG(FATAL) << "Calibration engine asked for unknown tensor name '" + << names[i] << "' at position " << i; + } + + bindings[i] = it->second.first; + } + return true; +} + +const void* TRTInt8Calibrator::readCalibrationCache(std::size_t& length) { + return nullptr; +} + +void TRTInt8Calibrator::writeCalibrationCache(const void* ptr, + std::size_t length) {} +TRTInt8Calibrator::~TRTInt8Calibrator() { + VLOG(1) << "Destroying calibrator for " << engine_name_; +} + +} // namespace tensorrt +} // namespace tensorflow +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h new file mode 100644 index 0000000000..8830f7efe7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h @@ -0,0 +1,65 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ + +#include +#include +#include +#include +#include "tensorflow/core/platform/mutex.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorrt/include/NvInfer.h" +namespace tensorflow { +namespace tensorrt { +// This class provides a 1 element queue to match TFs push model to +// TRTs pull model for calibration. When TRT implements a means for +// a push calibration This class should be updated accordingly + +struct TRTInt8Calibrator : public nvinfer1::IInt8EntropyCalibrator { + public: + TRTInt8Calibrator( + const std::unordered_map>& dev_buffers, + int batch_size, string engine_name); + int getBatchSize() const override; + bool getBatch(void* bindings[], const char* names[], + int num_bindings) override; + bool setBatch(const std::unordered_map& data); + void setDone() { done_ = true; } + const void* readCalibrationCache(std::size_t& length) override; + void writeCalibrationCache(const void* ptr, std::size_t length) override; + ~TRTInt8Calibrator(); + + private: + const int batch_size_; + tensorflow::mutex cond_mtx_; // mutex for condition_variable + tensorflow::condition_variable cond_; // condition variable to implement + // producer-consumer queue for + // calibration + bool done_; + const std::unordered_map> + dev_buffers_; // map to keep tensorrt input buffers and sizes keyed with + // buffer names + std::atomic_bool calib_running_; + string engine_name_; +}; +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_INT8_CALIBRATOR_H_ +#endif +#endif diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc new file mode 100644 index 0000000000..e663eed4dd --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.cc @@ -0,0 +1,39 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/contrib/tensorrt/resources/trt_resource_manager.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace tensorrt { + +std::shared_ptr +tensorflow::tensorrt::TRTResourceManager::getManager(const string& op_name) { + // mutex is held for lookup only. Most instantiations where mutex will be held + // longer will be during op creation and should be ok. + tensorflow::mutex_lock lock(map_mutex_); + auto s = managers_.find(op_name); + if (s == managers_.end()) { + auto it = managers_.emplace( + op_name, std::make_shared(op_name)); + VLOG(1) << "Returning a new manager " << op_name; + return it.first->second; + } + VLOG(1) << "Returning old manager " << op_name; + return s->second; +} + +} // namespace tensorrt +} // namespace tensorflow diff --git a/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h new file mode 100644 index 0000000000..5f8ad491d3 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resource_manager.h @@ -0,0 +1,49 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRT_RESOURCE_MANAGER_H_ +#include + +#include +#include +#include "tensorflow/core/framework/resource_mgr.h" +#include "tensorflow/core/platform/mutex.h" + +namespace tensorflow { +namespace tensorrt { + +class TRTResourceManager { + TRTResourceManager() = default; + + public: + static std::shared_ptr instance() { + static std::shared_ptr instance_( + new TRTResourceManager); + return instance_; + } + // returns a manager for given op, if it doesn't exists it creates one + std::shared_ptr getManager(const string& op_name); + + private: + std::unordered_map> + managers_; + tensorflow::mutex map_mutex_; +}; + +} // namespace tensorrt +} // namespace tensorflow + +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCE_TRT_RESOURCE_MANAGER_H_ diff --git a/tensorflow/contrib/tensorrt/resources/trt_resources.h b/tensorflow/contrib/tensorrt/resources/trt_resources.h new file mode 100644 index 0000000000..3c85968ae7 --- /dev/null +++ b/tensorflow/contrib/tensorrt/resources/trt_resources.h @@ -0,0 +1,95 @@ +/* Copyright 2018 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ +#define TENSORFLOW_CONTRIB_TENSORRT_RESOURCES_TRTRESOURCES_H_ + +#include +#include +#include +#include +#include +#include "tensorflow/contrib/tensorrt/log/trt_logger.h" +#include "tensorflow/core/framework/resource_mgr.h" + +#if GOOGLE_CUDA +#if GOOGLE_TENSORRT +#include "tensorflow/contrib/tensorrt/resources/trt_int8_calibrator.h" +#include "tensorrt/include/NvInfer.h" + +namespace tensorflow { +namespace tensorrt { +class TRTCalibrationResource : public tensorflow::ResourceBase { + public: + TRTCalibrationResource() + : calibrator_(nullptr), + builder_(nullptr), + network_(nullptr), + engine_(nullptr), + logger_(nullptr), + thr_(nullptr) {} + string DebugString() override { + std::stringstream oss; + oss << " Calibrator = " << std::hex << calibrator_ << std::dec << std::endl + << " Builder = " << std::hex << builder_ << std::dec << std::endl + << " Network = " << std::hex << network_ << std::dec << std::endl + << " Engine = " << std::hex << engine_ << std::dec << std::endl + << " Logger = " << std::hex << logger_ << std::dec << std::endl + << " Thread = " << std::hex << thr_ << std::dec << std::endl; + return oss.str(); + } + ~TRTCalibrationResource() { + VLOG(0) << "Destroying Calibration Resource " << std::endl << DebugString(); + } + TRTInt8Calibrator* calibrator_; + nvinfer1::IBuilder* builder_; + nvinfer1::INetworkDefinition* network_; + nvinfer1::ICudaEngine* engine_; + tensorflow::tensorrt::Logger* logger_; + // TODO(sami): Use threadpool threads! + std::thread* thr_; +}; + +class TRTWeightStore : public tensorflow::ResourceBase { + public: + TRTWeightStore() {} + std::list> store_; + string DebugString() override { + std::stringstream oss; + size_t lenBytes = 0; + for (const auto& v : store_) { + lenBytes += v.size() * sizeof(uint8_t); + } + oss << " Number of entries = " << store_.size() << std::endl + << " Total number of bytes = " + << store_.size() * sizeof(std::vector) + lenBytes << std::endl; + return oss.str(); + } + virtual ~TRTWeightStore() { VLOG(1) << "Destroying store" << DebugString(); } +}; + +class TRTEngineResource : public tensorflow::ResourceBase { + public: + TRTEngineResource() : runtime_(nullptr), ctx_(nullptr){}; + string DebugString() override { return string(""); } + nvinfer1::IRuntime* runtime_; + nvinfer1::IExecutionContext* ctx_; +}; + +} // namespace tensorrt +} // namespace tensorflow +#endif // TENSORFLOW_CONTRIB_TENSORRT_RESOURCEMGR_TRTRESOURCES_H_ +#endif +#endif diff --git a/tensorflow/contrib/timeseries/python/timeseries/BUILD b/tensorflow/contrib/timeseries/python/timeseries/BUILD index fff972c1f3..ed3ed4c0e1 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/BUILD +++ b/tensorflow/contrib/timeseries/python/timeseries/BUILD @@ -140,11 +140,13 @@ py_library( "//tensorflow/python:framework_ops", "//tensorflow/python:math_ops", "//tensorflow/python:state_ops", + "//tensorflow/python:summary", "//tensorflow/python:util", "//tensorflow/python:variable_scope", "//tensorflow/python/estimator:estimator_py", "//tensorflow/python/estimator:export", "//tensorflow/python/estimator:head", + "//tensorflow/python/estimator:metric_keys", ], ) diff --git a/tensorflow/contrib/timeseries/python/timeseries/head.py b/tensorflow/contrib/timeseries/python/timeseries/head.py index 8731b10923..f4d9351432 100644 --- a/tensorflow/contrib/timeseries/python/timeseries/head.py +++ b/tensorflow/contrib/timeseries/python/timeseries/head.py @@ -26,6 +26,7 @@ from tensorflow.contrib.timeseries.python.timeseries import feature_keys from tensorflow.python.estimator import estimator_lib from tensorflow.python.estimator.canned import head as head_lib +from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.export import export_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops @@ -35,6 +36,7 @@ from tensorflow.python.ops import math_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.util import nest +from tensorflow.python.summary import summary def time_series_regression_head(model, @@ -71,14 +73,34 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc self.input_statistics_generator = input_statistics_generator self._name = name + @property + def name(self): + return self._name + + # TODO(terrytangyuan): consolidate `model_outputs` and `_Head.LossSpec` + # once `_Head.create_loss` becomes extendable + def create_loss(self, features, mode, logits=None, labels=None): + """See `_Head`.""" + model_outputs = self.state_manager.define_loss( + self.model, features, mode) + summary.scalar( + head_lib._summary_key(self._name, metric_keys.MetricKeys.LOSS), + model_outputs.loss) + return model_outputs + + @property + def logits_dimension(self): + """See `_Head`.""" + return 1 + def _train_ops(self, features): """Add training ops to the graph.""" + mode = estimator_lib.ModeKeys.TRAIN with variable_scope.variable_scope( "model", # Use ResourceVariables to avoid race conditions. use_resource=True): - model_outputs = self.state_manager.define_loss( - self.model, features, estimator_lib.ModeKeys.TRAIN) + model_outputs = self.create_loss(features, mode) train_op = optimizers.optimize_loss( model_outputs.loss, @@ -88,31 +110,14 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc learning_rate=None) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, - mode=estimator_lib.ModeKeys.TRAIN, + mode=mode, train_op=train_op) - # TODO(terrytangyuan): suffix summary and metrics keys by `"/" + name` - @property - def name(self): - return self._name - - # TODO(terrytangyuan): unused for now. Need to decouple - # `state_manager.define_loss` to satisfy the extendable return signature of - # `_Head.create_loss`. - def create_loss(self, features, mode, logits, labels): - """See `_Head`.""" - return None - - # TODO(terrytangyuan): check label dimension - @property - def logits_dimension(self): - return None - def _evaluate_ops(self, features): """Add ops for evaluation (aka filtering) to the graph.""" + mode = estimator_lib.ModeKeys.EVAL with variable_scope.variable_scope("model", use_resource=True): - model_outputs = self.state_manager.define_loss( - self.model, features, estimator_lib.ModeKeys.EVAL) + model_outputs = self.create_loss(features, mode) metrics = {} # Just output in-sample predictions for the last chunk seen for prediction_key, prediction_value in model_outputs.predictions.items(): @@ -125,7 +130,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc model_outputs.end_state)) return estimator_lib.EstimatorSpec( loss=model_outputs.loss, - mode=estimator_lib.ModeKeys.EVAL, + mode=mode, eval_metric_ops=metrics, predictions={}) @@ -143,9 +148,8 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc with variable_scope.variable_scope("model", use_resource=True): prediction_outputs = self.model.predict(features=features) with variable_scope.variable_scope("model", reuse=True): - filtering_outputs = self.state_manager.define_loss( - self.model, features, estimator_lib.ModeKeys.EVAL) - + filtering_outputs = self.create_loss( + features, estimator_lib.ModeKeys.EVAL) return estimator_lib.EstimatorSpec( mode=estimator_lib.ModeKeys.PREDICT, export_outputs={ @@ -194,7 +198,7 @@ class _TimeSeriesRegressionHead(head_lib._Head): # pylint:disable=protected-acc def create_estimator_spec(self, features, mode, labels=None): """Performs basic error checking and returns an EstimatorSpec.""" - with ops.name_scope("head"): + with ops.name_scope(self._name, "head"): if labels: raise ValueError( "The model received a `labels` dictionary, which is " diff --git a/tensorflow/contrib/verbs/README.md b/tensorflow/contrib/verbs/README.md index 58fed4e5cb..4b6104a8b4 100644 --- a/tensorflow/contrib/verbs/README.md +++ b/tensorflow/contrib/verbs/README.md @@ -93,7 +93,7 @@ When the receiver receives the RDMA write, it will locate the relevant **RdmaTen 1. When the sender receives a tensor request, the source tensor may or may not be ready yet. The situation is handled through a process of tag matching: * If the request arrives before the tensor is ready, then a callback is put in a local table, and will be invoked once the tensor arrives. - * If the tensor is ready before the request arives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. + * If the tensor is ready before the request arrives, than the tensor is put in a local table. When the request arrives, it will invoke the callback immediately. In code it is done by calling **RecvLocalAsync()**, which receives the tensor's key, step-id, and the callback. 2. When the callback is invoked, the relevant tensor is removed from the tag matching table. In the case where we need to send the tensor's meta-data, the **RdmaTensorResponse** will store a copy of the tensor until the re-request arrives. 3. The sending of protocol messages (**RDMA_MESSAGE_TENSOR_REQUEST**, **RDMA_MESSAGE_META_DATA_RESPONSE** and **RDMA_MESSAGE_TENSOR_RE_REQUEST**) is done by the class **RdmaMessageBuffer**. All messages are sent using RDMA writes from/to fixed messages buffers. This implies that we cannot send on a specific channel more than one message at a time. In order to synchronize the messages, the **RdmaMessageBuffer** holds the a local and remote buffer statuses which can be either busy or idle. When a write is issued, both statuses will be changed to busy. When the write-complete event is received, the local status is changed to idle. When the write is received on the remote side, the remote side will parse the message, and return an ACK back to the sending side on which the sending side will update the remote status to idle. When both the local and remote statuses are idle, the next message can be sent. diff --git a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md index 956b8f2147..da6fdd48e1 100644 --- a/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md +++ b/tensorflow/contrib/verbs/patch_notes_verbs_with_0_copies.md @@ -64,7 +64,7 @@ The protocol messages themselves will remain mostly unchanged at the first stage * type - The message type. * request_index - Request index. * is_dead/data_type/tensor_shape/tensor_bytes - The up-to-date meta-data. -* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-requset after meta-data update and reallocation of result/proxy tensors. +* **RDMA_MESSAGE_BUFFER_RESPONSE** - (receiver ==> sender) Tensor re-request after meta-data update and reallocation of result/proxy tensors. * type - The message type. * name (name_size) - Name of the requested tensor. * step_id - Step ID. diff --git a/tensorflow/contrib/verbs/rdma.cc b/tensorflow/contrib/verbs/rdma.cc index 7d95b6522c..86350a08e5 100644 --- a/tensorflow/contrib/verbs/rdma.cc +++ b/tensorflow/contrib/verbs/rdma.cc @@ -30,6 +30,7 @@ limitations under the License. #include "tensorflow/core/distributed_runtime/rendezvous_mgr_interface.h" #include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/distributed_runtime/session_mgr.h" +#include "tensorflow/core/distributed_runtime/rpc/grpc_util.h" #include "tensorflow/core/framework/rendezvous.h" #include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/lib/core/status.h" diff --git a/tensorflow/core/api_def/base_api/api_def_UniqueWithCountsV2.pbtxt b/tensorflow/core/api_def/base_api/api_def_UniqueWithCountsV2.pbtxt new file mode 100644 index 0000000000..e21f56ba5b --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UniqueWithCountsV2.pbtxt @@ -0,0 +1,85 @@ +op { + graph_op_name: "UniqueWithCountsV2" + in_arg { + name: "x" + description: < [1, 2, 4, 7, 8] +idx ==> [0, 0, 1, 2, 2, 2, 3, 4, 4] +count ==> [2, 1, 3, 1, 2] +``` + +For an `2-D` tensor `x` with `axis = 0`: + +``` +# tensor 'x' is [[1, 0, 0], +# [1, 0, 0], +# [2, 0, 0]] +y, idx, count = unique_with_counts(x, axis=0) +y ==> [[1, 0, 0], + [2, 0, 0]] +idx ==> [0, 0, 1] +count ==> [2, 1] +``` + +For an `2-D` tensor `x` with `axis = 1`: + +``` +# tensor 'x' is [[1, 0, 0], +# [1, 0, 0], +# [2, 0, 0]] +y, idx, count = unique_with_counts(x, axis=1) +y ==> [[1, 0], + [1, 0], + [2, 0]] +idx ==> [0, 1, 1] +count ==> [1, 2] +``` +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMax.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMax.pbtxt index 4e69e0bc63..4ca6780c95 100644 --- a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMax.pbtxt +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMax.pbtxt @@ -14,20 +14,21 @@ Has same shape as data, except for dimension 0 which has size `num_segments`. END } - summary: "Computes the Max along segments of a tensor." + summary: "Computes the maximum along segments of a tensor." description: <::min()`. +If the maximum is empty for a given segment ID `i`, it outputs the smallest +possible value for the specific numeric type, +`output[i] = numeric_limits::lowest()`.
diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMin.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMin.pbtxt new file mode 100644 index 0000000000..55ea69b5dd --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentMin.pbtxt @@ -0,0 +1,33 @@ +op { + graph_op_name: "UnsortedSegmentMin" + in_arg { + name: "segment_ids" + description: <::max()`. +END +} diff --git a/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentProd.pbtxt b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentProd.pbtxt new file mode 100644 index 0000000000..577ff53d60 --- /dev/null +++ b/tensorflow/core/api_def/base_api/api_def_UnsortedSegmentProd.pbtxt @@ -0,0 +1,32 @@ +op { + graph_op_name: "UnsortedSegmentProd" + in_arg { + name: "segment_ids" + description: <